[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"project-senior-rlspezialist-reinforcement-learning-mwd":3,"similar-senior-rlspezialist-reinforcement-learning-mwd":36},{"id":4,"slug":5,"title":6,"skills":7,"budget":19,"duration":20,"location":21,"onsitePercent":22,"contractType":23,"foundAt":24,"category":25,"description":29,"rawText":30,"webTitle":31,"webText":32,"language":33,"projectId":34,"sourceUrl":35},6271,"senior-rlspezialist-reinforcement-learning-mwd","Senior RL‑Spezialist (Reinforcement Learning) (m\u002Fw\u002Fd)",[8,9,10,11,12,13,14,15,16,17,18],"Reinforcement Learning","Multi-Armed-Bandit-Verfahren","Contextual Bandits","Thompson Sampling","Off-Policy-Evaluation","Python","Machine Learning","SQL","Scrum","Kanban","SAFe",null,"01.07.2026 bis 31.12.2026 + Option","München",25,"contracting","2026-05-27T12:05:48+00:00",{"id":26,"slug":27,"label":28},3,"ai_ml","AI & Machine Learning","Verantwortung für methodische und technische Ausgestaltung von RL-Lösungen. Konzeption von Exploration und Bandit-Verhalten sowie Entwicklung von RL-Simulationsmodellen. Durchführung von Off-Policy-Evaluations und fachliche Absicherung des Bandit-Livegangs.","Senior RL‑Spezialist (Reinforcement Learning) (m\u002Fw\u002Fd)\n\nRemote (75%) & München\n\nStart: 01.07.2026 (asap)\n\nvor 4 Minuten\n\nJob Typ:\nProjekt\n\nDauer:\nbis 31.12.2026 + Option\n\nArbeitsumfang:\nTeilzeit - (50%)\n\nSprachen:\nDeutsch\n\nID: 178711\n\nJetzt bewerben\n\nWesthouse ist eines der führenden internationalen Recruitment Unternehmen für die Vermittlung von hochqualifizierten Fachexperten in Bereichen wie IT Life Cycle, SAP, Engineering, Kaufmännischem und Fachberatung.\n\nFür unseren Kunden suchen wir aktuell eine\u002Fn Senior RL‑Spezialist (Reinforcement Learning) (m\u002Fw\u002Fd) - Remote (75%) & München.\n\nIhre Aufgaben\n\n- Verantwortung für die methodische und technische Ausgestaltung der RL-Lösung\n- Konzeption und Ausgestaltung der Exploration und Bandit-Verhaltens\n- Gestaltung und Implementierung von Exploration Groups\n- Entwicklung und Implementierung von RL-Simulationsmodellen\n- Durchführung und Betreuung von Off-Policy-Evaluations\n- Definition und Auswahl geeigneter Bandit-Methodiken, z. B.: Thompson Sampling, Epsilon-Greedy, Upper Confidence Bound (UCB))\n- Fachliche Absicherung des Bandit-Livegangs\n\nInteressiert?\n\nPaul Schock\n\nTel.: +49-89-38377225\nEmail: E-Mail: p.schock@westhouse-group.com p.schock@westhouse-group.com\n\nJetzt bewerben\n\nStellenanzeige teilen\n\nFacebook linkedin Xing twitter\n\nIhre Qualifikationen\n\n- Sehr gute praktische Erfahrung im Bereich Reinforcement Learning (RL)\n- Tiefes Verständnis von: Exploration vs. Exploitation, Multi-Armed-Bandit-Verfahren, Contextual Bandits\n- Erfahrung mit Bandit-Methodiken, z. B.: Thompson Sampling\n- Erfahrung mit Off-Policy-Evaluation (OPE)\n- Erfahrung in der Entwicklung und Validierung von Simulationsmodellen\n- sehr gute Kenntnisse in Python & Machine Learning, sowie SQL-Kenntnisse\n- Erfahrung mit agilen Methoden (Scrum, Kanban, SAFe)","Senior RL-Spezialist (Reinforcement Learning)","Wir suchen einen erfahrenen Senior RL-Spezialisten für ein spannendes Projekt im Bereich Reinforcement Learning. Die Position bietet eine flexible Arbeitsgestaltung mit 75% Remote-Anteil und 25% Präsenz vor Ort. Das Projekt startet im Juli 2026 und läuft zunächst bis Ende 2026 mit Verlängerungsoption.\n\nIhre Hauptaufgaben umfassen die methodische und technische Ausgestaltung von RL-Lösungen sowie die Konzeption und Implementierung von Exploration- und Bandit-Verhalten. Sie entwickeln Exploration Groups und implementieren RL-Simulationsmodelle. Ein weiterer Schwerpunkt liegt auf der Durchführung und Betreuung von Off-Policy-Evaluations sowie der Definition und Auswahl geeigneter Bandit-Methodiken wie Thompson Sampling, Epsilon-Greedy oder Upper Confidence Bound (UCB). Die fachliche Absicherung des Bandit-Livegangs rundet Ihr Aufgabenspektrum ab.\n\nWir erwarten sehr gute praktische Erfahrung im Bereich Reinforcement Learning und ein tiefes Verständnis von Exploration vs. Exploitation, Multi-Armed-Bandit-Verfahren und Contextual Bandits. Erfahrung mit Bandit-Methodiken, insbesondere Thompson Sampling, sowie mit Off-Policy-Evaluation sind essentiell. Sie sollten Erfahrung in der Entwicklung und Validierung von Simulationsmodellen mitbringen und sehr gute Kenntnisse in Python, Machine Learning und SQL besitzen. Erfahrung mit agilen Methoden wie Scrum, Kanban oder SAFe rundet Ihr Profil ab.\n\nDie Position ist als Teilzeitprojekt (50%) ausgelegt und erfordert sehr gute Deutschkenntnisse. Dies ist eine hervorragende Gelegenheit, an innovativen RL-Projekten zu arbeiten und Ihre Expertise in einem dynamischen Umfeld einzusetzen.","de","178711","https:\u002F\u002Fwww.westhouse-group.com\u002Fjoblisting\u002Fsenior-rl-spezialist-reinforcement-learning-m-w-d-remote-75-muenchen\u002F",{"items":37},[38,52,72,89,104,124,147,163,179,188,203,216,231,248,263],{"id":39,"slug":40,"title":41,"skills":42,"budget":19,"duration":20,"location":21,"onsitePercent":22,"contractType":23,"foundAt":50,"category":51},6272,"product-manager-rlscope-mwd","Product Manager (RL‑Scope) (m\u002Fw\u002Fd)",[43,8,14,44,45,46,16,17,18,47,48,49],"Product Management","AI\u002FData-Produktlebenszyklen","Anforderungsmanagement","Backlog-Management","Produkt-Roadmaps","Risikomanagement","Stakeholdermanagement","2026-05-27T12:06:04+00:00",{"id":26,"slug":27,"label":28},{"id":53,"slug":54,"title":55,"skills":56,"budget":66,"duration":67,"location":68,"onsitePercent":69,"contractType":23,"foundAt":70,"category":71},6078,"ai-developerarchitect","AI Developer\u002FArchitect",[13,57,58,59,60,61,62,63,64,65],"Azure","AI agents","LLMs","embeddings","RAG","semantic search","C#","Java","English","Negotiable","6 months","Warsaw\u002FKraków",10,"2026-05-26T15:27:06+00:00",{"id":26,"slug":27,"label":28},{"id":73,"slug":74,"title":75,"skills":76,"budget":84,"duration":19,"location":85,"onsitePercent":69,"contractType":86,"foundAt":87,"category":88},5923,"applied-ai-software-engineer-langchain-langgraph-rag-knowledge-graphs","Applied AI Software Engineer - LangChain, LangGraph, RAG, Knowledge Graphs",[13,77,78,61,79,80,81,82,83,14],"LangChain","LangGraph","Knowledge Graphs","Neo4j","LLM APIs","OpenAI","Agentic AI workflows","90.000-110.000 GBP + 15% Bonus","Leeds\u002FBradford","permanent","2026-05-26T08:33:49+00:00",{"id":26,"slug":27,"label":28},{"id":90,"slug":91,"title":92,"skills":93,"budget":19,"duration":101,"location":19,"onsitePercent":69,"contractType":23,"foundAt":102,"category":103},5918,"ai-entwickler-mit-ux-mindset-fuer-ki-assistenten","AI-Entwickler mit UX-Mindset für KI Assistenten",[94,95,96,97,98,64,99,100],"React","TypeScript","VibeCoding","AI\u002FML","UX","Spring Boot","AWS","6 Monate","2026-05-26T08:31:30+00:00",{"id":26,"slug":27,"label":28},{"id":105,"slug":106,"title":107,"skills":108,"budget":19,"duration":101,"location":120,"onsitePercent":121,"contractType":23,"foundAt":122,"category":123},5888,"senior-rag-engineer-python-ai","Senior RAG Engineer-Python\u002F AI",[13,109,110,111,112,113,114,115,100,57,15,116,117,118,119],"FastAPI","Flask","Django","REST APIs","Microservices","Docker","Kubernetes","Asynchronous Programming","WebSocket","CI\u002FCD","System Architecture","Frankfurt",0,"2026-05-26T02:05:48+00:00",{"id":26,"slug":27,"label":28},{"id":125,"slug":126,"title":127,"skills":128,"budget":19,"duration":143,"location":120,"onsitePercent":144,"contractType":23,"foundAt":145,"category":146},5886,"senior-data-analyst-chatbot-ai-specialist-2","Senior Data Analyst \u002F ChatBot AI Specialist",[129,130,131,132,133,134,135,136,137,138,139,140,141,142],"Google Cloud","Gemini","Dialogflow CX\u002FES","NLU","Speech-to-Text","Multi-Agent AI","Agentic AI","Data Analytics","Dashboarding","Prompt Engineering","LLM Testing","Conversational AI","ChatBots","Large Language Models","3 Monate",20,"2026-05-26T02:05:20+00:00",{"id":26,"slug":27,"label":28},{"id":148,"slug":149,"title":150,"skills":151,"budget":19,"duration":160,"location":120,"onsitePercent":121,"contractType":23,"foundAt":161,"category":162},5790,"senior-softwareentwickler-ai-gestuetzte-softwareentwicklung-mwd","Senior Softwareentwickler AI-gestützte Softwareentwicklung (m\u002Fw\u002Fd)",[13,152,153,154,155,156,157,158,159],"Node.js","Golang","JavaScript","Large Language Models (LLMs)","Backend-Entwicklung","Softwarearchitekturen","DevOps","AI-Engineering-Plattformen","12 Monate","2026-05-23T14:52:45+00:00",{"id":26,"slug":27,"label":28},{"id":164,"slug":165,"title":166,"skills":167,"budget":176,"duration":19,"location":19,"onsitePercent":121,"contractType":86,"foundAt":177,"category":178},5770,"deep-learning-innovation-officer","Deep Learning Innovation Officer",[13,168,15,14,169,170,171,172,173,174,175],"R","Statistical Modeling","Data Preprocessing","Tableau","Power BI","Data Visualization","A\u002FB Testing","Data Science","100000\u002FJahr","2026-05-22T18:10:30+00:00",{"id":26,"slug":27,"label":28},{"id":180,"slug":181,"title":182,"skills":183,"budget":19,"duration":19,"location":19,"onsitePercent":19,"contractType":23,"foundAt":186,"category":187},5490,"kiberater-mwd-produktionsumfeld","KI‑Berater (m\u002Fw\u002Fd) – Produktionsumfeld",[184,185],"KI-Beratung","Produktionsumfeld","2026-05-22T13:35:14+00:00",{"id":26,"slug":27,"label":28},{"id":189,"slug":190,"title":191,"skills":192,"budget":19,"duration":19,"location":200,"onsitePercent":19,"contractType":23,"foundAt":201,"category":202},5423,"ai-machine-learning-program-transformation-lead-mfd","AI & Machine Learning Program & Transformation Lead (m\u002Ff\u002Fd)",[193,14,194,195,196,197,198,199],"AI","Generative AI","Automation","Stakeholder Management","Program Management","AI\u002FML Governance","Enterprise Architecture","Stuttgart","2026-05-22T11:07:20+00:00",{"id":26,"slug":27,"label":28},{"id":204,"slug":205,"title":206,"skills":207,"budget":66,"duration":211,"location":212,"onsitePercent":213,"contractType":23,"foundAt":214,"category":215},5390,"ai-consultantengineer","AI consultant\u002FEngineer",[193,208,209,210],"Microsoft AI stack","MS-Foundry","Dutch language","6 months initial contract with intention to extend","Noord Brabant",50,"2026-05-22T09:40:40+00:00",{"id":26,"slug":27,"label":28},{"id":217,"slug":218,"title":219,"skills":220,"budget":19,"duration":228,"location":19,"onsitePercent":121,"contractType":23,"foundAt":229,"category":230},5369,"lead-developer-architekt-mwd-amazon-connect-conversational-ai-remote","Lead Developer\u002F Architekt (m\u002Fw\u002Fd) Amazon Connect \u002F Conversational AI - Remote",[221,140,222,223,224,225,77,78,13,226,227],"Amazon Connect","AWS Connect","AWS Bedrock","AWS AgentCore","AWS Gateway","MCP Server","Customer Service","Juni \u002F ASAP bis 30.08 + Option","2026-05-22T08:11:12+00:00",{"id":26,"slug":27,"label":28},{"id":232,"slug":233,"title":234,"skills":235,"budget":19,"duration":244,"location":245,"onsitePercent":213,"contractType":23,"foundAt":246,"category":247},5360,"data-scientist","Data Scientist",[13,15,14,236,100,237,238,239,240,241,242,243],"Datenanalyse","pandas","polars","Zeitreihenmodelle","Feature Engineering","Modellvalidierung","Cross-Validation","Neural Networks","7 Monate","Berlin","2026-05-22T07:25:35+00:00",{"id":26,"slug":27,"label":28},{"id":249,"slug":250,"title":251,"skills":252,"budget":19,"duration":19,"location":260,"onsitePercent":213,"contractType":23,"foundAt":261,"category":262},5331,"data-scientist-mwd","Data Scientist (m\u002Fw\u002Fd)",[175,13,14,253,236,254,255,256,257,258,259],"Statistik","Modellierung","Mathematik","Informatik","Softwareentwicklung","Optimierung","Analyse","Vöcklamarkt","2026-05-22T02:27:04+00:00",{"id":26,"slug":27,"label":28},{"id":264,"slug":265,"title":266,"skills":267,"budget":19,"duration":281,"location":282,"onsitePercent":213,"contractType":23,"foundAt":283,"category":284},5284,"seniorexpert-ai-infrastructure-search-systems-architect","Senior\u002FExpert AI Infrastructure & Search Systems Architect",[268,269,270,271,272,115,273,274,275,276,277,278,279,280,118],"Elasticsearch","OpenSearch","SolrCloud","Weaviate","HNSW","Terraform","Ansible","Helm","Bash scripting","Performance Tuning","Vector Search","Embeddings","MLOps","zunächst 6 Monate, Option auf Verlängerung","Bayern","2026-05-21T16:38:14+00:00",{"id":26,"slug":27,"label":28}]