diff --git a/src/maxtext/trainers/post_train/rl/utils_rl.py b/src/maxtext/trainers/post_train/rl/utils_rl.py index c37db48c0a..fc21cce202 100644 --- a/src/maxtext/trainers/post_train/rl/utils_rl.py +++ b/src/maxtext/trainers/post_train/rl/utils_rl.py @@ -531,17 +531,18 @@ def _to_str(val): "prompts": model_tokenizer.apply_chat_template( [ { - "role": "user", - "content": template_config["TEMPLATE"].format( - system_prompt=template_config["SYSTEM_PROMPT"].format( - reasoning_start_token=tmvp_config.reasoning_start_token, - reasoning_end_token=tmvp_config.reasoning_end_token, - solution_start_token=tmvp_config.solution_start_token, - solution_end_token=tmvp_config.solution_end_token, - ), - question=question, + "role": "system", + "content": template_config["SYSTEM_PROMPT"].format( + reasoning_start_token=tmvp_config.reasoning_start_token, + reasoning_end_token=tmvp_config.reasoning_end_token, + solution_start_token=tmvp_config.solution_start_token, + solution_end_token=tmvp_config.solution_end_token, ), }, + { + "role": "user", + "content": question, + }, ], tokenize=False, add_generation_prompt=True,