From f9d2dc3d8dd36cb32b38c203141255757e723256 Mon Sep 17 00:00:00 2001 From: YanhuiDua Date: Wed, 27 May 2026 02:43:37 +0000 Subject: [PATCH 1/2] fix rollout ut --- xtuner/v1/rl/rollout/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xtuner/v1/rl/rollout/utils.py b/xtuner/v1/rl/rollout/utils.py index 19c2432a3..0f867e54f 100644 --- a/xtuner/v1/rl/rollout/utils.py +++ b/xtuner/v1/rl/rollout/utils.py @@ -11,7 +11,7 @@ from ray import ObjectRef as RayObjectRef from xtuner.v1.data_proto.rl_data import RolloutState, Status -from xtuner.v1.rl.utils import asyncio_run, free_object_refs +from xtuner.v1.rl.utils import free_object_refs from xtuner.v1.utils import get_logger @@ -179,7 +179,7 @@ def run_once(self) -> None: async def _run_checks() -> list[bool]: return await asyncio.gather(*tasks) - check_results = asyncio_run(_run_checks()) + check_results = asyncio.run(_run_checks()) inactive_workers = [] for (rank, _, _, _), is_healthy in zip(workers_to_check, check_results): if not is_healthy: From dc698d98d152ea1dbb6421e41f543b9c72390ae5 Mon Sep 17 00:00:00 2001 From: YanhuiDua Date: Thu, 28 May 2026 03:14:43 +0000 Subject: [PATCH 2/2] set return_routed_experts as True in sample_params --- xtuner/v1/data_proto/rl_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xtuner/v1/data_proto/rl_data.py b/xtuner/v1/data_proto/rl_data.py index 7c5ede857..c962eda0d 100644 --- a/xtuner/v1/data_proto/rl_data.py +++ b/xtuner/v1/data_proto/rl_data.py @@ -45,7 +45,7 @@ class SampleParams(BaseModel): include_stop_str_in_output: bool = True no_stop_trim: bool = True spaces_between_special_tokens: bool = False - return_routed_experts: bool = False + return_routed_experts: bool = True class Status(Enum):