Skip to content

Commit c2142ac

Browse files
committed
fix issues when running deepeyes using tq
Signed-off-by: jianjunzhong <jianjunzhong@foxmail.com>
1 parent ae85409 commit c2142ac

4 files changed

Lines changed: 13 additions & 6 deletions

File tree

recipe/deepeyes/deepeyes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ def __getitem__(self, item):
179179
return row_dict
180180

181181

182-
def compute_score(data_source: str, solution_str: str, ground_truth: str, extra_info=None) -> float:
182+
def compute_score(data_source: str, solution_str: str, ground_truth: str, extra_info=None, **kwargs) -> float:
183183
"""
184184
Compute reward score for model solutions with robust handling of various formats.
185185

recipe/transfer_queue/agent_loop.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,11 @@ def generate_sequences(self, prompts: BatchMeta) -> BatchMeta:
3030
BatchMeta: Output batch metadata.
3131
"""
3232

33-
if self.rm_micro_batch_size and len(prompts) % self.rm_micro_batch_size != 0:
34-
raise ValueError(
35-
f"The length of prompts {len(prompts)} cannot divide the world size of rm_wg {self.rm_micro_batch_size}"
36-
)
3733
if self.config.actor_rollout_ref.rollout.free_cache_engine:
3834
self.wake_up()
35+
if self.reward_model_manager and self.config.reward_model.rollout.free_cache_engine:
36+
self.reward_model_manager.wake_up()
37+
3938
chunkes = prompts.chunk(len(self.agent_loop_workers))
4039
outputs = ray.get(
4140
[
@@ -46,6 +45,8 @@ def generate_sequences(self, prompts: BatchMeta) -> BatchMeta:
4645
output = BatchMeta.concat(outputs)
4746
if self.config.actor_rollout_ref.rollout.free_cache_engine:
4847
self.sleep()
48+
if self.reward_model_manager and self.config.reward_model.rollout.free_cache_engine:
49+
self.reward_model_manager.sleep()
4950

5051
# calculate performance metrics
5152
metrics = [output.extra_info.pop("metrics") for output in outputs] # List[List[Dict[str, str]]]

verl/experimental/agent_loop/agent_loop.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -734,6 +734,11 @@ def _initialize_llm_servers(self):
734734
def _init_agent_loop_workers(self):
735735
self.agent_loop_workers = []
736736
num_workers = self.config.actor_rollout_ref.rollout.agent.num_workers
737+
runtime_env = {
738+
"env_vars": {
739+
"TRANSFER_QUEUE_ENABLE": "1" if self.config.transfer_queue.enable else "0",
740+
}
741+
}
737742

738743
node_ids = [node["NodeID"] for node in ray.nodes() if node["Alive"] and node["Resources"].get("CPU", 0) > 0]
739744
for i in range(num_workers):
@@ -745,6 +750,7 @@ def _init_agent_loop_workers(self):
745750
scheduling_strategy=ray.util.scheduling_strategies.NodeAffinitySchedulingStrategy(
746751
node_id=node_id, soft=True
747752
),
753+
runtime_env=runtime_env,
748754
).remote(self.config, self.server_handles, self.reward_router_address)
749755
)
750756

verl/utils/transferqueue_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ class BatchMeta:
3939

4040
_TRANSFER_QUEUE_CLIENT = None
4141

42-
is_transferqueue_enabled = os.environ.get("TRANSFER_QUEUE_ENABLE", False)
42+
is_transferqueue_enabled = os.environ.get("TRANSFER_QUEUE_ENABLE", "0") == "1"
4343

4444

4545
def create_transferqueue_client(

0 commit comments

Comments
 (0)