@@ -123,6 +123,8 @@ def _trigger_preempt(self, request, num_new_blocks, preempted_reqs, scheduled_re
123123 self .to_be_rescheduled_request_id_set .add (preempted_req .request_id )
124124 preempted_reqs .append (preempted_req )
125125 scheduled_reqs .append (self ._prepare_preempt_task (preempted_req ))
126+ main_process_metrics .num_requests_waiting .inc (1 )
127+ main_process_metrics .num_requests_running .dec (1 )
126128 if preempted_req == request :
127129 # No more request to preempt.
128130 can_schedule = False
@@ -369,6 +371,8 @@ def schedule(self):
369371 token_budget -= num_new_tokens
370372 request .num_computed_tokens += num_new_tokens
371373 request .status = RequestStatus .RUNNING
374+ main_process_metrics .num_requests_waiting .dec (1 )
375+ main_process_metrics .num_requests_running .inc (1 )
372376 allocated_position = self .get_available_position ()
373377 request .idx = allocated_position
374378 self .tasks_list [allocated_position ] = request
@@ -399,6 +403,8 @@ def schedule(self):
399403 token_budget -= num_new_tokens
400404 request .num_computed_tokens += num_new_tokens
401405 request .status = RequestStatus .RUNNING
406+ main_process_metrics .num_requests_waiting .dec (1 )
407+ main_process_metrics .num_requests_running .inc (1 )
402408 else :
403409 if self .config .cache_config .enable_prefix_caching :
404410 self ._free_blocks (request )
0 commit comments