From 0159735883d07f65c7dc628f16a0eedd4dd67611 Mon Sep 17 00:00:00 2001 From: zoooo0820 Date: Fri, 22 May 2026 17:41:28 +0800 Subject: [PATCH] tmp fix k3 in step0 --- fastdeploy/worker/gpu_worker.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fastdeploy/worker/gpu_worker.py b/fastdeploy/worker/gpu_worker.py index c8dd0cca535..8119f27b4b5 100644 --- a/fastdeploy/worker/gpu_worker.py +++ b/fastdeploy/worker/gpu_worker.py @@ -242,12 +242,6 @@ def graph_optimize_and_warm_up_model(self) -> None: ): self.model_runner.capture_model_prefill_and_mixed() - # Capture CUDAGraph for decode phase (all modes) - self.model_runner.capture_model() - - # Block-wise CUDA graph capture (independent loop) - self.model_runner.capture_block_wise_graphs() - # Deterministic mode: reset RNG and share_inputs after warmup. # Warmup _dummy_run() calls consume CUDA RNG state and leave stale # data (infer_seed, stop_flags, seq_lens, etc.) in share_inputs. @@ -257,6 +251,12 @@ def graph_optimize_and_warm_up_model(self) -> None: set_random_seed(self.fd_config.model_config.seed) self.model_runner.share_inputs.reset_share_inputs() + # Capture CUDAGraph for decode phase (all modes) + self.model_runner.capture_model() + + # Block-wise CUDA graph capture (independent loop) + self.model_runner.capture_block_wise_graphs() + def check_health(self) -> bool: """ """ return True