From a902b6e58d8336ad2efa53ddd3d2d7021f69984c Mon Sep 17 00:00:00 2001 From: Guo-Yilong Date: Mon, 30 Mar 2026 20:24:54 +0800 Subject: [PATCH] [Qwen3VL] Add clear_grpah_opt_backend method to Qwen3VLForConditionalGeneration Add clear_grpah_opt_backend method that delegates to the underlying model to clear cuda graph optimization backend. --- fastdeploy/model_executor/models/qwen3_vl/qwen3_vl.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fastdeploy/model_executor/models/qwen3_vl/qwen3_vl.py b/fastdeploy/model_executor/models/qwen3_vl/qwen3_vl.py index e60cc3b5314..1ced9bdf332 100644 --- a/fastdeploy/model_executor/models/qwen3_vl/qwen3_vl.py +++ b/fastdeploy/model_executor/models/qwen3_vl/qwen3_vl.py @@ -381,6 +381,10 @@ def forward( return hidden_states + def clear_grpah_opt_backend(self): + """Clear graph optimization backend, the captured cuda graph will be cleaned""" + self.model.clear_grpah_opt_backend(fd_config=self.fd_config) + class Qwen3VLPretrainedModel(PretrainedModel): """Utilities for tensor-parallel weight splitting."""