From a902b6e58d8336ad2efa53ddd3d2d7021f69984c Mon Sep 17 00:00:00 2001
From: Guo-Yilong <guoyilong076@gmail.com>
Date: Mon, 30 Mar 2026 20:24:54 +0800
Subject: [PATCH] [Qwen3VL] Add clear_grpah_opt_backend method to
 Qwen3VLForConditionalGeneration

Add clear_grpah_opt_backend method that delegates to the underlying model
to clear cuda graph optimization backend.
---
 fastdeploy/model_executor/models/qwen3_vl/qwen3_vl.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fastdeploy/model_executor/models/qwen3_vl/qwen3_vl.py b/fastdeploy/model_executor/models/qwen3_vl/qwen3_vl.py
index e60cc3b5314..1ced9bdf332 100644
--- a/fastdeploy/model_executor/models/qwen3_vl/qwen3_vl.py
+++ b/fastdeploy/model_executor/models/qwen3_vl/qwen3_vl.py
@@ -381,6 +381,10 @@ def forward(
 
         return hidden_states
 
+    def clear_grpah_opt_backend(self):
+        """Clear graph optimization backend, the captured cuda graph will be cleaned"""
+        self.model.clear_grpah_opt_backend(fd_config=self.fd_config)
+
 
 class Qwen3VLPretrainedModel(PretrainedModel):
     """Utilities for tensor-parallel weight splitting."""