From 4e1afc981f300061134f5ca0fe967def60b211b7 Mon Sep 17 00:00:00 2001 From: Varun Chawla Date: Sun, 22 Feb 2026 20:17:17 -0800 Subject: [PATCH] Fix CogVideoX dynamic CFG using loop index instead of timestep value The dynamic CFG calculation was using t.item() (timestep value, range 1-999) where it should use the loop index i (range 0 to num_inference_steps-1). This caused the cosine schedule to produce wildly oscillating guidance values instead of a smooth annealing from guidance_scale+1 down to 1. Fixes #9641 --- src/diffusers/pipelines/cogvideo/pipeline_cogvideox.py | 2 +- .../pipelines/cogvideo/pipeline_cogvideox_fun_control.py | 2 +- .../pipelines/cogvideo/pipeline_cogvideox_image2video.py | 2 +- .../pipelines/cogvideo/pipeline_cogvideox_video2video.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/diffusers/pipelines/cogvideo/pipeline_cogvideox.py b/src/diffusers/pipelines/cogvideo/pipeline_cogvideox.py index b883e10a6732..830969d06a9c 100644 --- a/src/diffusers/pipelines/cogvideo/pipeline_cogvideox.py +++ b/src/diffusers/pipelines/cogvideo/pipeline_cogvideox.py @@ -736,7 +736,7 @@ def __call__( # perform guidance if use_dynamic_cfg: self._guidance_scale = 1 + guidance_scale * ( - (1 - math.cos(math.pi * ((num_inference_steps - t.item()) / num_inference_steps) ** 5.0)) / 2 + (1 - math.cos(math.pi * ((num_inference_steps - i) / num_inference_steps) ** 5.0)) / 2 ) if do_classifier_free_guidance: noise_pred_uncond, noise_pred_text = noise_pred.chunk(2) diff --git a/src/diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py b/src/diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py index de5b969a9adc..d542d4a68fc8 100644 --- a/src/diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +++ b/src/diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py @@ -802,7 +802,7 @@ def __call__( # perform guidance if use_dynamic_cfg: self._guidance_scale = 1 + guidance_scale * ( - (1 - math.cos(math.pi * ((num_inference_steps - t.item()) / num_inference_steps) ** 5.0)) / 2 + (1 - math.cos(math.pi * ((num_inference_steps - i) / num_inference_steps) ** 5.0)) / 2 ) if do_classifier_free_guidance: noise_pred_uncond, noise_pred_text = noise_pred.chunk(2) diff --git a/src/diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py b/src/diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py index 9687d63bc7bf..acd630b751aa 100644 --- a/src/diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +++ b/src/diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py @@ -846,7 +846,7 @@ def __call__( # perform guidance if use_dynamic_cfg: self._guidance_scale = 1 + guidance_scale * ( - (1 - math.cos(math.pi * ((num_inference_steps - t.item()) / num_inference_steps) ** 5.0)) / 2 + (1 - math.cos(math.pi * ((num_inference_steps - i) / num_inference_steps) ** 5.0)) / 2 ) if do_classifier_free_guidance: noise_pred_uncond, noise_pred_text = noise_pred.chunk(2) diff --git a/src/diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py b/src/diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py index e3ce8292fad6..5365eb2fea43 100644 --- a/src/diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +++ b/src/diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py @@ -817,7 +817,7 @@ def __call__( # perform guidance if use_dynamic_cfg: self._guidance_scale = 1 + guidance_scale * ( - (1 - math.cos(math.pi * ((num_inference_steps - t.item()) / num_inference_steps) ** 5.0)) / 2 + (1 - math.cos(math.pi * ((num_inference_steps - i) / num_inference_steps) ** 5.0)) / 2 ) if do_classifier_free_guidance: noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)