From cc699e984a89ebca953d399af1ed95f4c5e91adc Mon Sep 17 00:00:00 2001 From: Akshan Krithick Date: Thu, 28 May 2026 23:57:39 -0700 Subject: [PATCH 1/7] refactor autoencoder_kl_temporal_decoder tests --- ..._models_autoencoder_kl_temporal_decoder.py | 68 +++++++++++-------- 1 file changed, 40 insertions(+), 28 deletions(-) diff --git a/tests/models/autoencoders/test_models_autoencoder_kl_temporal_decoder.py b/tests/models/autoencoders/test_models_autoencoder_kl_temporal_decoder.py index 93f40f44a919..d350615b0a74 100644 --- a/tests/models/autoencoders/test_models_autoencoder_kl_temporal_decoder.py +++ b/tests/models/autoencoders/test_models_autoencoder_kl_temporal_decoder.py @@ -13,48 +13,38 @@ # See the License for the specific language governing permissions and # limitations under the License. -import unittest +import torch from diffusers import AutoencoderKLTemporalDecoder +from diffusers.utils.torch_utils import randn_tensor -from ...testing_utils import ( - enable_full_determinism, - floats_tensor, - torch_device, -) -from ..test_modeling_common import ModelTesterMixin -from .testing_utils import AutoencoderTesterMixin +from ...testing_utils import enable_full_determinism, torch_device +from ..testing_utils import BaseModelTesterConfig, MemoryTesterMixin, ModelTesterMixin, TrainingTesterMixin +from .testing_utils import NewAutoencoderTesterMixin enable_full_determinism() -class AutoencoderKLTemporalDecoderTests(ModelTesterMixin, AutoencoderTesterMixin, unittest.TestCase): - model_class = AutoencoderKLTemporalDecoder - main_input_name = "sample" - base_precision = 1e-2 - +class AutoencoderKLTemporalDecoderTesterConfig(BaseModelTesterConfig): @property - def dummy_input(self): - batch_size = 3 - num_channels = 3 - sizes = (32, 32) - - image = floats_tensor((batch_size, num_channels) + sizes).to(torch_device) - num_frames = 3 + def model_class(self): + return AutoencoderKLTemporalDecoder - return {"sample": image, "num_frames": num_frames} + @property + def main_input_name(self) -> str: + return "sample" @property - def input_shape(self): + def output_shape(self) -> tuple: return (3, 32, 32) @property - def output_shape(self): - return (3, 32, 32) + def generator(self): + return torch.Generator("cpu").manual_seed(0) - def prepare_init_args_and_inputs_for_common(self): - init_dict = { + def get_init_dict(self) -> dict: + return { "block_out_channels": [32, 64], "in_channels": 3, "out_channels": 3, @@ -62,9 +52,31 @@ def prepare_init_args_and_inputs_for_common(self): "latent_channels": 4, "layers_per_block": 2, } - inputs_dict = self.dummy_input - return init_dict, inputs_dict + def get_dummy_inputs(self) -> dict: + batch_size = 3 + num_channels = 3 + sizes = (32, 32) + image = randn_tensor((batch_size, num_channels, *sizes), generator=self.generator, device=torch_device) + num_frames = 3 + return {"sample": image, "num_frames": num_frames} + + +class TestAutoencoderKLTemporalDecoder(AutoencoderKLTemporalDecoderTesterConfig, ModelTesterMixin): + base_precision = 1e-2 + + +class TestAutoencoderKLTemporalDecoderTraining(AutoencoderKLTemporalDecoderTesterConfig, TrainingTesterMixin): def test_gradient_checkpointing_is_applied(self): expected_set = {"Encoder", "TemporalDecoder", "UNetMidBlock2D"} super().test_gradient_checkpointing_is_applied(expected_set=expected_set) + + +class TestAutoencoderKLTemporalDecoderMemory(AutoencoderKLTemporalDecoderTesterConfig, MemoryTesterMixin): + pass + + +class TestAutoencoderKLTemporalDecoderSlicingTiling( + AutoencoderKLTemporalDecoderTesterConfig, NewAutoencoderTesterMixin +): + pass From 1167d1debaaf136b02304992a4f1229dd038a68c Mon Sep 17 00:00:00 2001 From: Akshan Krithick Date: Fri, 29 May 2026 01:04:32 -0700 Subject: [PATCH 2/7] refactor autoencoder_kl_cosmos tests --- .../test_models_autoencoder_cosmos.py | 73 +++++++++++-------- 1 file changed, 42 insertions(+), 31 deletions(-) diff --git a/tests/models/autoencoders/test_models_autoencoder_cosmos.py b/tests/models/autoencoders/test_models_autoencoder_cosmos.py index 5898ae776a1b..b98d1b123745 100644 --- a/tests/models/autoencoders/test_models_autoencoder_cosmos.py +++ b/tests/models/autoencoders/test_models_autoencoder_cosmos.py @@ -12,24 +12,38 @@ # See the License for the specific language governing permissions and # limitations under the License. -import unittest +import pytest +import torch from diffusers import AutoencoderKLCosmos +from diffusers.utils.torch_utils import randn_tensor -from ...testing_utils import enable_full_determinism, floats_tensor, torch_device -from ..test_modeling_common import ModelTesterMixin -from .testing_utils import AutoencoderTesterMixin +from ...testing_utils import enable_full_determinism, torch_device +from ..testing_utils import BaseModelTesterConfig, MemoryTesterMixin, ModelTesterMixin, TrainingTesterMixin +from .testing_utils import NewAutoencoderTesterMixin enable_full_determinism() -class AutoencoderKLCosmosTests(ModelTesterMixin, AutoencoderTesterMixin, unittest.TestCase): - model_class = AutoencoderKLCosmos - main_input_name = "sample" - base_precision = 1e-2 +class AutoencoderKLCosmosTesterConfig(BaseModelTesterConfig): + @property + def model_class(self): + return AutoencoderKLCosmos + + @property + def main_input_name(self) -> str: + return "sample" - def get_autoencoder_kl_cosmos_config(self): + @property + def output_shape(self) -> tuple: + return (3, 9, 32, 32) + + @property + def generator(self): + return torch.Generator("cpu").manual_seed(0) + + def get_init_dict(self) -> dict: return { "in_channels": 3, "out_channels": 3, @@ -46,38 +60,35 @@ def get_autoencoder_kl_cosmos_config(self): "temporal_compression_ratio": 4, } - @property - def dummy_input(self): + def get_dummy_inputs(self) -> dict: batch_size = 2 num_frames = 9 num_channels = 3 height = 32 width = 32 - - image = floats_tensor((batch_size, num_channels, num_frames, height, width)).to(torch_device) - + image = randn_tensor( + (batch_size, num_channels, num_frames, height, width), generator=self.generator, device=torch_device + ) return {"sample": image} - @property - def input_shape(self): - return (3, 9, 32, 32) - @property - def output_shape(self): - return (3, 9, 32, 32) +class TestAutoencoderKLCosmos(AutoencoderKLCosmosTesterConfig, ModelTesterMixin): + base_precision = 1e-2 - def prepare_init_args_and_inputs_for_common(self): - init_dict = self.get_autoencoder_kl_cosmos_config() - inputs_dict = self.dummy_input - return init_dict, inputs_dict +class TestAutoencoderKLCosmosTraining(AutoencoderKLCosmosTesterConfig, TrainingTesterMixin): def test_gradient_checkpointing_is_applied(self): - expected_set = { - "CosmosEncoder3d", - "CosmosDecoder3d", - } + expected_set = {"CosmosEncoder3d", "CosmosDecoder3d"} super().test_gradient_checkpointing_is_applied(expected_set=expected_set) - @unittest.skip("Not sure why this test fails. Investigate later.") - def test_effective_gradient_checkpointing(self): - pass + @pytest.mark.skip("Not sure why this test fails. Investigate later.") + def test_gradient_checkpointing_equivalence(self): + super().test_gradient_checkpointing_equivalence() + + +class TestAutoencoderKLCosmosMemory(AutoencoderKLCosmosTesterConfig, MemoryTesterMixin): + pass + + +class TestAutoencoderKLCosmosSlicingTiling(AutoencoderKLCosmosTesterConfig, NewAutoencoderTesterMixin): + pass From 20645265c0b4ac8389393d46f928d6c1b4a975fd Mon Sep 17 00:00:00 2001 From: Akshan Krithick Date: Fri, 29 May 2026 01:23:16 -0700 Subject: [PATCH 3/7] refactor autoencoder_kl_kvae tests --- .../test_models_autoencoder_kl_kvae.py | 64 +++++++++++-------- 1 file changed, 36 insertions(+), 28 deletions(-) diff --git a/tests/models/autoencoders/test_models_autoencoder_kl_kvae.py b/tests/models/autoencoders/test_models_autoencoder_kl_kvae.py index adae981f9c76..4852b9f6a9cc 100644 --- a/tests/models/autoencoders/test_models_autoencoder_kl_kvae.py +++ b/tests/models/autoencoders/test_models_autoencoder_kl_kvae.py @@ -13,24 +13,37 @@ # See the License for the specific language governing permissions and # limitations under the License. -import unittest +import torch from diffusers import AutoencoderKLKVAE +from diffusers.utils.torch_utils import randn_tensor -from ...testing_utils import enable_full_determinism, floats_tensor, torch_device -from ..test_modeling_common import ModelTesterMixin -from .testing_utils import AutoencoderTesterMixin +from ...testing_utils import enable_full_determinism, torch_device +from ..testing_utils import BaseModelTesterConfig, MemoryTesterMixin, ModelTesterMixin, TrainingTesterMixin +from .testing_utils import NewAutoencoderTesterMixin enable_full_determinism() -class AutoencoderKLKVAETests(ModelTesterMixin, AutoencoderTesterMixin, unittest.TestCase): - model_class = AutoencoderKLKVAE - main_input_name = "sample" - base_precision = 1e-2 +class AutoencoderKLKVAETesterConfig(BaseModelTesterConfig): + @property + def model_class(self): + return AutoencoderKLKVAE + + @property + def main_input_name(self) -> str: + return "sample" - def get_autoencoder_kl_kvae_config(self): + @property + def output_shape(self) -> tuple: + return (3, 32, 32) + + @property + def generator(self): + return torch.Generator("cpu").manual_seed(0) + + def get_init_dict(self) -> dict: return { "in_channels": 3, "channels": 32, @@ -42,32 +55,27 @@ def get_autoencoder_kl_kvae_config(self): "sample_size": 32, } - @property - def dummy_input(self): + def get_dummy_inputs(self) -> dict: batch_size = 2 num_channels = 3 sizes = (32, 32) - - image = floats_tensor((batch_size, num_channels) + sizes).to(torch_device) - + image = randn_tensor((batch_size, num_channels, *sizes), generator=self.generator, device=torch_device) return {"sample": image} - @property - def input_shape(self): - return (3, 32, 32) - @property - def output_shape(self): - return (3, 32, 32) +class TestAutoencoderKLKVAE(AutoencoderKLKVAETesterConfig, ModelTesterMixin): + base_precision = 1e-2 - def prepare_init_args_and_inputs_for_common(self): - init_dict = self.get_autoencoder_kl_kvae_config() - inputs_dict = self.dummy_input - return init_dict, inputs_dict +class TestAutoencoderKLKVAETraining(AutoencoderKLKVAETesterConfig, TrainingTesterMixin): def test_gradient_checkpointing_is_applied(self): - expected_set = { - "KVAEEncoder2D", - "KVAEDecoder2D", - } + expected_set = {"KVAEEncoder2D", "KVAEDecoder2D"} super().test_gradient_checkpointing_is_applied(expected_set=expected_set) + + +class TestAutoencoderKLKVAEMemory(AutoencoderKLKVAETesterConfig, MemoryTesterMixin): + pass + + +class TestAutoencoderKLKVAESlicingTiling(AutoencoderKLKVAETesterConfig, NewAutoencoderTesterMixin): + pass From a790bae6cf85c38756f22c8a4211a2cf66f06a6c Mon Sep 17 00:00:00 2001 From: Akshan Krithick Date: Fri, 29 May 2026 01:43:10 -0700 Subject: [PATCH 4/7] fix return_dict propagation in AutoencoderKLMochi.forward --- src/diffusers/models/autoencoders/autoencoder_kl_mochi.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/diffusers/models/autoencoders/autoencoder_kl_mochi.py b/src/diffusers/models/autoencoders/autoencoder_kl_mochi.py index d353bc80acb7..d2d51b29cdf2 100644 --- a/src/diffusers/models/autoencoders/autoencoder_kl_mochi.py +++ b/src/diffusers/models/autoencoders/autoencoder_kl_mochi.py @@ -1110,7 +1110,5 @@ def forward( z = posterior.sample(generator=generator) else: z = posterior.mode() - dec = self.decode(z) - if not return_dict: - return (dec,) + dec = self.decode(z, return_dict=return_dict) return dec From d9b543680db6f9e5f8ee5cc7d0e3608e1c934aee Mon Sep 17 00:00:00 2001 From: Akshan Krithick Date: Fri, 29 May 2026 01:43:10 -0700 Subject: [PATCH 5/7] refactor autoencoder_kl_mochi tests --- .../test_models_autoencoder_mochi.py | 87 +++++++++---------- 1 file changed, 43 insertions(+), 44 deletions(-) diff --git a/tests/models/autoencoders/test_models_autoencoder_mochi.py b/tests/models/autoencoders/test_models_autoencoder_mochi.py index ab8d429a67f6..f2b52da1c535 100755 --- a/tests/models/autoencoders/test_models_autoencoder_mochi.py +++ b/tests/models/autoencoders/test_models_autoencoder_mochi.py @@ -13,24 +13,38 @@ # See the License for the specific language governing permissions and # limitations under the License. -import unittest +import pytest +import torch from diffusers import AutoencoderKLMochi +from diffusers.utils.torch_utils import randn_tensor -from ...testing_utils import enable_full_determinism, floats_tensor, torch_device -from ..test_modeling_common import ModelTesterMixin -from .testing_utils import AutoencoderTesterMixin +from ...testing_utils import enable_full_determinism, torch_device +from ..testing_utils import BaseModelTesterConfig, MemoryTesterMixin, ModelTesterMixin, TrainingTesterMixin +from .testing_utils import NewAutoencoderTesterMixin enable_full_determinism() -class AutoencoderKLMochiTests(ModelTesterMixin, AutoencoderTesterMixin, unittest.TestCase): - model_class = AutoencoderKLMochi - main_input_name = "sample" - base_precision = 1e-2 +class AutoencoderKLMochiTesterConfig(BaseModelTesterConfig): + @property + def model_class(self): + return AutoencoderKLMochi + + @property + def main_input_name(self) -> str: + return "sample" + + @property + def output_shape(self) -> tuple: + return (3, 7, 16, 16) + + @property + def generator(self): + return torch.Generator("cpu").manual_seed(0) - def get_autoencoder_kl_mochi_config(self): + def get_init_dict(self) -> dict: return { "in_channels": 15, "out_channels": 3, @@ -42,30 +56,30 @@ def get_autoencoder_kl_mochi_config(self): "scaling_factor": 1, } - @property - def dummy_input(self): + def get_dummy_inputs(self) -> dict: batch_size = 2 num_frames = 7 num_channels = 3 sizes = (16, 16) + image = randn_tensor( + (batch_size, num_channels, num_frames, *sizes), generator=self.generator, device=torch_device + ) + return {"sample": image} - image = floats_tensor((batch_size, num_channels, num_frames) + sizes).to(torch_device) - return {"sample": image} +class TestAutoencoderKLMochi(AutoencoderKLMochiTesterConfig, ModelTesterMixin): + base_precision = 1e-2 - @property - def input_shape(self): - return (3, 7, 16, 16) + @pytest.mark.skip("Unsupported test.") + def test_model_parallelism(self): + super().test_model_parallelism() - @property - def output_shape(self): - return (3, 7, 16, 16) + @pytest.mark.skip("RuntimeError: values expected sparse tensor layout but got Strided") + def test_outputs_equivalence(self): + super().test_outputs_equivalence() - def prepare_init_args_and_inputs_for_common(self): - init_dict = self.get_autoencoder_kl_mochi_config() - inputs_dict = self.dummy_input - return init_dict, inputs_dict +class TestAutoencoderKLMochiTraining(AutoencoderKLMochiTesterConfig, TrainingTesterMixin): def test_gradient_checkpointing_is_applied(self): expected_set = { "MochiDecoder3D", @@ -76,25 +90,10 @@ def test_gradient_checkpointing_is_applied(self): } super().test_gradient_checkpointing_is_applied(expected_set=expected_set) - @unittest.skip("Unsupported test.") - def test_model_parallelism(self): - """ - tests/models/autoencoders/test_models_autoencoder_mochi.py::AutoencoderKLMochiTests::test_outputs_equivalence - - RuntimeError: values expected sparse tensor layout but got Strided - """ - pass - @unittest.skip("Unsupported test.") - def test_outputs_equivalence(self): - """ - tests/models/autoencoders/test_models_autoencoder_mochi.py::AutoencoderKLMochiTests::test_outputs_equivalence - - RuntimeError: values expected sparse tensor layout but got Strided - """ - pass - - @unittest.skip("Unsupported test.") - def test_sharded_checkpoints_device_map(self): - """ - tests/models/autoencoders/test_models_autoencoder_mochi.py::AutoencoderKLMochiTests::test_sharded_checkpoints_device_map - - RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cuda:5! - """ +class TestAutoencoderKLMochiMemory(AutoencoderKLMochiTesterConfig, MemoryTesterMixin): + pass + + +class TestAutoencoderKLMochiSlicingTiling(AutoencoderKLMochiTesterConfig, NewAutoencoderTesterMixin): + pass From f3a0388b752333848c5dc863cc37c19df7508244 Mon Sep 17 00:00:00 2001 From: Akshan Krithick Date: Fri, 29 May 2026 03:30:12 -0700 Subject: [PATCH 6/7] add docstrings --- tests/models/autoencoders/test_models_autoencoder_cosmos.py | 6 ++++-- .../models/autoencoders/test_models_autoencoder_kl_kvae.py | 6 ++++-- .../test_models_autoencoder_kl_temporal_decoder.py | 6 ++++-- tests/models/autoencoders/test_models_autoencoder_mochi.py | 6 ++++-- 4 files changed, 16 insertions(+), 8 deletions(-) diff --git a/tests/models/autoencoders/test_models_autoencoder_cosmos.py b/tests/models/autoencoders/test_models_autoencoder_cosmos.py index b98d1b123745..bb7ce4f0bce6 100644 --- a/tests/models/autoencoders/test_models_autoencoder_cosmos.py +++ b/tests/models/autoencoders/test_models_autoencoder_cosmos.py @@ -77,6 +77,8 @@ class TestAutoencoderKLCosmos(AutoencoderKLCosmosTesterConfig, ModelTesterMixin) class TestAutoencoderKLCosmosTraining(AutoencoderKLCosmosTesterConfig, TrainingTesterMixin): + """Training tests for AutoencoderKLCosmos.""" + def test_gradient_checkpointing_is_applied(self): expected_set = {"CosmosEncoder3d", "CosmosDecoder3d"} super().test_gradient_checkpointing_is_applied(expected_set=expected_set) @@ -87,8 +89,8 @@ def test_gradient_checkpointing_equivalence(self): class TestAutoencoderKLCosmosMemory(AutoencoderKLCosmosTesterConfig, MemoryTesterMixin): - pass + """Memory optimization tests for AutoencoderKLCosmos.""" class TestAutoencoderKLCosmosSlicingTiling(AutoencoderKLCosmosTesterConfig, NewAutoencoderTesterMixin): - pass + """Slicing and tiling tests for AutoencoderKLCosmos.""" diff --git a/tests/models/autoencoders/test_models_autoencoder_kl_kvae.py b/tests/models/autoencoders/test_models_autoencoder_kl_kvae.py index 4852b9f6a9cc..24c260477a34 100644 --- a/tests/models/autoencoders/test_models_autoencoder_kl_kvae.py +++ b/tests/models/autoencoders/test_models_autoencoder_kl_kvae.py @@ -68,14 +68,16 @@ class TestAutoencoderKLKVAE(AutoencoderKLKVAETesterConfig, ModelTesterMixin): class TestAutoencoderKLKVAETraining(AutoencoderKLKVAETesterConfig, TrainingTesterMixin): + """Training tests for AutoencoderKLKVAE.""" + def test_gradient_checkpointing_is_applied(self): expected_set = {"KVAEEncoder2D", "KVAEDecoder2D"} super().test_gradient_checkpointing_is_applied(expected_set=expected_set) class TestAutoencoderKLKVAEMemory(AutoencoderKLKVAETesterConfig, MemoryTesterMixin): - pass + """Memory optimization tests for AutoencoderKLKVAE.""" class TestAutoencoderKLKVAESlicingTiling(AutoencoderKLKVAETesterConfig, NewAutoencoderTesterMixin): - pass + """Slicing and tiling tests for AutoencoderKLKVAE.""" diff --git a/tests/models/autoencoders/test_models_autoencoder_kl_temporal_decoder.py b/tests/models/autoencoders/test_models_autoencoder_kl_temporal_decoder.py index d350615b0a74..860a8dd01599 100644 --- a/tests/models/autoencoders/test_models_autoencoder_kl_temporal_decoder.py +++ b/tests/models/autoencoders/test_models_autoencoder_kl_temporal_decoder.py @@ -67,16 +67,18 @@ class TestAutoencoderKLTemporalDecoder(AutoencoderKLTemporalDecoderTesterConfig, class TestAutoencoderKLTemporalDecoderTraining(AutoencoderKLTemporalDecoderTesterConfig, TrainingTesterMixin): + """Training tests for AutoencoderKLTemporalDecoder.""" + def test_gradient_checkpointing_is_applied(self): expected_set = {"Encoder", "TemporalDecoder", "UNetMidBlock2D"} super().test_gradient_checkpointing_is_applied(expected_set=expected_set) class TestAutoencoderKLTemporalDecoderMemory(AutoencoderKLTemporalDecoderTesterConfig, MemoryTesterMixin): - pass + """Memory optimization tests for AutoencoderKLTemporalDecoder.""" class TestAutoencoderKLTemporalDecoderSlicingTiling( AutoencoderKLTemporalDecoderTesterConfig, NewAutoencoderTesterMixin ): - pass + """Slicing and tiling tests for AutoencoderKLTemporalDecoder.""" diff --git a/tests/models/autoencoders/test_models_autoencoder_mochi.py b/tests/models/autoencoders/test_models_autoencoder_mochi.py index f2b52da1c535..2a87d37c6ee2 100755 --- a/tests/models/autoencoders/test_models_autoencoder_mochi.py +++ b/tests/models/autoencoders/test_models_autoencoder_mochi.py @@ -80,6 +80,8 @@ def test_outputs_equivalence(self): class TestAutoencoderKLMochiTraining(AutoencoderKLMochiTesterConfig, TrainingTesterMixin): + """Training tests for AutoencoderKLMochi.""" + def test_gradient_checkpointing_is_applied(self): expected_set = { "MochiDecoder3D", @@ -92,8 +94,8 @@ def test_gradient_checkpointing_is_applied(self): class TestAutoencoderKLMochiMemory(AutoencoderKLMochiTesterConfig, MemoryTesterMixin): - pass + """Memory optimization tests for AutoencoderKLMochi.""" class TestAutoencoderKLMochiSlicingTiling(AutoencoderKLMochiTesterConfig, NewAutoencoderTesterMixin): - pass + """Slicing and tiling tests for AutoencoderKLMochi.""" From f373fa9b707a05fb0f199bd4782cf18f00ebb0e5 Mon Sep 17 00:00:00 2001 From: Akshan Krithick Date: Fri, 29 May 2026 03:41:02 -0700 Subject: [PATCH 7/7] fix return type annotation --- src/diffusers/models/autoencoders/autoencoder_kl_mochi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diffusers/models/autoencoders/autoencoder_kl_mochi.py b/src/diffusers/models/autoencoders/autoencoder_kl_mochi.py index d2d51b29cdf2..ca40e93260b3 100644 --- a/src/diffusers/models/autoencoders/autoencoder_kl_mochi.py +++ b/src/diffusers/models/autoencoders/autoencoder_kl_mochi.py @@ -1092,7 +1092,7 @@ def forward( sample_posterior: bool = False, return_dict: bool = True, generator: torch.Generator | None = None, - ) -> torch.Tensor | torch.Tensor: + ) -> DecoderOutput | torch.Tensor: r""" Args: sample (`torch.Tensor`): Input sample.