modelscope
diff --git a/‎README.md‎
Lines changed: 6 additions & 0 deletions b/‎README.md‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎diffsynth_engine/algorithm/sampler/flow_match/flow_match_euler.py‎
Lines changed: 2 additions & 3 deletions b/‎diffsynth_engine/algorithm/sampler/flow_match/flow_match_euler.py‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎…_engine/conf/models/wan/dit/14b-i2v.json‎ ‎…onf/models/wan/dit/wan2.1-flf2v-14b.json‎diffsynth_engine/conf/models/wan/dit/14b-i2v.json renamed to diffsynth_engine/conf/models/wan/dit/wan2.1-flf2v-14b.json
Lines changed: 5 additions & 2 deletions b/‎…_engine/conf/models/wan/dit/14b-i2v.json‎ ‎…onf/models/wan/dit/wan2.1-flf2v-14b.json‎diffsynth_engine/conf/models/wan/dit/14b-i2v.json renamed to diffsynth_engine/conf/models/wan/dit/wan2.1-flf2v-14b.json
Lines changed: 5 additions & 2 deletions
diff --git a/‎…ngine/conf/models/wan/dit/14b-flf2v.json‎ ‎…/conf/models/wan/dit/wan2.1-i2v-14b.json‎diffsynth_engine/conf/models/wan/dit/14b-flf2v.json renamed to diffsynth_engine/conf/models/wan/dit/wan2.1-i2v-14b.json
Lines changed: 2 additions & 2 deletions b/‎…ngine/conf/models/wan/dit/14b-flf2v.json‎ ‎…/conf/models/wan/dit/wan2.1-i2v-14b.json‎diffsynth_engine/conf/models/wan/dit/14b-flf2v.json renamed to diffsynth_engine/conf/models/wan/dit/wan2.1-i2v-14b.json
Lines changed: 2 additions & 2 deletions
diff --git a/‎…engine/conf/models/wan/dit/1.3b-t2v.json‎ ‎…conf/models/wan/dit/wan2.1-t2v-1.3b.json‎diffsynth_engine/conf/models/wan/dit/1.3b-t2v.json renamed to diffsynth_engine/conf/models/wan/dit/wan2.1-t2v-1.3b.json
Lines changed: 0 additions & 1 deletion b/‎…engine/conf/models/wan/dit/1.3b-t2v.json‎ ‎…conf/models/wan/dit/wan2.1-t2v-1.3b.json‎diffsynth_engine/conf/models/wan/dit/1.3b-t2v.json renamed to diffsynth_engine/conf/models/wan/dit/wan2.1-t2v-1.3b.json
Lines changed: 0 additions & 1 deletion
diff --git a/‎…_engine/conf/models/wan/dit/14b-t2v.json‎ ‎…/conf/models/wan/dit/wan2.1-t2v-14b.json‎diffsynth_engine/conf/models/wan/dit/14b-t2v.json renamed to diffsynth_engine/conf/models/wan/dit/wan2.1-t2v-14b.json
Lines changed: 0 additions & 1 deletion b/‎…_engine/conf/models/wan/dit/14b-t2v.json‎ ‎…/conf/models/wan/dit/wan2.1-t2v-14b.json‎diffsynth_engine/conf/models/wan/dit/14b-t2v.json renamed to diffsynth_engine/conf/models/wan/dit/wan2.1-t2v-14b.json
Lines changed: 0 additions & 1 deletion
diff --git a/‎diffsynth_engine/conf/models/wan/dit/wan2.2-i2v-a14b.json‎
Lines changed: 16 additions & 0 deletions b/‎diffsynth_engine/conf/models/wan/dit/wan2.2-i2v-a14b.json‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎diffsynth_engine/conf/models/wan/dit/wan2.2-t2v-a14b.json‎
Lines changed: 16 additions & 0 deletions b/‎diffsynth_engine/conf/models/wan/dit/wan2.2-t2v-a14b.json‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎diffsynth_engine/conf/models/wan/dit/wan2.2-ti2v-5b.json‎
Lines changed: 14 additions & 0 deletions b/‎diffsynth_engine/conf/models/wan/dit/wan2.2-ti2v-5b.json‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎diffsynth_engine/conf/models/wan/vae/wan2.1-vae.json‎
Lines changed: 48 additions & 0 deletions b/‎diffsynth_engine/conf/models/wan/vae/wan2.1-vae.json‎
Lines changed: 48 additions & 0 deletions
@@ -21,6 +21,12 @@ and offloading strategies, enabling loading of larger diffusion models (e.g., Fl
 
 - **Cross-Platform Support:** Runnable on Windows, macOS (Apple Silicon), and Linux, ensuring a smooth experience across different operating systems.
 
+## News
+
+- **[v0.4.0](https://github.com/modelscope/DiffSynth-Engine/releases/tag/v0.4.0)** | **August 1, 2025**:
+  - 🔥Supports [Wan2.2](https://modelscope.cn/collections/tongyiwanxiang-22--shipinshengcheng-2bb5b1adef2840) video generation model
+  - ⚠️[**Breaking Change**] Improved `from_pretrained` method pipeline initialization
+
 ## Quick Start
 ### Requirements
 
 
@@ -9,13 +9,12 @@ def initialize(self, init_latents, timesteps, sigmas, mask=None):
         self.mask = mask
 
     def step(self, latents, model_outputs, i):
-        if self.mask is not None:
-            model_outputs = model_outputs * self.mask + self.init_latents * (1 - self.mask)
-
         dt = self.sigmas[i + 1] - self.sigmas[i]
         latents = latents.to(dtype=torch.float32)
         latents = latents + model_outputs * dt
         latents = latents.to(dtype=model_outputs.dtype)
+        if self.mask is not None:
+            latents = latents * self.mask + self.init_latents * (1 - self.mask)
         return latents
 
     def add_noise(self, latents, noise, sigma):
 
@@ -1,5 +1,7 @@
 {
-    "has_image_input": true,
+    "has_clip_feature": true,
+    "has_vae_feature": true,
+    "flf_pos_emb": true,
     "patch_size": [1, 2, 2],
     "in_dim": 36,
     "dim": 5120,
@@ -9,5 +11,6 @@
     "out_dim": 16,
     "num_heads": 40,
     "num_layers": 40,
-    "eps": 1e-6
+    "eps": 1e-6,
+    "shift": 16.0
 }
@@ -1,6 +1,6 @@
 {
-    "has_image_input": true,
-    "flf_pos_emb": true,
+    "has_clip_feature": true,
+    "has_vae_feature": true,
     "patch_size": [1, 2, 2],
     "in_dim": 36,
     "dim": 5120,
 
@@ -1,5 +1,4 @@
 {
-    "has_image_input": false,
     "patch_size": [1, 2, 2],
     "in_dim": 16,
     "dim": 1536,
 
@@ -1,5 +1,4 @@
 {
-    "has_image_input": false,
     "patch_size": [1, 2, 2],
     "in_dim": 16,
     "dim": 5120,
 
@@ -0,0 +1,16 @@
+{
+    "has_vae_feature": true,
+    "patch_size": [1, 2, 2],
+    "in_dim": 36,
+    "dim": 5120,
+    "ffn_dim": 13824,
+    "freq_dim": 256,
+    "text_dim": 4096,
+    "out_dim": 16,
+    "num_heads": 40,
+    "num_layers": 40,
+    "eps": 1e-6,
+    "boundary": 0.900,
+    "cfg_scale": [3.5, 3.5],
+    "num_inference_steps": 40
+}
@@ -0,0 +1,16 @@
+{
+    "patch_size": [1, 2, 2],
+    "in_dim": 16,
+    "dim": 5120,
+    "ffn_dim": 13824,
+    "freq_dim": 256,
+    "text_dim": 4096,
+    "out_dim": 16,
+    "num_heads": 40,
+    "num_layers": 40,
+    "eps": 1e-6,
+    "boundary": 0.875,
+    "shift": 12.0,
+    "cfg_scale": [3.0, 4.0],
+    "num_inference_steps": 40
+}
@@ -0,0 +1,14 @@
+{
+    "fuse_image_latents": true,
+    "patch_size": [1, 2, 2],
+    "in_dim": 48,
+    "dim": 3072,
+    "ffn_dim": 14336,
+    "freq_dim": 256,
+    "text_dim": 4096,
+    "out_dim": 48,
+    "num_heads": 24,
+    "num_layers": 30,
+    "eps": 1e-6,
+    "fps": 24
+}
@@ -0,0 +1,48 @@
+{
+    "in_channels": 3,
+    "out_channels": 3,
+    "encoder_dim": 96,
+    "decoder_dim": 96,
+    "z_dim": 16,
+    "dim_mult": [1, 2, 4, 4],
+    "num_res_blocks": 2,
+    "temperal_downsample": [false, true, true],
+    "dropout": 0.0,
+    "patch_size": 1,
+    "mean": [
+        -0.7571,
+        -0.7089,
+        -0.9113,
+        0.1075,
+        -0.1745,
+        0.9653,
+        -0.1517,
+        1.5508,
+        0.4134,
+        -0.0715,
+        0.5517,
+        -0.3632,
+        -0.1922,
+        -0.9497,
+        0.2503,
+        -0.2921
+    ],
+    "std": [
+        2.8184,
+        1.4541,
+        2.3275,
+        2.6558,
+        1.2196,
+        1.7708,
+        2.6052,
+        2.0743,
+        3.2687,
+        2.1526,
+        2.8652,
+        1.5579,
+        1.6382,
+        1.1253,
+        2.8251,
+        1.9160
+    ]
+}
Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`		`- "has_image_input": true,`
`3`		`- "flf_pos_emb": true,`
	`2`	`+ "has_clip_feature": true,`
	`3`	`+ "has_vae_feature": true,`
`4`	`4`	`"patch_size": [1, 2, 2],`
`5`	`5`	`"in_dim": 36,`
`6`	`6`	`"dim": 5120,`
Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,4 @@`
`1`	`1`	`{`
`2`		`- "has_image_input": false,`
`3`	`2`	`"patch_size": [1, 2, 2],`
`4`	`3`	`"in_dim": 16,`
`5`	`4`	`"dim": 1536,`