From 2f10d834adafdddd011758e0a9f132e2f902c2ba Mon Sep 17 00:00:00 2001
From: Lincoln Stein <lincoln.stein@gmail.com>
Date: Fri, 27 Mar 2026 19:56:22 -0400
Subject: [PATCH 01/13] feat: add Qwen Image 2512 txt2img support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Shares the QwenImageEdit base type and infrastructure with the edit model.
Key changes:

- Text encoder: auto-selects prompt template based on reference images —
  edit template (drop_idx=64) when images present, generate template
  (drop_idx=34) when absent
- Denoise: detects zero_cond_t to determine whether to concatenate
  reference latents; txt2img models pass only noisy patches with a
  single-entry img_shapes
- Model config: accept QwenImagePipeline in addition to
  QwenImageEditPlusPipeline
- LoRA: handle "transformer." key prefix from some training frameworks,
  add to config detection
- Starter models: Qwen-Image-2512 full + 4 GGUF variants + Lightning
  V2.0 LoRAs (4-step, 8-step), all added to the Qwen Image Edit bundle

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../app/invocations/qwen_image_denoise.py     | 63 +++++++++++------
 .../invocations/qwen_image_text_encoder.py    | 47 +++++++++----
 .../backend/model_manager/starter_models.py   | 67 +++++++++++++++++++
 3 files changed, 142 insertions(+), 35 deletions(-)

diff --git a/invokeai/app/invocations/qwen_image_denoise.py b/invokeai/app/invocations/qwen_image_denoise.py
index cd3ff917596..4b9fb207680 100644
--- a/invokeai/app/invocations/qwen_image_denoise.py
+++ b/invokeai/app/invocations/qwen_image_denoise.py
@@ -353,29 +353,44 @@ def _run_diffusion(self, context: InvocationContext):
         # Pack latents into 2x2 patches: (B, C, H, W) -> (B, H/2*W/2, C*4)
         latents = self._pack_latents(latents, 1, out_channels, latent_height, latent_width)
 
-        # Pack reference image latents and concatenate along the sequence dimension.
-        # The edit transformer always expects [noisy_patches ; ref_patches] in its sequence.
-        if ref_latents is not None:
-            _, ref_ch, rh, rw = ref_latents.shape
-            if rh != latent_height or rw != latent_width:
-                ref_latents = torch.nn.functional.interpolate(
-                    ref_latents, size=(latent_height, latent_width), mode="bilinear"
+        # Determine whether the model uses reference latent conditioning (zero_cond_t).
+        # Edit models (zero_cond_t=True) expect [noisy_patches ; ref_patches] in the sequence.
+        # Txt2img models (zero_cond_t=False) only take noisy patches.
+        has_zero_cond_t = getattr(transformer_info.model, "zero_cond_t", False) or getattr(
+            transformer_info.model.config, "zero_cond_t", False
+        )
+        use_ref_latents = has_zero_cond_t
+
+        ref_latents_packed = None
+        if use_ref_latents:
+            if ref_latents is not None:
+                _, ref_ch, rh, rw = ref_latents.shape
+                if rh != latent_height or rw != latent_width:
+                    ref_latents = torch.nn.functional.interpolate(
+                        ref_latents, size=(latent_height, latent_width), mode="bilinear"
+                    )
+            else:
+                # No reference image provided — use zeros so the model still gets the
+                # expected sequence layout.
+                ref_latents = torch.zeros(
+                    1, out_channels, latent_height, latent_width, device=device, dtype=inference_dtype
                 )
+            ref_latents_packed = self._pack_latents(ref_latents, 1, out_channels, latent_height, latent_width)
+
+        # img_shapes tells the transformer the spatial layout of patches.
+        if use_ref_latents:
+            img_shapes = [
+                [
+                    (1, latent_height // 2, latent_width // 2),
+                    (1, latent_height // 2, latent_width // 2),
+                ]
+            ]
         else:
-            # No reference image provided — use zeros so the model still gets the
-            # expected sequence layout.
-            ref_latents = torch.zeros(
-                1, out_channels, latent_height, latent_width, device=device, dtype=inference_dtype
-            )
-        ref_latents_packed = self._pack_latents(ref_latents, 1, out_channels, latent_height, latent_width)
-
-        # img_shapes tells the transformer the spatial layout of noisy and reference patches.
-        img_shapes = [
-            [
-                (1, latent_height // 2, latent_width // 2),
-                (1, latent_height // 2, latent_width // 2),
+            img_shapes = [
+                [
+                    (1, latent_height // 2, latent_width // 2),
+                ]
             ]
-        ]
 
         # Prepare inpaint extension (operates in 4D space, so unpack/repack around it)
         inpaint_mask = self._prep_inpaint_mask(context, noise)  # noise has the right 4D shape
@@ -428,8 +443,12 @@ def _run_diffusion(self, context: InvocationContext):
                 # The pipeline passes timestep / 1000 to the transformer
                 timestep = t.expand(latents.shape[0]).to(inference_dtype)
 
-                # Concatenate noisy and reference patches along the sequence dim
-                model_input = torch.cat([latents, ref_latents_packed], dim=1)
+                # For edit models: concatenate noisy and reference patches along the sequence dim
+                # For txt2img models: just use noisy patches
+                if ref_latents_packed is not None:
+                    model_input = torch.cat([latents, ref_latents_packed], dim=1)
+                else:
+                    model_input = latents
 
                 noise_pred_cond = transformer(
                     hidden_states=model_input,
diff --git a/invokeai/app/invocations/qwen_image_text_encoder.py b/invokeai/app/invocations/qwen_image_text_encoder.py
index 641e8c4d388..74670735877 100644
--- a/invokeai/app/invocations/qwen_image_text_encoder.py
+++ b/invokeai/app/invocations/qwen_image_text_encoder.py
@@ -20,26 +20,44 @@
     QwenImageConditioningInfo,
 )
 
-# The Qwen Image Edit pipeline uses a specific system prompt and drops the first
-# N tokens (the system prompt prefix) from the embeddings.  These constants are
-# taken directly from the diffusers QwenImagePipeline.
-_SYSTEM_PROMPT = (
+# Prompt templates and drop indices for the two Qwen Image model modes.
+# These are taken directly from the diffusers pipelines.
+
+# Image editing mode (QwenImagePipeline)
+_EDIT_SYSTEM_PROMPT = (
     "Describe the key features of the input image (color, shape, size, texture, objects, background), "
     "then explain how the user's text instruction should alter or modify the image. "
     "Generate a new image that meets the user's requirements while maintaining consistency "
     "with the original input where appropriate."
 )
+_EDIT_DROP_IDX = 64
+
+# Text-to-image mode (QwenImagePipeline)
+_GENERATE_SYSTEM_PROMPT = (
+    "Describe the image by detailing the color, shape, size, texture, quantity, "
+    "text, spatial relationships of the objects and background:"
+)
+_GENERATE_DROP_IDX = 34
+
 _IMAGE_PLACEHOLDER = "<|vision_start|><|image_pad|><|vision_end|>"
-_DROP_IDX = 64
 
 
 def _build_prompt(user_prompt: str, num_images: int) -> str:
-    """Build the full prompt with one vision placeholder per reference image."""
-    image_tokens = _IMAGE_PLACEHOLDER * max(num_images, 1)
-    return (
-        f"<|im_start|>system\n{_SYSTEM_PROMPT}<|im_end|>\n"
-        f"<|im_start|>user\n{image_tokens}{user_prompt}<|im_end|>\n"
-        "<|im_start|>assistant\n"
+    """Build the full prompt with the appropriate template based on whether reference images are provided."""
+    if num_images > 0:
+        # Edit mode: include vision placeholders for reference images
+        image_tokens = _IMAGE_PLACEHOLDER * num_images
+        return (
+            f"<|im_start|>system\n{_EDIT_SYSTEM_PROMPT}<|im_end|>\n"
+            f"<|im_start|>user\n{image_tokens}{user_prompt}<|im_end|>\n"
+            "<|im_start|>assistant\n"
+        )
+    else:
+        # Generate mode: text-only prompt
+        return (
+            f"<|im_start|>system\n{_GENERATE_SYSTEM_PROMPT}<|im_end|>\n"
+            f"<|im_start|>user\n{user_prompt}<|im_end|>\n"
+            "<|im_start|>assistant\n"
     )
 
 
@@ -188,7 +206,10 @@ def _encode(
             hidden_states = outputs.hidden_states[-1]
 
             # Extract valid (non-padding) tokens using the attention mask,
-            # then drop the first _DROP_IDX tokens (system prompt prefix).
+            # then drop the system prompt prefix tokens.
+            # The drop index differs between edit mode (64) and generate mode (34).
+            drop_idx = _EDIT_DROP_IDX if images else _GENERATE_DROP_IDX
+
             attn_mask = model_inputs.attention_mask
             bool_mask = attn_mask.bool()
             valid_lengths = bool_mask.sum(dim=1)
@@ -196,7 +217,7 @@ def _encode(
             split_hidden = torch.split(selected, valid_lengths.tolist(), dim=0)
 
             # Drop system prefix tokens and build padded output
-            trimmed = [h[_DROP_IDX:] for h in split_hidden]
+            trimmed = [h[drop_idx:] for h in split_hidden]
             attn_mask_list = [torch.ones(h.size(0), dtype=torch.long, device=device) for h in trimmed]
             max_seq_len = max(h.size(0) for h in trimmed)
 
diff --git a/invokeai/backend/model_manager/starter_models.py b/invokeai/backend/model_manager/starter_models.py
index de5f1e1b8b6..d049a52eee7 100644
--- a/invokeai/backend/model_manager/starter_models.py
+++ b/invokeai/backend/model_manager/starter_models.py
@@ -711,6 +711,69 @@ class StarterModelBundle(BaseModel):
     "Settings: Steps=8, CFG=1, Shift Override=3.",
     type=ModelType.LoRA,
 )
+
+# Qwen Image (txt2img)
+qwen_image = StarterModel(
+    name="Qwen Image 2512",
+    base=BaseModelType.QwenImage,
+    source="Qwen/Qwen-Image-2512",
+    description="Qwen Image 2512 full diffusers model. High-quality text-to-image generation. (~40GB)",
+    type=ModelType.Main,
+)
+
+qwen_image_gguf_q4_k_m = StarterModel(
+    name="Qwen Image 2512 (Q4_K_M)",
+    base=BaseModelType.QwenImage,
+    source="https://huggingface.co/unsloth/Qwen-Image-2512-GGUF/resolve/main/qwen-image-2512-Q4_K_M.gguf",
+    description="Qwen Image 2512 - Q4_K_M quantized transformer. Good quality/size balance. (~13GB)",
+    type=ModelType.Main,
+    format=ModelFormat.GGUFQuantized,
+)
+
+qwen_image_gguf_q2_k = StarterModel(
+    name="Qwen Image 2512 (Q2_K)",
+    base=BaseModelType.QwenImage,
+    source="https://huggingface.co/unsloth/Qwen-Image-2512-GGUF/resolve/main/qwen-image-2512-Q2_K.gguf",
+    description="Qwen Image 2512 - Q2_K heavily quantized transformer. Smallest size, lower quality. (~7.5GB)",
+    type=ModelType.Main,
+    format=ModelFormat.GGUFQuantized,
+)
+
+qwen_image_gguf_q6_k = StarterModel(
+    name="Qwen Image 2512 (Q6_K)",
+    base=BaseModelType.QwenImage,
+    source="https://huggingface.co/unsloth/Qwen-Image-2512-GGUF/resolve/main/qwen-image-2512-Q6_K.gguf",
+    description="Qwen Image 2512 - Q6_K quantized transformer. Near-lossless quality. (~17GB)",
+    type=ModelType.Main,
+    format=ModelFormat.GGUFQuantized,
+)
+
+qwen_image_gguf_q8_0 = StarterModel(
+    name="Qwen Image 2512 (Q8_0)",
+    base=BaseModelType.QwenImage,
+    source="https://huggingface.co/unsloth/Qwen-Image-2512-GGUF/resolve/main/qwen-image-2512-Q8_0.gguf",
+    description="Qwen Image 2512 - Q8_0 quantized transformer. Highest quality quantization. (~22GB)",
+    type=ModelType.Main,
+    format=ModelFormat.GGUFQuantized,
+)
+
+qwen_image_lightning_4step = StarterModel(
+    name="Qwen Image Lightning (4-step, V2.0, bf16)",
+    base=BaseModelType.QwenImage,
+    source="https://huggingface.co/lightx2v/Qwen-Image-Lightning/resolve/main/Qwen-Image-Lightning-4steps-V2.0-bf16.safetensors",
+    description="Lightning distillation LoRA for Qwen Image — enables generation in just 4 steps. "
+    "Settings: Steps=4, CFG=1, Shift Override=3.",
+    type=ModelType.LoRA,
+)
+
+qwen_image_lightning_8step = StarterModel(
+    name="Qwen Image Lightning (8-step, V2.0, bf16)",
+    base=BaseModelType.QwenImage,
+    source="https://huggingface.co/lightx2v/Qwen-Image-Lightning/resolve/main/Qwen-Image-Lightning-8steps-V2.0-bf16.safetensors",
+    description="Lightning distillation LoRA for Qwen Image — enables generation in 8 steps with better quality. "
+    "Settings: Steps=8, CFG=1, Shift Override=3.",
+    type=ModelType.LoRA,
+)
 # endregion
 
 # region SigLIP
@@ -1102,6 +1165,10 @@ class StarterModelBundle(BaseModel):
     qwen_image_gguf_q8_0,
     qwen_image_lightning_4step,
     qwen_image_lightning_8step,
+    qwen_image,
+    qwen_image_gguf_q4_k_m,
+    qwen_image_lightning_4step,
+    qwen_image_lightning_8step,
 ]
 
 STARTER_BUNDLES: dict[str, StarterModelBundle] = {

From 8b9e36f05aad8035f0a6c52f146ec37219d97dc7 Mon Sep 17 00:00:00 2001
From: Lincoln Stein <lincoln.stein@gmail.com>
Date: Fri, 27 Mar 2026 22:57:03 -0400
Subject: [PATCH 02/13] chore: ruff & lint:prettier

---
 .../invocations/qwen_image_text_encoder.py    |  2 +-
 .../model_records/model_records_base.py       | 10 +++++--
 invokeai/backend/model_manager/taxonomy.py    | 26 ++++++++++++++++---
 .../controlLayers/hooks/addLayerHooks.ts      |  6 +----
 .../Advanced/ParamQwenImageQuantization.tsx   |  5 +---
 5 files changed, 34 insertions(+), 15 deletions(-)

diff --git a/invokeai/app/invocations/qwen_image_text_encoder.py b/invokeai/app/invocations/qwen_image_text_encoder.py
index 74670735877..9e3f5723ba5 100644
--- a/invokeai/app/invocations/qwen_image_text_encoder.py
+++ b/invokeai/app/invocations/qwen_image_text_encoder.py
@@ -58,7 +58,7 @@ def _build_prompt(user_prompt: str, num_images: int) -> str:
             f"<|im_start|>system\n{_GENERATE_SYSTEM_PROMPT}<|im_end|>\n"
             f"<|im_start|>user\n{user_prompt}<|im_end|>\n"
             "<|im_start|>assistant\n"
-    )
+        )
 
 
 @invocation(
diff --git a/invokeai/app/services/model_records/model_records_base.py b/invokeai/app/services/model_records/model_records_base.py
index ea5b9ef7546..dcdc0ce5956 100644
--- a/invokeai/app/services/model_records/model_records_base.py
+++ b/invokeai/app/services/model_records/model_records_base.py
@@ -25,8 +25,8 @@
     ModelSourceType,
     ModelType,
     ModelVariantType,
-    QwenImageVariantType,
     Qwen3VariantType,
+    QwenImageVariantType,
     SchedulerPredictionType,
     ZImageVariantType,
 )
@@ -95,7 +95,13 @@ class ModelRecordChanges(BaseModelExcludeNull):
     # Checkpoint-specific changes
     # TODO(MM2): Should we expose these? Feels footgun-y...
     variant: Optional[
-        ModelVariantType | ClipVariantType | FluxVariantType | Flux2VariantType | ZImageVariantType | QwenImageVariantType | Qwen3VariantType
+        ModelVariantType
+        | ClipVariantType
+        | FluxVariantType
+        | Flux2VariantType
+        | ZImageVariantType
+        | QwenImageVariantType
+        | Qwen3VariantType
     ] = Field(description="The variant of the model.", default=None)
     prediction_type: Optional[SchedulerPredictionType] = Field(
         description="The prediction type of the model.", default=None
diff --git a/invokeai/backend/model_manager/taxonomy.py b/invokeai/backend/model_manager/taxonomy.py
index 9250310a29a..587c0b0625f 100644
--- a/invokeai/backend/model_manager/taxonomy.py
+++ b/invokeai/backend/model_manager/taxonomy.py
@@ -225,8 +225,28 @@ class FluxLoRAFormat(str, Enum):
 
 
 AnyVariant: TypeAlias = Union[
-    ModelVariantType, ClipVariantType, FluxVariantType, Flux2VariantType, ZImageVariantType, QwenImageVariantType, Qwen3VariantType
+    ModelVariantType,
+    ClipVariantType,
+    FluxVariantType,
+    Flux2VariantType,
+    ZImageVariantType,
+    QwenImageVariantType,
+    Qwen3VariantType,
 ]
 variant_type_adapter = TypeAdapter[
-    ModelVariantType | ClipVariantType | FluxVariantType | Flux2VariantType | ZImageVariantType | QwenImageVariantType | Qwen3VariantType
-](ModelVariantType | ClipVariantType | FluxVariantType | Flux2VariantType | ZImageVariantType | QwenImageVariantType | Qwen3VariantType)
+    ModelVariantType
+    | ClipVariantType
+    | FluxVariantType
+    | Flux2VariantType
+    | ZImageVariantType
+    | QwenImageVariantType
+    | Qwen3VariantType
+](
+    ModelVariantType
+    | ClipVariantType
+    | FluxVariantType
+    | Flux2VariantType
+    | ZImageVariantType
+    | QwenImageVariantType
+    | Qwen3VariantType
+)
diff --git a/invokeai/frontend/web/src/features/controlLayers/hooks/addLayerHooks.ts b/invokeai/frontend/web/src/features/controlLayers/hooks/addLayerHooks.ts
index 3cd28b5f2a0..2027ff41741 100644
--- a/invokeai/frontend/web/src/features/controlLayers/hooks/addLayerHooks.ts
+++ b/invokeai/frontend/web/src/features/controlLayers/hooks/addLayerHooks.ts
@@ -80,11 +80,7 @@ export const selectDefaultControlAdapter = createSelector(
 
 export const getDefaultRefImageConfig = (
   getState: AppGetState
-):
-  | IPAdapterConfig
-  | FluxKontextReferenceImageConfig
-  | Flux2ReferenceImageConfig
-  | QwenImageReferenceImageConfig => {
+): IPAdapterConfig | FluxKontextReferenceImageConfig | Flux2ReferenceImageConfig | QwenImageReferenceImageConfig => {
   const state = getState();
 
   const mainModelConfig = selectMainModelConfig(state);
diff --git a/invokeai/frontend/web/src/features/parameters/components/Advanced/ParamQwenImageQuantization.tsx b/invokeai/frontend/web/src/features/parameters/components/Advanced/ParamQwenImageQuantization.tsx
index 46025d95867..3d086e6ec4a 100644
--- a/invokeai/frontend/web/src/features/parameters/components/Advanced/ParamQwenImageQuantization.tsx
+++ b/invokeai/frontend/web/src/features/parameters/components/Advanced/ParamQwenImageQuantization.tsx
@@ -1,10 +1,7 @@
 import type { ComboboxOnChange, ComboboxOption } from '@invoke-ai/ui-library';
 import { Combobox, FormControl, FormLabel } from '@invoke-ai/ui-library';
 import { useAppDispatch, useAppSelector } from 'app/store/storeHooks';
-import {
-  qwenImageQuantizationChanged,
-  selectQwenImageQuantization,
-} from 'features/controlLayers/store/paramsSlice';
+import { qwenImageQuantizationChanged, selectQwenImageQuantization } from 'features/controlLayers/store/paramsSlice';
 import { memo, useCallback, useMemo } from 'react';
 import { useTranslation } from 'react-i18next';
 

From 25b45ca7582a2ac80c90709a6ead381d86ffe125 Mon Sep 17 00:00:00 2001
From: Lincoln Stein <lincoln.stein@gmail.com>
Date: Fri, 27 Mar 2026 23:07:53 -0400
Subject: [PATCH 03/13] fix: remove unused frontend exports
 (zQwenImageVariantType, isQwenImageEditMainModelConfig)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 invokeai/frontend/web/src/features/nodes/types/common.ts | 2 +-
 invokeai/frontend/web/src/services/api/types.ts          | 3 ---
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/invokeai/frontend/web/src/features/nodes/types/common.ts b/invokeai/frontend/web/src/features/nodes/types/common.ts
index ca1d42c5a44..10afd6e44bb 100644
--- a/invokeai/frontend/web/src/features/nodes/types/common.ts
+++ b/invokeai/frontend/web/src/features/nodes/types/common.ts
@@ -153,7 +153,7 @@ export const zModelVariantType = z.enum(['normal', 'inpaint', 'depth']);
 export const zFluxVariantType = z.enum(['dev', 'dev_fill', 'schnell']);
 export const zFlux2VariantType = z.enum(['klein_4b', 'klein_9b', 'klein_9b_base']);
 export const zZImageVariantType = z.enum(['turbo', 'zbase']);
-export const zQwenImageVariantType = z.enum(['generate', 'edit']);
+const zQwenImageVariantType = z.enum(['generate', 'edit']);
 export const zQwen3VariantType = z.enum(['qwen3_4b', 'qwen3_8b']);
 export const zAnyModelVariant = z.union([
   zModelVariantType,
diff --git a/invokeai/frontend/web/src/services/api/types.ts b/invokeai/frontend/web/src/services/api/types.ts
index cfeb672d95e..c8aeda6c760 100644
--- a/invokeai/frontend/web/src/services/api/types.ts
+++ b/invokeai/frontend/web/src/services/api/types.ts
@@ -330,9 +330,6 @@ export const isQwenImageDiffusersMainModelConfig = (config: AnyModelConfig): con
   return config.type === 'main' && config.base === 'qwen-image' && config.format === 'diffusers';
 };
 
-export const isQwenImageEditMainModelConfig = (config: AnyModelConfig): config is MainModelConfig => {
-  return config.type === 'main' && config.base === 'qwen-image' && 'variant' in config && config.variant === 'edit';
-};
 
 export const isTIModelConfig = (config: AnyModelConfig): config is MainModelConfig => {
   return config.type === 'embedding';

From 66e9f873c5cc9d7836ec48ca741ea540b658ee74 Mon Sep 17 00:00:00 2001
From: Lincoln Stein <lincoln.stein@gmail.com>
Date: Fri, 27 Mar 2026 23:17:27 -0400
Subject: [PATCH 04/13] fix: make QwenImage variant optional to fix model
 detection tags

The variant field with a default value was appended to the discriminator
tag (e.g. main.gguf_quantized.qwen-image.generate), breaking model
detection for GGUF and Diffusers models. Making variant optional with
default=None restores the correct tags (main.gguf_quantized.qwen-image).

The variant is still set during Diffusers model probing via
_get_qwen_image_variant() and can be manually set for GGUF models.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 invokeai/backend/model_manager/configs/main.py             | 4 ++--
 .../src/features/parameters/components/Prompts/Prompts.tsx | 7 +++++--
 invokeai/frontend/web/src/services/api/schema.ts           | 6 ++----
 invokeai/frontend/web/src/services/api/types.ts            | 1 -
 4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/invokeai/backend/model_manager/configs/main.py b/invokeai/backend/model_manager/configs/main.py
index 484a95f4bb8..6ec0611fdf3 100644
--- a/invokeai/backend/model_manager/configs/main.py
+++ b/invokeai/backend/model_manager/configs/main.py
@@ -1208,7 +1208,7 @@ class Main_Diffusers_QwenImage_Config(Diffusers_Config_Base, Main_Config_Base, C
     """Model config for Qwen Image diffusers models (both txt2img and edit)."""
 
     base: Literal[BaseModelType.QwenImage] = Field(BaseModelType.QwenImage)
-    variant: QwenImageVariantType = Field(default=QwenImageVariantType.Generate)
+    variant: QwenImageVariantType | None = Field(default=None)
 
     @classmethod
     def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) -> Self:
@@ -1269,7 +1269,7 @@ class Main_GGUF_QwenImage_Config(Checkpoint_Config_Base, Main_Config_Base, Confi
 
     base: Literal[BaseModelType.QwenImage] = Field(default=BaseModelType.QwenImage)
     format: Literal[ModelFormat.GGUFQuantized] = Field(default=ModelFormat.GGUFQuantized)
-    variant: QwenImageVariantType = Field(default=QwenImageVariantType.Generate)
+    variant: QwenImageVariantType | None = Field(default=None)
 
     @classmethod
     def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) -> Self:
diff --git a/invokeai/frontend/web/src/features/parameters/components/Prompts/Prompts.tsx b/invokeai/frontend/web/src/features/parameters/components/Prompts/Prompts.tsx
index 18f5c4c4dd8..c93841d77b7 100644
--- a/invokeai/frontend/web/src/features/parameters/components/Prompts/Prompts.tsx
+++ b/invokeai/frontend/web/src/features/parameters/components/Prompts/Prompts.tsx
@@ -22,8 +22,11 @@ export const Prompts = memo(() => {
     if (!modelSupportsRefImages) {
       return false;
     }
-    if (modelConfig?.base === 'qwen-image' && 'variant' in modelConfig && modelConfig.variant !== 'edit') {
-      return false;
+    if (modelConfig?.base === 'qwen-image') {
+      const variant = 'variant' in modelConfig ? modelConfig.variant : null;
+      if (variant !== 'edit') {
+        return false;
+      }
     }
     return true;
   }, [modelSupportsRefImages, modelConfig]);
diff --git a/invokeai/frontend/web/src/services/api/schema.ts b/invokeai/frontend/web/src/services/api/schema.ts
index a23217c3a81..2a8a3d243b7 100644
--- a/invokeai/frontend/web/src/services/api/schema.ts
+++ b/invokeai/frontend/web/src/services/api/schema.ts
@@ -18500,8 +18500,7 @@ export type components = {
              * @constant
              */
             base: "qwen-image";
-            /** @default generate */
-            variant: components["schemas"]["QwenImageVariantType"];
+            variant: components["schemas"]["QwenImageVariantType"] | null;
         };
         /** Main_Diffusers_SD1_Config */
         Main_Diffusers_SD1_Config: {
@@ -19234,8 +19233,7 @@ export type components = {
              * @constant
              */
             format: "gguf_quantized";
-            /** @default generate */
-            variant: components["schemas"]["QwenImageVariantType"];
+            variant: components["schemas"]["QwenImageVariantType"] | null;
         };
         /**
          * Main_GGUF_ZImage_Config
diff --git a/invokeai/frontend/web/src/services/api/types.ts b/invokeai/frontend/web/src/services/api/types.ts
index c8aeda6c760..b447f9debbe 100644
--- a/invokeai/frontend/web/src/services/api/types.ts
+++ b/invokeai/frontend/web/src/services/api/types.ts
@@ -330,7 +330,6 @@ export const isQwenImageDiffusersMainModelConfig = (config: AnyModelConfig): con
   return config.type === 'main' && config.base === 'qwen-image' && config.format === 'diffusers';
 };
 
-
 export const isTIModelConfig = (config: AnyModelConfig): config is MainModelConfig => {
   return config.type === 'embedding';
 };

From 556db02c45e12ff5f14a58f34673aeedea45bddd Mon Sep 17 00:00:00 2001
From: Lincoln Stein <lincoln.stein@gmail.com>
Date: Fri, 27 Mar 2026 23:33:18 -0400
Subject: [PATCH 05/13] fix: restore Qwen Image Edit starter models with
 distinct variable names

The rename from qwen_image_edit -> qwen_image caused variable name
collisions with the txt2img starter models. Give edit models the
qwen_image_edit_* prefix to distinguish from qwen_image_* (txt2img).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../backend/model_manager/starter_models.py   | 31 ++++++++++++-------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/invokeai/backend/model_manager/starter_models.py b/invokeai/backend/model_manager/starter_models.py
index d049a52eee7..ca0076cbadd 100644
--- a/invokeai/backend/model_manager/starter_models.py
+++ b/invokeai/backend/model_manager/starter_models.py
@@ -650,7 +650,7 @@ class StarterModelBundle(BaseModel):
 # endregion
 
 # region Qwen Image Edit
-qwen_image = StarterModel(
+qwen_image_edit = StarterModel(
     name="Qwen Image Edit 2511",
     base=BaseModelType.QwenImage,
     source="Qwen/Qwen-Image-Edit-2511",
@@ -658,7 +658,7 @@ class StarterModelBundle(BaseModel):
     type=ModelType.Main,
 )
 
-qwen_image_gguf_q4_k_m = StarterModel(
+qwen_image_edit_gguf_q4_k_m = StarterModel(
     name="Qwen Image Edit 2511 (Q4_K_M)",
     base=BaseModelType.QwenImage,
     source="https://huggingface.co/unsloth/Qwen-Image-Edit-2511-GGUF/resolve/main/qwen-image-2511-Q4_K_M.gguf",
@@ -667,7 +667,7 @@ class StarterModelBundle(BaseModel):
     format=ModelFormat.GGUFQuantized,
 )
 
-qwen_image_gguf_q2_k = StarterModel(
+qwen_image_edit_gguf_q2_k = StarterModel(
     name="Qwen Image Edit 2511 (Q2_K)",
     base=BaseModelType.QwenImage,
     source="https://huggingface.co/unsloth/Qwen-Image-Edit-2511-GGUF/resolve/main/qwen-image-2511-Q2_K.gguf",
@@ -676,7 +676,7 @@ class StarterModelBundle(BaseModel):
     format=ModelFormat.GGUFQuantized,
 )
 
-qwen_image_gguf_q6_k = StarterModel(
+qwen_image_edit_gguf_q6_k = StarterModel(
     name="Qwen Image Edit 2511 (Q6_K)",
     base=BaseModelType.QwenImage,
     source="https://huggingface.co/unsloth/Qwen-Image-Edit-2511-GGUF/resolve/main/qwen-image-2511-Q6_K.gguf",
@@ -685,7 +685,7 @@ class StarterModelBundle(BaseModel):
     format=ModelFormat.GGUFQuantized,
 )
 
-qwen_image_gguf_q8_0 = StarterModel(
+qwen_image_edit_gguf_q8_0 = StarterModel(
     name="Qwen Image Edit 2511 (Q8_0)",
     base=BaseModelType.QwenImage,
     source="https://huggingface.co/unsloth/Qwen-Image-Edit-2511-GGUF/resolve/main/qwen-image-2511-Q8_0.gguf",
@@ -694,7 +694,7 @@ class StarterModelBundle(BaseModel):
     format=ModelFormat.GGUFQuantized,
 )
 
-qwen_image_lightning_4step = StarterModel(
+qwen_image_edit_lightning_4step = StarterModel(
     name="Qwen Image Edit Lightning (4-step, bf16)",
     base=BaseModelType.QwenImage,
     source="https://huggingface.co/lightx2v/Qwen-Image-Edit-2511-Lightning/resolve/main/Qwen-Image-Edit-2511-Lightning-4steps-V1.0-bf16.safetensors",
@@ -703,7 +703,7 @@ class StarterModelBundle(BaseModel):
     type=ModelType.LoRA,
 )
 
-qwen_image_lightning_8step = StarterModel(
+qwen_image_edit_lightning_8step = StarterModel(
     name="Qwen Image Edit Lightning (8-step, bf16)",
     base=BaseModelType.QwenImage,
     source="https://huggingface.co/lightx2v/Qwen-Image-Edit-2511-Lightning/resolve/main/Qwen-Image-Edit-2511-Lightning-8steps-V1.0-bf16.safetensors",
@@ -1075,6 +1075,13 @@ class StarterModelBundle(BaseModel):
     flux2_klein_qwen3_4b_encoder,
     flux2_klein_qwen3_8b_encoder,
     cogview4,
+    qwen_image_edit,
+    qwen_image_edit_gguf_q2_k,
+    qwen_image_edit_gguf_q4_k_m,
+    qwen_image_edit_gguf_q6_k,
+    qwen_image_edit_gguf_q8_0,
+    qwen_image_edit_lightning_4step,
+    qwen_image_edit_lightning_8step,
     qwen_image,
     qwen_image_gguf_q2_k,
     qwen_image_gguf_q4_k_m,
@@ -1160,11 +1167,11 @@ class StarterModelBundle(BaseModel):
 ]
 
 qwen_image_bundle: list[StarterModel] = [
-    qwen_image,
-    qwen_image_gguf_q4_k_m,
-    qwen_image_gguf_q8_0,
-    qwen_image_lightning_4step,
-    qwen_image_lightning_8step,
+    qwen_image_edit,
+    qwen_image_edit_gguf_q4_k_m,
+    qwen_image_edit_gguf_q8_0,
+    qwen_image_edit_lightning_4step,
+    qwen_image_edit_lightning_8step,
     qwen_image,
     qwen_image_gguf_q4_k_m,
     qwen_image_lightning_4step,

From f3dfbd5d4473c54583cc29a53d73f67b0b39f995 Mon Sep 17 00:00:00 2001
From: Lincoln Stein <lincoln.stein@gmail.com>
Date: Fri, 27 Mar 2026 23:53:09 -0400
Subject: [PATCH 06/13] fix: restore correct GGUF filenames in Qwen Image Edit
 starter model URLs

The global rename sed changed 'qwen-image-edit-2511' to 'qwen-image-2511'
inside the HuggingFace URLs, but the actual files on HF still have 'edit'
in their names.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 invokeai/backend/model_manager/starter_models.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/invokeai/backend/model_manager/starter_models.py b/invokeai/backend/model_manager/starter_models.py
index ca0076cbadd..ef7b25431a0 100644
--- a/invokeai/backend/model_manager/starter_models.py
+++ b/invokeai/backend/model_manager/starter_models.py
@@ -661,7 +661,7 @@ class StarterModelBundle(BaseModel):
 qwen_image_edit_gguf_q4_k_m = StarterModel(
     name="Qwen Image Edit 2511 (Q4_K_M)",
     base=BaseModelType.QwenImage,
-    source="https://huggingface.co/unsloth/Qwen-Image-Edit-2511-GGUF/resolve/main/qwen-image-2511-Q4_K_M.gguf",
+    source="https://huggingface.co/unsloth/Qwen-Image-Edit-2511-GGUF/resolve/main/qwen-image-edit-2511-Q4_K_M.gguf",
     description="Qwen Image Edit 2511 - Q4_K_M quantized transformer. Good quality/size balance. (~13GB)",
     type=ModelType.Main,
     format=ModelFormat.GGUFQuantized,
@@ -670,7 +670,7 @@ class StarterModelBundle(BaseModel):
 qwen_image_edit_gguf_q2_k = StarterModel(
     name="Qwen Image Edit 2511 (Q2_K)",
     base=BaseModelType.QwenImage,
-    source="https://huggingface.co/unsloth/Qwen-Image-Edit-2511-GGUF/resolve/main/qwen-image-2511-Q2_K.gguf",
+    source="https://huggingface.co/unsloth/Qwen-Image-Edit-2511-GGUF/resolve/main/qwen-image-edit-2511-Q2_K.gguf",
     description="Qwen Image Edit 2511 - Q2_K heavily quantized transformer. Smallest size, lower quality. (~7.5GB)",
     type=ModelType.Main,
     format=ModelFormat.GGUFQuantized,
@@ -679,7 +679,7 @@ class StarterModelBundle(BaseModel):
 qwen_image_edit_gguf_q6_k = StarterModel(
     name="Qwen Image Edit 2511 (Q6_K)",
     base=BaseModelType.QwenImage,
-    source="https://huggingface.co/unsloth/Qwen-Image-Edit-2511-GGUF/resolve/main/qwen-image-2511-Q6_K.gguf",
+    source="https://huggingface.co/unsloth/Qwen-Image-Edit-2511-GGUF/resolve/main/qwen-image-edit-2511-Q6_K.gguf",
     description="Qwen Image Edit 2511 - Q6_K quantized transformer. Near-lossless quality. (~17GB)",
     type=ModelType.Main,
     format=ModelFormat.GGUFQuantized,
@@ -688,7 +688,7 @@ class StarterModelBundle(BaseModel):
 qwen_image_edit_gguf_q8_0 = StarterModel(
     name="Qwen Image Edit 2511 (Q8_0)",
     base=BaseModelType.QwenImage,
-    source="https://huggingface.co/unsloth/Qwen-Image-Edit-2511-GGUF/resolve/main/qwen-image-2511-Q8_0.gguf",
+    source="https://huggingface.co/unsloth/Qwen-Image-Edit-2511-GGUF/resolve/main/qwen-image-edit-2511-Q8_0.gguf",
     description="Qwen Image Edit 2511 - Q8_0 quantized transformer. Highest quality quantization. (~22GB)",
     type=ModelType.Main,
     format=ModelFormat.GGUFQuantized,

From 6a19ad57f3a49c4c26328fcca6b9d75f292c01e4 Mon Sep 17 00:00:00 2001
From: Lincoln Stein <lincoln.stein@gmail.com>
Date: Sat, 28 Mar 2026 00:36:28 -0400
Subject: [PATCH 07/13] fix: skip reference images in graph for non-edit Qwen
 Image models

When switching from an edit model to a generate model, reference images
remain in state but the panel is hidden. Prevent them from being passed
to the text encoder and VAE encoder by checking the model variant.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../graph/generation/buildQwenImageGraph.ts   | 20 +++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/invokeai/frontend/web/src/features/nodes/util/graph/generation/buildQwenImageGraph.ts b/invokeai/frontend/web/src/features/nodes/util/graph/generation/buildQwenImageGraph.ts
index e7c04744d4e..28a9b253485 100644
--- a/invokeai/frontend/web/src/features/nodes/util/graph/generation/buildQwenImageGraph.ts
+++ b/invokeai/frontend/web/src/features/nodes/util/graph/generation/buildQwenImageGraph.ts
@@ -103,14 +103,18 @@ export const buildQwenImageGraph = async (arg: GraphBuilderArg): Promise<GraphBu
   // Add Qwen Image Edit LoRAs if any are enabled
   addQwenImageLoRAs(state, g, denoise, modelLoader);
 
-  // Collect enabled Qwen Image Edit reference images that have an image set (image is optional for txt2img)
-  const validRefImageConfigs = selectRefImagesSlice(state).entities.filter(
-    (entity) =>
-      entity.isEnabled &&
-      isQwenImageReferenceImageConfig(entity.config) &&
-      entity.config.image !== null &&
-      getGlobalReferenceImageWarnings(entity, model).length === 0
-  );
+  // Only collect reference images for edit-variant models.
+  // For txt2img (generate) models, reference images are not used even if they exist in state.
+  const isEditModel = 'variant' in model && model.variant === 'edit';
+  const validRefImageConfigs = isEditModel
+    ? selectRefImagesSlice(state).entities.filter(
+        (entity) =>
+          entity.isEnabled &&
+          isQwenImageReferenceImageConfig(entity.config) &&
+          entity.config.image !== null &&
+          getGlobalReferenceImageWarnings(entity, model).length === 0
+      )
+    : [];
 
   if (validRefImageConfigs.length > 0) {
     const refImgCollect = g.addNode({

From 058df877c039b4ef09d9e96e7aafb2c58aae9576 Mon Sep 17 00:00:00 2001
From: Lincoln Stein <lincoln.stein@gmail.com>
Date: Sat, 28 Mar 2026 00:41:10 -0400
Subject: [PATCH 08/13] fix: only set zero_cond_t=True for edit-variant GGUF
 models
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The txt2img model doesn't use zero_cond_t — setting it causes the
transformer to double the timestep batch and create modulation indices
for non-existent reference patches, producing noise output. Now checks
the config variant before enabling it.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../model_manager/load/model_loaders/qwen_image.py        | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/invokeai/backend/model_manager/load/model_loaders/qwen_image.py b/invokeai/backend/model_manager/load/model_loaders/qwen_image.py
index 15fcedba166..a025e727945 100644
--- a/invokeai/backend/model_manager/load/model_loaders/qwen_image.py
+++ b/invokeai/backend/model_manager/load/model_loaders/qwen_image.py
@@ -15,6 +15,7 @@
     BaseModelType,
     ModelFormat,
     ModelType,
+    QwenImageVariantType,
     SubModelType,
 )
 from invokeai.backend.quantization.gguf.ggml_tensor import GGMLTensor
@@ -160,10 +161,13 @@ def _load_from_singlefile(self, config: AnyModelConfig) -> AnyModel:
             "axes_dims_rope": (16, 56, 56),
         }
 
-        # zero_cond_t was added in diffusers 0.37+; skip it on older versions
+        # zero_cond_t is only used by edit-variant models. It enables dual modulation
+        # for noisy vs reference patches. Setting it on txt2img models produces garbage.
+        # Also requires diffusers 0.37+ (the parameter doesn't exist in older versions).
         import inspect
 
-        if "zero_cond_t" in inspect.signature(QwenImageTransformer2DModel.__init__).parameters:
+        is_edit = getattr(config, "variant", None) == QwenImageVariantType.Edit
+        if is_edit and "zero_cond_t" in inspect.signature(QwenImageTransformer2DModel.__init__).parameters:
             model_config["zero_cond_t"] = True
 
         with accelerate.init_empty_weights():

From b41bee72bcb6836b01a50854de8e3fa437d9e39e Mon Sep 17 00:00:00 2001
From: Lincoln Stein <lincoln.stein@gmail.com>
Date: Sat, 28 Mar 2026 01:04:28 -0400
Subject: [PATCH 09/13] fix: recall Qwen Image advanced params (component
 source, quantization, shift)

- Save qwen_image_component_source, qwen_image_quantization, and
  qwen_image_shift in generation metadata
- Add metadata recall handlers so remix/recall restores these settings

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../web/src/features/metadata/parsing.tsx     | 83 +++++++++++++++++++
 .../graph/generation/buildQwenImageGraph.ts   |  3 +
 2 files changed, 86 insertions(+)

diff --git a/invokeai/frontend/web/src/features/metadata/parsing.tsx b/invokeai/frontend/web/src/features/metadata/parsing.tsx
index 7d1d511a3c2..58f3aaab07d 100644
--- a/invokeai/frontend/web/src/features/metadata/parsing.tsx
+++ b/invokeai/frontend/web/src/features/metadata/parsing.tsx
@@ -39,6 +39,9 @@ import {
   setZImageSeedVarianceEnabled,
   setZImageSeedVarianceRandomizePercent,
   setZImageSeedVarianceStrength,
+  qwenImageComponentSourceSelected,
+  qwenImageQuantizationChanged,
+  qwenImageShiftChanged,
   vaeSelected,
   widthChanged,
   zImageQwen3EncoderModelSelected,
@@ -677,6 +680,83 @@ const ZImageSeedVarianceRandomizePercent: SingleMetadataHandler<number> = {
 };
 //#endregion ZImageSeedVarianceRandomizePercent
 
+//#region QwenImageComponentSource
+const QwenImageComponentSource: SingleMetadataHandler<ModelIdentifierField | null> = {
+  [SingleMetadataKey]: true,
+  type: 'QwenImageComponentSource',
+  parse: async (metadata, _store) => {
+    try {
+      const raw = getProperty(metadata, 'qwen_image_component_source');
+      if (raw === null || raw === undefined) {
+        return Promise.resolve(null);
+      }
+      return Promise.resolve(zModelIdentifierField.parse(raw));
+    } catch {
+      return Promise.resolve(null);
+    }
+  },
+  recall: (value, store) => {
+    store.dispatch(qwenImageComponentSourceSelected(value));
+  },
+  i18nKey: 'modelManager.qwenImageComponentSource',
+  LabelComponent: MetadataLabel,
+  ValueComponent: ({ value }: SingleMetadataValueProps<ModelIdentifierField | null>) => (
+    <MetadataPrimitiveValue value={value ? value.name : 'None'} />
+  ),
+};
+//#endregion QwenImageComponentSource
+
+//#region QwenImageQuantization
+const QwenImageQuantization: SingleMetadataHandler<'none' | 'int8' | 'nf4'> = {
+  [SingleMetadataKey]: true,
+  type: 'QwenImageQuantization',
+  parse: (metadata, _store) => {
+    try {
+      const raw = getProperty(metadata, 'qwen_image_quantization');
+      const parsed = z.enum(['none', 'int8', 'nf4']).parse(raw);
+      return Promise.resolve(parsed);
+    } catch {
+      return Promise.resolve('none' as const);
+    }
+  },
+  recall: (value, store) => {
+    store.dispatch(qwenImageQuantizationChanged(value));
+  },
+  i18nKey: 'modelManager.qwenImageQuantization',
+  LabelComponent: MetadataLabel,
+  ValueComponent: ({ value }: SingleMetadataValueProps<'none' | 'int8' | 'nf4'>) => (
+    <MetadataPrimitiveValue value={value} />
+  ),
+};
+//#endregion QwenImageQuantization
+
+//#region QwenImageShift
+const QwenImageShift: SingleMetadataHandler<number | null> = {
+  [SingleMetadataKey]: true,
+  type: 'QwenImageShift',
+  parse: (metadata, _store) => {
+    try {
+      const raw = getProperty(metadata, 'qwen_image_shift');
+      if (raw === null || raw === undefined) {
+        return Promise.resolve(null);
+      }
+      const parsed = z.number().parse(raw);
+      return Promise.resolve(parsed);
+    } catch {
+      return Promise.resolve(null);
+    }
+  },
+  recall: (value, store) => {
+    store.dispatch(qwenImageShiftChanged(value));
+  },
+  i18nKey: 'modelManager.qwenImageShift',
+  LabelComponent: MetadataLabel,
+  ValueComponent: ({ value }: SingleMetadataValueProps<number | null>) => (
+    <MetadataPrimitiveValue value={value ?? 'Default'} />
+  ),
+};
+//#endregion QwenImageShift
+
 //#region RefinerModel
 const RefinerModel: SingleMetadataHandler<ParameterSDXLRefinerModel> = {
   [SingleMetadataKey]: true,
@@ -1233,6 +1313,9 @@ export const ImageMetadataHandlers = {
   ZImageSeedVarianceEnabled,
   ZImageSeedVarianceStrength,
   ZImageSeedVarianceRandomizePercent,
+  QwenImageComponentSource,
+  QwenImageQuantization,
+  QwenImageShift,
   LoRAs,
   CanvasLayers,
   RefImages,
diff --git a/invokeai/frontend/web/src/features/nodes/util/graph/generation/buildQwenImageGraph.ts b/invokeai/frontend/web/src/features/nodes/util/graph/generation/buildQwenImageGraph.ts
index 28a9b253485..8f1cb5362cd 100644
--- a/invokeai/frontend/web/src/features/nodes/util/graph/generation/buildQwenImageGraph.ts
+++ b/invokeai/frontend/web/src/features/nodes/util/graph/generation/buildQwenImageGraph.ts
@@ -167,6 +167,9 @@ export const buildQwenImageGraph = async (arg: GraphBuilderArg): Promise<GraphBu
     cfg_scale,
     negative_prompt: prompts.negative,
     model: Graph.getModelMetadataField(modelConfig),
+    qwen_image_component_source: params.qwenImageComponentSource,
+    qwen_image_quantization: params.qwenImageQuantization,
+    qwen_image_shift: params.qwenImageShift,
     steps,
   });
   g.addEdgeToMetadata(seed, 'value', 'seed');

From 18d038c10a21f34d0d53ca2313c20337af676a5e Mon Sep 17 00:00:00 2001
From: Lincoln Stein <lincoln.stein@gmail.com>
Date: Sat, 28 Mar 2026 01:12:12 -0400
Subject: [PATCH 10/13] fix: remove unnecessary async from
 QwenImageComponentSource parse

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 invokeai/frontend/web/src/features/metadata/parsing.tsx | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/invokeai/frontend/web/src/features/metadata/parsing.tsx b/invokeai/frontend/web/src/features/metadata/parsing.tsx
index 58f3aaab07d..4f179d6b017 100644
--- a/invokeai/frontend/web/src/features/metadata/parsing.tsx
+++ b/invokeai/frontend/web/src/features/metadata/parsing.tsx
@@ -13,6 +13,9 @@ import {
   kleinVaeModelSelected,
   negativePromptChanged,
   positivePromptChanged,
+  qwenImageComponentSourceSelected,
+  qwenImageQuantizationChanged,
+  qwenImageShiftChanged,
   refinerModelChanged,
   selectBase,
   setCfgRescaleMultiplier,
@@ -39,9 +42,6 @@ import {
   setZImageSeedVarianceEnabled,
   setZImageSeedVarianceRandomizePercent,
   setZImageSeedVarianceStrength,
-  qwenImageComponentSourceSelected,
-  qwenImageQuantizationChanged,
-  qwenImageShiftChanged,
   vaeSelected,
   widthChanged,
   zImageQwen3EncoderModelSelected,
@@ -684,7 +684,7 @@ const ZImageSeedVarianceRandomizePercent: SingleMetadataHandler<number> = {
 const QwenImageComponentSource: SingleMetadataHandler<ModelIdentifierField | null> = {
   [SingleMetadataKey]: true,
   type: 'QwenImageComponentSource',
-  parse: async (metadata, _store) => {
+  parse: (metadata, _store) => {
     try {
       const raw = getProperty(metadata, 'qwen_image_component_source');
       if (raw === null || raw === undefined) {

From 2aeb2fdd3a57ecc3d8003b6a761983aaa7e3c481 Mon Sep 17 00:00:00 2001
From: Lincoln Stein <lincoln.stein@gmail.com>
Date: Sat, 28 Mar 2026 01:30:52 -0400
Subject: [PATCH 11/13] fix: prevent Flux LoRAs from being detected as Qwen
 Image LoRAs

Flux PEFT LoRAs use transformer.single_transformer_blocks.* keys which
contain "transformer_blocks." as a substring, falsely matching the
Qwen Image LoRA detection. Add single_transformer_blocks to the Flux
exclusion set.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 invokeai/backend/model_manager/configs/lora.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/invokeai/backend/model_manager/configs/lora.py b/invokeai/backend/model_manager/configs/lora.py
index a5b9f40631d..f305bbddee8 100644
--- a/invokeai/backend/model_manager/configs/lora.py
+++ b/invokeai/backend/model_manager/configs/lora.py
@@ -775,14 +775,19 @@ def _validate_looks_like_lora(cls, mod: ModelOnDisk) -> None:
             state_dict,
             {"lora_A.weight", "lora_B.weight", "lora_down.weight", "lora_up.weight", "dora_scale"},
         )
-        # Must NOT have diffusion_model.layers (Z-Image) or double_blocks/single_blocks (Flux)
+        # Must NOT have diffusion_model.layers (Z-Image) or Flux-style keys.
+        # Flux LoRAs can have transformer.single_transformer_blocks or transformer.transformer_blocks
+        # (with the "transformer." prefix and "single_" variant) which would falsely match our check.
         has_z_image_keys = state_dict_has_any_keys_starting_with(state_dict, {"diffusion_model.layers."})
-        has_flux_keys = state_dict_has_any_keys_starting_with(state_dict, {"double_blocks.", "single_blocks."})
+        has_flux_keys = state_dict_has_any_keys_starting_with(
+            state_dict,
+            {"double_blocks.", "single_blocks.", "single_transformer_blocks.", "transformer.single_transformer_blocks."},
+        )
 
         if has_qwen_ie_keys and has_lora_suffix and not has_z_image_keys and not has_flux_keys:
             return
 
-        raise NotAMatchError("model does not match Qwen Image Edit LoRA heuristics")
+        raise NotAMatchError("model does not match Qwen Image LoRA heuristics")
 
     @classmethod
     def _get_base_or_raise(cls, mod: ModelOnDisk) -> BaseModelType:
@@ -791,7 +796,10 @@ def _get_base_or_raise(cls, mod: ModelOnDisk) -> BaseModelType:
             state_dict, {"transformer_blocks.", "transformer.transformer_blocks."}
         )
         has_z_image_keys = state_dict_has_any_keys_starting_with(state_dict, {"diffusion_model.layers."})
-        has_flux_keys = state_dict_has_any_keys_starting_with(state_dict, {"double_blocks.", "single_blocks."})
+        has_flux_keys = state_dict_has_any_keys_starting_with(
+            state_dict,
+            {"double_blocks.", "single_blocks.", "single_transformer_blocks.", "transformer.single_transformer_blocks."},
+        )
 
         if has_qwen_ie_keys and not has_z_image_keys and not has_flux_keys:
             return BaseModelType.QwenImage

From 5c6ca302b037edc0f0947aa5c0228141a80a1ed3 Mon Sep 17 00:00:00 2001
From: Lincoln Stein <lincoln.stein@gmail.com>
Date: Sat, 28 Mar 2026 01:41:33 -0400
Subject: [PATCH 12/13] chore: ruff

---
 invokeai/backend/model_manager/configs/lora.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/invokeai/backend/model_manager/configs/lora.py b/invokeai/backend/model_manager/configs/lora.py
index f305bbddee8..f2e6f3b34fa 100644
--- a/invokeai/backend/model_manager/configs/lora.py
+++ b/invokeai/backend/model_manager/configs/lora.py
@@ -781,7 +781,12 @@ def _validate_looks_like_lora(cls, mod: ModelOnDisk) -> None:
         has_z_image_keys = state_dict_has_any_keys_starting_with(state_dict, {"diffusion_model.layers."})
         has_flux_keys = state_dict_has_any_keys_starting_with(
             state_dict,
-            {"double_blocks.", "single_blocks.", "single_transformer_blocks.", "transformer.single_transformer_blocks."},
+            {
+                "double_blocks.",
+                "single_blocks.",
+                "single_transformer_blocks.",
+                "transformer.single_transformer_blocks.",
+            },
         )
 
         if has_qwen_ie_keys and has_lora_suffix and not has_z_image_keys and not has_flux_keys:
@@ -798,7 +803,12 @@ def _get_base_or_raise(cls, mod: ModelOnDisk) -> BaseModelType:
         has_z_image_keys = state_dict_has_any_keys_starting_with(state_dict, {"diffusion_model.layers."})
         has_flux_keys = state_dict_has_any_keys_starting_with(
             state_dict,
-            {"double_blocks.", "single_blocks.", "single_transformer_blocks.", "transformer.single_transformer_blocks."},
+            {
+                "double_blocks.",
+                "single_blocks.",
+                "single_transformer_blocks.",
+                "transformer.single_transformer_blocks.",
+            },
         )
 
         if has_qwen_ie_keys and not has_z_image_keys and not has_flux_keys:

From 2fcedc72dfa091ad169e5927ac5d3ac779e4a96b Mon Sep 17 00:00:00 2001
From: Lincoln Stein <lincoln.stein@gmail.com>
Date: Sat, 28 Mar 2026 10:15:08 -0400
Subject: [PATCH 13/13] fix: don't force reference image to output aspect ratio
 in VAE encoding
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously the graph builder passed the output canvas dimensions to the
I2L node, which resized the reference image to match — distorting its
aspect ratio when they differed. Now the reference is encoded at its
native size. The denoise node already handles dimension mismatches via
bilinear interpolation in latent space.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../util/graph/generation/buildQwenImageGraph.ts   | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/invokeai/frontend/web/src/features/nodes/util/graph/generation/buildQwenImageGraph.ts b/invokeai/frontend/web/src/features/nodes/util/graph/generation/buildQwenImageGraph.ts
index 8f1cb5362cd..1ea20a377e6 100644
--- a/invokeai/frontend/web/src/features/nodes/util/graph/generation/buildQwenImageGraph.ts
+++ b/invokeai/frontend/web/src/features/nodes/util/graph/generation/buildQwenImageGraph.ts
@@ -15,11 +15,7 @@ import { addQwenImageLoRAs } from 'features/nodes/util/graph/generation/addQwenI
 import { addTextToImage } from 'features/nodes/util/graph/generation/addTextToImage';
 import { addWatermarker } from 'features/nodes/util/graph/generation/addWatermarker';
 import { Graph } from 'features/nodes/util/graph/generation/Graph';
-import {
-  getOriginalAndScaledSizesForTextToImage,
-  selectCanvasOutputFields,
-  selectPresetModifiedPrompts,
-} from 'features/nodes/util/graph/graphBuilderUtils';
+import { selectCanvasOutputFields, selectPresetModifiedPrompts } from 'features/nodes/util/graph/graphBuilderUtils';
 import type { GraphBuilderArg, GraphBuilderReturn, ImageOutputNodes } from 'features/nodes/util/graph/types';
 import { selectActiveTab } from 'features/ui/store/uiSelectors';
 import type { Invocation } from 'services/api/types';
@@ -139,14 +135,12 @@ export const buildQwenImageGraph = async (arg: GraphBuilderArg): Promise<GraphBu
     const firstImgField = zImageField.parse(
       firstConfig.config.image?.crop?.image ?? firstConfig.config.image?.original.image
     );
-    // Resize the reference image to the generation dimensions before VAE encoding,
-    // matching the diffusers pipeline which resizes in pixel space, not latent space.
-    const { scaledSize } = getOriginalAndScaledSizesForTextToImage(state);
+    // Don't force-resize the reference image to the output dimensions — that would
+    // distort the aspect ratio when they differ. The I2L encodes at the image's
+    // native size; the denoise node handles dimension mismatches via interpolation.
     const refI2l = g.addNode({
       type: 'qwen_image_i2l',
       id: getPrefixedId('qwen_ref_i2l'),
-      width: scaledSize.width,
-      height: scaledSize.height,
     });
     const refImageNode = g.addNode({
       type: 'image',