diff --git a/invokeai/app/invocations/qwen_image_denoise.py b/invokeai/app/invocations/qwen_image_denoise.py
index cd3ff917596..4b9fb207680 100644
--- a/invokeai/app/invocations/qwen_image_denoise.py
+++ b/invokeai/app/invocations/qwen_image_denoise.py
@@ -353,29 +353,44 @@ def _run_diffusion(self, context: InvocationContext):
         # Pack latents into 2x2 patches: (B, C, H, W) -> (B, H/2*W/2, C*4)
         latents = self._pack_latents(latents, 1, out_channels, latent_height, latent_width)
 
-        # Pack reference image latents and concatenate along the sequence dimension.
-        # The edit transformer always expects [noisy_patches ; ref_patches] in its sequence.
-        if ref_latents is not None:
-            _, ref_ch, rh, rw = ref_latents.shape
-            if rh != latent_height or rw != latent_width:
-                ref_latents = torch.nn.functional.interpolate(
-                    ref_latents, size=(latent_height, latent_width), mode="bilinear"
+        # Determine whether the model uses reference latent conditioning (zero_cond_t).
+        # Edit models (zero_cond_t=True) expect [noisy_patches ; ref_patches] in the sequence.
+        # Txt2img models (zero_cond_t=False) only take noisy patches.
+        has_zero_cond_t = getattr(transformer_info.model, "zero_cond_t", False) or getattr(
+            transformer_info.model.config, "zero_cond_t", False
+        )
+        use_ref_latents = has_zero_cond_t
+
+        ref_latents_packed = None
+        if use_ref_latents:
+            if ref_latents is not None:
+                _, ref_ch, rh, rw = ref_latents.shape
+                if rh != latent_height or rw != latent_width:
+                    ref_latents = torch.nn.functional.interpolate(
+                        ref_latents, size=(latent_height, latent_width), mode="bilinear"
+                    )
+            else:
+                # No reference image provided — use zeros so the model still gets the
+                # expected sequence layout.
+                ref_latents = torch.zeros(
+                    1, out_channels, latent_height, latent_width, device=device, dtype=inference_dtype
                 )
+            ref_latents_packed = self._pack_latents(ref_latents, 1, out_channels, latent_height, latent_width)
+
+        # img_shapes tells the transformer the spatial layout of patches.
+        if use_ref_latents:
+            img_shapes = [
+                [
+                    (1, latent_height // 2, latent_width // 2),
+                    (1, latent_height // 2, latent_width // 2),
+                ]
+            ]
         else:
-            # No reference image provided — use zeros so the model still gets the
-            # expected sequence layout.
-            ref_latents = torch.zeros(
-                1, out_channels, latent_height, latent_width, device=device, dtype=inference_dtype
-            )
-        ref_latents_packed = self._pack_latents(ref_latents, 1, out_channels, latent_height, latent_width)
-
-        # img_shapes tells the transformer the spatial layout of noisy and reference patches.
-        img_shapes = [
-            [
-                (1, latent_height // 2, latent_width // 2),
-                (1, latent_height // 2, latent_width // 2),
+            img_shapes = [
+                [
+                    (1, latent_height // 2, latent_width // 2),
+                ]
             ]
-        ]
 
         # Prepare inpaint extension (operates in 4D space, so unpack/repack around it)
         inpaint_mask = self._prep_inpaint_mask(context, noise)  # noise has the right 4D shape
@@ -428,8 +443,12 @@ def _run_diffusion(self, context: InvocationContext):
                 # The pipeline passes timestep / 1000 to the transformer
                 timestep = t.expand(latents.shape[0]).to(inference_dtype)
 
-                # Concatenate noisy and reference patches along the sequence dim
-                model_input = torch.cat([latents, ref_latents_packed], dim=1)
+                # For edit models: concatenate noisy and reference patches along the sequence dim
+                # For txt2img models: just use noisy patches
+                if ref_latents_packed is not None:
+                    model_input = torch.cat([latents, ref_latents_packed], dim=1)
+                else:
+                    model_input = latents
 
                 noise_pred_cond = transformer(
                     hidden_states=model_input,
diff --git a/invokeai/app/invocations/qwen_image_text_encoder.py b/invokeai/app/invocations/qwen_image_text_encoder.py
index 641e8c4d388..9e3f5723ba5 100644
--- a/invokeai/app/invocations/qwen_image_text_encoder.py
+++ b/invokeai/app/invocations/qwen_image_text_encoder.py
@@ -20,27 +20,45 @@
     QwenImageConditioningInfo,
 )
 
-# The Qwen Image Edit pipeline uses a specific system prompt and drops the first
-# N tokens (the system prompt prefix) from the embeddings.  These constants are
-# taken directly from the diffusers QwenImagePipeline.
-_SYSTEM_PROMPT = (
+# Prompt templates and drop indices for the two Qwen Image model modes.
+# These are taken directly from the diffusers pipelines.
+
+# Image editing mode (QwenImagePipeline)
+_EDIT_SYSTEM_PROMPT = (
     "Describe the key features of the input image (color, shape, size, texture, objects, background), "
     "then explain how the user's text instruction should alter or modify the image. "
     "Generate a new image that meets the user's requirements while maintaining consistency "
     "with the original input where appropriate."
 )
+_EDIT_DROP_IDX = 64
+
+# Text-to-image mode (QwenImagePipeline)
+_GENERATE_SYSTEM_PROMPT = (
+    "Describe the image by detailing the color, shape, size, texture, quantity, "
+    "text, spatial relationships of the objects and background:"
+)
+_GENERATE_DROP_IDX = 34
+
 _IMAGE_PLACEHOLDER = "<|vision_start|><|image_pad|><|vision_end|>"
-_DROP_IDX = 64
 
 
 def _build_prompt(user_prompt: str, num_images: int) -> str:
-    """Build the full prompt with one vision placeholder per reference image."""
-    image_tokens = _IMAGE_PLACEHOLDER * max(num_images, 1)
-    return (
-        f"<|im_start|>system\n{_SYSTEM_PROMPT}<|im_end|>\n"
-        f"<|im_start|>user\n{image_tokens}{user_prompt}<|im_end|>\n"
-        "<|im_start|>assistant\n"
-    )
+    """Build the full prompt with the appropriate template based on whether reference images are provided."""
+    if num_images > 0:
+        # Edit mode: include vision placeholders for reference images
+        image_tokens = _IMAGE_PLACEHOLDER * num_images
+        return (
+            f"<|im_start|>system\n{_EDIT_SYSTEM_PROMPT}<|im_end|>\n"
+            f"<|im_start|>user\n{image_tokens}{user_prompt}<|im_end|>\n"
+            "<|im_start|>assistant\n"
+        )
+    else:
+        # Generate mode: text-only prompt
+        return (
+            f"<|im_start|>system\n{_GENERATE_SYSTEM_PROMPT}<|im_end|>\n"
+            f"<|im_start|>user\n{user_prompt}<|im_end|>\n"
+            "<|im_start|>assistant\n"
+        )
 
 
 @invocation(
@@ -188,7 +206,10 @@ def _encode(
             hidden_states = outputs.hidden_states[-1]
 
             # Extract valid (non-padding) tokens using the attention mask,
-            # then drop the first _DROP_IDX tokens (system prompt prefix).
+            # then drop the system prompt prefix tokens.
+            # The drop index differs between edit mode (64) and generate mode (34).
+            drop_idx = _EDIT_DROP_IDX if images else _GENERATE_DROP_IDX
+
             attn_mask = model_inputs.attention_mask
             bool_mask = attn_mask.bool()
             valid_lengths = bool_mask.sum(dim=1)
@@ -196,7 +217,7 @@ def _encode(
             split_hidden = torch.split(selected, valid_lengths.tolist(), dim=0)
 
             # Drop system prefix tokens and build padded output
-            trimmed = [h[_DROP_IDX:] for h in split_hidden]
+            trimmed = [h[drop_idx:] for h in split_hidden]
             attn_mask_list = [torch.ones(h.size(0), dtype=torch.long, device=device) for h in trimmed]
             max_seq_len = max(h.size(0) for h in trimmed)
 
diff --git a/invokeai/app/services/model_records/model_records_base.py b/invokeai/app/services/model_records/model_records_base.py
index ea5b9ef7546..dcdc0ce5956 100644
--- a/invokeai/app/services/model_records/model_records_base.py
+++ b/invokeai/app/services/model_records/model_records_base.py
@@ -25,8 +25,8 @@
     ModelSourceType,
     ModelType,
     ModelVariantType,
-    QwenImageVariantType,
     Qwen3VariantType,
+    QwenImageVariantType,
     SchedulerPredictionType,
     ZImageVariantType,
 )
@@ -95,7 +95,13 @@ class ModelRecordChanges(BaseModelExcludeNull):
     # Checkpoint-specific changes
     # TODO(MM2): Should we expose these? Feels footgun-y...
     variant: Optional[
-        ModelVariantType | ClipVariantType | FluxVariantType | Flux2VariantType | ZImageVariantType | QwenImageVariantType | Qwen3VariantType
+        ModelVariantType
+        | ClipVariantType
+        | FluxVariantType
+        | Flux2VariantType
+        | ZImageVariantType
+        | QwenImageVariantType
+        | Qwen3VariantType
     ] = Field(description="The variant of the model.", default=None)
     prediction_type: Optional[SchedulerPredictionType] = Field(
         description="The prediction type of the model.", default=None
diff --git a/invokeai/backend/model_manager/configs/lora.py b/invokeai/backend/model_manager/configs/lora.py
index a5b9f40631d..f2e6f3b34fa 100644
--- a/invokeai/backend/model_manager/configs/lora.py
+++ b/invokeai/backend/model_manager/configs/lora.py
@@ -775,14 +775,24 @@ def _validate_looks_like_lora(cls, mod: ModelOnDisk) -> None:
             state_dict,
             {"lora_A.weight", "lora_B.weight", "lora_down.weight", "lora_up.weight", "dora_scale"},
         )
-        # Must NOT have diffusion_model.layers (Z-Image) or double_blocks/single_blocks (Flux)
+        # Must NOT have diffusion_model.layers (Z-Image) or Flux-style keys.
+        # Flux LoRAs can have transformer.single_transformer_blocks or transformer.transformer_blocks
+        # (with the "transformer." prefix and "single_" variant) which would falsely match our check.
         has_z_image_keys = state_dict_has_any_keys_starting_with(state_dict, {"diffusion_model.layers."})
-        has_flux_keys = state_dict_has_any_keys_starting_with(state_dict, {"double_blocks.", "single_blocks."})
+        has_flux_keys = state_dict_has_any_keys_starting_with(
+            state_dict,
+            {
+                "double_blocks.",
+                "single_blocks.",
+                "single_transformer_blocks.",
+                "transformer.single_transformer_blocks.",
+            },
+        )
 
         if has_qwen_ie_keys and has_lora_suffix and not has_z_image_keys and not has_flux_keys:
             return
 
-        raise NotAMatchError("model does not match Qwen Image Edit LoRA heuristics")
+        raise NotAMatchError("model does not match Qwen Image LoRA heuristics")
 
     @classmethod
     def _get_base_or_raise(cls, mod: ModelOnDisk) -> BaseModelType:
@@ -791,7 +801,15 @@ def _get_base_or_raise(cls, mod: ModelOnDisk) -> BaseModelType:
             state_dict, {"transformer_blocks.", "transformer.transformer_blocks."}
         )
         has_z_image_keys = state_dict_has_any_keys_starting_with(state_dict, {"diffusion_model.layers."})
-        has_flux_keys = state_dict_has_any_keys_starting_with(state_dict, {"double_blocks.", "single_blocks."})
+        has_flux_keys = state_dict_has_any_keys_starting_with(
+            state_dict,
+            {
+                "double_blocks.",
+                "single_blocks.",
+                "single_transformer_blocks.",
+                "transformer.single_transformer_blocks.",
+            },
+        )
 
         if has_qwen_ie_keys and not has_z_image_keys and not has_flux_keys:
             return BaseModelType.QwenImage
diff --git a/invokeai/backend/model_manager/configs/main.py b/invokeai/backend/model_manager/configs/main.py
index 484a95f4bb8..6ec0611fdf3 100644
--- a/invokeai/backend/model_manager/configs/main.py
+++ b/invokeai/backend/model_manager/configs/main.py
@@ -1208,7 +1208,7 @@ class Main_Diffusers_QwenImage_Config(Diffusers_Config_Base, Main_Config_Base, C
     """Model config for Qwen Image diffusers models (both txt2img and edit)."""
 
     base: Literal[BaseModelType.QwenImage] = Field(BaseModelType.QwenImage)
-    variant: QwenImageVariantType = Field(default=QwenImageVariantType.Generate)
+    variant: QwenImageVariantType | None = Field(default=None)
 
     @classmethod
     def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) -> Self:
@@ -1269,7 +1269,7 @@ class Main_GGUF_QwenImage_Config(Checkpoint_Config_Base, Main_Config_Base, Confi
 
     base: Literal[BaseModelType.QwenImage] = Field(default=BaseModelType.QwenImage)
     format: Literal[ModelFormat.GGUFQuantized] = Field(default=ModelFormat.GGUFQuantized)
-    variant: QwenImageVariantType = Field(default=QwenImageVariantType.Generate)
+    variant: QwenImageVariantType | None = Field(default=None)
 
     @classmethod
     def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) -> Self:
diff --git a/invokeai/backend/model_manager/load/model_loaders/qwen_image.py b/invokeai/backend/model_manager/load/model_loaders/qwen_image.py
index 15fcedba166..a025e727945 100644
--- a/invokeai/backend/model_manager/load/model_loaders/qwen_image.py
+++ b/invokeai/backend/model_manager/load/model_loaders/qwen_image.py
@@ -15,6 +15,7 @@
     BaseModelType,
     ModelFormat,
     ModelType,
+    QwenImageVariantType,
     SubModelType,
 )
 from invokeai.backend.quantization.gguf.ggml_tensor import GGMLTensor
@@ -160,10 +161,13 @@ def _load_from_singlefile(self, config: AnyModelConfig) -> AnyModel:
             "axes_dims_rope": (16, 56, 56),
         }
 
-        # zero_cond_t was added in diffusers 0.37+; skip it on older versions
+        # zero_cond_t is only used by edit-variant models. It enables dual modulation
+        # for noisy vs reference patches. Setting it on txt2img models produces garbage.
+        # Also requires diffusers 0.37+ (the parameter doesn't exist in older versions).
         import inspect
 
-        if "zero_cond_t" in inspect.signature(QwenImageTransformer2DModel.__init__).parameters:
+        is_edit = getattr(config, "variant", None) == QwenImageVariantType.Edit
+        if is_edit and "zero_cond_t" in inspect.signature(QwenImageTransformer2DModel.__init__).parameters:
             model_config["zero_cond_t"] = True
 
         with accelerate.init_empty_weights():
diff --git a/invokeai/backend/model_manager/starter_models.py b/invokeai/backend/model_manager/starter_models.py
index de5f1e1b8b6..ef7b25431a0 100644
--- a/invokeai/backend/model_manager/starter_models.py
+++ b/invokeai/backend/model_manager/starter_models.py
@@ -650,7 +650,7 @@ class StarterModelBundle(BaseModel):
 # endregion
 
 # region Qwen Image Edit
-qwen_image = StarterModel(
+qwen_image_edit = StarterModel(
     name="Qwen Image Edit 2511",
     base=BaseModelType.QwenImage,
     source="Qwen/Qwen-Image-Edit-2511",
@@ -658,43 +658,43 @@ class StarterModelBundle(BaseModel):
     type=ModelType.Main,
 )
 
-qwen_image_gguf_q4_k_m = StarterModel(
+qwen_image_edit_gguf_q4_k_m = StarterModel(
     name="Qwen Image Edit 2511 (Q4_K_M)",
     base=BaseModelType.QwenImage,
-    source="https://huggingface.co/unsloth/Qwen-Image-Edit-2511-GGUF/resolve/main/qwen-image-2511-Q4_K_M.gguf",
+    source="https://huggingface.co/unsloth/Qwen-Image-Edit-2511-GGUF/resolve/main/qwen-image-edit-2511-Q4_K_M.gguf",
     description="Qwen Image Edit 2511 - Q4_K_M quantized transformer. Good quality/size balance. (~13GB)",
     type=ModelType.Main,
     format=ModelFormat.GGUFQuantized,
 )
 
-qwen_image_gguf_q2_k = StarterModel(
+qwen_image_edit_gguf_q2_k = StarterModel(
     name="Qwen Image Edit 2511 (Q2_K)",
     base=BaseModelType.QwenImage,
-    source="https://huggingface.co/unsloth/Qwen-Image-Edit-2511-GGUF/resolve/main/qwen-image-2511-Q2_K.gguf",
+    source="https://huggingface.co/unsloth/Qwen-Image-Edit-2511-GGUF/resolve/main/qwen-image-edit-2511-Q2_K.gguf",
     description="Qwen Image Edit 2511 - Q2_K heavily quantized transformer. Smallest size, lower quality. (~7.5GB)",
     type=ModelType.Main,
     format=ModelFormat.GGUFQuantized,
 )
 
-qwen_image_gguf_q6_k = StarterModel(
+qwen_image_edit_gguf_q6_k = StarterModel(
     name="Qwen Image Edit 2511 (Q6_K)",
     base=BaseModelType.QwenImage,
-    source="https://huggingface.co/unsloth/Qwen-Image-Edit-2511-GGUF/resolve/main/qwen-image-2511-Q6_K.gguf",
+    source="https://huggingface.co/unsloth/Qwen-Image-Edit-2511-GGUF/resolve/main/qwen-image-edit-2511-Q6_K.gguf",
     description="Qwen Image Edit 2511 - Q6_K quantized transformer. Near-lossless quality. (~17GB)",
     type=ModelType.Main,
     format=ModelFormat.GGUFQuantized,
 )
 
-qwen_image_gguf_q8_0 = StarterModel(
+qwen_image_edit_gguf_q8_0 = StarterModel(
     name="Qwen Image Edit 2511 (Q8_0)",
     base=BaseModelType.QwenImage,
-    source="https://huggingface.co/unsloth/Qwen-Image-Edit-2511-GGUF/resolve/main/qwen-image-2511-Q8_0.gguf",
+    source="https://huggingface.co/unsloth/Qwen-Image-Edit-2511-GGUF/resolve/main/qwen-image-edit-2511-Q8_0.gguf",
     description="Qwen Image Edit 2511 - Q8_0 quantized transformer. Highest quality quantization. (~22GB)",
     type=ModelType.Main,
     format=ModelFormat.GGUFQuantized,
 )
 
-qwen_image_lightning_4step = StarterModel(
+qwen_image_edit_lightning_4step = StarterModel(
     name="Qwen Image Edit Lightning (4-step, bf16)",
     base=BaseModelType.QwenImage,
     source="https://huggingface.co/lightx2v/Qwen-Image-Edit-2511-Lightning/resolve/main/Qwen-Image-Edit-2511-Lightning-4steps-V1.0-bf16.safetensors",
@@ -703,7 +703,7 @@ class StarterModelBundle(BaseModel):
     type=ModelType.LoRA,
 )
 
-qwen_image_lightning_8step = StarterModel(
+qwen_image_edit_lightning_8step = StarterModel(
     name="Qwen Image Edit Lightning (8-step, bf16)",
     base=BaseModelType.QwenImage,
     source="https://huggingface.co/lightx2v/Qwen-Image-Edit-2511-Lightning/resolve/main/Qwen-Image-Edit-2511-Lightning-8steps-V1.0-bf16.safetensors",
@@ -711,6 +711,69 @@ class StarterModelBundle(BaseModel):
     "Settings: Steps=8, CFG=1, Shift Override=3.",
     type=ModelType.LoRA,
 )
+
+# Qwen Image (txt2img)
+qwen_image = StarterModel(
+    name="Qwen Image 2512",
+    base=BaseModelType.QwenImage,
+    source="Qwen/Qwen-Image-2512",
+    description="Qwen Image 2512 full diffusers model. High-quality text-to-image generation. (~40GB)",
+    type=ModelType.Main,
+)
+
+qwen_image_gguf_q4_k_m = StarterModel(
+    name="Qwen Image 2512 (Q4_K_M)",
+    base=BaseModelType.QwenImage,
+    source="https://huggingface.co/unsloth/Qwen-Image-2512-GGUF/resolve/main/qwen-image-2512-Q4_K_M.gguf",
+    description="Qwen Image 2512 - Q4_K_M quantized transformer. Good quality/size balance. (~13GB)",
+    type=ModelType.Main,
+    format=ModelFormat.GGUFQuantized,
+)
+
+qwen_image_gguf_q2_k = StarterModel(
+    name="Qwen Image 2512 (Q2_K)",
+    base=BaseModelType.QwenImage,
+    source="https://huggingface.co/unsloth/Qwen-Image-2512-GGUF/resolve/main/qwen-image-2512-Q2_K.gguf",
+    description="Qwen Image 2512 - Q2_K heavily quantized transformer. Smallest size, lower quality. (~7.5GB)",
+    type=ModelType.Main,
+    format=ModelFormat.GGUFQuantized,
+)
+
+qwen_image_gguf_q6_k = StarterModel(
+    name="Qwen Image 2512 (Q6_K)",
+    base=BaseModelType.QwenImage,
+    source="https://huggingface.co/unsloth/Qwen-Image-2512-GGUF/resolve/main/qwen-image-2512-Q6_K.gguf",
+    description="Qwen Image 2512 - Q6_K quantized transformer. Near-lossless quality. (~17GB)",
+    type=ModelType.Main,
+    format=ModelFormat.GGUFQuantized,
+)
+
+qwen_image_gguf_q8_0 = StarterModel(
+    name="Qwen Image 2512 (Q8_0)",
+    base=BaseModelType.QwenImage,
+    source="https://huggingface.co/unsloth/Qwen-Image-2512-GGUF/resolve/main/qwen-image-2512-Q8_0.gguf",
+    description="Qwen Image 2512 - Q8_0 quantized transformer. Highest quality quantization. (~22GB)",
+    type=ModelType.Main,
+    format=ModelFormat.GGUFQuantized,
+)
+
+qwen_image_lightning_4step = StarterModel(
+    name="Qwen Image Lightning (4-step, V2.0, bf16)",
+    base=BaseModelType.QwenImage,
+    source="https://huggingface.co/lightx2v/Qwen-Image-Lightning/resolve/main/Qwen-Image-Lightning-4steps-V2.0-bf16.safetensors",
+    description="Lightning distillation LoRA for Qwen Image — enables generation in just 4 steps. "
+    "Settings: Steps=4, CFG=1, Shift Override=3.",
+    type=ModelType.LoRA,
+)
+
+qwen_image_lightning_8step = StarterModel(
+    name="Qwen Image Lightning (8-step, V2.0, bf16)",
+    base=BaseModelType.QwenImage,
+    source="https://huggingface.co/lightx2v/Qwen-Image-Lightning/resolve/main/Qwen-Image-Lightning-8steps-V2.0-bf16.safetensors",
+    description="Lightning distillation LoRA for Qwen Image — enables generation in 8 steps with better quality. "
+    "Settings: Steps=8, CFG=1, Shift Override=3.",
+    type=ModelType.LoRA,
+)
 # endregion
 
 # region SigLIP
@@ -1012,6 +1075,13 @@ class StarterModelBundle(BaseModel):
     flux2_klein_qwen3_4b_encoder,
     flux2_klein_qwen3_8b_encoder,
     cogview4,
+    qwen_image_edit,
+    qwen_image_edit_gguf_q2_k,
+    qwen_image_edit_gguf_q4_k_m,
+    qwen_image_edit_gguf_q6_k,
+    qwen_image_edit_gguf_q8_0,
+    qwen_image_edit_lightning_4step,
+    qwen_image_edit_lightning_8step,
     qwen_image,
     qwen_image_gguf_q2_k,
     qwen_image_gguf_q4_k_m,
@@ -1097,9 +1167,13 @@ class StarterModelBundle(BaseModel):
 ]
 
 qwen_image_bundle: list[StarterModel] = [
+    qwen_image_edit,
+    qwen_image_edit_gguf_q4_k_m,
+    qwen_image_edit_gguf_q8_0,
+    qwen_image_edit_lightning_4step,
+    qwen_image_edit_lightning_8step,
     qwen_image,
     qwen_image_gguf_q4_k_m,
-    qwen_image_gguf_q8_0,
     qwen_image_lightning_4step,
     qwen_image_lightning_8step,
 ]
diff --git a/invokeai/backend/model_manager/taxonomy.py b/invokeai/backend/model_manager/taxonomy.py
index 9250310a29a..587c0b0625f 100644
--- a/invokeai/backend/model_manager/taxonomy.py
+++ b/invokeai/backend/model_manager/taxonomy.py
@@ -225,8 +225,28 @@ class FluxLoRAFormat(str, Enum):
 
 
 AnyVariant: TypeAlias = Union[
-    ModelVariantType, ClipVariantType, FluxVariantType, Flux2VariantType, ZImageVariantType, QwenImageVariantType, Qwen3VariantType
+    ModelVariantType,
+    ClipVariantType,
+    FluxVariantType,
+    Flux2VariantType,
+    ZImageVariantType,
+    QwenImageVariantType,
+    Qwen3VariantType,
 ]
 variant_type_adapter = TypeAdapter[
-    ModelVariantType | ClipVariantType | FluxVariantType | Flux2VariantType | ZImageVariantType | QwenImageVariantType | Qwen3VariantType
-](ModelVariantType | ClipVariantType | FluxVariantType | Flux2VariantType | ZImageVariantType | QwenImageVariantType | Qwen3VariantType)
+    ModelVariantType
+    | ClipVariantType
+    | FluxVariantType
+    | Flux2VariantType
+    | ZImageVariantType
+    | QwenImageVariantType
+    | Qwen3VariantType
+](
+    ModelVariantType
+    | ClipVariantType
+    | FluxVariantType
+    | Flux2VariantType
+    | ZImageVariantType
+    | QwenImageVariantType
+    | Qwen3VariantType
+)
diff --git a/invokeai/frontend/web/src/features/controlLayers/hooks/addLayerHooks.ts b/invokeai/frontend/web/src/features/controlLayers/hooks/addLayerHooks.ts
index 3cd28b5f2a0..2027ff41741 100644
--- a/invokeai/frontend/web/src/features/controlLayers/hooks/addLayerHooks.ts
+++ b/invokeai/frontend/web/src/features/controlLayers/hooks/addLayerHooks.ts
@@ -80,11 +80,7 @@ export const selectDefaultControlAdapter = createSelector(
 
 export const getDefaultRefImageConfig = (
   getState: AppGetState
-):
-  | IPAdapterConfig
-  | FluxKontextReferenceImageConfig
-  | Flux2ReferenceImageConfig
-  | QwenImageReferenceImageConfig => {
+): IPAdapterConfig | FluxKontextReferenceImageConfig | Flux2ReferenceImageConfig | QwenImageReferenceImageConfig => {
   const state = getState();
 
   const mainModelConfig = selectMainModelConfig(state);
diff --git a/invokeai/frontend/web/src/features/metadata/parsing.tsx b/invokeai/frontend/web/src/features/metadata/parsing.tsx
index 7d1d511a3c2..4f179d6b017 100644
--- a/invokeai/frontend/web/src/features/metadata/parsing.tsx
+++ b/invokeai/frontend/web/src/features/metadata/parsing.tsx
@@ -13,6 +13,9 @@ import {
   kleinVaeModelSelected,
   negativePromptChanged,
   positivePromptChanged,
+  qwenImageComponentSourceSelected,
+  qwenImageQuantizationChanged,
+  qwenImageShiftChanged,
   refinerModelChanged,
   selectBase,
   setCfgRescaleMultiplier,
@@ -677,6 +680,83 @@ const ZImageSeedVarianceRandomizePercent: SingleMetadataHandler<number> = {
 };
 //#endregion ZImageSeedVarianceRandomizePercent
 
+//#region QwenImageComponentSource
+const QwenImageComponentSource: SingleMetadataHandler<ModelIdentifierField | null> = {
+  [SingleMetadataKey]: true,
+  type: 'QwenImageComponentSource',
+  parse: (metadata, _store) => {
+    try {
+      const raw = getProperty(metadata, 'qwen_image_component_source');
+      if (raw === null || raw === undefined) {
+        return Promise.resolve(null);
+      }
+      return Promise.resolve(zModelIdentifierField.parse(raw));
+    } catch {
+      return Promise.resolve(null);
+    }
+  },
+  recall: (value, store) => {
+    store.dispatch(qwenImageComponentSourceSelected(value));
+  },
+  i18nKey: 'modelManager.qwenImageComponentSource',
+  LabelComponent: MetadataLabel,
+  ValueComponent: ({ value }: SingleMetadataValueProps<ModelIdentifierField | null>) => (
+    <MetadataPrimitiveValue value={value ? value.name : 'None'} />
+  ),
+};
+//#endregion QwenImageComponentSource
+
+//#region QwenImageQuantization
+const QwenImageQuantization: SingleMetadataHandler<'none' | 'int8' | 'nf4'> = {
+  [SingleMetadataKey]: true,
+  type: 'QwenImageQuantization',
+  parse: (metadata, _store) => {
+    try {
+      const raw = getProperty(metadata, 'qwen_image_quantization');
+      const parsed = z.enum(['none', 'int8', 'nf4']).parse(raw);
+      return Promise.resolve(parsed);
+    } catch {
+      return Promise.resolve('none' as const);
+    }
+  },
+  recall: (value, store) => {
+    store.dispatch(qwenImageQuantizationChanged(value));
+  },
+  i18nKey: 'modelManager.qwenImageQuantization',
+  LabelComponent: MetadataLabel,
+  ValueComponent: ({ value }: SingleMetadataValueProps<'none' | 'int8' | 'nf4'>) => (
+    <MetadataPrimitiveValue value={value} />
+  ),
+};
+//#endregion QwenImageQuantization
+
+//#region QwenImageShift
+const QwenImageShift: SingleMetadataHandler<number | null> = {
+  [SingleMetadataKey]: true,
+  type: 'QwenImageShift',
+  parse: (metadata, _store) => {
+    try {
+      const raw = getProperty(metadata, 'qwen_image_shift');
+      if (raw === null || raw === undefined) {
+        return Promise.resolve(null);
+      }
+      const parsed = z.number().parse(raw);
+      return Promise.resolve(parsed);
+    } catch {
+      return Promise.resolve(null);
+    }
+  },
+  recall: (value, store) => {
+    store.dispatch(qwenImageShiftChanged(value));
+  },
+  i18nKey: 'modelManager.qwenImageShift',
+  LabelComponent: MetadataLabel,
+  ValueComponent: ({ value }: SingleMetadataValueProps<number | null>) => (
+    <MetadataPrimitiveValue value={value ?? 'Default'} />
+  ),
+};
+//#endregion QwenImageShift
+
 //#region RefinerModel
 const RefinerModel: SingleMetadataHandler<ParameterSDXLRefinerModel> = {
   [SingleMetadataKey]: true,
@@ -1233,6 +1313,9 @@ export const ImageMetadataHandlers = {
   ZImageSeedVarianceEnabled,
   ZImageSeedVarianceStrength,
   ZImageSeedVarianceRandomizePercent,
+  QwenImageComponentSource,
+  QwenImageQuantization,
+  QwenImageShift,
   LoRAs,
   CanvasLayers,
   RefImages,
diff --git a/invokeai/frontend/web/src/features/nodes/types/common.ts b/invokeai/frontend/web/src/features/nodes/types/common.ts
index ca1d42c5a44..10afd6e44bb 100644
--- a/invokeai/frontend/web/src/features/nodes/types/common.ts
+++ b/invokeai/frontend/web/src/features/nodes/types/common.ts
@@ -153,7 +153,7 @@ export const zModelVariantType = z.enum(['normal', 'inpaint', 'depth']);
 export const zFluxVariantType = z.enum(['dev', 'dev_fill', 'schnell']);
 export const zFlux2VariantType = z.enum(['klein_4b', 'klein_9b', 'klein_9b_base']);
 export const zZImageVariantType = z.enum(['turbo', 'zbase']);
-export const zQwenImageVariantType = z.enum(['generate', 'edit']);
+const zQwenImageVariantType = z.enum(['generate', 'edit']);
 export const zQwen3VariantType = z.enum(['qwen3_4b', 'qwen3_8b']);
 export const zAnyModelVariant = z.union([
   zModelVariantType,
diff --git a/invokeai/frontend/web/src/features/nodes/util/graph/generation/buildQwenImageGraph.ts b/invokeai/frontend/web/src/features/nodes/util/graph/generation/buildQwenImageGraph.ts
index e7c04744d4e..1ea20a377e6 100644
--- a/invokeai/frontend/web/src/features/nodes/util/graph/generation/buildQwenImageGraph.ts
+++ b/invokeai/frontend/web/src/features/nodes/util/graph/generation/buildQwenImageGraph.ts
@@ -15,11 +15,7 @@ import { addQwenImageLoRAs } from 'features/nodes/util/graph/generation/addQwenI
 import { addTextToImage } from 'features/nodes/util/graph/generation/addTextToImage';
 import { addWatermarker } from 'features/nodes/util/graph/generation/addWatermarker';
 import { Graph } from 'features/nodes/util/graph/generation/Graph';
-import {
-  getOriginalAndScaledSizesForTextToImage,
-  selectCanvasOutputFields,
-  selectPresetModifiedPrompts,
-} from 'features/nodes/util/graph/graphBuilderUtils';
+import { selectCanvasOutputFields, selectPresetModifiedPrompts } from 'features/nodes/util/graph/graphBuilderUtils';
 import type { GraphBuilderArg, GraphBuilderReturn, ImageOutputNodes } from 'features/nodes/util/graph/types';
 import { selectActiveTab } from 'features/ui/store/uiSelectors';
 import type { Invocation } from 'services/api/types';
@@ -103,14 +99,18 @@ export const buildQwenImageGraph = async (arg: GraphBuilderArg): Promise<GraphBu
   // Add Qwen Image Edit LoRAs if any are enabled
   addQwenImageLoRAs(state, g, denoise, modelLoader);
 
-  // Collect enabled Qwen Image Edit reference images that have an image set (image is optional for txt2img)
-  const validRefImageConfigs = selectRefImagesSlice(state).entities.filter(
-    (entity) =>
-      entity.isEnabled &&
-      isQwenImageReferenceImageConfig(entity.config) &&
-      entity.config.image !== null &&
-      getGlobalReferenceImageWarnings(entity, model).length === 0
-  );
+  // Only collect reference images for edit-variant models.
+  // For txt2img (generate) models, reference images are not used even if they exist in state.
+  const isEditModel = 'variant' in model && model.variant === 'edit';
+  const validRefImageConfigs = isEditModel
+    ? selectRefImagesSlice(state).entities.filter(
+        (entity) =>
+          entity.isEnabled &&
+          isQwenImageReferenceImageConfig(entity.config) &&
+          entity.config.image !== null &&
+          getGlobalReferenceImageWarnings(entity, model).length === 0
+      )
+    : [];
 
   if (validRefImageConfigs.length > 0) {
     const refImgCollect = g.addNode({
@@ -135,14 +135,12 @@ export const buildQwenImageGraph = async (arg: GraphBuilderArg): Promise<GraphBu
     const firstImgField = zImageField.parse(
       firstConfig.config.image?.crop?.image ?? firstConfig.config.image?.original.image
     );
-    // Resize the reference image to the generation dimensions before VAE encoding,
-    // matching the diffusers pipeline which resizes in pixel space, not latent space.
-    const { scaledSize } = getOriginalAndScaledSizesForTextToImage(state);
+    // Don't force-resize the reference image to the output dimensions — that would
+    // distort the aspect ratio when they differ. The I2L encodes at the image's
+    // native size; the denoise node handles dimension mismatches via interpolation.
     const refI2l = g.addNode({
       type: 'qwen_image_i2l',
       id: getPrefixedId('qwen_ref_i2l'),
-      width: scaledSize.width,
-      height: scaledSize.height,
     });
     const refImageNode = g.addNode({
       type: 'image',
@@ -163,6 +161,9 @@ export const buildQwenImageGraph = async (arg: GraphBuilderArg): Promise<GraphBu
     cfg_scale,
     negative_prompt: prompts.negative,
     model: Graph.getModelMetadataField(modelConfig),
+    qwen_image_component_source: params.qwenImageComponentSource,
+    qwen_image_quantization: params.qwenImageQuantization,
+    qwen_image_shift: params.qwenImageShift,
     steps,
   });
   g.addEdgeToMetadata(seed, 'value', 'seed');
diff --git a/invokeai/frontend/web/src/features/parameters/components/Advanced/ParamQwenImageQuantization.tsx b/invokeai/frontend/web/src/features/parameters/components/Advanced/ParamQwenImageQuantization.tsx
index 46025d95867..3d086e6ec4a 100644
--- a/invokeai/frontend/web/src/features/parameters/components/Advanced/ParamQwenImageQuantization.tsx
+++ b/invokeai/frontend/web/src/features/parameters/components/Advanced/ParamQwenImageQuantization.tsx
@@ -1,10 +1,7 @@
 import type { ComboboxOnChange, ComboboxOption } from '@invoke-ai/ui-library';
 import { Combobox, FormControl, FormLabel } from '@invoke-ai/ui-library';
 import { useAppDispatch, useAppSelector } from 'app/store/storeHooks';
-import {
-  qwenImageQuantizationChanged,
-  selectQwenImageQuantization,
-} from 'features/controlLayers/store/paramsSlice';
+import { qwenImageQuantizationChanged, selectQwenImageQuantization } from 'features/controlLayers/store/paramsSlice';
 import { memo, useCallback, useMemo } from 'react';
 import { useTranslation } from 'react-i18next';
 
diff --git a/invokeai/frontend/web/src/features/parameters/components/Prompts/Prompts.tsx b/invokeai/frontend/web/src/features/parameters/components/Prompts/Prompts.tsx
index 18f5c4c4dd8..c93841d77b7 100644
--- a/invokeai/frontend/web/src/features/parameters/components/Prompts/Prompts.tsx
+++ b/invokeai/frontend/web/src/features/parameters/components/Prompts/Prompts.tsx
@@ -22,8 +22,11 @@ export const Prompts = memo(() => {
     if (!modelSupportsRefImages) {
       return false;
     }
-    if (modelConfig?.base === 'qwen-image' && 'variant' in modelConfig && modelConfig.variant !== 'edit') {
-      return false;
+    if (modelConfig?.base === 'qwen-image') {
+      const variant = 'variant' in modelConfig ? modelConfig.variant : null;
+      if (variant !== 'edit') {
+        return false;
+      }
     }
     return true;
   }, [modelSupportsRefImages, modelConfig]);
diff --git a/invokeai/frontend/web/src/services/api/schema.ts b/invokeai/frontend/web/src/services/api/schema.ts
index a23217c3a81..2a8a3d243b7 100644
--- a/invokeai/frontend/web/src/services/api/schema.ts
+++ b/invokeai/frontend/web/src/services/api/schema.ts
@@ -18500,8 +18500,7 @@ export type components = {
              * @constant
              */
             base: "qwen-image";
-            /** @default generate */
-            variant: components["schemas"]["QwenImageVariantType"];
+            variant: components["schemas"]["QwenImageVariantType"] | null;
         };
         /** Main_Diffusers_SD1_Config */
         Main_Diffusers_SD1_Config: {
@@ -19234,8 +19233,7 @@ export type components = {
              * @constant
              */
             format: "gguf_quantized";
-            /** @default generate */
-            variant: components["schemas"]["QwenImageVariantType"];
+            variant: components["schemas"]["QwenImageVariantType"] | null;
         };
         /**
          * Main_GGUF_ZImage_Config
diff --git a/invokeai/frontend/web/src/services/api/types.ts b/invokeai/frontend/web/src/services/api/types.ts
index cfeb672d95e..b447f9debbe 100644
--- a/invokeai/frontend/web/src/services/api/types.ts
+++ b/invokeai/frontend/web/src/services/api/types.ts
@@ -330,10 +330,6 @@ export const isQwenImageDiffusersMainModelConfig = (config: AnyModelConfig): con
   return config.type === 'main' && config.base === 'qwen-image' && config.format === 'diffusers';
 };
 
-export const isQwenImageEditMainModelConfig = (config: AnyModelConfig): config is MainModelConfig => {
-  return config.type === 'main' && config.base === 'qwen-image' && 'variant' in config && config.variant === 'edit';
-};
-
 export const isTIModelConfig = (config: AnyModelConfig): config is MainModelConfig => {
   return config.type === 'embedding';
 };