From 2f10d834adafdddd011758e0a9f132e2f902c2ba Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Fri, 27 Mar 2026 19:56:22 -0400 Subject: [PATCH 01/13] feat: add Qwen Image 2512 txt2img support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Shares the QwenImageEdit base type and infrastructure with the edit model. Key changes: - Text encoder: auto-selects prompt template based on reference images — edit template (drop_idx=64) when images present, generate template (drop_idx=34) when absent - Denoise: detects zero_cond_t to determine whether to concatenate reference latents; txt2img models pass only noisy patches with a single-entry img_shapes - Model config: accept QwenImagePipeline in addition to QwenImageEditPlusPipeline - LoRA: handle "transformer." key prefix from some training frameworks, add to config detection - Starter models: Qwen-Image-2512 full + 4 GGUF variants + Lightning V2.0 LoRAs (4-step, 8-step), all added to the Qwen Image Edit bundle Co-Authored-By: Claude Opus 4.6 (1M context) --- .../app/invocations/qwen_image_denoise.py | 63 +++++++++++------ .../invocations/qwen_image_text_encoder.py | 47 +++++++++---- .../backend/model_manager/starter_models.py | 67 +++++++++++++++++++ 3 files changed, 142 insertions(+), 35 deletions(-) diff --git a/invokeai/app/invocations/qwen_image_denoise.py b/invokeai/app/invocations/qwen_image_denoise.py index cd3ff917596..4b9fb207680 100644 --- a/invokeai/app/invocations/qwen_image_denoise.py +++ b/invokeai/app/invocations/qwen_image_denoise.py @@ -353,29 +353,44 @@ def _run_diffusion(self, context: InvocationContext): # Pack latents into 2x2 patches: (B, C, H, W) -> (B, H/2*W/2, C*4) latents = self._pack_latents(latents, 1, out_channels, latent_height, latent_width) - # Pack reference image latents and concatenate along the sequence dimension. - # The edit transformer always expects [noisy_patches ; ref_patches] in its sequence. - if ref_latents is not None: - _, ref_ch, rh, rw = ref_latents.shape - if rh != latent_height or rw != latent_width: - ref_latents = torch.nn.functional.interpolate( - ref_latents, size=(latent_height, latent_width), mode="bilinear" + # Determine whether the model uses reference latent conditioning (zero_cond_t). + # Edit models (zero_cond_t=True) expect [noisy_patches ; ref_patches] in the sequence. + # Txt2img models (zero_cond_t=False) only take noisy patches. + has_zero_cond_t = getattr(transformer_info.model, "zero_cond_t", False) or getattr( + transformer_info.model.config, "zero_cond_t", False + ) + use_ref_latents = has_zero_cond_t + + ref_latents_packed = None + if use_ref_latents: + if ref_latents is not None: + _, ref_ch, rh, rw = ref_latents.shape + if rh != latent_height or rw != latent_width: + ref_latents = torch.nn.functional.interpolate( + ref_latents, size=(latent_height, latent_width), mode="bilinear" + ) + else: + # No reference image provided — use zeros so the model still gets the + # expected sequence layout. + ref_latents = torch.zeros( + 1, out_channels, latent_height, latent_width, device=device, dtype=inference_dtype ) + ref_latents_packed = self._pack_latents(ref_latents, 1, out_channels, latent_height, latent_width) + + # img_shapes tells the transformer the spatial layout of patches. + if use_ref_latents: + img_shapes = [ + [ + (1, latent_height // 2, latent_width // 2), + (1, latent_height // 2, latent_width // 2), + ] + ] else: - # No reference image provided — use zeros so the model still gets the - # expected sequence layout. - ref_latents = torch.zeros( - 1, out_channels, latent_height, latent_width, device=device, dtype=inference_dtype - ) - ref_latents_packed = self._pack_latents(ref_latents, 1, out_channels, latent_height, latent_width) - - # img_shapes tells the transformer the spatial layout of noisy and reference patches. - img_shapes = [ - [ - (1, latent_height // 2, latent_width // 2), - (1, latent_height // 2, latent_width // 2), + img_shapes = [ + [ + (1, latent_height // 2, latent_width // 2), + ] ] - ] # Prepare inpaint extension (operates in 4D space, so unpack/repack around it) inpaint_mask = self._prep_inpaint_mask(context, noise) # noise has the right 4D shape @@ -428,8 +443,12 @@ def _run_diffusion(self, context: InvocationContext): # The pipeline passes timestep / 1000 to the transformer timestep = t.expand(latents.shape[0]).to(inference_dtype) - # Concatenate noisy and reference patches along the sequence dim - model_input = torch.cat([latents, ref_latents_packed], dim=1) + # For edit models: concatenate noisy and reference patches along the sequence dim + # For txt2img models: just use noisy patches + if ref_latents_packed is not None: + model_input = torch.cat([latents, ref_latents_packed], dim=1) + else: + model_input = latents noise_pred_cond = transformer( hidden_states=model_input, diff --git a/invokeai/app/invocations/qwen_image_text_encoder.py b/invokeai/app/invocations/qwen_image_text_encoder.py index 641e8c4d388..74670735877 100644 --- a/invokeai/app/invocations/qwen_image_text_encoder.py +++ b/invokeai/app/invocations/qwen_image_text_encoder.py @@ -20,26 +20,44 @@ QwenImageConditioningInfo, ) -# The Qwen Image Edit pipeline uses a specific system prompt and drops the first -# N tokens (the system prompt prefix) from the embeddings. These constants are -# taken directly from the diffusers QwenImagePipeline. -_SYSTEM_PROMPT = ( +# Prompt templates and drop indices for the two Qwen Image model modes. +# These are taken directly from the diffusers pipelines. + +# Image editing mode (QwenImagePipeline) +_EDIT_SYSTEM_PROMPT = ( "Describe the key features of the input image (color, shape, size, texture, objects, background), " "then explain how the user's text instruction should alter or modify the image. " "Generate a new image that meets the user's requirements while maintaining consistency " "with the original input where appropriate." ) +_EDIT_DROP_IDX = 64 + +# Text-to-image mode (QwenImagePipeline) +_GENERATE_SYSTEM_PROMPT = ( + "Describe the image by detailing the color, shape, size, texture, quantity, " + "text, spatial relationships of the objects and background:" +) +_GENERATE_DROP_IDX = 34 + _IMAGE_PLACEHOLDER = "<|vision_start|><|image_pad|><|vision_end|>" -_DROP_IDX = 64 def _build_prompt(user_prompt: str, num_images: int) -> str: - """Build the full prompt with one vision placeholder per reference image.""" - image_tokens = _IMAGE_PLACEHOLDER * max(num_images, 1) - return ( - f"<|im_start|>system\n{_SYSTEM_PROMPT}<|im_end|>\n" - f"<|im_start|>user\n{image_tokens}{user_prompt}<|im_end|>\n" - "<|im_start|>assistant\n" + """Build the full prompt with the appropriate template based on whether reference images are provided.""" + if num_images > 0: + # Edit mode: include vision placeholders for reference images + image_tokens = _IMAGE_PLACEHOLDER * num_images + return ( + f"<|im_start|>system\n{_EDIT_SYSTEM_PROMPT}<|im_end|>\n" + f"<|im_start|>user\n{image_tokens}{user_prompt}<|im_end|>\n" + "<|im_start|>assistant\n" + ) + else: + # Generate mode: text-only prompt + return ( + f"<|im_start|>system\n{_GENERATE_SYSTEM_PROMPT}<|im_end|>\n" + f"<|im_start|>user\n{user_prompt}<|im_end|>\n" + "<|im_start|>assistant\n" ) @@ -188,7 +206,10 @@ def _encode( hidden_states = outputs.hidden_states[-1] # Extract valid (non-padding) tokens using the attention mask, - # then drop the first _DROP_IDX tokens (system prompt prefix). + # then drop the system prompt prefix tokens. + # The drop index differs between edit mode (64) and generate mode (34). + drop_idx = _EDIT_DROP_IDX if images else _GENERATE_DROP_IDX + attn_mask = model_inputs.attention_mask bool_mask = attn_mask.bool() valid_lengths = bool_mask.sum(dim=1) @@ -196,7 +217,7 @@ def _encode( split_hidden = torch.split(selected, valid_lengths.tolist(), dim=0) # Drop system prefix tokens and build padded output - trimmed = [h[_DROP_IDX:] for h in split_hidden] + trimmed = [h[drop_idx:] for h in split_hidden] attn_mask_list = [torch.ones(h.size(0), dtype=torch.long, device=device) for h in trimmed] max_seq_len = max(h.size(0) for h in trimmed) diff --git a/invokeai/backend/model_manager/starter_models.py b/invokeai/backend/model_manager/starter_models.py index de5f1e1b8b6..d049a52eee7 100644 --- a/invokeai/backend/model_manager/starter_models.py +++ b/invokeai/backend/model_manager/starter_models.py @@ -711,6 +711,69 @@ class StarterModelBundle(BaseModel): "Settings: Steps=8, CFG=1, Shift Override=3.", type=ModelType.LoRA, ) + +# Qwen Image (txt2img) +qwen_image = StarterModel( + name="Qwen Image 2512", + base=BaseModelType.QwenImage, + source="Qwen/Qwen-Image-2512", + description="Qwen Image 2512 full diffusers model. High-quality text-to-image generation. (~40GB)", + type=ModelType.Main, +) + +qwen_image_gguf_q4_k_m = StarterModel( + name="Qwen Image 2512 (Q4_K_M)", + base=BaseModelType.QwenImage, + source="https://huggingface.co/unsloth/Qwen-Image-2512-GGUF/resolve/main/qwen-image-2512-Q4_K_M.gguf", + description="Qwen Image 2512 - Q4_K_M quantized transformer. Good quality/size balance. (~13GB)", + type=ModelType.Main, + format=ModelFormat.GGUFQuantized, +) + +qwen_image_gguf_q2_k = StarterModel( + name="Qwen Image 2512 (Q2_K)", + base=BaseModelType.QwenImage, + source="https://huggingface.co/unsloth/Qwen-Image-2512-GGUF/resolve/main/qwen-image-2512-Q2_K.gguf", + description="Qwen Image 2512 - Q2_K heavily quantized transformer. Smallest size, lower quality. (~7.5GB)", + type=ModelType.Main, + format=ModelFormat.GGUFQuantized, +) + +qwen_image_gguf_q6_k = StarterModel( + name="Qwen Image 2512 (Q6_K)", + base=BaseModelType.QwenImage, + source="https://huggingface.co/unsloth/Qwen-Image-2512-GGUF/resolve/main/qwen-image-2512-Q6_K.gguf", + description="Qwen Image 2512 - Q6_K quantized transformer. Near-lossless quality. (~17GB)", + type=ModelType.Main, + format=ModelFormat.GGUFQuantized, +) + +qwen_image_gguf_q8_0 = StarterModel( + name="Qwen Image 2512 (Q8_0)", + base=BaseModelType.QwenImage, + source="https://huggingface.co/unsloth/Qwen-Image-2512-GGUF/resolve/main/qwen-image-2512-Q8_0.gguf", + description="Qwen Image 2512 - Q8_0 quantized transformer. Highest quality quantization. (~22GB)", + type=ModelType.Main, + format=ModelFormat.GGUFQuantized, +) + +qwen_image_lightning_4step = StarterModel( + name="Qwen Image Lightning (4-step, V2.0, bf16)", + base=BaseModelType.QwenImage, + source="https://huggingface.co/lightx2v/Qwen-Image-Lightning/resolve/main/Qwen-Image-Lightning-4steps-V2.0-bf16.safetensors", + description="Lightning distillation LoRA for Qwen Image — enables generation in just 4 steps. " + "Settings: Steps=4, CFG=1, Shift Override=3.", + type=ModelType.LoRA, +) + +qwen_image_lightning_8step = StarterModel( + name="Qwen Image Lightning (8-step, V2.0, bf16)", + base=BaseModelType.QwenImage, + source="https://huggingface.co/lightx2v/Qwen-Image-Lightning/resolve/main/Qwen-Image-Lightning-8steps-V2.0-bf16.safetensors", + description="Lightning distillation LoRA for Qwen Image — enables generation in 8 steps with better quality. " + "Settings: Steps=8, CFG=1, Shift Override=3.", + type=ModelType.LoRA, +) # endregion # region SigLIP @@ -1102,6 +1165,10 @@ class StarterModelBundle(BaseModel): qwen_image_gguf_q8_0, qwen_image_lightning_4step, qwen_image_lightning_8step, + qwen_image, + qwen_image_gguf_q4_k_m, + qwen_image_lightning_4step, + qwen_image_lightning_8step, ] STARTER_BUNDLES: dict[str, StarterModelBundle] = { From 8b9e36f05aad8035f0a6c52f146ec37219d97dc7 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Fri, 27 Mar 2026 22:57:03 -0400 Subject: [PATCH 02/13] chore: ruff & lint:prettier --- .../invocations/qwen_image_text_encoder.py | 2 +- .../model_records/model_records_base.py | 10 +++++-- invokeai/backend/model_manager/taxonomy.py | 26 ++++++++++++++++--- .../controlLayers/hooks/addLayerHooks.ts | 6 +---- .../Advanced/ParamQwenImageQuantization.tsx | 5 +--- 5 files changed, 34 insertions(+), 15 deletions(-) diff --git a/invokeai/app/invocations/qwen_image_text_encoder.py b/invokeai/app/invocations/qwen_image_text_encoder.py index 74670735877..9e3f5723ba5 100644 --- a/invokeai/app/invocations/qwen_image_text_encoder.py +++ b/invokeai/app/invocations/qwen_image_text_encoder.py @@ -58,7 +58,7 @@ def _build_prompt(user_prompt: str, num_images: int) -> str: f"<|im_start|>system\n{_GENERATE_SYSTEM_PROMPT}<|im_end|>\n" f"<|im_start|>user\n{user_prompt}<|im_end|>\n" "<|im_start|>assistant\n" - ) + ) @invocation( diff --git a/invokeai/app/services/model_records/model_records_base.py b/invokeai/app/services/model_records/model_records_base.py index ea5b9ef7546..dcdc0ce5956 100644 --- a/invokeai/app/services/model_records/model_records_base.py +++ b/invokeai/app/services/model_records/model_records_base.py @@ -25,8 +25,8 @@ ModelSourceType, ModelType, ModelVariantType, - QwenImageVariantType, Qwen3VariantType, + QwenImageVariantType, SchedulerPredictionType, ZImageVariantType, ) @@ -95,7 +95,13 @@ class ModelRecordChanges(BaseModelExcludeNull): # Checkpoint-specific changes # TODO(MM2): Should we expose these? Feels footgun-y... variant: Optional[ - ModelVariantType | ClipVariantType | FluxVariantType | Flux2VariantType | ZImageVariantType | QwenImageVariantType | Qwen3VariantType + ModelVariantType + | ClipVariantType + | FluxVariantType + | Flux2VariantType + | ZImageVariantType + | QwenImageVariantType + | Qwen3VariantType ] = Field(description="The variant of the model.", default=None) prediction_type: Optional[SchedulerPredictionType] = Field( description="The prediction type of the model.", default=None diff --git a/invokeai/backend/model_manager/taxonomy.py b/invokeai/backend/model_manager/taxonomy.py index 9250310a29a..587c0b0625f 100644 --- a/invokeai/backend/model_manager/taxonomy.py +++ b/invokeai/backend/model_manager/taxonomy.py @@ -225,8 +225,28 @@ class FluxLoRAFormat(str, Enum): AnyVariant: TypeAlias = Union[ - ModelVariantType, ClipVariantType, FluxVariantType, Flux2VariantType, ZImageVariantType, QwenImageVariantType, Qwen3VariantType + ModelVariantType, + ClipVariantType, + FluxVariantType, + Flux2VariantType, + ZImageVariantType, + QwenImageVariantType, + Qwen3VariantType, ] variant_type_adapter = TypeAdapter[ - ModelVariantType | ClipVariantType | FluxVariantType | Flux2VariantType | ZImageVariantType | QwenImageVariantType | Qwen3VariantType -](ModelVariantType | ClipVariantType | FluxVariantType | Flux2VariantType | ZImageVariantType | QwenImageVariantType | Qwen3VariantType) + ModelVariantType + | ClipVariantType + | FluxVariantType + | Flux2VariantType + | ZImageVariantType + | QwenImageVariantType + | Qwen3VariantType +]( + ModelVariantType + | ClipVariantType + | FluxVariantType + | Flux2VariantType + | ZImageVariantType + | QwenImageVariantType + | Qwen3VariantType +) diff --git a/invokeai/frontend/web/src/features/controlLayers/hooks/addLayerHooks.ts b/invokeai/frontend/web/src/features/controlLayers/hooks/addLayerHooks.ts index 3cd28b5f2a0..2027ff41741 100644 --- a/invokeai/frontend/web/src/features/controlLayers/hooks/addLayerHooks.ts +++ b/invokeai/frontend/web/src/features/controlLayers/hooks/addLayerHooks.ts @@ -80,11 +80,7 @@ export const selectDefaultControlAdapter = createSelector( export const getDefaultRefImageConfig = ( getState: AppGetState -): - | IPAdapterConfig - | FluxKontextReferenceImageConfig - | Flux2ReferenceImageConfig - | QwenImageReferenceImageConfig => { +): IPAdapterConfig | FluxKontextReferenceImageConfig | Flux2ReferenceImageConfig | QwenImageReferenceImageConfig => { const state = getState(); const mainModelConfig = selectMainModelConfig(state); diff --git a/invokeai/frontend/web/src/features/parameters/components/Advanced/ParamQwenImageQuantization.tsx b/invokeai/frontend/web/src/features/parameters/components/Advanced/ParamQwenImageQuantization.tsx index 46025d95867..3d086e6ec4a 100644 --- a/invokeai/frontend/web/src/features/parameters/components/Advanced/ParamQwenImageQuantization.tsx +++ b/invokeai/frontend/web/src/features/parameters/components/Advanced/ParamQwenImageQuantization.tsx @@ -1,10 +1,7 @@ import type { ComboboxOnChange, ComboboxOption } from '@invoke-ai/ui-library'; import { Combobox, FormControl, FormLabel } from '@invoke-ai/ui-library'; import { useAppDispatch, useAppSelector } from 'app/store/storeHooks'; -import { - qwenImageQuantizationChanged, - selectQwenImageQuantization, -} from 'features/controlLayers/store/paramsSlice'; +import { qwenImageQuantizationChanged, selectQwenImageQuantization } from 'features/controlLayers/store/paramsSlice'; import { memo, useCallback, useMemo } from 'react'; import { useTranslation } from 'react-i18next'; From 25b45ca7582a2ac80c90709a6ead381d86ffe125 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Fri, 27 Mar 2026 23:07:53 -0400 Subject: [PATCH 03/13] fix: remove unused frontend exports (zQwenImageVariantType, isQwenImageEditMainModelConfig) Co-Authored-By: Claude Opus 4.6 (1M context) --- invokeai/frontend/web/src/features/nodes/types/common.ts | 2 +- invokeai/frontend/web/src/services/api/types.ts | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/invokeai/frontend/web/src/features/nodes/types/common.ts b/invokeai/frontend/web/src/features/nodes/types/common.ts index ca1d42c5a44..10afd6e44bb 100644 --- a/invokeai/frontend/web/src/features/nodes/types/common.ts +++ b/invokeai/frontend/web/src/features/nodes/types/common.ts @@ -153,7 +153,7 @@ export const zModelVariantType = z.enum(['normal', 'inpaint', 'depth']); export const zFluxVariantType = z.enum(['dev', 'dev_fill', 'schnell']); export const zFlux2VariantType = z.enum(['klein_4b', 'klein_9b', 'klein_9b_base']); export const zZImageVariantType = z.enum(['turbo', 'zbase']); -export const zQwenImageVariantType = z.enum(['generate', 'edit']); +const zQwenImageVariantType = z.enum(['generate', 'edit']); export const zQwen3VariantType = z.enum(['qwen3_4b', 'qwen3_8b']); export const zAnyModelVariant = z.union([ zModelVariantType, diff --git a/invokeai/frontend/web/src/services/api/types.ts b/invokeai/frontend/web/src/services/api/types.ts index cfeb672d95e..c8aeda6c760 100644 --- a/invokeai/frontend/web/src/services/api/types.ts +++ b/invokeai/frontend/web/src/services/api/types.ts @@ -330,9 +330,6 @@ export const isQwenImageDiffusersMainModelConfig = (config: AnyModelConfig): con return config.type === 'main' && config.base === 'qwen-image' && config.format === 'diffusers'; }; -export const isQwenImageEditMainModelConfig = (config: AnyModelConfig): config is MainModelConfig => { - return config.type === 'main' && config.base === 'qwen-image' && 'variant' in config && config.variant === 'edit'; -}; export const isTIModelConfig = (config: AnyModelConfig): config is MainModelConfig => { return config.type === 'embedding'; From 66e9f873c5cc9d7836ec48ca741ea540b658ee74 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Fri, 27 Mar 2026 23:17:27 -0400 Subject: [PATCH 04/13] fix: make QwenImage variant optional to fix model detection tags The variant field with a default value was appended to the discriminator tag (e.g. main.gguf_quantized.qwen-image.generate), breaking model detection for GGUF and Diffusers models. Making variant optional with default=None restores the correct tags (main.gguf_quantized.qwen-image). The variant is still set during Diffusers model probing via _get_qwen_image_variant() and can be manually set for GGUF models. Co-Authored-By: Claude Opus 4.6 (1M context) --- invokeai/backend/model_manager/configs/main.py | 4 ++-- .../src/features/parameters/components/Prompts/Prompts.tsx | 7 +++++-- invokeai/frontend/web/src/services/api/schema.ts | 6 ++---- invokeai/frontend/web/src/services/api/types.ts | 1 - 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/invokeai/backend/model_manager/configs/main.py b/invokeai/backend/model_manager/configs/main.py index 484a95f4bb8..6ec0611fdf3 100644 --- a/invokeai/backend/model_manager/configs/main.py +++ b/invokeai/backend/model_manager/configs/main.py @@ -1208,7 +1208,7 @@ class Main_Diffusers_QwenImage_Config(Diffusers_Config_Base, Main_Config_Base, C """Model config for Qwen Image diffusers models (both txt2img and edit).""" base: Literal[BaseModelType.QwenImage] = Field(BaseModelType.QwenImage) - variant: QwenImageVariantType = Field(default=QwenImageVariantType.Generate) + variant: QwenImageVariantType | None = Field(default=None) @classmethod def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) -> Self: @@ -1269,7 +1269,7 @@ class Main_GGUF_QwenImage_Config(Checkpoint_Config_Base, Main_Config_Base, Confi base: Literal[BaseModelType.QwenImage] = Field(default=BaseModelType.QwenImage) format: Literal[ModelFormat.GGUFQuantized] = Field(default=ModelFormat.GGUFQuantized) - variant: QwenImageVariantType = Field(default=QwenImageVariantType.Generate) + variant: QwenImageVariantType | None = Field(default=None) @classmethod def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) -> Self: diff --git a/invokeai/frontend/web/src/features/parameters/components/Prompts/Prompts.tsx b/invokeai/frontend/web/src/features/parameters/components/Prompts/Prompts.tsx index 18f5c4c4dd8..c93841d77b7 100644 --- a/invokeai/frontend/web/src/features/parameters/components/Prompts/Prompts.tsx +++ b/invokeai/frontend/web/src/features/parameters/components/Prompts/Prompts.tsx @@ -22,8 +22,11 @@ export const Prompts = memo(() => { if (!modelSupportsRefImages) { return false; } - if (modelConfig?.base === 'qwen-image' && 'variant' in modelConfig && modelConfig.variant !== 'edit') { - return false; + if (modelConfig?.base === 'qwen-image') { + const variant = 'variant' in modelConfig ? modelConfig.variant : null; + if (variant !== 'edit') { + return false; + } } return true; }, [modelSupportsRefImages, modelConfig]); diff --git a/invokeai/frontend/web/src/services/api/schema.ts b/invokeai/frontend/web/src/services/api/schema.ts index a23217c3a81..2a8a3d243b7 100644 --- a/invokeai/frontend/web/src/services/api/schema.ts +++ b/invokeai/frontend/web/src/services/api/schema.ts @@ -18500,8 +18500,7 @@ export type components = { * @constant */ base: "qwen-image"; - /** @default generate */ - variant: components["schemas"]["QwenImageVariantType"]; + variant: components["schemas"]["QwenImageVariantType"] | null; }; /** Main_Diffusers_SD1_Config */ Main_Diffusers_SD1_Config: { @@ -19234,8 +19233,7 @@ export type components = { * @constant */ format: "gguf_quantized"; - /** @default generate */ - variant: components["schemas"]["QwenImageVariantType"]; + variant: components["schemas"]["QwenImageVariantType"] | null; }; /** * Main_GGUF_ZImage_Config diff --git a/invokeai/frontend/web/src/services/api/types.ts b/invokeai/frontend/web/src/services/api/types.ts index c8aeda6c760..b447f9debbe 100644 --- a/invokeai/frontend/web/src/services/api/types.ts +++ b/invokeai/frontend/web/src/services/api/types.ts @@ -330,7 +330,6 @@ export const isQwenImageDiffusersMainModelConfig = (config: AnyModelConfig): con return config.type === 'main' && config.base === 'qwen-image' && config.format === 'diffusers'; }; - export const isTIModelConfig = (config: AnyModelConfig): config is MainModelConfig => { return config.type === 'embedding'; }; From 556db02c45e12ff5f14a58f34673aeedea45bddd Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Fri, 27 Mar 2026 23:33:18 -0400 Subject: [PATCH 05/13] fix: restore Qwen Image Edit starter models with distinct variable names The rename from qwen_image_edit -> qwen_image caused variable name collisions with the txt2img starter models. Give edit models the qwen_image_edit_* prefix to distinguish from qwen_image_* (txt2img). Co-Authored-By: Claude Opus 4.6 (1M context) --- .../backend/model_manager/starter_models.py | 31 ++++++++++++------- 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/invokeai/backend/model_manager/starter_models.py b/invokeai/backend/model_manager/starter_models.py index d049a52eee7..ca0076cbadd 100644 --- a/invokeai/backend/model_manager/starter_models.py +++ b/invokeai/backend/model_manager/starter_models.py @@ -650,7 +650,7 @@ class StarterModelBundle(BaseModel): # endregion # region Qwen Image Edit -qwen_image = StarterModel( +qwen_image_edit = StarterModel( name="Qwen Image Edit 2511", base=BaseModelType.QwenImage, source="Qwen/Qwen-Image-Edit-2511", @@ -658,7 +658,7 @@ class StarterModelBundle(BaseModel): type=ModelType.Main, ) -qwen_image_gguf_q4_k_m = StarterModel( +qwen_image_edit_gguf_q4_k_m = StarterModel( name="Qwen Image Edit 2511 (Q4_K_M)", base=BaseModelType.QwenImage, source="https://huggingface.co/unsloth/Qwen-Image-Edit-2511-GGUF/resolve/main/qwen-image-2511-Q4_K_M.gguf", @@ -667,7 +667,7 @@ class StarterModelBundle(BaseModel): format=ModelFormat.GGUFQuantized, ) -qwen_image_gguf_q2_k = StarterModel( +qwen_image_edit_gguf_q2_k = StarterModel( name="Qwen Image Edit 2511 (Q2_K)", base=BaseModelType.QwenImage, source="https://huggingface.co/unsloth/Qwen-Image-Edit-2511-GGUF/resolve/main/qwen-image-2511-Q2_K.gguf", @@ -676,7 +676,7 @@ class StarterModelBundle(BaseModel): format=ModelFormat.GGUFQuantized, ) -qwen_image_gguf_q6_k = StarterModel( +qwen_image_edit_gguf_q6_k = StarterModel( name="Qwen Image Edit 2511 (Q6_K)", base=BaseModelType.QwenImage, source="https://huggingface.co/unsloth/Qwen-Image-Edit-2511-GGUF/resolve/main/qwen-image-2511-Q6_K.gguf", @@ -685,7 +685,7 @@ class StarterModelBundle(BaseModel): format=ModelFormat.GGUFQuantized, ) -qwen_image_gguf_q8_0 = StarterModel( +qwen_image_edit_gguf_q8_0 = StarterModel( name="Qwen Image Edit 2511 (Q8_0)", base=BaseModelType.QwenImage, source="https://huggingface.co/unsloth/Qwen-Image-Edit-2511-GGUF/resolve/main/qwen-image-2511-Q8_0.gguf", @@ -694,7 +694,7 @@ class StarterModelBundle(BaseModel): format=ModelFormat.GGUFQuantized, ) -qwen_image_lightning_4step = StarterModel( +qwen_image_edit_lightning_4step = StarterModel( name="Qwen Image Edit Lightning (4-step, bf16)", base=BaseModelType.QwenImage, source="https://huggingface.co/lightx2v/Qwen-Image-Edit-2511-Lightning/resolve/main/Qwen-Image-Edit-2511-Lightning-4steps-V1.0-bf16.safetensors", @@ -703,7 +703,7 @@ class StarterModelBundle(BaseModel): type=ModelType.LoRA, ) -qwen_image_lightning_8step = StarterModel( +qwen_image_edit_lightning_8step = StarterModel( name="Qwen Image Edit Lightning (8-step, bf16)", base=BaseModelType.QwenImage, source="https://huggingface.co/lightx2v/Qwen-Image-Edit-2511-Lightning/resolve/main/Qwen-Image-Edit-2511-Lightning-8steps-V1.0-bf16.safetensors", @@ -1075,6 +1075,13 @@ class StarterModelBundle(BaseModel): flux2_klein_qwen3_4b_encoder, flux2_klein_qwen3_8b_encoder, cogview4, + qwen_image_edit, + qwen_image_edit_gguf_q2_k, + qwen_image_edit_gguf_q4_k_m, + qwen_image_edit_gguf_q6_k, + qwen_image_edit_gguf_q8_0, + qwen_image_edit_lightning_4step, + qwen_image_edit_lightning_8step, qwen_image, qwen_image_gguf_q2_k, qwen_image_gguf_q4_k_m, @@ -1160,11 +1167,11 @@ class StarterModelBundle(BaseModel): ] qwen_image_bundle: list[StarterModel] = [ - qwen_image, - qwen_image_gguf_q4_k_m, - qwen_image_gguf_q8_0, - qwen_image_lightning_4step, - qwen_image_lightning_8step, + qwen_image_edit, + qwen_image_edit_gguf_q4_k_m, + qwen_image_edit_gguf_q8_0, + qwen_image_edit_lightning_4step, + qwen_image_edit_lightning_8step, qwen_image, qwen_image_gguf_q4_k_m, qwen_image_lightning_4step, From f3dfbd5d4473c54583cc29a53d73f67b0b39f995 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Fri, 27 Mar 2026 23:53:09 -0400 Subject: [PATCH 06/13] fix: restore correct GGUF filenames in Qwen Image Edit starter model URLs The global rename sed changed 'qwen-image-edit-2511' to 'qwen-image-2511' inside the HuggingFace URLs, but the actual files on HF still have 'edit' in their names. Co-Authored-By: Claude Opus 4.6 (1M context) --- invokeai/backend/model_manager/starter_models.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/invokeai/backend/model_manager/starter_models.py b/invokeai/backend/model_manager/starter_models.py index ca0076cbadd..ef7b25431a0 100644 --- a/invokeai/backend/model_manager/starter_models.py +++ b/invokeai/backend/model_manager/starter_models.py @@ -661,7 +661,7 @@ class StarterModelBundle(BaseModel): qwen_image_edit_gguf_q4_k_m = StarterModel( name="Qwen Image Edit 2511 (Q4_K_M)", base=BaseModelType.QwenImage, - source="https://huggingface.co/unsloth/Qwen-Image-Edit-2511-GGUF/resolve/main/qwen-image-2511-Q4_K_M.gguf", + source="https://huggingface.co/unsloth/Qwen-Image-Edit-2511-GGUF/resolve/main/qwen-image-edit-2511-Q4_K_M.gguf", description="Qwen Image Edit 2511 - Q4_K_M quantized transformer. Good quality/size balance. (~13GB)", type=ModelType.Main, format=ModelFormat.GGUFQuantized, @@ -670,7 +670,7 @@ class StarterModelBundle(BaseModel): qwen_image_edit_gguf_q2_k = StarterModel( name="Qwen Image Edit 2511 (Q2_K)", base=BaseModelType.QwenImage, - source="https://huggingface.co/unsloth/Qwen-Image-Edit-2511-GGUF/resolve/main/qwen-image-2511-Q2_K.gguf", + source="https://huggingface.co/unsloth/Qwen-Image-Edit-2511-GGUF/resolve/main/qwen-image-edit-2511-Q2_K.gguf", description="Qwen Image Edit 2511 - Q2_K heavily quantized transformer. Smallest size, lower quality. (~7.5GB)", type=ModelType.Main, format=ModelFormat.GGUFQuantized, @@ -679,7 +679,7 @@ class StarterModelBundle(BaseModel): qwen_image_edit_gguf_q6_k = StarterModel( name="Qwen Image Edit 2511 (Q6_K)", base=BaseModelType.QwenImage, - source="https://huggingface.co/unsloth/Qwen-Image-Edit-2511-GGUF/resolve/main/qwen-image-2511-Q6_K.gguf", + source="https://huggingface.co/unsloth/Qwen-Image-Edit-2511-GGUF/resolve/main/qwen-image-edit-2511-Q6_K.gguf", description="Qwen Image Edit 2511 - Q6_K quantized transformer. Near-lossless quality. (~17GB)", type=ModelType.Main, format=ModelFormat.GGUFQuantized, @@ -688,7 +688,7 @@ class StarterModelBundle(BaseModel): qwen_image_edit_gguf_q8_0 = StarterModel( name="Qwen Image Edit 2511 (Q8_0)", base=BaseModelType.QwenImage, - source="https://huggingface.co/unsloth/Qwen-Image-Edit-2511-GGUF/resolve/main/qwen-image-2511-Q8_0.gguf", + source="https://huggingface.co/unsloth/Qwen-Image-Edit-2511-GGUF/resolve/main/qwen-image-edit-2511-Q8_0.gguf", description="Qwen Image Edit 2511 - Q8_0 quantized transformer. Highest quality quantization. (~22GB)", type=ModelType.Main, format=ModelFormat.GGUFQuantized, From 6a19ad57f3a49c4c26328fcca6b9d75f292c01e4 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Sat, 28 Mar 2026 00:36:28 -0400 Subject: [PATCH 07/13] fix: skip reference images in graph for non-edit Qwen Image models When switching from an edit model to a generate model, reference images remain in state but the panel is hidden. Prevent them from being passed to the text encoder and VAE encoder by checking the model variant. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../graph/generation/buildQwenImageGraph.ts | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/invokeai/frontend/web/src/features/nodes/util/graph/generation/buildQwenImageGraph.ts b/invokeai/frontend/web/src/features/nodes/util/graph/generation/buildQwenImageGraph.ts index e7c04744d4e..28a9b253485 100644 --- a/invokeai/frontend/web/src/features/nodes/util/graph/generation/buildQwenImageGraph.ts +++ b/invokeai/frontend/web/src/features/nodes/util/graph/generation/buildQwenImageGraph.ts @@ -103,14 +103,18 @@ export const buildQwenImageGraph = async (arg: GraphBuilderArg): Promise - entity.isEnabled && - isQwenImageReferenceImageConfig(entity.config) && - entity.config.image !== null && - getGlobalReferenceImageWarnings(entity, model).length === 0 - ); + // Only collect reference images for edit-variant models. + // For txt2img (generate) models, reference images are not used even if they exist in state. + const isEditModel = 'variant' in model && model.variant === 'edit'; + const validRefImageConfigs = isEditModel + ? selectRefImagesSlice(state).entities.filter( + (entity) => + entity.isEnabled && + isQwenImageReferenceImageConfig(entity.config) && + entity.config.image !== null && + getGlobalReferenceImageWarnings(entity, model).length === 0 + ) + : []; if (validRefImageConfigs.length > 0) { const refImgCollect = g.addNode({ From 058df877c039b4ef09d9e96e7aafb2c58aae9576 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Sat, 28 Mar 2026 00:41:10 -0400 Subject: [PATCH 08/13] fix: only set zero_cond_t=True for edit-variant GGUF models MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The txt2img model doesn't use zero_cond_t — setting it causes the transformer to double the timestep batch and create modulation indices for non-existent reference patches, producing noise output. Now checks the config variant before enabling it. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../model_manager/load/model_loaders/qwen_image.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/invokeai/backend/model_manager/load/model_loaders/qwen_image.py b/invokeai/backend/model_manager/load/model_loaders/qwen_image.py index 15fcedba166..a025e727945 100644 --- a/invokeai/backend/model_manager/load/model_loaders/qwen_image.py +++ b/invokeai/backend/model_manager/load/model_loaders/qwen_image.py @@ -15,6 +15,7 @@ BaseModelType, ModelFormat, ModelType, + QwenImageVariantType, SubModelType, ) from invokeai.backend.quantization.gguf.ggml_tensor import GGMLTensor @@ -160,10 +161,13 @@ def _load_from_singlefile(self, config: AnyModelConfig) -> AnyModel: "axes_dims_rope": (16, 56, 56), } - # zero_cond_t was added in diffusers 0.37+; skip it on older versions + # zero_cond_t is only used by edit-variant models. It enables dual modulation + # for noisy vs reference patches. Setting it on txt2img models produces garbage. + # Also requires diffusers 0.37+ (the parameter doesn't exist in older versions). import inspect - if "zero_cond_t" in inspect.signature(QwenImageTransformer2DModel.__init__).parameters: + is_edit = getattr(config, "variant", None) == QwenImageVariantType.Edit + if is_edit and "zero_cond_t" in inspect.signature(QwenImageTransformer2DModel.__init__).parameters: model_config["zero_cond_t"] = True with accelerate.init_empty_weights(): From b41bee72bcb6836b01a50854de8e3fa437d9e39e Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Sat, 28 Mar 2026 01:04:28 -0400 Subject: [PATCH 09/13] fix: recall Qwen Image advanced params (component source, quantization, shift) - Save qwen_image_component_source, qwen_image_quantization, and qwen_image_shift in generation metadata - Add metadata recall handlers so remix/recall restores these settings Co-Authored-By: Claude Opus 4.6 (1M context) --- .../web/src/features/metadata/parsing.tsx | 83 +++++++++++++++++++ .../graph/generation/buildQwenImageGraph.ts | 3 + 2 files changed, 86 insertions(+) diff --git a/invokeai/frontend/web/src/features/metadata/parsing.tsx b/invokeai/frontend/web/src/features/metadata/parsing.tsx index 7d1d511a3c2..58f3aaab07d 100644 --- a/invokeai/frontend/web/src/features/metadata/parsing.tsx +++ b/invokeai/frontend/web/src/features/metadata/parsing.tsx @@ -39,6 +39,9 @@ import { setZImageSeedVarianceEnabled, setZImageSeedVarianceRandomizePercent, setZImageSeedVarianceStrength, + qwenImageComponentSourceSelected, + qwenImageQuantizationChanged, + qwenImageShiftChanged, vaeSelected, widthChanged, zImageQwen3EncoderModelSelected, @@ -677,6 +680,83 @@ const ZImageSeedVarianceRandomizePercent: SingleMetadataHandler = { }; //#endregion ZImageSeedVarianceRandomizePercent +//#region QwenImageComponentSource +const QwenImageComponentSource: SingleMetadataHandler = { + [SingleMetadataKey]: true, + type: 'QwenImageComponentSource', + parse: async (metadata, _store) => { + try { + const raw = getProperty(metadata, 'qwen_image_component_source'); + if (raw === null || raw === undefined) { + return Promise.resolve(null); + } + return Promise.resolve(zModelIdentifierField.parse(raw)); + } catch { + return Promise.resolve(null); + } + }, + recall: (value, store) => { + store.dispatch(qwenImageComponentSourceSelected(value)); + }, + i18nKey: 'modelManager.qwenImageComponentSource', + LabelComponent: MetadataLabel, + ValueComponent: ({ value }: SingleMetadataValueProps) => ( + + ), +}; +//#endregion QwenImageComponentSource + +//#region QwenImageQuantization +const QwenImageQuantization: SingleMetadataHandler<'none' | 'int8' | 'nf4'> = { + [SingleMetadataKey]: true, + type: 'QwenImageQuantization', + parse: (metadata, _store) => { + try { + const raw = getProperty(metadata, 'qwen_image_quantization'); + const parsed = z.enum(['none', 'int8', 'nf4']).parse(raw); + return Promise.resolve(parsed); + } catch { + return Promise.resolve('none' as const); + } + }, + recall: (value, store) => { + store.dispatch(qwenImageQuantizationChanged(value)); + }, + i18nKey: 'modelManager.qwenImageQuantization', + LabelComponent: MetadataLabel, + ValueComponent: ({ value }: SingleMetadataValueProps<'none' | 'int8' | 'nf4'>) => ( + + ), +}; +//#endregion QwenImageQuantization + +//#region QwenImageShift +const QwenImageShift: SingleMetadataHandler = { + [SingleMetadataKey]: true, + type: 'QwenImageShift', + parse: (metadata, _store) => { + try { + const raw = getProperty(metadata, 'qwen_image_shift'); + if (raw === null || raw === undefined) { + return Promise.resolve(null); + } + const parsed = z.number().parse(raw); + return Promise.resolve(parsed); + } catch { + return Promise.resolve(null); + } + }, + recall: (value, store) => { + store.dispatch(qwenImageShiftChanged(value)); + }, + i18nKey: 'modelManager.qwenImageShift', + LabelComponent: MetadataLabel, + ValueComponent: ({ value }: SingleMetadataValueProps) => ( + + ), +}; +//#endregion QwenImageShift + //#region RefinerModel const RefinerModel: SingleMetadataHandler = { [SingleMetadataKey]: true, @@ -1233,6 +1313,9 @@ export const ImageMetadataHandlers = { ZImageSeedVarianceEnabled, ZImageSeedVarianceStrength, ZImageSeedVarianceRandomizePercent, + QwenImageComponentSource, + QwenImageQuantization, + QwenImageShift, LoRAs, CanvasLayers, RefImages, diff --git a/invokeai/frontend/web/src/features/nodes/util/graph/generation/buildQwenImageGraph.ts b/invokeai/frontend/web/src/features/nodes/util/graph/generation/buildQwenImageGraph.ts index 28a9b253485..8f1cb5362cd 100644 --- a/invokeai/frontend/web/src/features/nodes/util/graph/generation/buildQwenImageGraph.ts +++ b/invokeai/frontend/web/src/features/nodes/util/graph/generation/buildQwenImageGraph.ts @@ -167,6 +167,9 @@ export const buildQwenImageGraph = async (arg: GraphBuilderArg): Promise Date: Sat, 28 Mar 2026 01:12:12 -0400 Subject: [PATCH 10/13] fix: remove unnecessary async from QwenImageComponentSource parse Co-Authored-By: Claude Opus 4.6 (1M context) --- invokeai/frontend/web/src/features/metadata/parsing.tsx | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/invokeai/frontend/web/src/features/metadata/parsing.tsx b/invokeai/frontend/web/src/features/metadata/parsing.tsx index 58f3aaab07d..4f179d6b017 100644 --- a/invokeai/frontend/web/src/features/metadata/parsing.tsx +++ b/invokeai/frontend/web/src/features/metadata/parsing.tsx @@ -13,6 +13,9 @@ import { kleinVaeModelSelected, negativePromptChanged, positivePromptChanged, + qwenImageComponentSourceSelected, + qwenImageQuantizationChanged, + qwenImageShiftChanged, refinerModelChanged, selectBase, setCfgRescaleMultiplier, @@ -39,9 +42,6 @@ import { setZImageSeedVarianceEnabled, setZImageSeedVarianceRandomizePercent, setZImageSeedVarianceStrength, - qwenImageComponentSourceSelected, - qwenImageQuantizationChanged, - qwenImageShiftChanged, vaeSelected, widthChanged, zImageQwen3EncoderModelSelected, @@ -684,7 +684,7 @@ const ZImageSeedVarianceRandomizePercent: SingleMetadataHandler = { const QwenImageComponentSource: SingleMetadataHandler = { [SingleMetadataKey]: true, type: 'QwenImageComponentSource', - parse: async (metadata, _store) => { + parse: (metadata, _store) => { try { const raw = getProperty(metadata, 'qwen_image_component_source'); if (raw === null || raw === undefined) { From 2aeb2fdd3a57ecc3d8003b6a761983aaa7e3c481 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Sat, 28 Mar 2026 01:30:52 -0400 Subject: [PATCH 11/13] fix: prevent Flux LoRAs from being detected as Qwen Image LoRAs Flux PEFT LoRAs use transformer.single_transformer_blocks.* keys which contain "transformer_blocks." as a substring, falsely matching the Qwen Image LoRA detection. Add single_transformer_blocks to the Flux exclusion set. Co-Authored-By: Claude Opus 4.6 (1M context) --- invokeai/backend/model_manager/configs/lora.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/invokeai/backend/model_manager/configs/lora.py b/invokeai/backend/model_manager/configs/lora.py index a5b9f40631d..f305bbddee8 100644 --- a/invokeai/backend/model_manager/configs/lora.py +++ b/invokeai/backend/model_manager/configs/lora.py @@ -775,14 +775,19 @@ def _validate_looks_like_lora(cls, mod: ModelOnDisk) -> None: state_dict, {"lora_A.weight", "lora_B.weight", "lora_down.weight", "lora_up.weight", "dora_scale"}, ) - # Must NOT have diffusion_model.layers (Z-Image) or double_blocks/single_blocks (Flux) + # Must NOT have diffusion_model.layers (Z-Image) or Flux-style keys. + # Flux LoRAs can have transformer.single_transformer_blocks or transformer.transformer_blocks + # (with the "transformer." prefix and "single_" variant) which would falsely match our check. has_z_image_keys = state_dict_has_any_keys_starting_with(state_dict, {"diffusion_model.layers."}) - has_flux_keys = state_dict_has_any_keys_starting_with(state_dict, {"double_blocks.", "single_blocks."}) + has_flux_keys = state_dict_has_any_keys_starting_with( + state_dict, + {"double_blocks.", "single_blocks.", "single_transformer_blocks.", "transformer.single_transformer_blocks."}, + ) if has_qwen_ie_keys and has_lora_suffix and not has_z_image_keys and not has_flux_keys: return - raise NotAMatchError("model does not match Qwen Image Edit LoRA heuristics") + raise NotAMatchError("model does not match Qwen Image LoRA heuristics") @classmethod def _get_base_or_raise(cls, mod: ModelOnDisk) -> BaseModelType: @@ -791,7 +796,10 @@ def _get_base_or_raise(cls, mod: ModelOnDisk) -> BaseModelType: state_dict, {"transformer_blocks.", "transformer.transformer_blocks."} ) has_z_image_keys = state_dict_has_any_keys_starting_with(state_dict, {"diffusion_model.layers."}) - has_flux_keys = state_dict_has_any_keys_starting_with(state_dict, {"double_blocks.", "single_blocks."}) + has_flux_keys = state_dict_has_any_keys_starting_with( + state_dict, + {"double_blocks.", "single_blocks.", "single_transformer_blocks.", "transformer.single_transformer_blocks."}, + ) if has_qwen_ie_keys and not has_z_image_keys and not has_flux_keys: return BaseModelType.QwenImage From 5c6ca302b037edc0f0947aa5c0228141a80a1ed3 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Sat, 28 Mar 2026 01:41:33 -0400 Subject: [PATCH 12/13] chore: ruff --- invokeai/backend/model_manager/configs/lora.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/invokeai/backend/model_manager/configs/lora.py b/invokeai/backend/model_manager/configs/lora.py index f305bbddee8..f2e6f3b34fa 100644 --- a/invokeai/backend/model_manager/configs/lora.py +++ b/invokeai/backend/model_manager/configs/lora.py @@ -781,7 +781,12 @@ def _validate_looks_like_lora(cls, mod: ModelOnDisk) -> None: has_z_image_keys = state_dict_has_any_keys_starting_with(state_dict, {"diffusion_model.layers."}) has_flux_keys = state_dict_has_any_keys_starting_with( state_dict, - {"double_blocks.", "single_blocks.", "single_transformer_blocks.", "transformer.single_transformer_blocks."}, + { + "double_blocks.", + "single_blocks.", + "single_transformer_blocks.", + "transformer.single_transformer_blocks.", + }, ) if has_qwen_ie_keys and has_lora_suffix and not has_z_image_keys and not has_flux_keys: @@ -798,7 +803,12 @@ def _get_base_or_raise(cls, mod: ModelOnDisk) -> BaseModelType: has_z_image_keys = state_dict_has_any_keys_starting_with(state_dict, {"diffusion_model.layers."}) has_flux_keys = state_dict_has_any_keys_starting_with( state_dict, - {"double_blocks.", "single_blocks.", "single_transformer_blocks.", "transformer.single_transformer_blocks."}, + { + "double_blocks.", + "single_blocks.", + "single_transformer_blocks.", + "transformer.single_transformer_blocks.", + }, ) if has_qwen_ie_keys and not has_z_image_keys and not has_flux_keys: From 2fcedc72dfa091ad169e5927ac5d3ac779e4a96b Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Sat, 28 Mar 2026 10:15:08 -0400 Subject: [PATCH 13/13] fix: don't force reference image to output aspect ratio in VAE encoding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously the graph builder passed the output canvas dimensions to the I2L node, which resized the reference image to match — distorting its aspect ratio when they differed. Now the reference is encoded at its native size. The denoise node already handles dimension mismatches via bilinear interpolation in latent space. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../util/graph/generation/buildQwenImageGraph.ts | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/invokeai/frontend/web/src/features/nodes/util/graph/generation/buildQwenImageGraph.ts b/invokeai/frontend/web/src/features/nodes/util/graph/generation/buildQwenImageGraph.ts index 8f1cb5362cd..1ea20a377e6 100644 --- a/invokeai/frontend/web/src/features/nodes/util/graph/generation/buildQwenImageGraph.ts +++ b/invokeai/frontend/web/src/features/nodes/util/graph/generation/buildQwenImageGraph.ts @@ -15,11 +15,7 @@ import { addQwenImageLoRAs } from 'features/nodes/util/graph/generation/addQwenI import { addTextToImage } from 'features/nodes/util/graph/generation/addTextToImage'; import { addWatermarker } from 'features/nodes/util/graph/generation/addWatermarker'; import { Graph } from 'features/nodes/util/graph/generation/Graph'; -import { - getOriginalAndScaledSizesForTextToImage, - selectCanvasOutputFields, - selectPresetModifiedPrompts, -} from 'features/nodes/util/graph/graphBuilderUtils'; +import { selectCanvasOutputFields, selectPresetModifiedPrompts } from 'features/nodes/util/graph/graphBuilderUtils'; import type { GraphBuilderArg, GraphBuilderReturn, ImageOutputNodes } from 'features/nodes/util/graph/types'; import { selectActiveTab } from 'features/ui/store/uiSelectors'; import type { Invocation } from 'services/api/types'; @@ -139,14 +135,12 @@ export const buildQwenImageGraph = async (arg: GraphBuilderArg): Promise