diff --git a/.dockerignore b/.dockerignore index ab4fdc7ab2..bc085a874a 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,7 +1,23 @@ .venv .venv-style **/.venv +.pytest_cache +.devcontainer +.vscode +.vs +.idea +.gdb_history out +bazel-bin +bazel-model_server/ +bazel-openvino-model-server/ +bazel-out +bazel-ovms +bazel-ovms-c +bazel-testlogs demos/continuous_batching demos/embeddings -demos/common/export_models/models \ No newline at end of file +demos/common/export_models/models +*.log +*.img +models diff --git a/.gitignore b/.gitignore index 62dca37b89..9db061a1e9 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,8 @@ __pycache__/ report.json trace.json bazel-bin +bazel-model_server/ +bazel-openvino-model-server/ bazel-out bazel-ovms bazel-ovms-c @@ -28,8 +30,6 @@ tags src/test/llm_testing node_modules/ yarn.* -bazel-openvino-model-server/ -bazel-model_server/ out .user.bazelrc *.log diff --git a/demos/image_generation/README.md b/demos/image_generation/README.md index 70a569ac5d..5dcf183c18 100644 --- a/demos/image_generation/README.md +++ b/demos/image_generation/README.md @@ -3,6 +3,12 @@ This demo shows how to deploy image generation models (Stable Diffusion/Stable Diffusion 3/Stable Diffusion XL/FLUX) to create and edit images with the OpenVINO Model Server. Image generation pipelines are exposed via [OpenAI API](https://platform.openai.com/docs/api-reference/images/create) `images/generations` and `images/edits` endpoints. +Supported workloads: +- **Text-to-image** — generate an image from a text prompt (`/v3/images/generations`) +- **Image-to-image** — transform an existing image guided by a prompt (`/v3/images/edits`) +- **Inpainting** — repaint a masked region of an image (`/v3/images/edits` with `mask` field) +- **Outpainting** — extend an image beyond its original borders (`/v3/images/edits` with `mask` field and larger canvas) + Check [supported models](https://openvinotoolkit.github.io/openvino.genai/docs/supported-models/#image-generation-models). > **Note:** Please note that FLUX models are not supported on NPU. @@ -364,35 +370,32 @@ ovms --rest_port 8000 ^ Wait for the model to load. You can check the status with a simple command: ```console -curl http://localhost:8000/v1/config +curl http://localhost:8000/v3/models ``` ```json { - "OpenVINO/stable-diffusion-v1-5-int8-ov" : - { - "model_version_status": [ - { - "version": "1", - "state": "AVAILABLE", - "status": { - "error_code": "OK", - "error_message": "OK" + "object": "list", + "data": [ + { + "id": "OpenVINO/stable-diffusion-v1-5-int8-ov", + "object": "model", + "created": 0, + "owned_by": "openvinotoolkit" } - } ] - } } ``` ## Request Generation -A single servable exposes following endpoints: -- text to image: `images/generations` -- image to image: `images/edits` +A single servable exposes the following endpoints: +- **Text-to-image**: `images/generations` — JSON body with `prompt` +- **Image-to-image**: `images/edits` — multipart form with `image` + `prompt` (no mask) +- **Inpainting**: `images/edits` — multipart form with `image` + `mask` + `prompt` +- **Outpainting**: `images/edits` — multipart form with `image` + `mask` + `prompt` (image placed on larger canvas, mask marks the area to fill) -Endpoints unsupported for now: -- inpainting: `images/edits` with `mask` field +> **Note:** For inpainting/outpainting, dedicated inpainting models (e.g. `stable-diffusion-v1-5/stable-diffusion-inpainting`) only support the `images/edits` endpoint. Check [supported models](https://openvinotoolkit.github.io/openvino.genai/docs/supported-models/#image-generation-models). All requests are processed in unary format, with no streaming capabilities. @@ -519,6 +522,194 @@ image.save('edit_output.png') Output file (`edit_output.png`): ![edit_output](./edit_output.png) +### Requesting inpainting with cURL + +Inpainting replaces a masked region in an image based on the prompt. The `mask` is a black-and-white image where white pixels mark the area to repaint. + +![cat](./cat.png) ![cat_mask](./cat_mask.png) + +::::{tab-set} +:::{tab-item} Linux +:sync: linux +```bash +curl http://localhost:8000/v3/images/edits \ + -F "model=diffusers/stable-diffusion-xl-1.0-inpainting-0.1" \ + -F "prompt=a golden retriever dog sitting on a bench in a sunny park" \ + -F "image=@cat.png" \ + -F "mask=@cat_mask.png" \ + -F "num_inference_steps=50" \ + -F "size=1024x1024" | jq -r '.data[0].b64_json' | base64 --decode > inpaint_output.png +``` +::: + +:::{tab-item} Windows Command Prompt +:sync: windows +```bat +curl http://localhost:8000/v3/images/edits ^ + -F "model=diffusers/stable-diffusion-xl-1.0-inpainting-0.1" ^ + -F "prompt=a golden retriever dog sitting on a bench in a sunny park" ^ + -F "image=@cat.png" ^ + -F "mask=@cat_mask.png" ^ + -F "num_inference_steps=50" ^ + -F "size=1024x1024" +``` +::: + +:::: + +Expected output (`inpaint_output.png`): + +![inpaint_output](./inpaint_output.png) + +### Requesting inpainting with OpenAI Python package + +```python +from openai import OpenAI +import base64 +from io import BytesIO +from PIL import Image + +client = OpenAI( + base_url="http://localhost:8000/v3", + api_key="unused" +) + +response = client.images.edit( + model="diffusers/stable-diffusion-xl-1.0-inpainting-0.1", + image=open("cat.png", "rb"), + mask=open("cat_mask.png", "rb"), + prompt="a golden retriever dog sitting on a bench in a sunny park", + extra_body={ + "num_inference_steps": 50, + "size": "1024x1024" + } + ) +base64_image = response.data[0].b64_json + +image_data = base64.b64decode(base64_image) +image = Image.open(BytesIO(image_data)) +image.save('inpaint_output.png') +``` + +### Requesting outpainting with cURL + +Outpainting extends an image beyond its original borders. Prepare two images: +- **outpaint_input.png** — the original image centered on a larger canvas (e.g. 768×768) with black borders +- **outpaint_mask.png** — white where the new content should be generated (the borders), black where the original image is + +![outpaint_input](./outpaint_input.png) ![outpaint_mask](./outpaint_mask.png) + +::::{tab-set} +:::{tab-item} Linux +:sync: linux +```bash +curl http://localhost:8000/v3/images/edits \ + -F "model=stable-diffusion-v1-5/stable-diffusion-inpainting" \ + -F "prompt=a cat sitting on a bench in a park" \ + -F "image=@outpaint_input.png" \ + -F "mask=@outpaint_mask.png" \ + -F "num_inference_steps=50" \ + -F "size=768x768" | jq -r '.data[0].b64_json' | base64 --decode > outpaint_output.png +``` +::: + +:::{tab-item} Windows Command Prompt +:sync: windows +```bat +curl http://localhost:8000/v3/images/edits ^ + -F "model=stable-diffusion-v1-5/stable-diffusion-inpainting" ^ + -F "prompt=a cat sitting on a bench in a park" ^ + -F "image=@outpaint_input.png" ^ + -F "mask=@outpaint_mask.png" ^ + -F "num_inference_steps=50" ^ + -F "size=768x768" +``` +::: + +:::: + +Expected output (`outpaint_output.png`): + +![outpaint_output](./outpaint_output.png) + +### Requesting outpainting with OpenAI Python package + +```python +from openai import OpenAI +import base64 +from io import BytesIO +from PIL import Image + +client = OpenAI( + base_url="http://localhost:8000/v3", + api_key="unused" +) + +response = client.images.edit( + model="stable-diffusion-v1-5/stable-diffusion-inpainting", + image=open("outpaint_input.png", "rb"), + mask=open("outpaint_mask.png", "rb"), + prompt="a cat sitting on a bench in a park", + extra_body={ + "num_inference_steps": 50, + "size": "768x768" + } + ) +base64_image = response.data[0].b64_json + +image_data = base64.b64decode(base64_image) +image = Image.open(BytesIO(image_data)) +image.save('outpaint_output.png') +``` + +### Using dedicated inpainting models + +For best inpainting/outpainting quality, use a dedicated inpainting model. These models have a 9-channel UNet specifically trained for masked generation. + +Example models for inpainting: +- `stable-diffusion-v1-5/stable-diffusion-inpainting` — SD 1.5 based, 512×512 native resolution +- `diffusers/stable-diffusion-xl-1.0-inpainting-0.1` — SDXL based, 1024×1024 native resolution + +For the full list see [supported image generation models](https://openvinotoolkit.github.io/openvino.genai/docs/supported-models/#image-generation-models). + +> **Note:** Dedicated inpainting models only expose the `images/edits` endpoint (with mask). Text-to-image and image-to-image requests will return an error indicating the pipeline is not available for this model. Base models (e.g. `stable-diffusion-v1-5/stable-diffusion-v1-5`) support all endpoints including inpainting. + +::::{tab-set} +:::{tab-item} Docker (Linux) — GPU +:sync: docker-gpu +```bash +mkdir -p models + +docker run -d --rm -p 8000:8000 -v $(pwd)/models:/models/:rw \ + --user $(id -u):$(id -g) --device /dev/dri --group-add=$(stat -c "%g" /dev/dri/render* | head -n 1) \ + -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy \ + openvino/model_server:latest-gpu \ + --rest_port 8000 \ + --model_repository_path /models/ \ + --task image_generation \ + --source_model stable-diffusion-v1-5/stable-diffusion-inpainting \ + --weight-format int8 \ + --target_device GPU +``` +::: + +:::{tab-item} Bare metal (Windows) +:sync: bare-metal +```bat +mkdir models + +ovms --rest_port 8000 ^ + --model_repository_path ./models/ ^ + --task image_generation ^ + --source_model stable-diffusion-v1-5/stable-diffusion-inpainting ^ + --weight-format int8 ^ + --target_device GPU +``` +::: + +:::: + + ### Strength influence on final damage ![strength](./strength.png) diff --git a/demos/image_generation/cat.png b/demos/image_generation/cat.png new file mode 100644 index 0000000000..a344d4aeb8 Binary files /dev/null and b/demos/image_generation/cat.png differ diff --git a/demos/image_generation/cat_mask.png b/demos/image_generation/cat_mask.png new file mode 100644 index 0000000000..1d70b248ce Binary files /dev/null and b/demos/image_generation/cat_mask.png differ diff --git a/demos/image_generation/inpaint_output.png b/demos/image_generation/inpaint_output.png new file mode 100644 index 0000000000..c7fd8b62a7 Binary files /dev/null and b/demos/image_generation/inpaint_output.png differ diff --git a/demos/image_generation/outpaint_input.png b/demos/image_generation/outpaint_input.png new file mode 100644 index 0000000000..d94745e1d2 Binary files /dev/null and b/demos/image_generation/outpaint_input.png differ diff --git a/demos/image_generation/outpaint_mask.png b/demos/image_generation/outpaint_mask.png new file mode 100644 index 0000000000..245dbb3c4a Binary files /dev/null and b/demos/image_generation/outpaint_mask.png differ diff --git a/demos/image_generation/outpaint_output.png b/demos/image_generation/outpaint_output.png new file mode 100644 index 0000000000..e94ec625a0 Binary files /dev/null and b/demos/image_generation/outpaint_output.png differ diff --git a/docs/model_server_rest_api_image_edit.md b/docs/model_server_rest_api_image_edit.md index 9409528fab..154d921a3a 100644 --- a/docs/model_server_rest_api_image_edit.md +++ b/docs/model_server_rest_api_image_edit.md @@ -53,7 +53,7 @@ curl -X POST http://localhost:8000/v3/images/edits \ |-----|----------|----------|---------|-----| | model | ✅ | ✅ | string (required) | Name of the model to use. Name assigned to a MediaPipe graph configured to schedule generation using desired embedding model. **Note**: This can also be omitted to fall back to URI based routing. Read more on routing topic **TODO** | | image | ⚠️ | ✅ | string or array of strings (required) | The image to edit. Must be a single image (⚠️**Note**: Array of strings is not supported for now.) | -| mask | ❌ | ✅ | string | Triggers inpainting pipeline type. An additional image whose fully transparent areas (e.g. where alpha is zero) indicate where `image` should be edited. Not supported for now. | +| mask | ✅ | ✅ | string | Triggers inpainting pipeline. An additional image where white pixels mark the area to repaint. Send as a multipart file field alongside `image`. | | prompt | ✅ | ✅ | string (required) | A text description of the desired image(s). | | size | ✅ | ✅ | string or null (default: auto) | The size of the generated images. Must be in WxH format, example: `1024x768`. Default model W/H will be used when using `auto`. | | n | ✅ | ✅ | integer or null (default: `1`) | A number of images to generate. If you want to generate multiple images for the same combination of generation parameters and text prompts, you can use this parameter for better performance as internally computations will be performed with batch for Unet / Transformer models and text embeddings tensors will also be computed only once. | diff --git a/src/http_frontend/multi_part_parser_drogon_impl.cpp b/src/http_frontend/multi_part_parser_drogon_impl.cpp index 7282535df4..17d05967e2 100644 --- a/src/http_frontend/multi_part_parser_drogon_impl.cpp +++ b/src/http_frontend/multi_part_parser_drogon_impl.cpp @@ -50,6 +50,17 @@ std::string_view DrogonMultiPartParser::getFileContentByFieldName(const std::str return it->second.fileContent(); } +std::vector DrogonMultiPartParser::getFilesArrayByFieldName(const std::string& name) const { + const std::vector& files = this->parser->getFiles(); + std::vector result; + for (const drogon::HttpFile& file : files) { + if (file.getItemName() == name) { + result.push_back(file.fileContent()); + } + } + return result; +} + std::set DrogonMultiPartParser::getAllFieldNames() const { std::set fieldNames; auto fileMap = this->parser->getFilesMap(); diff --git a/src/http_frontend/multi_part_parser_drogon_impl.hpp b/src/http_frontend/multi_part_parser_drogon_impl.hpp index a37de000f0..7741b82946 100644 --- a/src/http_frontend/multi_part_parser_drogon_impl.hpp +++ b/src/http_frontend/multi_part_parser_drogon_impl.hpp @@ -47,6 +47,7 @@ class DrogonMultiPartParser : public MultiPartParser { std::string getFieldByName(const std::string& name) const override; std::vector getArrayFieldByName(const std::string& name) const override; std::string_view getFileContentByFieldName(const std::string& name) const override; + std::vector getFilesArrayByFieldName(const std::string& name) const override; std::set getAllFieldNames() const override; }; diff --git a/src/image_conversion.cpp b/src/image_conversion.cpp index 6a750cc2c6..9f633745bd 100644 --- a/src/image_conversion.cpp +++ b/src/image_conversion.cpp @@ -74,7 +74,7 @@ ov::Tensor loadImageStbi(unsigned char* image, const int x, const int y, const i SharedImageAllocator(image, desiredChannels, y, x)); } -ov::Tensor loadImageStbiFromMemory(const std::string& imageBytes) { +ov::Tensor loadImageStbiFromMemory(std::string_view imageBytes) { int x = 0, y = 0, channelsInFile = 0; constexpr int desiredChannels = 3; unsigned char* image = stbi_load_from_memory( diff --git a/src/image_conversion.hpp b/src/image_conversion.hpp index 5dd8ee85df..74236be8f7 100644 --- a/src/image_conversion.hpp +++ b/src/image_conversion.hpp @@ -16,6 +16,7 @@ #pragma once #include +#include #include #include @@ -23,7 +24,7 @@ namespace ovms { ov::Tensor loadImageStbi(unsigned char* image, const int x, const int y, const int desiredChannels); -ov::Tensor loadImageStbiFromMemory(const std::string& imageBytes); +ov::Tensor loadImageStbiFromMemory(std::string_view imageBytes); ov::Tensor loadImageStbiFromFile(const char* filename); std::vector saveImagesStbi(const ov::Tensor& tensor); diff --git a/src/image_gen/BUILD b/src/image_gen/BUILD index 87056d4934..38c44983f5 100644 --- a/src/image_gen/BUILD +++ b/src/image_gen/BUILD @@ -24,6 +24,7 @@ ovms_cc_library( deps = [ "imagegenpipelineargs", "//src:libovmslogging", + "//src:libovms_queue", "//src:libovmsstring_utils", "//third_party:genai",], visibility = ["//visibility:public"], diff --git a/src/image_gen/http_image_gen_calculator.cc b/src/image_gen/http_image_gen_calculator.cc index 8aa5912fe9..ffe625e040 100644 --- a/src/image_gen/http_image_gen_calculator.cc +++ b/src/image_gen/http_image_gen_calculator.cc @@ -49,6 +49,7 @@ static bool progress_bar(size_t step, size_t num_steps, ov::Tensor&) { SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Image Generation Step: {}/{}", step + 1, num_steps); return false; } + // written out separately to avoid msvc crashing when using try-catch in process method ... static absl::Status generateTensor(ov::genai::Text2ImagePipeline& request, const std::string& prompt, ov::AnyMap& requestOptions, @@ -94,7 +95,29 @@ static absl::Status generateTensorImg2Img(ov::genai::Image2ImagePipeline& reques return absl::OkStatus(); } // written out separately to avoid msvc crashing when using try-catch in process method ... -static absl::Status makeTensorFromString(const std::string& filePayload, ov::Tensor& imageTensor) { +static absl::Status generateTensorInpainting(ov::genai::InpaintingPipeline& request, + const std::string& prompt, ov::Tensor image, ov::Tensor mask, ov::AnyMap& requestOptions, + std::unique_ptr& images) { + try { + requestOptions.insert(ov::genai::callback(progress_bar)); + images = std::make_unique(request.generate(prompt, image, mask, requestOptions)); + auto dims = images->get_shape(); + std::stringstream ss; + for (const auto& dim : dims) { + ss << dim << " "; + } + ss << " element type: " << images->get_element_type().get_type_name(); + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "ImageGenCalculator generated inpainting tensor: {}", ss.str()); + } catch (const std::exception& e) { + SPDLOG_LOGGER_ERROR(llm_calculator_logger, "ImageGenCalculator Inpainting Error: {}", e.what()); + return absl::InternalError("Error during inpainting generation"); + } catch (...) { + return absl::InternalError("Unknown error during inpainting generation"); + } + return absl::OkStatus(); +} +// written out separately to avoid msvc crashing when using try-catch in process method ... +static absl::Status makeTensorFromString(std::string_view filePayload, ov::Tensor& imageTensor) { try { imageTensor = loadImageStbiFromMemory(filePayload); } catch (std::runtime_error& e) { @@ -140,10 +163,12 @@ class ImageGenCalculator : public CalculatorBase { auto pipe = it->second; auto payload = cc->Inputs().Tag(INPUT_TAG_NAME).Get(); + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "ImageGenCalculator [Node: {}] Request URI: {}", cc->NodeName(), payload.uri); std::unique_ptr images; // output if (absl::StartsWith(payload.uri, "/v3/images/generations")) { + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "ImageGenCalculator [Node: {}] Routed to image generations path", cc->NodeName()); if (payload.parsedJson->HasParseError()) return absl::InvalidArgumentError("Failed to parse JSON"); @@ -154,13 +179,15 @@ class ImageGenCalculator : public CalculatorBase { SET_OR_RETURN(std::string, prompt, getPromptField(*payload.parsedJson)); SET_OR_RETURN(ov::AnyMap, requestOptions, getImageGenerationRequestOptions(*payload.parsedJson, pipe->args)); - ov::genai::Text2ImagePipeline request = pipe->text2ImagePipeline->clone(); - - auto status = generateTensor(request, prompt, requestOptions, images); + if (!pipe->text2ImagePipeline) + return absl::FailedPreconditionError("Text-to-image pipeline is not available for this model"); + auto t2i = pipe->text2ImagePipeline->clone(); + auto status = generateTensor(t2i, prompt, requestOptions, images); if (!status.ok()) { return status; } } else if (absl::StartsWith(payload.uri, "/v3/images/edits")) { + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "ImageGenCalculator [Node: {}] Routed to image edits path", cc->NodeName()); if (payload.multipartParser->hasParseError()) return absl::InvalidArgumentError("Failed to parse multipart data"); @@ -169,15 +196,37 @@ class ImageGenCalculator : public CalculatorBase { RET_CHECK(image.has_value() && !image.value().empty()) << "Image field is missing in multipart body"; ov::Tensor imageTensor; - auto status = makeTensorFromString(std::string(image.value()), imageTensor); + auto status = makeTensorFromString(image.value(), imageTensor); if (!status.ok()) { return status; } SET_OR_RETURN(ov::AnyMap, requestOptions, getImageEditRequestOptions(*payload.multipartParser, pipe->args)); - ov::genai::Image2ImagePipeline request = pipe->image2ImagePipeline->clone(); - status = generateTensorImg2Img(request, prompt, imageTensor, requestOptions, images); + SET_OR_RETURN(std::optional, mask, getFileFromPayload(*payload.multipartParser, "mask")); + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "ImageGenCalculator [Node: {}] Mask present: {}", cc->NodeName(), mask.has_value() && !mask.value().empty()); + + if (mask.has_value() && !mask.value().empty()) { + if (!pipe->inpaintingPipeline) + return absl::FailedPreconditionError("Inpainting pipeline is not available for this model"); + // Inpainting path — uses the pre-built InpaintingPipeline that was loaded from disk + // during initialization. Do NOT derive InpaintingPipeline from Image2ImagePipeline + ov::Tensor maskTensor; + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "ImageGenCalculator [Node: {}] Inpainting: decoding mask tensor", cc->NodeName()); + status = makeTensorFromString(mask.value(), maskTensor); + if (!status.ok()) { + return status; + } + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "ImageGenCalculator [Node: {}] Inpainting: mask tensor decoded, acquiring inpainting queue slot", cc->NodeName()); + InpaintingQueueGuard inpaintingGuard(*pipe->inpaintingQueue); + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "ImageGenCalculator [Node: {}] Inpainting: queue slot acquired, invoking generate()", cc->NodeName()); + status = generateTensorInpainting(*pipe->inpaintingPipeline, prompt, imageTensor, maskTensor, requestOptions, images); + } else { + if (!pipe->image2ImagePipeline) + return absl::FailedPreconditionError("Image-to-image pipeline is not available for this model"); + auto i2i = pipe->image2ImagePipeline->clone(); + status = generateTensorImg2Img(i2i, prompt, imageTensor, requestOptions, images); + } if (!status.ok()) { return status; } diff --git a/src/image_gen/imagegenutils.cpp b/src/image_gen/imagegenutils.cpp index 4a11e3e80c..2235e9fa44 100644 --- a/src/image_gen/imagegenutils.cpp +++ b/src/image_gen/imagegenutils.cpp @@ -441,7 +441,7 @@ std::variant getImageEditRequestOptions(const ovms::Mu // prompt REQUIRED // image string (REQUIRED) or array (NOT SUPPORTED) // background REJECTED string NO optional default=auto - // mask file NO + // mask file DONE // model string NO optional default=dall-e-2 // n optional default=1 ----> num_images_per_prompt // output_compression REJECTED int NO optional default=100 @@ -527,6 +527,7 @@ std::variant getImageEditRequestOptions(const ovms::Mu static std::set acceptedFields{ "prompt", "prompt_2", "prompt_3", "image", + "mask", "negative_prompt", "negative_prompt_2", "negative_prompt_3", "size", "height", "width", "n", "num_images_per_prompt", diff --git a/src/image_gen/pipelines.cpp b/src/image_gen/pipelines.cpp index 54c7ac1e1b..2d07936fc2 100644 --- a/src/image_gen/pipelines.cpp +++ b/src/image_gen/pipelines.cpp @@ -17,11 +17,44 @@ #include -#include "../logging.hpp" -#include "../stringutils.hpp" +#include +#include +#include + +#include "src/logging.hpp" +#include "src/stringutils.hpp" namespace ovms { +// Reshape and compile a pipeline that was loaded from disk. +// Derived (weight-sharing) pipelines inherit the compiled state from the parent and skip this. +template +static void reshapeAndCompile(PipelineT& pipeline, + const ImageGenPipelineArgs& args, + const std::vector& device) { + if (args.staticReshapeSettings.has_value() && args.staticReshapeSettings.value().resolution.size() == 1) { + auto numImagesPerPrompt = args.staticReshapeSettings.value().numImagesPerPrompt.value_or(ov::genai::ImageGenerationConfig().num_images_per_prompt); + auto guidanceScale = args.staticReshapeSettings.value().guidanceScale.value_or(ov::genai::ImageGenerationConfig().guidance_scale); + + SPDLOG_DEBUG("Image Generation Pipeline reshape to static {}x{} resolution, batch: {}, guidance scale: {}", + args.staticReshapeSettings.value().resolution[0].first, args.staticReshapeSettings.value().resolution[0].second, numImagesPerPrompt, guidanceScale); + + pipeline.reshape( + numImagesPerPrompt, + args.staticReshapeSettings.value().resolution[0].first, + args.staticReshapeSettings.value().resolution[0].second, + guidanceScale); + } + + if (device.size() == 1) { + SPDLOG_DEBUG("Image Generation Pipeline compiling to device: {}", device[0]); + pipeline.compile(device[0], args.pluginConfig); + } else { + SPDLOG_DEBUG("Image Generation Pipeline compiling to devices: text_encode={} denoise={} vae={}", device[0], device[1], device[2]); + pipeline.compile(device[0], device[1], device[2], args.pluginConfig); + } +} + ImageGenerationPipelines::ImageGenerationPipelines(const ImageGenPipelineArgs& args) : args(args) { std::vector device; @@ -33,30 +66,87 @@ ImageGenerationPipelines::ImageGenerationPipelines(const ImageGenPipelineArgs& a SPDLOG_DEBUG("Image Generation Pipelines weights loading from: {}", args.modelsPath); - image2ImagePipeline = std::make_unique(args.modelsPath); + // Pipeline construction strategy: + // Preferred chain (weight-sharing, single model load): + // INP(disk) → reshape+compile → I2I(INP) → T2I(I2I) + // + // Some models don't support all derivation directions (e.g. inpainting-specific + // models reject I2I(INP) with "Cannot create Image2ImagePipeline from InpaintingPipeline + // with inpainting model"). When derivation fails, fall back to loading from disk + // (separate model load + reshape+compile). We WARN on individual failures and only + // throw if no pipeline could be created at all. - if (args.staticReshapeSettings.has_value() && args.staticReshapeSettings.value().resolution.size() == 1) { - auto numImagesPerPrompt = args.staticReshapeSettings.value().numImagesPerPrompt.value_or(ov::genai::ImageGenerationConfig().num_images_per_prompt); - auto guidanceScale = args.staticReshapeSettings.value().guidanceScale.value_or(ov::genai::ImageGenerationConfig().guidance_scale); + // --- Step 1: InpaintingPipeline from disk --- + try { + inpaintingPipeline = std::make_unique(args.modelsPath); + reshapeAndCompile(*inpaintingPipeline, args, device); + SPDLOG_DEBUG("InpaintingPipeline created from disk"); + } catch (const std::exception& e) { + SPDLOG_WARN("Failed to create InpaintingPipeline from disk: {}", e.what()); + inpaintingPipeline.reset(); + } - SPDLOG_DEBUG("Image Generation Pipelines will be reshaped to static {}x{} resolution, batch: {}, guidance scale: {}", - args.staticReshapeSettings.value().resolution[0].first, args.staticReshapeSettings.value().resolution[0].second, numImagesPerPrompt, guidanceScale); + // --- Step 2: Image2ImagePipeline — derive from INP, fallback to disk --- + if (inpaintingPipeline) { + try { + image2ImagePipeline = std::make_unique(*inpaintingPipeline); + SPDLOG_DEBUG("Image2ImagePipeline derived from InpaintingPipeline"); + } catch (const std::exception& e) { + SPDLOG_WARN("Failed to derive Image2ImagePipeline from InpaintingPipeline: {}", e.what()); + } + } + if (!image2ImagePipeline) { + try { + image2ImagePipeline = std::make_unique(args.modelsPath); + reshapeAndCompile(*image2ImagePipeline, args, device); + SPDLOG_DEBUG("Image2ImagePipeline created from disk (fallback)"); + } catch (const std::exception& e) { + SPDLOG_WARN("Failed to create Image2ImagePipeline from disk: {}", e.what()); + image2ImagePipeline.reset(); + } + } - image2ImagePipeline->reshape( - numImagesPerPrompt, - args.staticReshapeSettings.value().resolution[0].first, // at this point it should be validated for existence - args.staticReshapeSettings.value().resolution[0].second, // at this point it should be validated for existence - guidanceScale); + // --- Step 3: Text2ImagePipeline — derive from I2I or INP, fallback to disk --- + if (image2ImagePipeline) { + try { + text2ImagePipeline = std::make_unique(*image2ImagePipeline); + SPDLOG_DEBUG("Text2ImagePipeline derived from Image2ImagePipeline"); + } catch (const std::exception& e) { + SPDLOG_WARN("Failed to derive Text2ImagePipeline from Image2ImagePipeline: {}", e.what()); + } + } + if (!text2ImagePipeline && inpaintingPipeline) { + try { + text2ImagePipeline = std::make_unique(*inpaintingPipeline); + SPDLOG_DEBUG("Text2ImagePipeline derived from InpaintingPipeline"); + } catch (const std::exception& e) { + SPDLOG_WARN("Failed to derive Text2ImagePipeline from InpaintingPipeline: {}", e.what()); + } + } + if (!text2ImagePipeline) { + try { + text2ImagePipeline = std::make_unique(args.modelsPath); + reshapeAndCompile(*text2ImagePipeline, args, device); + SPDLOG_DEBUG("Text2ImagePipeline created from disk (fallback)"); + } catch (const std::exception& e) { + SPDLOG_WARN("Failed to create Text2ImagePipeline from disk: {}", e.what()); + text2ImagePipeline.reset(); + } } - if (device.size() == 1) { - SPDLOG_DEBUG("Image Generation Pipelines compiling to devices: text_encode={} denoise={} vae={}", device[0], device[0], device[0]); - image2ImagePipeline->compile(device[0], args.pluginConfig); - } else { - SPDLOG_DEBUG("Image Generation Pipelines compiling to devices: text_encode={} denoise={} vae={}", device[0], device[1], device[2]); - image2ImagePipeline->compile(device[0], device[1], device[2], args.pluginConfig); + if (!inpaintingPipeline && !image2ImagePipeline && !text2ImagePipeline) { + throw std::runtime_error("Failed to create any image generation pipeline from: " + args.modelsPath); + } + + // InpaintingPipeline does not support clone(), so concurrent inpainting + // requests must be serialized. Queue size = 1 acts as a mutex. + if (inpaintingPipeline) { + inpaintingQueue = std::make_unique>(1); } - text2ImagePipeline = std::make_unique(*image2ImagePipeline); + SPDLOG_INFO("Image Generation Pipelines ready — T2I: {} | I2I: {} | INP: {}", + text2ImagePipeline ? "OK" : "N/A", + image2ImagePipeline ? "OK" : "N/A", + inpaintingPipeline ? "OK" : "N/A"); } } // namespace ovms diff --git a/src/image_gen/pipelines.hpp b/src/image_gen/pipelines.hpp index 7c83bdc2e9..cda14396a7 100644 --- a/src/image_gen/pipelines.hpp +++ b/src/image_gen/pipelines.hpp @@ -18,17 +18,45 @@ #include #include -#include #include +#include +#include #include "imagegenpipelineargs.hpp" +#include "src/queue.hpp" namespace ovms { + +// RAII guard that acquires a slot from a Queue(1) on construction +// and returns it on destruction, serializing concurrent inpainting requests. +class InpaintingQueueGuard { +public: + // Blocks until an inpainting slot becomes available. + explicit InpaintingQueueGuard(Queue& queue) : + queue_(queue), + streamId_(queue_.getIdleStream().get()) {} + ~InpaintingQueueGuard() { + queue_.returnStream(streamId_); + } + + InpaintingQueueGuard(const InpaintingQueueGuard&) = delete; + InpaintingQueueGuard& operator=(const InpaintingQueueGuard&) = delete; + +private: + Queue& queue_; + int streamId_; +}; + struct ImageGenerationPipelines { std::unique_ptr image2ImagePipeline; std::unique_ptr text2ImagePipeline; + std::unique_ptr inpaintingPipeline; ImageGenPipelineArgs args; + // Serializes concurrent inpainting requests (InpaintingPipeline lacks clone()). + // Queue size = 1: only one inpainting inference runs at a time. + std::unique_ptr> inpaintingQueue; + ImageGenerationPipelines() = delete; ImageGenerationPipelines(const ImageGenPipelineArgs& args); ImageGenerationPipelines(const ImageGenerationPipelines&) = delete; diff --git a/src/multi_part_parser.hpp b/src/multi_part_parser.hpp index 19f28f51ba..d5e376b9aa 100644 --- a/src/multi_part_parser.hpp +++ b/src/multi_part_parser.hpp @@ -16,6 +16,7 @@ #include #include +#include #include namespace ovms { @@ -38,6 +39,9 @@ class MultiPartParser { // Returns empty string if file is not found. virtual std::string_view getFileContentByFieldName(const std::string& name) const = 0; + // API for MP calculators to get all file contents for a given array field name (e.g. "image[]"). + virtual std::vector getFilesArrayByFieldName(const std::string& name) const = 0; + // API for MP calculators to get all field names. virtual std::set getAllFieldNames() const = 0; }; diff --git a/src/pull_module/optimum_export.cpp b/src/pull_module/optimum_export.cpp index 2c8fde8426..c41c614920 100644 --- a/src/pull_module/optimum_export.cpp +++ b/src/pull_module/optimum_export.cpp @@ -54,6 +54,9 @@ std::string OptimumDownloader::getExportCmdEmbeddings() { oss << "--disable-convert-tokenizer --task feature-extraction --library sentence_transformers"; oss << " --model " << this->sourceModel << " --trust-remote-code "; oss << " --weight-format " << this->exportSettings.precision; + if (this->exportSettings.extraQuantizationParams.has_value()) { + oss << " " << this->exportSettings.extraQuantizationParams.value(); + } oss << " " << this->downloadPath; // clang-format on @@ -69,6 +72,9 @@ std::string OptimumDownloader::getExportCmdTextToSpeech() { } oss << "--model " << this->sourceModel << " --trust-remote-code "; oss << " --weight-format " << this->exportSettings.precision; + if (this->exportSettings.extraQuantizationParams.has_value()) { + oss << " " << this->exportSettings.extraQuantizationParams.value(); + } oss << " " << this->downloadPath; // clang-format on @@ -81,6 +87,9 @@ std::string OptimumDownloader::getExportCmdSpeechToText() { oss << this->OPTIMUM_CLI_EXPORT_COMMAND; oss << "--model " << this->sourceModel << " --trust-remote-code "; oss << " --weight-format " << this->exportSettings.precision; + if (this->exportSettings.extraQuantizationParams.has_value()) { + oss << " " << this->exportSettings.extraQuantizationParams.value(); + } oss << " " << this->downloadPath; // clang-format on @@ -95,6 +104,9 @@ std::string OptimumDownloader::getExportCmdRerank() { oss << " --trust-remote-code "; oss << " --weight-format " << this->exportSettings.precision; oss << " --task text-classification "; + if (this->exportSettings.extraQuantizationParams.has_value()) { + oss << " " << this->exportSettings.extraQuantizationParams.value(); + } oss << " " << this->downloadPath; // clang-format on @@ -107,6 +119,9 @@ std::string OptimumDownloader::getExportCmdImageGeneration() { oss << this->OPTIMUM_CLI_EXPORT_COMMAND; oss << "--model " << this->sourceModel; oss << " --weight-format " << this->exportSettings.precision; + if (this->exportSettings.extraQuantizationParams.has_value()) { + oss << " " << this->exportSettings.extraQuantizationParams.value(); + } oss << " " << this->downloadPath; // clang-format on diff --git a/src/test/pull_hf_model_test.cpp b/src/test/pull_hf_model_test.cpp index b29bbee326..215f2ee41f 100644 --- a/src/test/pull_hf_model_test.cpp +++ b/src/test/pull_hf_model_test.cpp @@ -345,7 +345,7 @@ class TestOptimumDownloaderSetup : public ::testing::Test { inHfSettings.sourceModel = "model/name"; inHfSettings.downloadPath = "/path/to/Download"; inHfSettings.exportSettings.precision = "fp64"; - inHfSettings.exportSettings.extraQuantizationParams = "--param --param value"; + inHfSettings.exportSettings.extraQuantizationParams = "--someOptimumParam --anotherOptParam value"; inHfSettings.task = ovms::TEXT_GENERATION_GRAPH; inHfSettings.downloadType = ovms::OPTIMUM_CLI_DOWNLOAD; #ifdef _WIN32 @@ -371,7 +371,7 @@ class TestOptimumDownloaderSetupWithFile : public TestOptimumDownloaderSetup { TEST_F(TestOptimumDownloaderSetup, Methods) { std::unique_ptr optimumDownloader = std::make_unique(inHfSettings); std::string expectedPath = inHfSettings.downloadPath + "/" + inHfSettings.sourceModel; - std::string expectedCmd = "optimum-cli export openvino --model model/name --trust-remote-code --weight-format fp64 --param --param value \\path\\to\\Download\\model\\name"; + std::string expectedCmd = "optimum-cli export openvino --model model/name --trust-remote-code --weight-format fp64 --someOptimumParam --anotherOptParam value \\path\\to\\Download\\model\\name"; std::string expectedCmd2 = "convert_tokenizer model/name --with-detokenizer -o \\path\\to\\Download\\model\\name"; #ifdef _WIN32 std::replace(expectedPath.begin(), expectedPath.end(), '/', '\\'); @@ -388,7 +388,7 @@ TEST_F(TestOptimumDownloaderSetup, Methods) { TEST_F(TestOptimumDownloaderSetup, RerankExportCmd) { inHfSettings.task = ovms::RERANK_GRAPH; std::unique_ptr optimumDownloader = std::make_unique(inHfSettings); - std::string expectedCmd = "optimum-cli export openvino --disable-convert-tokenizer --model model/name --trust-remote-code --weight-format fp64 --task text-classification \\path\\to\\Download\\model\\name"; + std::string expectedCmd = "optimum-cli export openvino --disable-convert-tokenizer --model model/name --trust-remote-code --weight-format fp64 --task text-classification --someOptimumParam --anotherOptParam value \\path\\to\\Download\\model\\name"; std::string expectedCmd2 = "convert_tokenizer model/name -o \\path\\to\\Download\\model\\name"; #ifdef __linux__ std::replace(expectedCmd.begin(), expectedCmd.end(), '\\', '/'); @@ -401,6 +401,19 @@ TEST_F(TestOptimumDownloaderSetup, RerankExportCmd) { TEST_F(TestOptimumDownloaderSetup, ImageGenExportCmd) { inHfSettings.task = ovms::IMAGE_GENERATION_GRAPH; std::unique_ptr optimumDownloader = std::make_unique(inHfSettings); + std::string expectedCmd = "optimum-cli export openvino --model model/name --weight-format fp64 --someOptimumParam --anotherOptParam value \\path\\to\\Download\\model\\name"; + std::string expectedCmd2 = ""; +#ifdef __linux__ + std::replace(expectedCmd.begin(), expectedCmd.end(), '\\', '/'); +#endif + ASSERT_EQ(optimumDownloader->getExportCmd(), expectedCmd); + ASSERT_EQ(optimumDownloader->getConvertCmd(), expectedCmd2); +} + +TEST_F(TestOptimumDownloaderSetup, ImageGenExportCmdNoExtraParams) { + inHfSettings.task = ovms::IMAGE_GENERATION_GRAPH; + inHfSettings.exportSettings.extraQuantizationParams = std::nullopt; + std::unique_ptr optimumDownloader = std::make_unique(inHfSettings); std::string expectedCmd = "optimum-cli export openvino --model model/name --weight-format fp64 \\path\\to\\Download\\model\\name"; std::string expectedCmd2 = ""; #ifdef __linux__ @@ -413,7 +426,7 @@ TEST_F(TestOptimumDownloaderSetup, ImageGenExportCmd) { TEST_F(TestOptimumDownloaderSetup, EmbeddingsExportCmd) { inHfSettings.task = ovms::EMBEDDINGS_GRAPH; std::unique_ptr optimumDownloader = std::make_unique(inHfSettings); - std::string expectedCmd = "optimum-cli export openvino --disable-convert-tokenizer --task feature-extraction --library sentence_transformers --model model/name --trust-remote-code --weight-format fp64 \\path\\to\\Download\\model\\name"; + std::string expectedCmd = "optimum-cli export openvino --disable-convert-tokenizer --task feature-extraction --library sentence_transformers --model model/name --trust-remote-code --weight-format fp64 --someOptimumParam --anotherOptParam value \\path\\to\\Download\\model\\name"; std::string expectedCmd2 = "convert_tokenizer model/name -o \\path\\to\\Download\\model\\name"; #ifdef __linux__ std::replace(expectedCmd.begin(), expectedCmd.end(), '\\', '/'); @@ -427,7 +440,7 @@ TEST_F(TestOptimumDownloaderSetup, TextToSpeechExportCmd) { inHfSettings.task = ovms::TEXT_TO_SPEECH_GRAPH; inHfSettings.exportSettings.vocoder = "microsoft/speecht5_hifigan"; std::unique_ptr optimumDownloader = std::make_unique(inHfSettings); - std::string expectedCmd = "optimum-cli export openvino --model-kwargs \"{\"vocoder\": \"microsoft/speecht5_hifigan\"}\" --model model/name --trust-remote-code --weight-format fp64 \\path\\to\\Download\\model\\name"; + std::string expectedCmd = "optimum-cli export openvino --model-kwargs \"{\"vocoder\": \"microsoft/speecht5_hifigan\"}\" --model model/name --trust-remote-code --weight-format fp64 --someOptimumParam --anotherOptParam value \\path\\to\\Download\\model\\name"; std::string expectedCmd2 = "convert_tokenizer model/name -o \\path\\to\\Download\\model\\name"; #ifdef __linux__ std::replace(expectedCmd.begin(), expectedCmd.end(), '\\', '/'); @@ -440,7 +453,7 @@ TEST_F(TestOptimumDownloaderSetup, TextToSpeechExportCmd) { TEST_F(TestOptimumDownloaderSetup, SpeechToTextExportCmd) { inHfSettings.task = ovms::SPEECH_TO_TEXT_GRAPH; std::unique_ptr optimumDownloader = std::make_unique(inHfSettings); - std::string expectedCmd = "optimum-cli export openvino --model model/name --trust-remote-code --weight-format fp64 \\path\\to\\Download\\model\\name"; + std::string expectedCmd = "optimum-cli export openvino --model model/name --trust-remote-code --weight-format fp64 --someOptimumParam --anotherOptParam value \\path\\to\\Download\\model\\name"; std::string expectedCmd2 = "convert_tokenizer model/name -o \\path\\to\\Download\\model\\name"; #ifdef __linux__ std::replace(expectedCmd.begin(), expectedCmd.end(), '\\', '/'); diff --git a/src/test/test_http_utils.hpp b/src/test/test_http_utils.hpp index 83138b3b5f..62ca393c40 100644 --- a/src/test/test_http_utils.hpp +++ b/src/test/test_http_utils.hpp @@ -49,6 +49,7 @@ class MockedMultiPartParser final : public ovms::MultiPartParser { MOCK_METHOD(std::vector, getArrayFieldByName, (const std::string&), (const override)); MOCK_METHOD(std::string, getFieldByName, (const std::string&), (const override)); MOCK_METHOD(std::string_view, getFileContentByFieldName, (const std::string&), (const override)); + MOCK_METHOD(std::vector, getFilesArrayByFieldName, (const std::string&), (const override)); MOCK_METHOD(std::set, getAllFieldNames, (), (const, override)); }; diff --git a/src/test/text2image_test.cpp b/src/test/text2image_test.cpp index 0f7916ac60..3d61d3e3cd 100644 --- a/src/test/text2image_test.cpp +++ b/src/test/text2image_test.cpp @@ -48,6 +48,7 @@ class MockedMultiPartParser final : public ovms::MultiPartParser { MOCK_METHOD(std::vector, getArrayFieldByName, (const std::string& name), (const, override)); MOCK_METHOD(std::string, getFieldByName, (const std::string& name), (const, override)); MOCK_METHOD(std::string_view, getFileContentByFieldName, (const std::string& name), (const, override)); + MOCK_METHOD(std::vector, getFilesArrayByFieldName, (const std::string& name), (const, override)); MOCK_METHOD(std::set, getAllFieldNames, (), (const, override)); }; @@ -796,6 +797,115 @@ TEST(Image2ImageTest, getImageEditRequestOptionsRejectedFields) { ASSERT_FALSE(std::holds_alternative(requestOptions)); } +TEST(Image2ImageTest, getImageEditRequestOptionsMaskAccepted) { + MockedMultiPartParser multipartParser; + ON_CALL(multipartParser, getFieldByName("prompt")).WillByDefault(Return("test prompt")); + ON_CALL(multipartParser, getAllFieldNames()).WillByDefault(Return(std::set{"prompt", "mask"})); + auto requestOptions = ovms::getImageEditRequestOptions(multipartParser, DEFAULTIMAGE_GEN_ARGS); + ASSERT_TRUE(std::holds_alternative(requestOptions)); +} + +TEST(Image2ImageTest, getImageEditRequestOptionsUnknownFieldRejected) { + MockedMultiPartParser multipartParser; + ON_CALL(multipartParser, getFieldByName("prompt")).WillByDefault(Return("test prompt")); + ON_CALL(multipartParser, getAllFieldNames()).WillByDefault(Return(std::set{"prompt", "nonexistent_field"})); + auto requestOptions = ovms::getImageEditRequestOptions(multipartParser, DEFAULTIMAGE_GEN_ARGS); + ASSERT_TRUE(std::holds_alternative(requestOptions)); + EXPECT_EQ(std::get(requestOptions).code(), absl::StatusCode::kInvalidArgument); + EXPECT_THAT(std::get(requestOptions).message(), ::testing::HasSubstr("nonexistent_field")); +} + +TEST(Image2ImageTest, getImageEditRequestOptionsAllExplicitlyRejectedOpenAIFields) { + for (const auto& field : {"background", "moderation", "output_compression", "output_format", "quality", "style"}) { + MockedMultiPartParser multipartParser; + ON_CALL(multipartParser, getFieldByName("prompt")).WillByDefault(Return("test prompt")); + ON_CALL(multipartParser, getFieldByName(field)).WillByDefault(Return("some_value")); + auto requestOptions = ovms::getImageEditRequestOptions(multipartParser, DEFAULTIMAGE_GEN_ARGS); + ASSERT_TRUE(std::holds_alternative(requestOptions)) << "Expected rejection for field: " << field; + EXPECT_EQ(std::get(requestOptions).code(), absl::StatusCode::kInvalidArgument); + EXPECT_THAT(std::get(requestOptions).message(), ::testing::HasSubstr(field)); + } +} + +TEST(Image2ImageTest, getImageEditRequestOptionsUnsupportedResponseFormat) { + MockedMultiPartParser multipartParser; + ON_CALL(multipartParser, getFieldByName("prompt")).WillByDefault(Return("test prompt")); + ON_CALL(multipartParser, getFieldByName("response_format")).WillByDefault(Return("url")); + auto requestOptions = ovms::getImageEditRequestOptions(multipartParser, DEFAULTIMAGE_GEN_ARGS); + ASSERT_TRUE(std::holds_alternative(requestOptions)); + EXPECT_EQ(std::get(requestOptions).code(), absl::StatusCode::kInvalidArgument); + EXPECT_THAT(std::get(requestOptions).message(), ::testing::HasSubstr("response_format")); +} + +TEST(Image2ImageTest, getImageEditRequestOptionsAllAcceptedFieldsPassValidation) { + MockedMultiPartParser multipartParser; + ON_CALL(multipartParser, getFieldByName("prompt")).WillByDefault(Return("test prompt")); + ON_CALL(multipartParser, getFieldByName("prompt_2")).WillByDefault(Return("prompt 2")); + ON_CALL(multipartParser, getFieldByName("size")).WillByDefault(Return("512x1024")); + ON_CALL(multipartParser, getFieldByName("n")).WillByDefault(Return("2")); + ON_CALL(multipartParser, getFieldByName("response_format")).WillByDefault(Return("b64_json")); + ON_CALL(multipartParser, getFieldByName("model")).WillByDefault(Return("test_model")); + ON_CALL(multipartParser, getAllFieldNames()).WillByDefault(Return(std::set{ + "prompt", "prompt_2", "prompt_3", + "image", "mask", + "negative_prompt", "negative_prompt_2", "negative_prompt_3", + "size", "height", "width", + "n", "num_images_per_prompt", + "response_format", + "num_inference_steps", "rng_seed", "strength", "guidance_scale", "max_sequence_length", "model"})); + auto requestOptions = ovms::getImageEditRequestOptions(multipartParser, DEFAULTIMAGE_GEN_ARGS); + ASSERT_TRUE(std::holds_alternative(requestOptions)) << std::get(requestOptions).message(); +} + +TEST(Image2ImageTest, getImageEditRequestOptionsDefaultSizeBehavior) { + MockedMultiPartParser multipartParser; + ON_CALL(multipartParser, getFieldByName("prompt")).WillByDefault(Return("test prompt")); + // no size, width, or height set — default resolution should be applied + auto argsWithDefaults = DEFAULTIMAGE_GEN_ARGS; + argsWithDefaults.defaultResolution = std::make_pair(512, 256); + auto requestOptions = ovms::getImageEditRequestOptions(multipartParser, argsWithDefaults); + ASSERT_TRUE(std::holds_alternative(requestOptions)) << std::get(requestOptions).message(); + auto& options = std::get(requestOptions); + EXPECT_EQ(options.at("width").as(), 512); + EXPECT_EQ(options.at("height").as(), 256); +} + +TEST(Image2ImageTest, getImageEditRequestOptionsZeroWidth) { + MockedMultiPartParser multipartParser; + ON_CALL(multipartParser, getFieldByName("prompt")).WillByDefault(Return("test prompt")); + ON_CALL(multipartParser, getFieldByName("width")).WillByDefault(Return("0")); + auto requestOptions = ovms::getImageEditRequestOptions(multipartParser, DEFAULTIMAGE_GEN_ARGS); + ASSERT_TRUE(std::holds_alternative(requestOptions)); + EXPECT_EQ(std::get(requestOptions).code(), absl::StatusCode::kInvalidArgument); +} + +TEST(Image2ImageTest, getImageEditRequestOptionsZeroHeight) { + MockedMultiPartParser multipartParser; + ON_CALL(multipartParser, getFieldByName("prompt")).WillByDefault(Return("test prompt")); + ON_CALL(multipartParser, getFieldByName("height")).WillByDefault(Return("0")); + auto requestOptions = ovms::getImageEditRequestOptions(multipartParser, DEFAULTIMAGE_GEN_ARGS); + ASSERT_TRUE(std::holds_alternative(requestOptions)); + EXPECT_EQ(std::get(requestOptions).code(), absl::StatusCode::kInvalidArgument); +} + +TEST(Image2ImageTest, getImageEditRequestOptionsZeroN) { + MockedMultiPartParser multipartParser; + ON_CALL(multipartParser, getFieldByName("prompt")).WillByDefault(Return("test prompt")); + ON_CALL(multipartParser, getFieldByName("n")).WillByDefault(Return("0")); + auto requestOptions = ovms::getImageEditRequestOptions(multipartParser, DEFAULTIMAGE_GEN_ARGS); + ASSERT_TRUE(std::holds_alternative(requestOptions)); + EXPECT_EQ(std::get(requestOptions).code(), absl::StatusCode::kInvalidArgument); +} + +TEST(Image2ImageTest, getImageEditRequestOptionsZeroNumInferenceSteps) { + MockedMultiPartParser multipartParser; + ON_CALL(multipartParser, getFieldByName("prompt")).WillByDefault(Return("test prompt")); + ON_CALL(multipartParser, getFieldByName("num_inference_steps")).WillByDefault(Return("0")); + auto requestOptions = ovms::getImageEditRequestOptions(multipartParser, DEFAULTIMAGE_GEN_ARGS); + ASSERT_TRUE(std::holds_alternative(requestOptions)); + EXPECT_EQ(std::get(requestOptions).code(), absl::StatusCode::kInvalidArgument); +} + using mediapipe::CalculatorGraphConfig; using ovms::ImageGenPipelineArgs; TEST(ImageGenCalculatorOptionsTest, PositiveAllfields) { @@ -1387,5 +1497,4 @@ TEST(Text2ImageTest, ResponseFromOvTensorBatch3) { testResponseFromOvTensor(n); } // TODO: -// -> test for all unhandled OpenAI fields define what to do - ignore/error imageEdit // -> test for all unhandled OpenAI fields define what to do - ignore/error imageVariation