From dbd0842a2719cb33ddda580465e84d152dacf852 Mon Sep 17 00:00:00 2001 From: Arik Horodniceanu Date: Fri, 24 Apr 2026 11:56:18 -0700 Subject: [PATCH] Qualcomm AI Engine Direct - [GenAI Pipeline] PR1: Core Data Model, Engine Routing & Exceptions --- backends/qualcomm/.coveragerc | 18 ++ backends/qualcomm/genai_pipeline/__init__.py | 49 ++++++ .../genai_pipeline/configs/__init__.py | 41 +++++ .../configs/compilation_input_config.py | 38 +++++ .../configs/compilation_output_config.py | 29 ++++ .../configs/inference_input_config.py | 36 ++++ .../configs/inference_output_config.py | 30 ++++ .../configs/model_preparation_input_config.py | 23 +++ .../model_preparation_output_config.py | 33 ++++ .../configs/quantization_input_config.py | 38 +++++ .../configs/quantization_output_config.py | 19 +++ .../qualcomm/genai_pipeline/engine_proxy.py | 74 +++++++++ .../qualcomm/genai_pipeline/exceptions.py | 48 ++++++ .../genai_pipeline/pipeline_context.py | 109 ++++++++++++ .../qualcomm/genai_pipeline/pipeline_types.py | 29 ++++ .../configs/test_compilation_input_config.py | 28 ++++ .../configs/test_inference_input_config.py | 29 ++++ .../test_model_preparation_input_config.py | 28 ++++ .../configs/test_quantization_input_config.py | 29 ++++ .../genai_pipeline/tests/test_engine_proxy.py | 80 +++++++++ .../genai_pipeline/tests/test_exceptions.py | 49 ++++++ .../tests/test_pipeline_context.py | 157 ++++++++++++++++++ .../genai_pipeline/tests/test_utils.py | 27 +++ 23 files changed, 1041 insertions(+) create mode 100644 backends/qualcomm/.coveragerc create mode 100644 backends/qualcomm/genai_pipeline/__init__.py create mode 100644 backends/qualcomm/genai_pipeline/configs/__init__.py create mode 100644 backends/qualcomm/genai_pipeline/configs/compilation_input_config.py create mode 100644 backends/qualcomm/genai_pipeline/configs/compilation_output_config.py create mode 100644 backends/qualcomm/genai_pipeline/configs/inference_input_config.py create mode 100644 backends/qualcomm/genai_pipeline/configs/inference_output_config.py create mode 100644 backends/qualcomm/genai_pipeline/configs/model_preparation_input_config.py create mode 100644 backends/qualcomm/genai_pipeline/configs/model_preparation_output_config.py create mode 100644 backends/qualcomm/genai_pipeline/configs/quantization_input_config.py create mode 100644 backends/qualcomm/genai_pipeline/configs/quantization_output_config.py create mode 100644 backends/qualcomm/genai_pipeline/engine_proxy.py create mode 100644 backends/qualcomm/genai_pipeline/exceptions.py create mode 100644 backends/qualcomm/genai_pipeline/pipeline_context.py create mode 100644 backends/qualcomm/genai_pipeline/pipeline_types.py create mode 100644 backends/qualcomm/genai_pipeline/tests/configs/test_compilation_input_config.py create mode 100644 backends/qualcomm/genai_pipeline/tests/configs/test_inference_input_config.py create mode 100644 backends/qualcomm/genai_pipeline/tests/configs/test_model_preparation_input_config.py create mode 100644 backends/qualcomm/genai_pipeline/tests/configs/test_quantization_input_config.py create mode 100644 backends/qualcomm/genai_pipeline/tests/test_engine_proxy.py create mode 100644 backends/qualcomm/genai_pipeline/tests/test_exceptions.py create mode 100644 backends/qualcomm/genai_pipeline/tests/test_pipeline_context.py create mode 100644 backends/qualcomm/genai_pipeline/tests/test_utils.py diff --git a/backends/qualcomm/.coveragerc b/backends/qualcomm/.coveragerc new file mode 100644 index 00000000000..b6abc86cef9 --- /dev/null +++ b/backends/qualcomm/.coveragerc @@ -0,0 +1,18 @@ +[run] +branch = True +source = backends/qualcomm/genai_pipeline + +omit = + */__init__.py + */tests/* + */__pycache__/* + pipeline_types.py + +[report] +show_missing = True + +omit = + */__init__.py + */tests/* + */__pycache__/* + pipeline_types.py diff --git a/backends/qualcomm/genai_pipeline/__init__.py b/backends/qualcomm/genai_pipeline/__init__.py new file mode 100644 index 00000000000..adb170bab25 --- /dev/null +++ b/backends/qualcomm/genai_pipeline/__init__.py @@ -0,0 +1,49 @@ +# Copyright (c) Qualcomm Innovation Center, Inc. +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +__version__ = "1.0.0" + +from backends.qualcomm.genai_pipeline.configs import ( + CompilationInputConfig, + CompilationOutputConfig, + InferenceInputConfig, + InferenceOutputConfig, + ModelPreparationInputConfig, + ModelPreparationOutputConfig, + QuantizationInputConfig, + QuantizationOutputConfig, +) +from backends.qualcomm.genai_pipeline.engine_proxy import EngineProxy +from backends.qualcomm.genai_pipeline.exceptions import ( + ConfigValidationError, + EngineNotAvailableError, + PipelineError, + StageError, +) +from backends.qualcomm.genai_pipeline.pipeline_context import ( + PipelineContext, + PipelineContextBuilder, +) +from backends.qualcomm.genai_pipeline.pipeline_types import EngineType + +__all__ = [ + "CompilationInputConfig", + "CompilationOutputConfig", + "ConfigValidationError", + "EngineNotAvailableError", + "EngineProxy", + "EngineType", + "InferenceInputConfig", + "InferenceOutputConfig", + "ModelPreparationInputConfig", + "ModelPreparationOutputConfig", + "PipelineContext", + "PipelineContextBuilder", + "PipelineError", + "QuantizationInputConfig", + "QuantizationOutputConfig", + "StageError", +] diff --git a/backends/qualcomm/genai_pipeline/configs/__init__.py b/backends/qualcomm/genai_pipeline/configs/__init__.py new file mode 100644 index 00000000000..1ddf7b4d24b --- /dev/null +++ b/backends/qualcomm/genai_pipeline/configs/__init__.py @@ -0,0 +1,41 @@ +# Copyright (c) Qualcomm Innovation Center, Inc. +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from backends.qualcomm.genai_pipeline.configs.compilation_input_config import ( + CompilationInputConfig, +) +from backends.qualcomm.genai_pipeline.configs.compilation_output_config import ( + CompilationOutputConfig, +) +from backends.qualcomm.genai_pipeline.configs.inference_input_config import ( + InferenceInputConfig, +) +from backends.qualcomm.genai_pipeline.configs.inference_output_config import ( + InferenceOutputConfig, +) +from backends.qualcomm.genai_pipeline.configs.model_preparation_input_config import ( + ModelPreparationInputConfig, +) +from backends.qualcomm.genai_pipeline.configs.model_preparation_output_config import ( + ModelPreparationOutputConfig, +) +from backends.qualcomm.genai_pipeline.configs.quantization_input_config import ( + QuantizationInputConfig, +) +from backends.qualcomm.genai_pipeline.configs.quantization_output_config import ( + QuantizationOutputConfig, +) + +__all__ = [ + "CompilationInputConfig", + "CompilationOutputConfig", + "InferenceInputConfig", + "InferenceOutputConfig", + "ModelPreparationInputConfig", + "ModelPreparationOutputConfig", + "QuantizationInputConfig", + "QuantizationOutputConfig", +] diff --git a/backends/qualcomm/genai_pipeline/configs/compilation_input_config.py b/backends/qualcomm/genai_pipeline/configs/compilation_input_config.py new file mode 100644 index 00000000000..8066f6c7aa1 --- /dev/null +++ b/backends/qualcomm/genai_pipeline/configs/compilation_input_config.py @@ -0,0 +1,38 @@ +# Copyright (c) Qualcomm Innovation Center, Inc. +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from __future__ import annotations + +from dataclasses import dataclass, field +from pathlib import Path +from typing import List, Optional, TYPE_CHECKING + +if TYPE_CHECKING: + from executorch.backends.qualcomm.serialization.qc_schema import ( + QcomChipset, + QnnExecuTorchBackendType, + ) + from executorch.exir.backend.compile_spec import CompileSpec + from torch import nn + + +@dataclass +class CompilationInputConfig: + """Input configuration for the compilation stage. + + Attributes: + soc_model: The target SoC (e.g., QcomChipset.SM8750). Required. + backend_type: QNN backend type (HTP, GPU, LPAI, etc.). Required. + model: The nn.Module to compile (quantized or original for FP16 mode). + artifact_dir: Directory to store compiled artifacts. + compile_specs: QNN compiler specifications for backend delegation. + """ + + soc_model: "QcomChipset" + backend_type: "QnnExecuTorchBackendType" + model: Optional["nn.Module"] = None + artifact_dir: Path = field(default_factory=lambda: Path(".")) + compile_specs: Optional[List["CompileSpec"]] = None diff --git a/backends/qualcomm/genai_pipeline/configs/compilation_output_config.py b/backends/qualcomm/genai_pipeline/configs/compilation_output_config.py new file mode 100644 index 00000000000..f68b0471c3f --- /dev/null +++ b/backends/qualcomm/genai_pipeline/configs/compilation_output_config.py @@ -0,0 +1,29 @@ +# Copyright (c) Qualcomm Innovation Center, Inc. +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path +from typing import List, Optional, TYPE_CHECKING + +if TYPE_CHECKING: + from executorch.devtools.etrecord import ETRecord + + +@dataclass +class CompilationOutputConfig: + """Output produced by the compilation stage. + + Attributes: + artifact_paths: Paths to the compiled artifacts (.pte files). + List to support multi-split models where compilation produces + multiple .pte files (e.g., prefill + decode). + etrecord: Optional ETRecord for debugging. ExecuTorch engine only. + """ + + artifact_paths: Optional[List[Path]] = None + etrecord: Optional["ETRecord"] = None diff --git a/backends/qualcomm/genai_pipeline/configs/inference_input_config.py b/backends/qualcomm/genai_pipeline/configs/inference_input_config.py new file mode 100644 index 00000000000..19612b15a2c --- /dev/null +++ b/backends/qualcomm/genai_pipeline/configs/inference_input_config.py @@ -0,0 +1,36 @@ +# Copyright (c) Qualcomm Innovation Center, Inc. +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from __future__ import annotations + +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Dict, List, Optional, TYPE_CHECKING + +if TYPE_CHECKING: + from executorch.backends.qualcomm.serialization.qc_schema import QcomChipset + + +@dataclass +class InferenceInputConfig: + """Input configuration for the inference stage. + + Attributes: + soc_model: The target SoC (e.g., QcomChipset.SM8750). Required. + artifact_paths: Paths to compiled model artifacts (.pte files). + List to support multi-split models (e.g., prefill + decode). + tokenizer: The tokenizer instance for encoding/decoding. + runtime_tokenizer_path: Path to runtime tokenizer for on-device use. + prompt: The user prompt(s) for text generation. + inference_options: Engine-specific inference options. + """ + + soc_model: "QcomChipset" + artifact_paths: Optional[List[Path]] = None + tokenizer: Any = None + runtime_tokenizer_path: Optional[Path] = None + prompt: Optional[List[str]] = None + inference_options: Dict[str, Any] = field(default_factory=dict) diff --git a/backends/qualcomm/genai_pipeline/configs/inference_output_config.py b/backends/qualcomm/genai_pipeline/configs/inference_output_config.py new file mode 100644 index 00000000000..22b68b0f636 --- /dev/null +++ b/backends/qualcomm/genai_pipeline/configs/inference_output_config.py @@ -0,0 +1,30 @@ +# Copyright (c) Qualcomm Innovation Center, Inc. +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Any, Dict, List, Optional, TYPE_CHECKING + +if TYPE_CHECKING: + from executorch.devtools.etdump.schema_flatcc import ETDump + + +@dataclass +class InferenceOutputConfig: + """Output produced by the inference stage. + + Attributes: + inference_results: Generated text output(s) from the model. + performance_metrics: Performance data (e.g., TTFT, tokens/sec). + eval_results: Evaluation metric results (e.g., SQNR, perplexity). + etdump: Optional ETDump for debugging. ExecuTorch engine only. + """ + + inference_results: Optional[List[str]] = None + performance_metrics: Dict[str, Any] = field(default_factory=dict) + eval_results: Dict[str, Any] = field(default_factory=dict) + etdump: Optional["ETDump"] = None diff --git a/backends/qualcomm/genai_pipeline/configs/model_preparation_input_config.py b/backends/qualcomm/genai_pipeline/configs/model_preparation_input_config.py new file mode 100644 index 00000000000..fe3b42cf040 --- /dev/null +++ b/backends/qualcomm/genai_pipeline/configs/model_preparation_input_config.py @@ -0,0 +1,23 @@ +# Copyright (c) Qualcomm Innovation Center, Inc. +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from dataclasses import dataclass, field +from typing import Any, Dict + + +@dataclass +class ModelPreparationInputConfig: + """Input configuration for the model preparation stage. + + Attributes: + model_name: Model identifier (e.g., "llama3_2-1b_instruct"). Required. + soc_model: Target SoC (e.g., "SM8750"). Required. + extra_options: Additional model-preparation-specific options. + """ + + model_name: str + soc_model: str + extra_options: Dict[str, Any] = field(default_factory=dict) diff --git a/backends/qualcomm/genai_pipeline/configs/model_preparation_output_config.py b/backends/qualcomm/genai_pipeline/configs/model_preparation_output_config.py new file mode 100644 index 00000000000..3638984a493 --- /dev/null +++ b/backends/qualcomm/genai_pipeline/configs/model_preparation_output_config.py @@ -0,0 +1,33 @@ +# Copyright (c) Qualcomm Innovation Center, Inc. +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path +from typing import Any, List, Optional, TYPE_CHECKING + +if TYPE_CHECKING: + from torch import nn + + +@dataclass +class ModelPreparationOutputConfig: + """Output produced by the model preparation stage. + + Attributes: + model_module: The prepared nn.Module ready for quantization. + tokenizer: The tokenizer instance for encoding/decoding text. + calibration_data: Dataset samples for calibration during quantization. + runtime_tokenizer_path: Path to runtime tokenizer for on-device inference. + chat_template: Optional chat template for instruct models. + """ + + model_module: Optional["nn.Module"] = None + tokenizer: Any = None + calibration_data: Optional[List[Any]] = None + runtime_tokenizer_path: Optional[Path] = None + chat_template: Optional[str] = None diff --git a/backends/qualcomm/genai_pipeline/configs/quantization_input_config.py b/backends/qualcomm/genai_pipeline/configs/quantization_input_config.py new file mode 100644 index 00000000000..04ef57e11e0 --- /dev/null +++ b/backends/qualcomm/genai_pipeline/configs/quantization_input_config.py @@ -0,0 +1,38 @@ +# Copyright (c) Qualcomm Innovation Center, Inc. +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Any, Dict, List, Optional, TYPE_CHECKING + +if TYPE_CHECKING: + from executorch.backends.qualcomm.serialization.qc_schema import ( + QcomChipset, + QnnExecuTorchBackendType, + ) + from torch import nn + + +@dataclass +class QuantizationInputConfig: + """Input configuration for the quantization stage. + + Attributes: + soc_model: The target SoC (e.g., QcomChipset.SM8750). Required. + backend_type: QNN backend type (HTP, GPU, LPAI, etc.). Required. + model_module: The nn.Module to quantize. + calibration_data: Calibration dataset samples. + quant_recipe: Quantization recipe (per-layer bit widths, group sizes, etc.). + extra_options: Additional quantization-specific options. + """ + + soc_model: "QcomChipset" + backend_type: "QnnExecuTorchBackendType" + model_module: Optional["nn.Module"] = None + calibration_data: Optional[List[Any]] = None + quant_recipe: Any = None + extra_options: Dict[str, Any] = field(default_factory=dict) diff --git a/backends/qualcomm/genai_pipeline/configs/quantization_output_config.py b/backends/qualcomm/genai_pipeline/configs/quantization_output_config.py new file mode 100644 index 00000000000..7852a1b90fb --- /dev/null +++ b/backends/qualcomm/genai_pipeline/configs/quantization_output_config.py @@ -0,0 +1,19 @@ +# Copyright (c) Qualcomm Innovation Center, Inc. +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from dataclasses import dataclass +from typing import Any + + +@dataclass +class QuantizationOutputConfig: + """Output produced by the quantization stage. + + Attributes: + quantized_model: The quantized nn.Module or path to saved QDQ model. + """ + + quantized_model: Any = None diff --git a/backends/qualcomm/genai_pipeline/engine_proxy.py b/backends/qualcomm/genai_pipeline/engine_proxy.py new file mode 100644 index 00000000000..d622c596a2b --- /dev/null +++ b/backends/qualcomm/genai_pipeline/engine_proxy.py @@ -0,0 +1,74 @@ +# Copyright (c) Qualcomm Innovation Center, Inc. +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from __future__ import annotations + +from typing import Dict, TYPE_CHECKING + +from backends.qualcomm.genai_pipeline.pipeline_types import ALL_STAGES, EngineType + +if TYPE_CHECKING: + from executorch.backends.qualcomm.serialization.qc_schema import ( + QnnExecuTorchBackendType, + ) + + +class EngineProxy: + """Routes each pipeline stage to the appropriate engine strategy. + + Validates stage-engine combinations at construction time. + + Args: + stage_engines: Mapping of stage name to EngineType. + backend_type: The QNN backend type (HTP, GPU, LPAI, etc.). + + Raises: + ValueError: If an unsupported stage-engine combination is specified. + """ + + def __init__( + self, + stage_engines: Dict[str, EngineType], + backend_type: "QnnExecuTorchBackendType", + ): + self._validate(stage_engines) + self._stage_engines = dict(stage_engines) + self._backend_type = backend_type + + def _validate( + self, + stage_engines: Dict[str, EngineType], + ) -> None: + """Validate stage-engine combinations. + + Raises: + ValueError: On invalid stage names. + """ + for stage in stage_engines: + if stage not in ALL_STAGES: + raise ValueError( + f"Unknown stage '{stage}'. " f"Valid stages: {sorted(ALL_STAGES)}" + ) + + def get_engine(self, stage: str) -> EngineType: + """Get the engine type for a given stage. + + Args: + stage: The pipeline stage name. + + Returns: + The EngineType for the stage, defaulting to EXECUTORCH + if not explicitly configured. + """ + return self._stage_engines.get(stage, EngineType.EXECUTORCH) + + @property + def backend_type(self) -> "QnnExecuTorchBackendType": + return self._backend_type + + @property + def stage_engines(self) -> Dict[str, EngineType]: + return dict(self._stage_engines) diff --git a/backends/qualcomm/genai_pipeline/exceptions.py b/backends/qualcomm/genai_pipeline/exceptions.py new file mode 100644 index 00000000000..1bc772b52a4 --- /dev/null +++ b/backends/qualcomm/genai_pipeline/exceptions.py @@ -0,0 +1,48 @@ +# Copyright (c) Qualcomm Innovation Center, Inc. +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Optional + + +class PipelineError(Exception): + """Base exception for all GenAI pipeline errors.""" + + +class StageError(PipelineError): + """Error during pipeline stage execution. + + Attributes: + stage_name: The name of the stage that failed. + original_exception: The underlying exception that caused the failure. + """ + + def __init__( + self, + stage_name: str, + message: str, + original_exception: Optional[Exception] = None, + ) -> None: + self.stage_name = stage_name + self.original_exception = original_exception + full_message = f"[{stage_name}] {message}" + if original_exception: + full_message += f" (caused by {type(original_exception).__name__}: {original_exception})" + super().__init__(full_message) + + +class ConfigValidationError(PipelineError): + """Error during config validation at build time (before any stage runs).""" + + +class EngineNotAvailableError(PipelineError): + """Raised when a requested engine is not installed or available.""" + + def __init__(self, engine_name: str, message: Optional[str] = None) -> None: + self.engine_name = engine_name + full_message = f"Engine '{engine_name}' is not available" + if message: + full_message += f": {message}" + super().__init__(full_message) diff --git a/backends/qualcomm/genai_pipeline/pipeline_context.py b/backends/qualcomm/genai_pipeline/pipeline_context.py new file mode 100644 index 00000000000..53113d17338 --- /dev/null +++ b/backends/qualcomm/genai_pipeline/pipeline_context.py @@ -0,0 +1,109 @@ +# Copyright (c) Qualcomm Innovation Center, Inc. +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from dataclasses import dataclass, field +from typing import Any, Dict, List, Optional + + +@dataclass(frozen=True) +class PipelineContext: + """Immutable context holding all user-provided inputs for the pipeline. + + Attributes: + model_name: Model identifier (e.g., "llama3_2-1b_instruct"). + soc_model: Target SoC (e.g., "SM8750"). + prompt: User prompt(s) for text generation. + artifact_dir: Directory for storing compiled artifacts. + extra_options: Additional options passed through to stages. + """ + + model_name: str + soc_model: str + prompt: List[str] + artifact_dir: str = "./genai_artifacts" + extra_options: Dict[str, Any] = field(default_factory=dict) + + @staticmethod + def builder() -> "PipelineContextBuilder": + return PipelineContextBuilder() + + +class PipelineContextBuilder: + """Builder for constructing a PipelineContext with validation. + + Example usage: + + context = ( + PipelineContext.builder() + .with_model("llama3_2-1b_instruct") + .with_soc("SM8750") + .with_prompt("What is the capital of France?") + .build() + ) + """ + + def __init__(self) -> None: + self._model_name: Optional[str] = None + self._soc_model: Optional[str] = None + self._prompt: Optional[List[str]] = None + self._artifact_dir: str = "./genai_artifacts" + self._extra_options: Dict[str, Any] = {} + + def with_model(self, model_name: str) -> "PipelineContextBuilder": + self._model_name = model_name + return self + + def with_soc(self, soc_model: str) -> "PipelineContextBuilder": + self._soc_model = soc_model + return self + + def with_prompt(self, prompt: str | List[str]) -> "PipelineContextBuilder": + if isinstance(prompt, str): + self._prompt = [prompt] + else: + self._prompt = list(prompt) + return self + + def with_artifact_dir(self, artifact_dir: str) -> "PipelineContextBuilder": + self._artifact_dir = artifact_dir + return self + + def with_extra_options( + self, extra_options: Dict[str, Any] + ) -> "PipelineContextBuilder": + self._extra_options = dict(extra_options) + return self + + def build(self) -> PipelineContext: + """Build and validate the PipelineContext. + + Returns: + A validated PipelineContext instance. + + Raises: + ValueError: If required fields are missing. + """ + missing = [] + if self._model_name is None: + missing.append("model_name (use .with_model())") + if self._soc_model is None: + missing.append("soc_model (use .with_soc())") + if self._prompt is None: + missing.append("prompt (use .with_prompt())") + + if missing: + raise ValueError( + f"Cannot build PipelineContext, missing required fields: " + f"{', '.join(missing)}" + ) + + return PipelineContext( + model_name=self._model_name, + soc_model=self._soc_model, + prompt=self._prompt, + artifact_dir=self._artifact_dir, + extra_options=self._extra_options, + ) diff --git a/backends/qualcomm/genai_pipeline/pipeline_types.py b/backends/qualcomm/genai_pipeline/pipeline_types.py new file mode 100644 index 00000000000..69f1ed376ae --- /dev/null +++ b/backends/qualcomm/genai_pipeline/pipeline_types.py @@ -0,0 +1,29 @@ +# Copyright (c) Qualcomm Innovation Center, Inc. +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from enum import Enum + + +class EngineType(Enum): + """Selects the inference framework for a pipeline stage.""" + + EXECUTORCH = "executorch" + + +# Pipeline stage names +STAGE_MODEL_PREPARATION = "model_preparation" +STAGE_QUANTIZATION = "quantization" +STAGE_COMPILATION = "compilation" +STAGE_INFERENCE = "inference" + +ALL_STAGES = frozenset( + { + STAGE_MODEL_PREPARATION, + STAGE_QUANTIZATION, + STAGE_COMPILATION, + STAGE_INFERENCE, + } +) diff --git a/backends/qualcomm/genai_pipeline/tests/configs/test_compilation_input_config.py b/backends/qualcomm/genai_pipeline/tests/configs/test_compilation_input_config.py new file mode 100644 index 00000000000..08cb25830d7 --- /dev/null +++ b/backends/qualcomm/genai_pipeline/tests/configs/test_compilation_input_config.py @@ -0,0 +1,28 @@ +# Copyright (c) Qualcomm Innovation Center, Inc. +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import unittest +from pathlib import Path +from unittest.mock import MagicMock + +from backends.qualcomm.genai_pipeline.configs.compilation_input_config import ( + CompilationInputConfig, +) + + +class TestCompilationInputConfig(unittest.TestCase): + + def test_artifact_dir_default(self): + config = CompilationInputConfig(soc_model=MagicMock(), backend_type=MagicMock()) + self.assertEqual(config.artifact_dir, Path(".")) + + def test_required_fields(self): + with self.assertRaises(TypeError): + CompilationInputConfig() + + +if __name__ == "__main__": + unittest.main() diff --git a/backends/qualcomm/genai_pipeline/tests/configs/test_inference_input_config.py b/backends/qualcomm/genai_pipeline/tests/configs/test_inference_input_config.py new file mode 100644 index 00000000000..32462c930b8 --- /dev/null +++ b/backends/qualcomm/genai_pipeline/tests/configs/test_inference_input_config.py @@ -0,0 +1,29 @@ +# Copyright (c) Qualcomm Innovation Center, Inc. +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import unittest +from unittest.mock import MagicMock + +from backends.qualcomm.genai_pipeline.configs.inference_input_config import ( + InferenceInputConfig, +) + + +class TestInferenceInputConfig(unittest.TestCase): + + def test_inference_options_default_factory(self): + c1 = InferenceInputConfig(soc_model=MagicMock()) + c2 = InferenceInputConfig(soc_model=MagicMock()) + c1.inference_options["key"] = "val" + self.assertEqual(c2.inference_options, {}) + + def test_required_fields(self): + with self.assertRaises(TypeError): + InferenceInputConfig() + + +if __name__ == "__main__": + unittest.main() diff --git a/backends/qualcomm/genai_pipeline/tests/configs/test_model_preparation_input_config.py b/backends/qualcomm/genai_pipeline/tests/configs/test_model_preparation_input_config.py new file mode 100644 index 00000000000..1ef29c6f842 --- /dev/null +++ b/backends/qualcomm/genai_pipeline/tests/configs/test_model_preparation_input_config.py @@ -0,0 +1,28 @@ +# Copyright (c) Qualcomm Innovation Center, Inc. +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import unittest + +from backends.qualcomm.genai_pipeline.configs.model_preparation_input_config import ( + ModelPreparationInputConfig, +) + + +class TestModelPreparationInputConfig(unittest.TestCase): + + def test_extra_options_default_factory(self): + c1 = ModelPreparationInputConfig(model_name="model_a", soc_model="SM8750") + c2 = ModelPreparationInputConfig(model_name="model_b", soc_model="SM8650") + c1.extra_options["key"] = "val" + self.assertEqual(c2.extra_options, {}) + + def test_required_fields(self): + with self.assertRaises(TypeError): + ModelPreparationInputConfig() + + +if __name__ == "__main__": + unittest.main() diff --git a/backends/qualcomm/genai_pipeline/tests/configs/test_quantization_input_config.py b/backends/qualcomm/genai_pipeline/tests/configs/test_quantization_input_config.py new file mode 100644 index 00000000000..25cb1034b4c --- /dev/null +++ b/backends/qualcomm/genai_pipeline/tests/configs/test_quantization_input_config.py @@ -0,0 +1,29 @@ +# Copyright (c) Qualcomm Innovation Center, Inc. +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import unittest +from unittest.mock import MagicMock + +from backends.qualcomm.genai_pipeline.configs.quantization_input_config import ( + QuantizationInputConfig, +) + + +class TestQuantizationInputConfig(unittest.TestCase): + + def test_extra_options_default_factory(self): + c1 = QuantizationInputConfig(soc_model=MagicMock(), backend_type=MagicMock()) + c2 = QuantizationInputConfig(soc_model=MagicMock(), backend_type=MagicMock()) + c1.extra_options["key"] = "val" + self.assertEqual(c2.extra_options, {}) + + def test_required_fields(self): + with self.assertRaises(TypeError): + QuantizationInputConfig() + + +if __name__ == "__main__": + unittest.main() diff --git a/backends/qualcomm/genai_pipeline/tests/test_engine_proxy.py b/backends/qualcomm/genai_pipeline/tests/test_engine_proxy.py new file mode 100644 index 00000000000..dcc530e4c03 --- /dev/null +++ b/backends/qualcomm/genai_pipeline/tests/test_engine_proxy.py @@ -0,0 +1,80 @@ +# Copyright (c) Qualcomm Innovation Center, Inc. +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import unittest +from unittest.mock import MagicMock + +from backends.qualcomm.genai_pipeline.engine_proxy import EngineProxy +from backends.qualcomm.genai_pipeline.pipeline_types import ( + EngineType, + STAGE_COMPILATION, + STAGE_INFERENCE, + STAGE_MODEL_PREPARATION, + STAGE_QUANTIZATION, +) + + +class TestEngineProxy(unittest.TestCase): + + def test_full_executorch_workflow(self): + proxy = EngineProxy( + { + STAGE_MODEL_PREPARATION: EngineType.EXECUTORCH, + STAGE_QUANTIZATION: EngineType.EXECUTORCH, + STAGE_COMPILATION: EngineType.EXECUTORCH, + STAGE_INFERENCE: EngineType.EXECUTORCH, + }, + backend_type=MagicMock(name="kHtpBackend"), + ) + self.assertEqual( + proxy.get_engine(STAGE_MODEL_PREPARATION), EngineType.EXECUTORCH + ) + self.assertEqual(proxy.get_engine(STAGE_QUANTIZATION), EngineType.EXECUTORCH) + self.assertEqual(proxy.get_engine(STAGE_COMPILATION), EngineType.EXECUTORCH) + self.assertEqual(proxy.get_engine(STAGE_INFERENCE), EngineType.EXECUTORCH) + + def test_default_engine_is_executorch(self): + proxy = EngineProxy({}, backend_type=MagicMock(name="kHtpBackend")) + self.assertEqual( + proxy.get_engine(STAGE_MODEL_PREPARATION), EngineType.EXECUTORCH + ) + self.assertEqual(proxy.get_engine(STAGE_QUANTIZATION), EngineType.EXECUTORCH) + self.assertEqual(proxy.get_engine(STAGE_COMPILATION), EngineType.EXECUTORCH) + self.assertEqual(proxy.get_engine(STAGE_INFERENCE), EngineType.EXECUTORCH) + + def test_backend_type_is_stored(self): + backend = MagicMock(name="kHtpBackend") + proxy = EngineProxy( + {STAGE_INFERENCE: EngineType.EXECUTORCH}, backend_type=backend + ) + self.assertIs(proxy.backend_type, backend) + + def test_stage_engines_returns_copy(self): + proxy = EngineProxy( + {STAGE_INFERENCE: EngineType.EXECUTORCH}, + backend_type=MagicMock(name="kHtpBackend"), + ) + engines = proxy.stage_engines + engines[STAGE_INFERENCE] = None + self.assertEqual(proxy.get_engine(STAGE_INFERENCE), EngineType.EXECUTORCH) + + def test_invalid_stage_name_raises(self): + with self.assertRaises(ValueError) as cm: + EngineProxy( + {"invalid_stage": EngineType.EXECUTORCH}, + backend_type=MagicMock(name="kHtpBackend"), + ) + self.assertIn("Unknown stage", str(cm.exception)) + self.assertIn("invalid_stage", str(cm.exception)) + + def test_empty_stage_engines(self): + proxy = EngineProxy({}, backend_type=MagicMock(name="kHtpBackend")) + self.assertEqual(proxy.get_engine(STAGE_QUANTIZATION), EngineType.EXECUTORCH) + self.assertEqual(proxy.get_engine(STAGE_INFERENCE), EngineType.EXECUTORCH) + + +if __name__ == "__main__": + unittest.main() diff --git a/backends/qualcomm/genai_pipeline/tests/test_exceptions.py b/backends/qualcomm/genai_pipeline/tests/test_exceptions.py new file mode 100644 index 00000000000..ca96ed0e09f --- /dev/null +++ b/backends/qualcomm/genai_pipeline/tests/test_exceptions.py @@ -0,0 +1,49 @@ +# Copyright (c) Qualcomm Innovation Center, Inc. +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import unittest + +from backends.qualcomm.genai_pipeline.exceptions import ( + ConfigValidationError, + EngineNotAvailableError, + PipelineError, + StageError, +) + + +class TestExceptionHierarchy(unittest.TestCase): + + def test_all_exceptions_inherit_from_pipeline_error(self): + self.assertIsInstance(StageError("s", "m"), PipelineError) + self.assertIsInstance(ConfigValidationError("m"), PipelineError) + self.assertIsInstance(EngineNotAvailableError("e"), PipelineError) + + +class TestStageError(unittest.TestCase): + + def test_message_includes_stage_name(self): + error = StageError("quantization", "model failed") + self.assertIn("[quantization]", str(error)) + self.assertIn("model failed", str(error)) + + def test_original_exception_chained(self): + cause = RuntimeError("out of memory") + error = StageError("quantization", "failed", original_exception=cause) + self.assertIs(error.original_exception, cause) + self.assertIn("RuntimeError", str(error)) + self.assertIn("out of memory", str(error)) + + +class TestEngineNotAvailableError(unittest.TestCase): + + def test_custom_message(self): + error = EngineNotAvailableError("test_engine", "SDK not installed") + self.assertIn("test_engine", str(error)) + self.assertIn("SDK not installed", str(error)) + + +if __name__ == "__main__": + unittest.main() diff --git a/backends/qualcomm/genai_pipeline/tests/test_pipeline_context.py b/backends/qualcomm/genai_pipeline/tests/test_pipeline_context.py new file mode 100644 index 00000000000..26412fe1c4b --- /dev/null +++ b/backends/qualcomm/genai_pipeline/tests/test_pipeline_context.py @@ -0,0 +1,157 @@ +# Copyright (c) Qualcomm Innovation Center, Inc. +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import unittest + +from backends.qualcomm.genai_pipeline.pipeline_context import ( + PipelineContext, + PipelineContextBuilder, +) + +TEST_MODEL_NAME = "llama3_2-1b_instruct" +TEST_SOC_MODEL = "SM8750" +TEST_PROMPT = "What is AI?" +TEST_ARTIFACT_DIR = "/tmp/artifacts" +TEST_DEFAULT_ARTIFACT_DIR = "./genai_artifacts" + + +class TestPipelineContext(unittest.TestCase): + + def test_create_with_all_fields(self): + ctx = PipelineContext( + model_name=TEST_MODEL_NAME, + soc_model=TEST_SOC_MODEL, + prompt=["Hello world"], + artifact_dir=TEST_ARTIFACT_DIR, + extra_options={"key": "value"}, + ) + self.assertEqual(ctx.model_name, TEST_MODEL_NAME) + self.assertEqual(ctx.soc_model, TEST_SOC_MODEL) + self.assertEqual(ctx.prompt, ["Hello world"]) + self.assertEqual(ctx.artifact_dir, TEST_ARTIFACT_DIR) + self.assertEqual(ctx.extra_options, {"key": "value"}) + + def test_default_artifact_dir(self): + ctx = PipelineContext( + model_name="test", soc_model=TEST_SOC_MODEL, prompt=["test"] + ) + self.assertEqual(ctx.artifact_dir, TEST_DEFAULT_ARTIFACT_DIR) + + def test_default_extra_options(self): + ctx = PipelineContext( + model_name="test", soc_model=TEST_SOC_MODEL, prompt=["test"] + ) + self.assertEqual(ctx.extra_options, {}) + + def test_frozen(self): + ctx = PipelineContext( + model_name="test", soc_model=TEST_SOC_MODEL, prompt=["test"] + ) + with self.assertRaises(AttributeError): + ctx.model_name = "changed" + + def test_builder_static_method(self): + builder = PipelineContext.builder() + self.assertIsInstance(builder, PipelineContextBuilder) + + +class TestPipelineContextBuilder(unittest.TestCase): + + def test_build_with_all_required_fields(self): + ctx = ( + PipelineContext.builder() + .with_model(TEST_MODEL_NAME) + .with_soc(TEST_SOC_MODEL) + .with_prompt(TEST_PROMPT) + .build() + ) + self.assertEqual(ctx.model_name, TEST_MODEL_NAME) + self.assertEqual(ctx.soc_model, TEST_SOC_MODEL) + self.assertEqual(ctx.prompt, [TEST_PROMPT]) + + def test_string_prompt_becomes_list(self): + ctx = ( + PipelineContext.builder() + .with_model("test") + .with_soc(TEST_SOC_MODEL) + .with_prompt("single prompt") + .build() + ) + self.assertEqual(ctx.prompt, ["single prompt"]) + + def test_list_prompt_preserved(self): + test_prompts = ["prompt1", "prompt2"] + ctx = ( + PipelineContext.builder() + .with_model("test") + .with_soc(TEST_SOC_MODEL) + .with_prompt(test_prompts) + .build() + ) + self.assertEqual(ctx.prompt, test_prompts) + + def test_with_artifact_dir(self): + custom_dir = "/custom/path" + ctx = ( + PipelineContext.builder() + .with_model("test") + .with_soc(TEST_SOC_MODEL) + .with_prompt("test") + .with_artifact_dir(custom_dir) + .build() + ) + self.assertEqual(ctx.artifact_dir, custom_dir) + + def test_with_extra_options(self): + extra = {"temperature": 0.8} + ctx = ( + PipelineContext.builder() + .with_model("test") + .with_soc(TEST_SOC_MODEL) + .with_prompt("test") + .with_extra_options(extra) + .build() + ) + self.assertEqual(ctx.extra_options, extra) + + def test_missing_model_raises(self): + with self.assertRaises(ValueError) as cm: + PipelineContext.builder().with_soc(TEST_SOC_MODEL).with_prompt( + "test" + ).build() + self.assertIn("model_name", str(cm.exception)) + + def test_missing_soc_raises(self): + with self.assertRaises(ValueError) as cm: + PipelineContext.builder().with_model("test").with_prompt("test").build() + self.assertIn("soc_model", str(cm.exception)) + + def test_missing_prompt_raises(self): + with self.assertRaises(ValueError) as cm: + PipelineContext.builder().with_model("test").with_soc( + TEST_SOC_MODEL + ).build() + self.assertIn("prompt", str(cm.exception)) + + def test_missing_all_raises_with_all_fields(self): + with self.assertRaises(ValueError) as cm: + PipelineContext.builder().build() + msg = str(cm.exception) + self.assertIn("model_name", msg) + self.assertIn("soc_model", msg) + self.assertIn("prompt", msg) + + def test_builder_chaining(self): + builder = PipelineContextBuilder() + self.assertIs(builder.with_model("test"), builder) + self.assertIs(builder.with_soc(TEST_SOC_MODEL), builder) + self.assertIs(builder.with_prompt("test"), builder) + self.assertIs(builder.with_artifact_dir("/tmp"), builder) + self.assertIs(builder.with_extra_options({"k": "v"}), builder) + + +if __name__ == "__main__": + unittest.main() diff --git a/backends/qualcomm/genai_pipeline/tests/test_utils.py b/backends/qualcomm/genai_pipeline/tests/test_utils.py new file mode 100644 index 00000000000..aaf1fdc2865 --- /dev/null +++ b/backends/qualcomm/genai_pipeline/tests/test_utils.py @@ -0,0 +1,27 @@ +# Copyright (c) Qualcomm Innovation Center, Inc. +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from pathlib import Path + +from backends.qualcomm.genai_pipeline.pipeline_context import PipelineContext + +# Shared test constants +TEST_MODEL_NAME = "test_model" +TEST_SOC_MODEL = "SM8750" +TEST_PROMPT = ["test"] +TEST_ARTIFACT_DIR = "/tmp/test_artifacts" +TEST_PTE_PATH = Path("/tmp/test.pte") + + +def make_test_context(**kwargs) -> PipelineContext: + defaults = { + "model_name": TEST_MODEL_NAME, + "soc_model": TEST_SOC_MODEL, + "prompt": TEST_PROMPT, + "artifact_dir": TEST_ARTIFACT_DIR, + } + defaults.update(kwargs) + return PipelineContext(**defaults)