diff --git a/.ci/scripts/test_backend.sh b/.ci/scripts/test_backend.sh index 86d53c996bc..bfdae4f733e 100755 --- a/.ci/scripts/test_backend.sh +++ b/.ci/scripts/test_backend.sh @@ -17,7 +17,7 @@ echo "Running backend test job for suite $SUITE, flow $FLOW." echo "Saving job artifacts to $ARTIFACT_DIR." eval "$(conda shell.bash hook)" -CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") +CONDA_ENV=$(conda env list --json | python -c "import sys, json; print(json.load(sys.stdin)['envs'][-1])") conda activate "${CONDA_ENV}" if [[ "$(uname)" == "Darwin" ]]; then @@ -56,6 +56,32 @@ if [[ "$FLOW" == *vulkan* ]]; then EXTRA_BUILD_ARGS+=" -DEXECUTORCH_BUILD_VULKAN=ON" fi +if [[ "$FLOW" == *cuda* ]]; then + # When running with the PyTorch test-infra Docker image (which has nvcc), + # install executorch directly — it will auto-detect CUDA and install + # CUDA-enabled PyTorch. Skip setup-linux.sh which expects the custom + # Docker image with pre-built pinned-commit torch. + echo "Installing ExecuTorch with CUDA support..." + ./install_executorch.sh --editable + + # Verify PyTorch was installed with CUDA support + python -c "import torch; assert torch.cuda.is_available(), 'PyTorch CUDA not available after reinstall'; print(f'PyTorch {torch.__version__} with CUDA {torch.version.cuda}')" || { + echo "ERROR: PyTorch was not installed with CUDA support" + exit 1 + } + + # Fix libstdc++ GLIBCXX version for CUDA backend. + # The embedded .so files in the CUDA blob require GLIBCXX_3.4.30 + # which the default conda libstdc++ doesn't have. + echo "Installing newer libstdc++ for CUDA backend..." + conda install -y -c conda-forge 'libstdcxx-ng>=12' + export LD_LIBRARY_PATH="${CONDA_PREFIX}/lib:${LD_LIBRARY_PATH:-}" + + source .ci/scripts/utils.sh + CMAKE_ARGS="$EXTRA_BUILD_ARGS" build_executorch_runner cmake Release + CUDA_SETUP_DONE=1 +fi + if [[ "$FLOW" == *arm* ]]; then # Setup ARM deps. @@ -78,12 +104,14 @@ if [[ "$FLOW" == *arm* ]]; then fi fi -if [[ $IS_MACOS -eq 1 ]]; then - SETUP_SCRIPT=.ci/scripts/setup-macos.sh -else - SETUP_SCRIPT=.ci/scripts/setup-linux.sh +if [[ "${CUDA_SETUP_DONE:-0}" != "1" ]]; then + if [[ $IS_MACOS -eq 1 ]]; then + SETUP_SCRIPT=.ci/scripts/setup-macos.sh + else + SETUP_SCRIPT=.ci/scripts/setup-linux.sh + fi + CMAKE_ARGS="$EXTRA_BUILD_ARGS" ${CONDA_RUN_CMD} $SETUP_SCRIPT --build-tool cmake --build-mode Release --editable true fi -CMAKE_ARGS="$EXTRA_BUILD_ARGS" ${CONDA_RUN_CMD} $SETUP_SCRIPT --build-tool cmake --build-mode Release --editable true GOLDEN_DIR="${ARTIFACT_DIR}/golden-artifacts" export GOLDEN_ARTIFACTS_DIR="${GOLDEN_DIR}" diff --git a/.github/workflows/test-backend-cuda.yml b/.github/workflows/test-backend-cuda.yml new file mode 100644 index 00000000000..220bbc3a673 --- /dev/null +++ b/.github/workflows/test-backend-cuda.yml @@ -0,0 +1,41 @@ +name: Test CUDA Backend + +on: + schedule: + - cron: 0 2 * * * + push: + branches: + - release/* + tags: + - ciflow/nightly/* + pull_request: + paths: + - .github/workflows/test-backend-cuda.yml + - .ci/scripts/test_backend.sh + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}--${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }} + cancel-in-progress: true + +jobs: + test-cuda: + strategy: + fail-fast: false + matrix: + suite: [models, operators] + + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main + with: + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + runner: linux.g5.4xlarge.nvidia.gpu + gpu-arch-type: cuda + gpu-arch-version: '12.6' + use-custom-docker-registry: false + submodules: recursive + timeout: 120 + upload-artifact: test-report-cuda-${{ matrix.suite }} + script: | + set -eux + + source .ci/scripts/test_backend.sh "${{ matrix.suite }}" "cuda" "${RUNNER_ARTIFACT_DIR}" diff --git a/backends/cuda/test/tester.py b/backends/cuda/test/tester.py new file mode 100644 index 00000000000..e4ac2b366d4 --- /dev/null +++ b/backends/cuda/test/tester.py @@ -0,0 +1,71 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Any, List, Optional, Tuple + +import executorch +import executorch.backends.test.harness.stages as BaseStages +import torch +from executorch.backends.cuda.cuda_backend import CudaBackend +from executorch.backends.cuda.cuda_partitioner import CudaPartitioner +from executorch.backends.test.harness import Tester as TesterBase +from executorch.backends.test.harness.stages import StageType +from executorch.exir import EdgeCompileConfig +from executorch.exir.backend.partitioner import Partitioner + + +def _create_default_partitioner() -> CudaPartitioner: + """Create a CudaPartitioner with default compile specs.""" + compile_specs = [CudaBackend.generate_method_name_compile_spec("forward")] + return CudaPartitioner(compile_specs) + + +class ToEdgeTransformAndLower(BaseStages.ToEdgeTransformAndLower): + """CUDA-specific ToEdgeTransformAndLower stage.""" + + def __init__( + self, + partitioners: Optional[List[Partitioner]] = None, + edge_compile_config: Optional[EdgeCompileConfig] = None, + ): + if partitioners is None: + partitioners = [_create_default_partitioner()] + + super().__init__( + default_partitioner_cls=_create_default_partitioner, + partitioners=partitioners, + edge_compile_config=edge_compile_config + or EdgeCompileConfig(_check_ir_validity=False), + ) + + +class CudaTester(TesterBase): + """ + Tester subclass for CUDA backend. + + This tester defines the recipe for lowering models to the CUDA backend + using AOTInductor compilation. + """ + + def __init__( + self, + module: torch.nn.Module, + example_inputs: Tuple[torch.Tensor], + dynamic_shapes: Optional[Tuple[Any]] = None, + ): + stage_classes = ( + executorch.backends.test.harness.Tester.default_stage_classes() + | { + StageType.TO_EDGE_TRANSFORM_AND_LOWER: ToEdgeTransformAndLower, + } + ) + + super().__init__( + module=module, + stage_classes=stage_classes, + example_inputs=example_inputs, + dynamic_shapes=dynamic_shapes, + ) diff --git a/backends/test/harness/stages/serialize.py b/backends/test/harness/stages/serialize.py index a5be1631d98..2cdcfb1b5a5 100644 --- a/backends/test/harness/stages/serialize.py +++ b/backends/test/harness/stages/serialize.py @@ -1,11 +1,9 @@ import copy import logging - -from typing import Optional +from typing import Dict, Optional from executorch.backends.test.harness.stages.stage import Stage, StageType from executorch.exir import ExecutorchProgramManager - from torch.utils._pytree import tree_flatten logger = logging.getLogger(__name__) @@ -23,12 +21,15 @@ class Serialize(Stage): def __init__(self): self.buffer = None + self.data_files: Dict[str, bytes] = {} def stage_type(self) -> StageType: return StageType.SERIALIZE def run(self, artifact: ExecutorchProgramManager, inputs=None) -> None: self.buffer = artifact.buffer + # Capture external data files (e.g., .ptd files for CUDA backend) + self.data_files = artifact.data_files @property def artifact(self) -> bytes: @@ -40,8 +41,29 @@ def graph_module(self) -> None: def run_artifact(self, inputs): inputs_flattened, _ = tree_flatten(inputs) + + # Combine all external data files into a single buffer for data_map_buffer + # Most backends have at most one external data file, but we concatenate + # in case there are multiple (though this may not be fully supported) + data_map_buffer = None + if self.data_files: + # If there's exactly one data file, use it directly + # Otherwise, log a warning - multiple external files may need special handling + if len(self.data_files) == 1: + data_map_buffer = list(self.data_files.values())[0] + else: + # For multiple files, we use the first one and warn + # This is a limitation - proper handling would need runtime support + logger.warning( + f"Multiple external data files found ({list(self.data_files.keys())}). " + f"Using the first one. This may not work correctly for all backends." + ) + data_map_buffer = list(self.data_files.values())[0] + executorch_module = _load_for_executorch_from_buffer( - self.buffer, program_verification=Verification.Minimal + self.buffer, + data_map_buffer=data_map_buffer, + program_verification=Verification.Minimal, ) executorch_output = copy.deepcopy( executorch_module.run_method("forward", tuple(inputs_flattened)) diff --git a/backends/test/suite/conftest.py b/backends/test/suite/conftest.py index 6de1e59a6b1..340e6c9ae2b 100644 --- a/backends/test/suite/conftest.py +++ b/backends/test/suite/conftest.py @@ -3,7 +3,6 @@ import pytest import torch - from executorch.backends.test.suite.flow import all_flows, TestFlow from executorch.backends.test.suite.reporting import _sum_op_counts from executorch.backends.test.suite.runner import run_test @@ -103,7 +102,14 @@ def lower_and_run_model( ids=str, ) def test_runner(request): - return TestRunner(request.param, request.node.name, request.node.originalname) + flow = request.param + test_name = request.node.name + + # Check if this test should be skipped based on the flow's skip_patterns + if flow.should_skip_test(test_name): + pytest.skip(f"Test '{test_name}' matches skip pattern for flow '{flow.name}'") + + return TestRunner(flow, test_name, request.node.originalname) @pytest.hookimpl(optionalhook=True) diff --git a/backends/test/suite/flow.py b/backends/test/suite/flow.py index f3c9ee75083..5c071d025c1 100644 --- a/backends/test/suite/flow.py +++ b/backends/test/suite/flow.py @@ -4,7 +4,6 @@ # LICENSE file in the root directory of this source tree. import logging - from dataclasses import dataclass, field from typing import Callable @@ -53,98 +52,98 @@ def __str__(self): return self.name -def all_flows() -> dict[str, TestFlow]: - flows = [] - - from executorch.backends.test.suite.flows.portable import PORTABLE_TEST_FLOW +def _try_import_flows( + module_path: str, flow_names: list[str], backend_name: str +) -> list[TestFlow]: + """ + Attempt to import test flows from a module. - flows += [ - PORTABLE_TEST_FLOW, - ] + Args: + module_path: The full module path to import from. + flow_names: List of flow variable names to import from the module. + backend_name: Human-readable name for logging on failure. + Returns: + List of imported TestFlow objects, or empty list if import fails. + """ try: - from executorch.backends.test.suite.flows.xnnpack import ( - XNNPACK_DYNAMIC_INT8_PER_CHANNEL_TEST_FLOW, - XNNPACK_STATIC_INT8_PER_CHANNEL_TEST_FLOW, - XNNPACK_STATIC_INT8_PER_TENSOR_TEST_FLOW, - XNNPACK_TEST_FLOW, - ) - - flows += [ - XNNPACK_TEST_FLOW, - XNNPACK_DYNAMIC_INT8_PER_CHANNEL_TEST_FLOW, - XNNPACK_STATIC_INT8_PER_CHANNEL_TEST_FLOW, - XNNPACK_STATIC_INT8_PER_TENSOR_TEST_FLOW, - ] - except Exception as e: - logger.info(f"Skipping XNNPACK flow registration: {e}") + import importlib - try: - from executorch.backends.test.suite.flows.coreml import ( - COREML_STATIC_INT8_TEST_FLOW, - COREML_TEST_FLOW, - ) - - flows += [ - COREML_TEST_FLOW, - COREML_STATIC_INT8_TEST_FLOW, - ] + module = importlib.import_module(module_path) + return [getattr(module, name) for name in flow_names] except Exception as e: - logger.info(f"Skipping Core ML flow registration: {e}") + logger.info(f"Skipping {backend_name} flow registration: {e}") + return [] + + +# Registry of backend flows to import: (module_path, flow_names, backend_name) +_FLOW_REGISTRY: list[tuple[str, list[str], str]] = [ + ( + "executorch.backends.test.suite.flows.xnnpack", + [ + "XNNPACK_TEST_FLOW", + "XNNPACK_DYNAMIC_INT8_PER_CHANNEL_TEST_FLOW", + "XNNPACK_STATIC_INT8_PER_CHANNEL_TEST_FLOW", + "XNNPACK_STATIC_INT8_PER_TENSOR_TEST_FLOW", + ], + "XNNPACK", + ), + ( + "executorch.backends.test.suite.flows.coreml", + [ + "COREML_TEST_FLOW", + "COREML_STATIC_INT8_TEST_FLOW", + ], + "Core ML", + ), + ( + "executorch.backends.test.suite.flows.vulkan", + [ + "VULKAN_TEST_FLOW", + "VULKAN_STATIC_INT8_PER_CHANNEL_TEST_FLOW", + ], + "Vulkan", + ), + ( + "executorch.backends.test.suite.flows.qualcomm", + [ + "QNN_TEST_FLOW", + "QNN_16A16W_TEST_FLOW", + "QNN_16A8W_TEST_FLOW", + "QNN_16A4W_TEST_FLOW", + "QNN_16A4W_BLOCK_TEST_FLOW", + "QNN_8A8W_TEST_FLOW", + ], + "QNN", + ), + ( + "executorch.backends.test.suite.flows.arm", + [ + "ARM_TOSA_FP_FLOW", + "ARM_TOSA_INT_FLOW", + "ARM_ETHOS_U55_FLOW", + "ARM_ETHOS_U85_FLOW", + "ARM_VGF_FP_FLOW", + "ARM_VGF_INT_FLOW", + ], + "ARM", + ), + ( + "executorch.backends.test.suite.flows.cuda", + [ + "CUDA_TEST_FLOW", + ], + "CUDA", + ), +] - try: - from executorch.backends.test.suite.flows.vulkan import ( - VULKAN_STATIC_INT8_PER_CHANNEL_TEST_FLOW, - VULKAN_TEST_FLOW, - ) - - flows += [ - VULKAN_TEST_FLOW, - VULKAN_STATIC_INT8_PER_CHANNEL_TEST_FLOW, - ] - except Exception as e: - logger.info(f"Skipping Vulkan flow registration: {e}") - try: - from executorch.backends.test.suite.flows.qualcomm import ( - QNN_16A16W_TEST_FLOW, - QNN_16A4W_BLOCK_TEST_FLOW, - QNN_16A4W_TEST_FLOW, - QNN_16A8W_TEST_FLOW, - QNN_8A8W_TEST_FLOW, - QNN_TEST_FLOW, - ) - - flows += [ - QNN_TEST_FLOW, - QNN_16A16W_TEST_FLOW, - QNN_16A8W_TEST_FLOW, - QNN_16A4W_TEST_FLOW, - QNN_16A4W_BLOCK_TEST_FLOW, - QNN_8A8W_TEST_FLOW, - ] - except Exception as e: - logger.info(f"Skipping QNN flow registration: {e}") +def all_flows() -> dict[str, TestFlow]: + from executorch.backends.test.suite.flows.portable import PORTABLE_TEST_FLOW - try: - from executorch.backends.test.suite.flows.arm import ( - ARM_ETHOS_U55_FLOW, - ARM_ETHOS_U85_FLOW, - ARM_TOSA_FP_FLOW, - ARM_TOSA_INT_FLOW, - ARM_VGF_FP_FLOW, - ARM_VGF_INT_FLOW, - ) - - flows += [ - ARM_TOSA_FP_FLOW, - ARM_TOSA_INT_FLOW, - ARM_ETHOS_U55_FLOW, - ARM_ETHOS_U85_FLOW, - ARM_VGF_FP_FLOW, - ARM_VGF_INT_FLOW, - ] - except Exception as e: - logger.info(f"Skipping ARM flow registration: {e}") + flows = [PORTABLE_TEST_FLOW] + + for module_path, flow_names, backend_name in _FLOW_REGISTRY: + flows.extend(_try_import_flows(module_path, flow_names, backend_name)) return {f.name: f for f in flows if f is not None} diff --git a/backends/test/suite/flows/cuda.py b/backends/test/suite/flows/cuda.py new file mode 100644 index 00000000000..2db3eb0fa5c --- /dev/null +++ b/backends/test/suite/flows/cuda.py @@ -0,0 +1,22 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from executorch.backends.cuda.test.tester import CudaTester +from executorch.backends.test.suite.flow import TestFlow + + +def _create_cuda_flow(name: str = "cuda") -> TestFlow: + """Create a test flow for the CUDA backend. + + The CUDA backend saves data externally (.so and weights blob in .ptd file). + The test harness serialize stage has been updated to support loading external + data via the data_map_buffer parameter of _load_for_executorch_from_buffer. + """ + + return TestFlow(name, backend="cuda", tester_factory=CudaTester, quantize=False) + + +CUDA_TEST_FLOW = _create_cuda_flow("cuda") diff --git a/exir/program/_program.py b/exir/program/_program.py index baacd5eaec4..bcc667ed630 100644 --- a/exir/program/_program.py +++ b/exir/program/_program.py @@ -1974,6 +1974,22 @@ def buffer(self) -> bytes: self._buffer = bytes(self._pte_data) return self._buffer + @property + def data_files(self) -> Dict[str, bytes]: + """Returns the external data files as a dictionary of filename to bytes. + + External data files (e.g., .ptd files) contain tensor data that is stored + separately from the main .pte file. This is used by backends like CUDA + that serialize weights externally. + + Returns: + Dict[str, bytes]: Dictionary mapping filenames to their byte content. + Returns an empty dict if no external data files exist. + """ + if self._tensor_data is None: + return {} + return {filename: bytes(cord) for filename, cord in self._tensor_data.items()} + def get_etrecord(self): """ Get the generated ETRecord if etrecord generation was enabled.