Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 34 additions & 6 deletions .ci/scripts/test_backend.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ echo "Running backend test job for suite $SUITE, flow $FLOW."
echo "Saving job artifacts to $ARTIFACT_DIR."

eval "$(conda shell.bash hook)"
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
CONDA_ENV=$(conda env list --json | python -c "import sys, json; print(json.load(sys.stdin)['envs'][-1])")
conda activate "${CONDA_ENV}"

if [[ "$(uname)" == "Darwin" ]]; then
Expand Down Expand Up @@ -56,6 +56,32 @@ if [[ "$FLOW" == *vulkan* ]]; then
EXTRA_BUILD_ARGS+=" -DEXECUTORCH_BUILD_VULKAN=ON"
fi

if [[ "$FLOW" == *cuda* ]]; then
# When running with the PyTorch test-infra Docker image (which has nvcc),
# install executorch directly — it will auto-detect CUDA and install
# CUDA-enabled PyTorch. Skip setup-linux.sh which expects the custom
# Docker image with pre-built pinned-commit torch.
echo "Installing ExecuTorch with CUDA support..."
./install_executorch.sh --editable

# Verify PyTorch was installed with CUDA support
python -c "import torch; assert torch.cuda.is_available(), 'PyTorch CUDA not available after reinstall'; print(f'PyTorch {torch.__version__} with CUDA {torch.version.cuda}')" || {
echo "ERROR: PyTorch was not installed with CUDA support"
exit 1
}

# Fix libstdc++ GLIBCXX version for CUDA backend.
# The embedded .so files in the CUDA blob require GLIBCXX_3.4.30
# which the default conda libstdc++ doesn't have.
echo "Installing newer libstdc++ for CUDA backend..."
conda install -y -c conda-forge 'libstdcxx-ng>=12'
export LD_LIBRARY_PATH="${CONDA_PREFIX}/lib:${LD_LIBRARY_PATH:-}"

source .ci/scripts/utils.sh
CMAKE_ARGS="$EXTRA_BUILD_ARGS" build_executorch_runner cmake Release
CUDA_SETUP_DONE=1
fi

if [[ "$FLOW" == *arm* ]]; then

# Setup ARM deps.
Expand All @@ -78,12 +104,14 @@ if [[ "$FLOW" == *arm* ]]; then
fi
fi

if [[ $IS_MACOS -eq 1 ]]; then
SETUP_SCRIPT=.ci/scripts/setup-macos.sh
else
SETUP_SCRIPT=.ci/scripts/setup-linux.sh
if [[ "${CUDA_SETUP_DONE:-0}" != "1" ]]; then
if [[ $IS_MACOS -eq 1 ]]; then
SETUP_SCRIPT=.ci/scripts/setup-macos.sh
else
SETUP_SCRIPT=.ci/scripts/setup-linux.sh
fi
CMAKE_ARGS="$EXTRA_BUILD_ARGS" ${CONDA_RUN_CMD} $SETUP_SCRIPT --build-tool cmake --build-mode Release --editable true
fi
CMAKE_ARGS="$EXTRA_BUILD_ARGS" ${CONDA_RUN_CMD} $SETUP_SCRIPT --build-tool cmake --build-mode Release --editable true

GOLDEN_DIR="${ARTIFACT_DIR}/golden-artifacts"
export GOLDEN_ARTIFACTS_DIR="${GOLDEN_DIR}"
Expand Down
41 changes: 41 additions & 0 deletions .github/workflows/test-backend-cuda.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
name: Test CUDA Backend

on:
schedule:
- cron: 0 2 * * *
push:
branches:
- release/*
tags:
- ciflow/nightly/*
pull_request:
paths:
- .github/workflows/test-backend-cuda.yml
- .ci/scripts/test_backend.sh
workflow_dispatch:

concurrency:
group: ${{ github.workflow }}--${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
cancel-in-progress: true

jobs:
test-cuda:
strategy:
fail-fast: false
matrix:
suite: [models, operators]

uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
with:
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
runner: linux.g5.4xlarge.nvidia.gpu
gpu-arch-type: cuda
gpu-arch-version: '12.6'
use-custom-docker-registry: false
submodules: recursive
timeout: 120
upload-artifact: test-report-cuda-${{ matrix.suite }}
script: |
set -eux

source .ci/scripts/test_backend.sh "${{ matrix.suite }}" "cuda" "${RUNNER_ARTIFACT_DIR}"
71 changes: 71 additions & 0 deletions backends/cuda/test/tester.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

from typing import Any, List, Optional, Tuple

import executorch
import executorch.backends.test.harness.stages as BaseStages
import torch
from executorch.backends.cuda.cuda_backend import CudaBackend
from executorch.backends.cuda.cuda_partitioner import CudaPartitioner
from executorch.backends.test.harness import Tester as TesterBase
from executorch.backends.test.harness.stages import StageType
from executorch.exir import EdgeCompileConfig
from executorch.exir.backend.partitioner import Partitioner


def _create_default_partitioner() -> CudaPartitioner:
"""Create a CudaPartitioner with default compile specs."""
compile_specs = [CudaBackend.generate_method_name_compile_spec("forward")]
return CudaPartitioner(compile_specs)


class ToEdgeTransformAndLower(BaseStages.ToEdgeTransformAndLower):
"""CUDA-specific ToEdgeTransformAndLower stage."""

def __init__(
self,
partitioners: Optional[List[Partitioner]] = None,
edge_compile_config: Optional[EdgeCompileConfig] = None,
):
if partitioners is None:
partitioners = [_create_default_partitioner()]

super().__init__(
default_partitioner_cls=_create_default_partitioner,
partitioners=partitioners,
edge_compile_config=edge_compile_config
or EdgeCompileConfig(_check_ir_validity=False),
)


class CudaTester(TesterBase):
"""
Tester subclass for CUDA backend.
This tester defines the recipe for lowering models to the CUDA backend
using AOTInductor compilation.
"""

def __init__(
self,
module: torch.nn.Module,
example_inputs: Tuple[torch.Tensor],
dynamic_shapes: Optional[Tuple[Any]] = None,
):
stage_classes = (
executorch.backends.test.harness.Tester.default_stage_classes()
| {
StageType.TO_EDGE_TRANSFORM_AND_LOWER: ToEdgeTransformAndLower,
}
)

super().__init__(
module=module,
stage_classes=stage_classes,
example_inputs=example_inputs,
dynamic_shapes=dynamic_shapes,
)
30 changes: 26 additions & 4 deletions backends/test/harness/stages/serialize.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
import copy
import logging

from typing import Optional
from typing import Dict, Optional

from executorch.backends.test.harness.stages.stage import Stage, StageType
from executorch.exir import ExecutorchProgramManager

from torch.utils._pytree import tree_flatten

logger = logging.getLogger(__name__)
Expand All @@ -23,12 +21,15 @@
class Serialize(Stage):
def __init__(self):
self.buffer = None
self.data_files: Dict[str, bytes] = {}

def stage_type(self) -> StageType:
return StageType.SERIALIZE

def run(self, artifact: ExecutorchProgramManager, inputs=None) -> None:
self.buffer = artifact.buffer
# Capture external data files (e.g., .ptd files for CUDA backend)
self.data_files = artifact.data_files

@property
def artifact(self) -> bytes:
Expand All @@ -40,8 +41,29 @@ def graph_module(self) -> None:

def run_artifact(self, inputs):
inputs_flattened, _ = tree_flatten(inputs)

# Combine all external data files into a single buffer for data_map_buffer
# Most backends have at most one external data file, but we concatenate
# in case there are multiple (though this may not be fully supported)
data_map_buffer = None
if self.data_files:
# If there's exactly one data file, use it directly
# Otherwise, log a warning - multiple external files may need special handling
if len(self.data_files) == 1:
data_map_buffer = list(self.data_files.values())[0]
else:
# For multiple files, we use the first one and warn
# This is a limitation - proper handling would need runtime support
logger.warning(
f"Multiple external data files found ({list(self.data_files.keys())}). "
f"Using the first one. This may not work correctly for all backends."
)
data_map_buffer = list(self.data_files.values())[0]

executorch_module = _load_for_executorch_from_buffer(
self.buffer, program_verification=Verification.Minimal
self.buffer,
data_map_buffer=data_map_buffer,
program_verification=Verification.Minimal,
)
executorch_output = copy.deepcopy(
executorch_module.run_method("forward", tuple(inputs_flattened))
Expand Down
10 changes: 8 additions & 2 deletions backends/test/suite/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

import pytest
import torch

from executorch.backends.test.suite.flow import all_flows, TestFlow
from executorch.backends.test.suite.reporting import _sum_op_counts
from executorch.backends.test.suite.runner import run_test
Expand Down Expand Up @@ -103,7 +102,14 @@ def lower_and_run_model(
ids=str,
)
def test_runner(request):
return TestRunner(request.param, request.node.name, request.node.originalname)
flow = request.param
test_name = request.node.name

# Check if this test should be skipped based on the flow's skip_patterns
if flow.should_skip_test(test_name):
pytest.skip(f"Test '{test_name}' matches skip pattern for flow '{flow.name}'")

return TestRunner(flow, test_name, request.node.originalname)


@pytest.hookimpl(optionalhook=True)
Expand Down
Loading
Loading