Skip to content

Commit 1656c34

Browse files
authored
Merge branch 'ml-explore:main' into feat/padding
2 parents 37b0c3c + df7f7db commit 1656c34

429 files changed

Lines changed: 27951 additions & 8009 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/actions/build-cuda-release/action.yml

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,15 @@
11
name: 'Build CUDA wheel'
22
description: 'Build CUDA wheel'
33

4+
inputs:
5+
arch:
6+
description: 'Platform architecture tag'
7+
required: true
8+
type: choice
9+
options:
10+
- x86_64
11+
- aarch64
12+
413
runs:
514
using: "composite"
615
steps:
@@ -11,5 +20,12 @@ runs:
1120
run: |
1221
pip install auditwheel build patchelf setuptools
1322
python setup.py clean --all
14-
MLX_BUILD_STAGE=2 python -m build -w
15-
bash python/scripts/repair_cuda.sh
23+
MLX_DISABLE_SM90A_KERNELS=1 MLX_BUILD_STAGE=2 python -m build -w
24+
25+
auditwheel repair dist/mlx_cuda*.whl \
26+
--plat manylinux_2_35_${{ inputs.arch }} \
27+
--exclude libcublas* \
28+
--exclude libcuda* \
29+
--exclude libcudnn* \
30+
--exclude libnccl* \
31+
--exclude libnvrtc*

.github/actions/build-linux-release/action.yml

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,19 +18,21 @@ inputs:
1818
runs:
1919
using: "composite"
2020
steps:
21-
- name: Generate package stubs
21+
- name: Build MLX
2222
shell: bash
23-
run: |
24-
pip install -e ".[dev]" -v
25-
pip install typing_extensions
26-
python setup.py generate_stubs
23+
run: pip install -e . -v
24+
2725
- name: Build Python package
2826
shell: bash
2927
run: |
3028
pip install auditwheel patchelf build
3129
python setup.py clean --all
3230
MLX_BUILD_STAGE=1 python -m build -w
33-
bash python/scripts/repair_linux.sh ${{ inputs.arch }}
31+
auditwheel repair dist/mlx-*.whl \
32+
--plat manylinux_2_35_${{ inputs.arch }} \
33+
--exclude libmlx.so* \
34+
--only-plat
35+
3436
- name: Build backend package
3537
if: ${{ inputs.build-backend }}
3638
shell: bash

.github/actions/build-linux/action.yml

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ inputs:
99
runs:
1010
using: "composite"
1111
steps:
12+
1213
- name: Install Python package
1314
id: python_build
1415
shell: sh
@@ -20,20 +21,16 @@ runs:
2021
run: |
2122
if ${{ startsWith(inputs.toolkit, 'cuda') && runner.arch == 'arm64' }} ; then
2223
# There is no GPU in arm64 runner, use a common arch.
23-
CMAKE_ARGS="$CMAKE_ARGS -DMLX_CUDA_ARCHITECTURES=90a"
24-
# Can not build tests when the built executables can not run.
25-
CMAKE_ARGS="$CMAKE_ARGS -DMLX_BUILD_TESTS=OFF"
24+
CMAKE_ARGS="$CMAKE_ARGS -DMLX_CUDA_ARCHITECTURES=80"
25+
# Can not build tests and stubs when the built executables can not run.
26+
CMAKE_ARGS="$CMAKE_ARGS -DMLX_BUILD_TESTS=OFF -DMLX_BUILD_PYTHON_STUBS=OFF"
2627
fi
28+
# Install cpu-only torch to save space
29+
pip install torch --index-url https://download.pytorch.org/whl/cpu
2730
pip install --no-build-isolation -e ".[dev]" -v
2831
# Pass the CMAKE_ARGS to following steps.
2932
echo CMAKE_ARGS="$CMAKE_ARGS" >> $GITHUB_OUTPUT
3033
31-
- name: Generate package stubs
32-
shell: sh
33-
run: |
34-
pip install typing_extensions
35-
python setup.py generate_stubs
36-
3734
- name: Build CPP only
3835
shell: bash
3936
run: |

.github/actions/build-macos-release/action.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ runs:
1818
- name: Build Python package
1919
shell: bash -l {0}
2020
env:
21+
DEVELOPER_DIR: /Applications/Xcode-latest.app
2122
MACOSX_DEPLOYMENT_TARGET: ${{ inputs.macos-target }}
2223
run: |
2324
pip install build
@@ -28,6 +29,7 @@ runs:
2829
if: ${{ inputs.build-backend }}
2930
shell: bash -l {0}
3031
env:
32+
DEVELOPER_DIR: /Applications/Xcode-latest.app
3133
MACOSX_DEPLOYMENT_TARGET: ${{ inputs.macos-target }}
3234
run: |
3335
python setup.py clean --all

.github/actions/build-macos/action.yml

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -11,27 +11,21 @@ runs:
1111
shell: bash -l {0}
1212
run: |
1313
pip install --upgrade pip
14-
pip install cmake setuptools nanobind==2.4.0
15-
pip install -e . -v
16-
17-
- name: Generate package stubs
18-
shell: bash -l {0}
19-
run: |
20-
pip install typing_extensions
21-
python setup.py generate_stubs
14+
pip install cmake setuptools typing_extensions
15+
pip install -e ".[dev]" -v
2216
2317
- name: Install tests dependencies
2418
shell: bash -l {0}
2519
run: |
26-
pip install numpy torch tensorflow unittest-xml-reporting
20+
pip install tensorflow
2721
2822
- name: Run Python tests
2923
shell: bash -l {0}
3024
env:
3125
LOW_MEMORY: 1
3226
run: |
33-
DEVICE=cpu python -m xmlrunner discover -v python/tests -o test-results/cpu
34-
DEVICE=gpu METAL_DEVICE_WRAPPER_TYPE=1 METAL_DEBUG_ERROR_MODE=0 python -m xmlrunner discover -v python/tests -o test-results/gpu
27+
DEVICE=cpu python -m unittest discover -v python/tests
28+
DEVICE=gpu METAL_DEVICE_WRAPPER_TYPE=1 METAL_DEBUG_ERROR_MODE=0 python -m unittest discover -v python/tests
3529
mpirun --bind-to none -host localhost:8 -np 8 -x DYLD_LIBRARY_PATH=/opt/homebrew/lib/ python python/tests/mpi_test_distributed.py
3630
mlx.launch --verbose -n 8 python/tests/ring_test_distributed.py -v 2> >(tee -a stderr.log >&2)
3731
if $(grep "\[WARN\]" stderr.log); then echo "Distributed ring test failed"; exit 1; fi
@@ -83,6 +77,4 @@ runs:
8377
run: |
8478
CMAKE_ARGS="-DMLX_METAL_JIT=ON" \
8579
pip install -e . -v
86-
python -m xmlrunner discover \
87-
-v python/tests \
88-
-o test-results/gpu_jit
80+
python -m unittest discover -v python/tests
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
name: 'Build on Windows'
2+
3+
runs:
4+
using: 'composite'
5+
steps:
6+
- name: Install Python package
7+
id: python-build
8+
shell: cmd
9+
env:
10+
# For MSVC, Ninja/Release is the only config supported by ccache.
11+
CMAKE_ARGS: >-
12+
-G Ninja
13+
-DCMAKE_BUILD_TYPE=Release
14+
-DCMAKE_C_COMPILER=cl
15+
-DCMAKE_CXX_COMPILER=cl
16+
-DCMAKE_RC_COMPILER=rc
17+
run: |
18+
uv pip install ".[dev]" -v
19+
:: Pass the CMAKE_ARGS to following steps.
20+
>>%GITHUB_OUTPUT% ECHO CMAKE_ARGS=%CMAKE_ARGS%
21+
22+
- name: Build CPP only
23+
shell: cmd
24+
run: |
25+
cmake . -B build ${{ steps.python-build.outputs.CMAKE_ARGS }}
26+
cmake --build build -j %NUMBER_OF_PROCESSORS%

.github/actions/setup-linux/action.yml

Lines changed: 35 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -9,22 +9,34 @@ inputs:
99
python-version:
1010
description: 'Version of python to set up'
1111
required: false
12-
default: '3.10'
12+
default: '3.14'
13+
use-ccache:
14+
description: 'Whether to enable ccache'
15+
required: false
16+
default: 'true'
1317

1418
runs:
1519
using: "composite"
1620
steps:
17-
- name: Use ccache
18-
uses: hendrikmuhs/ccache-action@v1.2
19-
with:
20-
key: ccache-${{ runner.os }}-${{ runner.arch }}-${{ inputs.toolkit }}-py${{ inputs.python-version }}
21-
max-size: 1GB
22-
2321
- name: Install common dependencies
2422
shell: bash
2523
run: |
24+
echo "::group::Install common dependencies"
2625
sudo apt-get update
27-
sudo apt-get install -y libblas-dev liblapack-dev liblapacke-dev zip
26+
sudo apt-get install -y --no-install-recommends \
27+
zip \
28+
libblas-dev liblapack-dev liblapacke-dev \
29+
openmpi-bin openmpi-common libopenmpi-dev
30+
echo "::endgroup::"
31+
32+
- name: Use ccache
33+
if: ${{ inputs.use-ccache == 'true' }}
34+
uses: hendrikmuhs/ccache-action@v1.2
35+
with:
36+
key: ccache-${{ runner.os }}-${{ runner.arch }}-${{ inputs.toolkit }}
37+
max-size: 1GB
38+
# ccache-action bug: running "apt-get update" fails on large arm runner.
39+
update-package-index: false
2840

2941
- uses: actions/setup-python@v6
3042
with:
@@ -33,16 +45,14 @@ runs:
3345
- name: Setup Python venv
3446
shell: bash
3547
run: |
48+
echo "::group::Setup Python venv"
3649
python -m venv .venv
3750
source .venv/bin/activate
38-
pip install setuptools cmake nanobind==2.4.0
51+
pip install setuptools cmake typing_extensions
3952
echo PATH=$PATH >> $GITHUB_ENV
40-
# Make cmake search .venv for nanobind
53+
# Search python packages in .venv
4154
echo PYTHONPATH=`python -c 'import sys; print(sys.path[-1])'` >> $GITHUB_ENV
42-
43-
- name: Install MPI
44-
shell: bash
45-
run: sudo apt-get install -y openmpi-bin openmpi-common libopenmpi-dev
55+
echo "::endgroup::"
4656
4757
- name: Install CUDA toolkit
4858
if: ${{ startsWith(inputs.toolkit, 'cuda') }}
@@ -53,34 +63,31 @@ runs:
5363
# https://docs.nvidia.com/deeplearning/cudnn/backend/latest/reference/support-matrix.html
5464
PACKAGES: |
5565
{
56-
"cuda-12.6": "libcudnn9-dev-cuda-12 cuda-toolkit-12-6",
57-
"cuda-12.9": "libcudnn9-dev-cuda-12 cuda-toolkit-12-9",
58-
"cuda-13.0": "libcudnn9-dev-cuda-13 cuda-toolkit-13-0"
66+
"cuda-12.6": "libcudnn9-dev-cuda-12 cuda-compiler-12-6 cuda-libraries-dev-12-6",
67+
"cuda-12.9": "libcudnn9-dev-cuda-12 cuda-compiler-12-9 cuda-libraries-dev-12-9",
68+
"cuda-13.0": "libcudnn9-dev-cuda-13 cuda-compiler-13-0 cuda-libraries-dev-13-0"
5969
}
6070
run: |
71+
echo "::group::Install CUDA toolkit"
6172
# The CUDA binaries are hosted in the "sbsa" repo, the "arm64" repo is
6273
# Jetson specific. SBSA means Arm Server Base System Architecture.
6374
ARCH=${{ runner.arch == 'arm64' && 'sbsa' || 'x86_64' }}
6475
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/$ARCH/cuda-keyring_1.1-1_all.deb
6576
sudo dpkg -i cuda-keyring_1.1-1_all.deb
6677
sudo apt-get update
67-
sudo apt-get install -y \
78+
sudo apt-get install -y --no-install-recommends \
6879
libnccl2 libnccl-dev \
6980
${{ fromJson(env.PACKAGES)[inputs.toolkit] }}
7081
echo "/usr/local/${{ inputs.toolkit }}/bin" >> $GITHUB_PATH
82+
echo "::endgroup::"
7183
7284
- name: CUDA packages and driver report
7385
if: ${{ startsWith(inputs.toolkit, 'cuda') }}
7486
shell: bash
7587
run: |
76-
sudo apt-get install -y ubuntu-drivers-common dkms
77-
echo "NVIDIA Driver Packages Available:"
78-
sudo ubuntu-drivers list --gpgpu
79-
echo "NVIDIA Driver Version:"
80-
cat /proc/driver/nvidia/version || echo "nvidia driver not found"
81-
echo "Installed NVIDIA and CUDA packages:"
88+
echo "::group::Installed NVIDIA and CUDA packages"
8289
dpkg -l | egrep "cuda|nvidia" -i
83-
echo "DKMS Status:"
84-
dkms status || echo "dkms not found"
85-
echo "NVIDIA-SMI Status:"
86-
nvidia-smi || echo "nvidia-smi not found"
90+
echo "::endgroup::"
91+
echo "::group::NVIDIA-SMI Status"
92+
nvidia-smi || true
93+
echo "::endgroup::"
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
name: 'Setup Windows environment'
2+
3+
inputs:
4+
python-version:
5+
description: 'Version of python to set up'
6+
required: false
7+
default: '3.14'
8+
use-ccache:
9+
description: 'Whether to enable ccache'
10+
required: false
11+
default: 'true'
12+
13+
runs:
14+
using: 'composite'
15+
steps:
16+
- name: Use ccache
17+
if: ${{ inputs.use-ccache == 'true' }}
18+
uses: hendrikmuhs/ccache-action@v1.2
19+
with:
20+
key: ccache-${{ runner.os }}-${{ runner.arch }}-cpu
21+
max-size: 1GB
22+
23+
- name: Setup Visual Studio cmd
24+
shell: cmd
25+
run: |
26+
:: Find out path to VS.
27+
pushd "C:\Program Files (x86)\Microsoft Visual Studio\Installer\"
28+
for /f "delims=" %%x in ('.\vswhere.exe -latest -property InstallationPath') do set VSPATH=%%x
29+
popd
30+
:: Import VS vars.
31+
call "%VSPATH%\VC\Auxiliary\Build\vcvarsall.bat" x64
32+
:: Export to all steps.
33+
>>%GITHUB_ENV% set
34+
35+
- uses: astral-sh/setup-uv@v7
36+
37+
- name: Setup Python venv
38+
shell: cmd
39+
run: |
40+
uv venv --python ${{ inputs.python-version }}
41+
call ".venv/Scripts/activate.bat"
42+
>>%GITHUB_ENV% set

.github/actions/test-linux/action.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,5 +65,5 @@ runs:
6565
DEVICE: gpu
6666
run: |
6767
echo "::group::CPP tests - GPU"
68-
./build/tests/tests -sfe="*fft_tests.cpp,*linalg_tests.cpp"
68+
./build/tests/tests -sfe="*linalg_tests.cpp"
6969
echo "::endgroup::"
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
name: 'Run tests on Windows'
2+
3+
runs:
4+
using: 'composite'
5+
steps:
6+
- name: Run Python tests - CPU
7+
shell: bash
8+
run: |
9+
echo "::group::Python tests - CPU"
10+
python -m unittest discover python/tests -v
11+
echo "::endgroup::"
12+
13+
- name: Run CPP tests - CPU
14+
shell: bash
15+
env:
16+
DEVICE: cpu
17+
run: |
18+
echo "::group::CPP tests - CPU"
19+
./build/tests.exe -tce="*gguf*,test random uniform"
20+
echo "::endgroup::"

0 commit comments

Comments
 (0)