From 8fb7a53f0e2d0fa85ccdc6dd7ccda44dec6ee0d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Mon, 12 Jan 2026 12:05:47 +0100 Subject: [PATCH 1/4] GH-48827: [CI][Python] Add required xz dependency to emscripten dockerfile (#48828) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change The emscripten job has been failing on the nightlies jobs ### What changes are included in this PR? Install dependencies slightly earlier on the Dockerfile and add xz which is required on `install_emscripten.sh` now. ### Are these changes tested? Yes via archery. ### Are there any user-facing changes? No * GitHub Issue: #48827 Authored-by: Raúl Cumplido Signed-off-by: Raúl Cumplido --- ci/docker/conda-python-emscripten.dockerfile | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/ci/docker/conda-python-emscripten.dockerfile b/ci/docker/conda-python-emscripten.dockerfile index 47ff550cd59..878f918710f 100644 --- a/ci/docker/conda-python-emscripten.dockerfile +++ b/ci/docker/conda-python-emscripten.dockerfile @@ -39,6 +39,11 @@ RUN python -m pip install --no-cache-dir selenium==${selenium_version} && \ RUN pyodide_dist_url="https://github.com/pyodide/pyodide/releases/download/${pyodide_version}/pyodide-${pyodide_version}.tar.bz2" && \ wget -q "${pyodide_dist_url}" -O- | tar -xj -C / +# install node 20 (needed for async call support) +# and pthread-stubs for build, and unzip needed for chrome build to work +# xz is needed by emsdk to extract node tarballs +RUN conda install nodejs=20 unzip pthread-stubs make xz -c conda-forge + # install correct version of emscripten for this pyodide COPY ci/scripts/install_emscripten.sh /arrow/ci/scripts/ RUN bash /arrow/ci/scripts/install_emscripten.sh ~ /pyodide @@ -46,10 +51,6 @@ RUN bash /arrow/ci/scripts/install_emscripten.sh ~ /pyodide # make sure zlib is cached in the EMSDK folder RUN source ~/emsdk/emsdk_env.sh && embuilder --pic build zlib -# install node 20 (needed for async call support) -# and pthread-stubs for build, and unzip needed for chrome build to work -RUN conda install nodejs=20 unzip pthread-stubs make -c conda-forge - # install chrome for testing browser based runner COPY ci/scripts/install_chromedriver.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_chromedriver.sh "${chrome_version}" From b6362d09204136dbf2167fee8ec8ec2af0efe42d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Mon, 12 Jan 2026 12:12:07 +0100 Subject: [PATCH 2/4] GH-48582: [CI][GPU][C++][Python] Add new CUDA jobs using the new self-hosted runners (#48583) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change The CUDA jobs stopped working when Voltron Data infrastructure went down. We have set up with ASF Infra a [runs-on](https://runs-on.com/runners/gpu/) solution to run CUDA runners. ### What changes are included in this PR? Add the new workflow for `cuda_extra.yml` with CI jobs that use the runs-on CUDA runners. Due to the underlying instances having CUDA 12.9 the jobs to be run are: - AMD64 Ubuntu 22 CUDA 11.7.1 - AMD64 Ubuntu 24 CUDA 12.9.0 - AMD64 Ubuntu 22 CUDA 11.7.1 Python - AMD64 Ubuntu 24 CUDA 12.9.0 Python A follow up issue has been created to add jobs for CUDA 13, see: https://github.com/apache/arrow/issues/48783 A new label `CI: Extra: CUDA` has also been created. ### Are these changes tested? Yes via CI ### Are there any user-facing changes? No * GitHub Issue: #48582 Authored-by: Raúl Cumplido Signed-off-by: Raúl Cumplido --- .github/workflows/cuda_extra.yml | 136 +++++++++++++++++++++++++ dev/tasks/docker-tests/github.cuda.yml | 52 ---------- dev/tasks/tasks.yml | 26 ----- 3 files changed, 136 insertions(+), 78 deletions(-) create mode 100644 .github/workflows/cuda_extra.yml delete mode 100644 dev/tasks/docker-tests/github.cuda.yml diff --git a/.github/workflows/cuda_extra.yml b/.github/workflows/cuda_extra.yml new file mode 100644 index 00000000000..1700d6a8456 --- /dev/null +++ b/.github/workflows/cuda_extra.yml @@ -0,0 +1,136 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: CUDA Extra + +on: + push: + tags: + - '**' + pull_request: + types: + - labeled + - opened + - reopened + - synchronize + schedule: + - cron: | + 0 6 * * * + +concurrency: + group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} + cancel-in-progress: true + +permissions: + actions: read + contents: read + pull-requests: read + +jobs: + check-labels: + if: github.event_name != 'schedule' || github.repository == 'apache/arrow' + uses: ./.github/workflows/check_labels.yml + secrets: inherit + with: + parent-workflow: cuda_extra + + docker: + needs: check-labels + name: ${{ matrix.title }} + runs-on: "runs-on=${{ github.run_id }}/family=g4dn.xlarge/image=ubuntu24-gpu-x64/spot=capacity-optimized" + if: >- + needs.check-labels.outputs.force == 'true' || + contains(fromJSON(needs.check-labels.outputs.ci-extra-labels || '[]'), 'CI: Extra') || + contains(fromJSON(needs.check-labels.outputs.ci-extra-labels || '[]'), 'CI: Extra: CUDA') + timeout-minutes: 75 + strategy: + fail-fast: false + matrix: + include: + - cuda: 12.9.0 + ubuntu: 24.04 + image: ubuntu-cuda-cpp + title: AMD64 Ubuntu 24 CUDA 12.9.0 + - cuda: 11.7.1 + ubuntu: 22.04 + image: ubuntu-cuda-cpp + title: AMD64 Ubuntu 22 CUDA 11.7.1 + - cuda: 12.9.0 + ubuntu: 24.04 + image: ubuntu-cuda-python + title: AMD64 Ubuntu 24 CUDA 12.9.0 Python + - cuda: 11.7.1 + ubuntu: 22.04 + image: ubuntu-cuda-python + title: AMD64 Ubuntu 22 CUDA 11.7.1 Python + env: + ARCHERY_DEBUG: 1 + ARROW_ENABLE_TIMING_TESTS: OFF + DOCKER_VOLUME_PREFIX: ".docker/" + steps: + - name: Checkout Arrow + uses: actions/checkout@v6 + with: + fetch-depth: 0 + submodules: recursive + - name: Cache Docker Volumes + uses: actions/cache@v5 + with: + path: .docker + key: extra-${{ matrix.image }}-${{ hashFiles('cpp/**') }} + restore-keys: extra-${{ matrix.image }}- + - name: Setup Python + uses: actions/setup-python@v6 + with: + python-version: 3 + - name: Setup Archery + run: python3 -m pip install -e dev/archery[docker] + - name: Display NVIDIA SMI details + run: | + nvidia-smi + nvidia-smi -L + nvidia-smi -q -d Memory + - name: Execute Docker Build + continue-on-error: ${{ matrix.continue-on-error || false }} + env: + ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} + ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} + CUDA: ${{ matrix.cuda }} + UBUNTU: ${{ matrix.ubuntu }} + run: | + # GH-40558: reduce ASLR to avoid ASAN/LSAN crashes + sudo sysctl -w vm.mmap_rnd_bits=28 + source ci/scripts/util_enable_core_dumps.sh + archery docker run ${{ matrix.run-options || '' }} ${{ matrix.image }} + - name: Docker Push + if: >- + success() && + github.event_name == 'push' && + github.repository == 'apache/arrow' && + github.ref_name == 'main' + env: + ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} + ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} + continue-on-error: true + run: archery docker push ${{ matrix.image }} + + report-extra-cpp: + if: github.event_name == 'schedule' && always() + needs: + - docker + uses: ./.github/workflows/report_ci.yml + secrets: inherit diff --git a/dev/tasks/docker-tests/github.cuda.yml b/dev/tasks/docker-tests/github.cuda.yml deleted file mode 100644 index e65ac457b2e..00000000000 --- a/dev/tasks/docker-tests/github.cuda.yml +++ /dev/null @@ -1,52 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -{% import 'macros.jinja' as macros with context %} - -{{ macros.github_header() }} - -jobs: - test: - name: | - Docker Test {{ flags|default("") }} {{ image }} {{ command|default("") }} - runs-on: ['self-hosted', 'cuda'] -{{ macros.github_set_env(env) }} - timeout-minutes: {{ timeout|default(60) }} - steps: - {{ macros.github_checkout_arrow(fetch_depth=fetch_depth|default(1))|indent }} - # python 3.10 is installed on the runner, no need to install - - name: Install pip - run: sudo apt update && sudo apt install python3-pip -y - - name: Install archery - run: python3 -m pip install -e arrow/dev/archery[docker] - - name: Execute Docker Build - shell: bash - env: - {{ macros.github_set_sccache_envvars()|indent(8) }} - run: | - source arrow/ci/scripts/util_enable_core_dumps.sh - archery docker run \ - -e SETUPTOOLS_SCM_PRETEND_VERSION="{{ arrow.no_rc_version }}" \ - {{ flags|default("") }} \ - {{ image }} \ - {{ command|default("") }} - {% if arrow.is_default_branch() %} - {{ macros.github_login_dockerhub()|indent }} - - name: Push Docker Image - shell: bash - run: archery docker push {{ image }} - {% endif %} diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 266073daff6..2667aa1fb5e 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -41,9 +41,6 @@ groups: {############################# Testing tasks #################################} - cuda: - - test-cuda-* - test: - test-* @@ -762,35 +759,12 @@ tasks: artifacts: - docs.tar.gz - ############################## CUDA tests ################################# - -{% for ubuntu, cuda in [("22.04", "11.7.1"), ("24.04", "13.0.2")] %} - test-cuda-cpp-ubuntu-{{ ubuntu }}-cuda-{{ cuda }}: - ci: github - template: docker-tests/github.cuda.yml - params: - env: - CUDA: {{ cuda }} - UBUNTU: {{ ubuntu }} - image: ubuntu-cuda-cpp - - test-cuda-python-ubuntu-{{ ubuntu }}-cuda-{{ cuda }}: - ci: github - template: docker-tests/github.cuda.yml - params: - env: - CUDA: {{ cuda }} - UBUNTU: {{ ubuntu }} - image: ubuntu-cuda-python -{% endfor %} - ############################## Fuzz tests ################################# test-build-cpp-fuzz: ci: github template: fuzz-tests/github.oss-fuzz.yml - ############################## vcpkg tests ################################## test-build-vcpkg-win: From 86d28e9d55f8d0f11634b4a2a19233fa843d9261 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 13 Jan 2026 13:01:18 +0100 Subject: [PATCH 3/4] GH-48838: [Release] Use gh cli to download sources for Linux packages and publish draft release before verification (#48839) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change With the change we did for immutable releases we required draft releases to be able to keep uploading artifacts during the release process. This means that the interim URL to download assets isn't the expected one on some of our scripts. ### What changes are included in this PR? Update the `download_rc_archive` task so we use the GitHub cli tool instead of manually building the download URL for the source tar.gz from the release. Update order of release scripts to publish the release before running verification tasks so the URL is the final one. ### Are these changes tested? I have manually tested both the `gh release download` script and that the final URL will be the expected one once we move from draft to published release. I've tested creating a new release on my own fork here: https://github.com/raulcd/arrow/releases/tag/test-release-rc2 ### Are there any user-facing changes? No * GitHub Issue: #48838 Authored-by: Raúl Cumplido Signed-off-by: Raúl Cumplido --- .pre-commit-config.yaml | 2 +- ...ublish-gh-release.sh => 07-publish-gh-release.sh} | 0 .../{07-binary-verify.sh => 08-binary-verify.sh} | 0 dev/tasks/linux-packages/apache-arrow/Rakefile | 12 ++++++++---- docs/source/developers/release.rst | 12 ++++++++---- 5 files changed, 17 insertions(+), 9 deletions(-) rename dev/release/{08-publish-gh-release.sh => 07-publish-gh-release.sh} (100%) rename dev/release/{07-binary-verify.sh => 08-binary-verify.sh} (100%) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 186277edf40..da84abed0d9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -353,7 +353,7 @@ repos: ?^cpp/examples/minimal_build/run\.sh$| ?^cpp/examples/tutorial_examples/run\.sh$| ?^dev/release/05-binary-upload\.sh$| - ?^dev/release/07-binary-verify\.sh$| + ?^dev/release/08-binary-verify\.sh$| ?^dev/release/binary-recover\.sh$| ?^dev/release/post-03-binary\.sh$| ?^dev/release/post-08-docs\.sh$| diff --git a/dev/release/08-publish-gh-release.sh b/dev/release/07-publish-gh-release.sh similarity index 100% rename from dev/release/08-publish-gh-release.sh rename to dev/release/07-publish-gh-release.sh diff --git a/dev/release/07-binary-verify.sh b/dev/release/08-binary-verify.sh similarity index 100% rename from dev/release/07-binary-verify.sh rename to dev/release/08-binary-verify.sh diff --git a/dev/tasks/linux-packages/apache-arrow/Rakefile b/dev/tasks/linux-packages/apache-arrow/Rakefile index 7644d2d23fb..cdb77108452 100644 --- a/dev/tasks/linux-packages/apache-arrow/Rakefile +++ b/dev/tasks/linux-packages/apache-arrow/Rakefile @@ -59,11 +59,15 @@ class ApacheArrowPackageTask < PackageTask end def download_rc_archive - base_url = "https://github.com/#{github_repository}" - base_url += "/releases/download/apache-arrow-#{@version}" archive_name_no_rc = @archive_name.gsub(/-rc\d+(\.tar\.gz)\z/, "\\1") - url = "#{base_url}/#{archive_name_no_rc}" - download(url, @archive_name) + sh("gh", + "release", + "download", + "apache-arrow-#{@version}", + "--clobber", + "--repo", github_repository, + "--pattern", archive_name_no_rc) + mv(archive_name_no_rc, @archive_name) end def download_released_archive diff --git a/docs/source/developers/release.rst b/docs/source/developers/release.rst index c5efc5f30fc..0ec81c1e6c8 100644 --- a/docs/source/developers/release.rst +++ b/docs/source/developers/release.rst @@ -246,7 +246,8 @@ Build source and binaries and submit them archery crossbow status # Download the produced binaries - # This will download packages to a directory called packages/release--rc + # This will download packages generated from the archery tasks + # to a directory called packages/release--rc dev/release/04-binary-download.sh # Sign and upload the binaries @@ -263,11 +264,14 @@ Build source and binaries and submit them # NOTE: You need to have GitHub CLI installed to run this script. dev/release/06-matlab-upload.sh + # Move the Release Candidate GitHub Release from draft to published state + # This will update the artifacts download URL which will be available for the + # verification step. + dev/release/07-publish-gh-release.sh + # Start verifications for binaries and wheels - dev/release/07-binary-verify.sh + dev/release/08-binary-verify.sh - # Move the Release Candidate GitHub Release from draft to published state - dev/release/08-publish-gh-release.sh Verify the Release ------------------ From e33512d1d82c28e753004d0d7a76c1dca542b1cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 13 Jan 2026 14:03:11 +0100 Subject: [PATCH 4/4] GH-48841: [Release][Package] Add GH_TOKEN to rake build step on Linux Packaging jobs (#48842) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change With: - https://github.com/apache/arrow/pull/48839 We use `gh release download`. This requires the GH_TOKEN available. ### What changes are included in this PR? Add env with `GH_TOKEN`. I've validate the Rake's `sh` should inherit the environment variables that are defined on your shell. ### Are these changes tested? No ### Are there any user-facing changes? No * GitHub Issue: #48841 Authored-by: Raúl Cumplido Signed-off-by: Raúl Cumplido --- .github/workflows/package_linux.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/package_linux.yml b/.github/workflows/package_linux.yml index 3e4b7592153..c59784d7f58 100644 --- a/.github/workflows/package_linux.yml +++ b/.github/workflows/package_linux.yml @@ -230,6 +230,8 @@ jobs: ${GITHUB_REF_NAME} \ release_candidate.yml - name: Build + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | pushd dev/tasks/linux-packages rake docker:pull || :