From 7506019f734c79c6b49a6cbda251fcdad3ccec24 Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Thu, 15 Jan 2026 15:54:53 +0100 Subject: [PATCH 1/7] add Spark 4.1.1 --- spark-k8s/Dockerfile | 11 ++++-- spark-k8s/boil-config.toml | 18 +++++++++ .../4.1.1/0001-Update-CycloneDX-plugin.patch | 38 +++++++++++++++++++ .../stackable/patches/4.1.1/patchable.toml | 1 + 4 files changed, 65 insertions(+), 3 deletions(-) create mode 100644 spark-k8s/stackable/patches/4.1.1/0001-Update-CycloneDX-plugin.patch create mode 100644 spark-k8s/stackable/patches/4.1.1/patchable.toml diff --git a/spark-k8s/Dockerfile b/spark-k8s/Dockerfile index 562a435eb..2afbd7a4d 100644 --- a/spark-k8s/Dockerfile +++ b/spark-k8s/Dockerfile @@ -272,11 +272,16 @@ WORKDIR /stackable/spark-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}/dist/ext RUN < +Date: Thu, 15 Jan 2026 14:27:23 +0100 +Subject: Update CycloneDX plugin + +--- + dev/make-distribution.sh | 1 - + pom.xml | 5 +++++ + 2 files changed, 5 insertions(+), 1 deletion(-) + +diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh +index 16598bda873..327fa763144 100755 +--- a/dev/make-distribution.sh ++++ b/dev/make-distribution.sh +@@ -185,7 +185,6 @@ else + -Dmaven.javadoc.skip=true \ + -Dmaven.scaladoc.skip=true \ + -Dmaven.source.skip \ +- -Dcyclonedx.skip=true \ + $@) + fi + +diff --git a/pom.xml b/pom.xml +index dc757d78812..05f1af034f3 100644 +--- a/pom.xml ++++ b/pom.xml +@@ -3333,6 +3333,11 @@ + org.cyclonedx + cyclonedx-maven-plugin + 2.9.1 ++ ++ application ++ 1.5 ++ false ++ + + + package diff --git a/spark-k8s/stackable/patches/4.1.1/patchable.toml b/spark-k8s/stackable/patches/4.1.1/patchable.toml new file mode 100644 index 000000000..15b658491 --- /dev/null +++ b/spark-k8s/stackable/patches/4.1.1/patchable.toml @@ -0,0 +1 @@ +base = "c0690c763bafabd08e7079d1137fa0a769a05bae" From 82f8be3038bfe5ed5e1b5fa51ab746aa8670a82e Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Thu, 15 Jan 2026 15:56:46 +0100 Subject: [PATCH 2/7] delete 3.5.6 and 4.0.1 --- spark-k8s/boil-config.toml | 36 -------- .../3.5.6/0001-Update-CycloneDX-plugin.patch | 40 --------- ...1311-BUILD-Promote-bcprov-jdk18on-to.patch | 83 ------------------- .../stackable/patches/3.5.6/patchable.toml | 1 - .../4.0.1/0001-Update-CycloneDX-plugin.patch | 38 --------- .../stackable/patches/4.0.1/patchable.toml | 2 - 6 files changed, 200 deletions(-) delete mode 100644 spark-k8s/stackable/patches/3.5.6/0001-Update-CycloneDX-plugin.patch delete mode 100644 spark-k8s/stackable/patches/3.5.6/0002-Backport-SPARK-51311-BUILD-Promote-bcprov-jdk18on-to.patch delete mode 100644 spark-k8s/stackable/patches/3.5.6/patchable.toml delete mode 100644 spark-k8s/stackable/patches/4.0.1/0001-Update-CycloneDX-plugin.patch delete mode 100644 spark-k8s/stackable/patches/4.0.1/patchable.toml diff --git a/spark-k8s/boil-config.toml b/spark-k8s/boil-config.toml index 8ae2cdece..6cff0d34a 100644 --- a/spark-k8s/boil-config.toml +++ b/spark-k8s/boil-config.toml @@ -1,21 +1,3 @@ -[versions."3.5.6".local-images] -"hadoop/hadoop" = "3.4.2" -java-base = "17" -java-devel = "17" -hbase = "2.6.3" - -[versions."3.5.6".build-arguments] -python-version = "3.11" -aws-java-sdk-bundle-version = "2.29.52" # needs to match the version shipped by Hadoop -azure-storage-version = "7.0.1" # needs to match the version shipped by Hadoop -azure-keyvault-core-version = "1.0.0" # needs to match the version shipped by Hadoop -jackson-dataformat-xml-version = "2.15.2" # needs to match the version shipped by Spark https://mvnrepository.com/artifact/org.apache.spark/spark-core_2.12/3.5.6 -stax2-api-version = "4.2.1" # needs to match the jackson version https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2 -woodstox-core-version = "6.5.1" # needs to match the jackson version https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2 -jmx-exporter-version = "1.3.0" -tini-version = "0.19.0" -hbase-connector-version = "1.0.1" - [versions."3.5.7".local-images] "hadoop/hadoop" = "3.4.2" java-base = "17" @@ -34,24 +16,6 @@ jmx-exporter-version = "1.3.0" tini-version = "0.19.0" hbase-connector-version = "1.0.1" -[versions."4.0.1".local-images] -"hadoop/hadoop" = "3.4.2" -java-base = "17" -java-devel = "17" -hbase = "2.6.3" - -[versions."4.0.1".build-arguments] -python-version = "3.11" -aws-java-sdk-bundle-version = "2.29.52" # needs to match the version shipped by Hadoop -azure-storage-version = "7.0.1" # needs to match the version shipped by Hadoop -azure-keyvault-core-version = "1.0.0" # needs to match the version shipped by Hadoop -jackson-dataformat-xml-version = "2.18.2" # needs to match the version shipped by Spark https://mvnrepository.com/artifact/org.apache.spark/spark-core_2.13/4.0.1 -stax2-api-version = "4.2.2" # needs to match the jackson version https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.18.2 -woodstox-core-version = "7.0.0" # needs to match the jackson version https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.18.2 -jmx-exporter-version = "1.3.0" -tini-version = "0.19.0" -hbase-connector-version = "1.0.1" # This is not supported in Spark 4 yet. - [versions."4.1.1".local-images] "hadoop/hadoop" = "3.4.2" java-base = "21" diff --git a/spark-k8s/stackable/patches/3.5.6/0001-Update-CycloneDX-plugin.patch b/spark-k8s/stackable/patches/3.5.6/0001-Update-CycloneDX-plugin.patch deleted file mode 100644 index 9ad9eea3c..000000000 --- a/spark-k8s/stackable/patches/3.5.6/0001-Update-CycloneDX-plugin.patch +++ /dev/null @@ -1,40 +0,0 @@ -From 94ccf32b4d0eb7c3191b4e5a646605e7386c39ff Mon Sep 17 00:00:00 2001 -From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> -Date: Tue, 11 Mar 2025 17:29:39 +0200 -Subject: Update CycloneDX plugin - ---- - dev/make-distribution.sh | 1 - - pom.xml | 7 ++++++- - 2 files changed, 6 insertions(+), 2 deletions(-) - -diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh -index ef7c010e93..0f4c1c74e4 100755 ---- a/dev/make-distribution.sh -+++ b/dev/make-distribution.sh -@@ -171,7 +171,6 @@ BUILD_COMMAND=("$MVN" clean package \ - -Dmaven.javadoc.skip=true \ - -Dmaven.scaladoc.skip=true \ - -Dmaven.source.skip \ -- -Dcyclonedx.skip=true \ - $@) - - # Actually build the jar -diff --git a/pom.xml b/pom.xml -index 68e2c422a2..6216ebb08e 100644 ---- a/pom.xml -+++ b/pom.xml -@@ -3534,7 +3534,12 @@ - - org.cyclonedx - cyclonedx-maven-plugin -- 2.7.9 -+ 2.8.0 -+ -+ application -+ 1.5 -+ false -+ - - - package diff --git a/spark-k8s/stackable/patches/3.5.6/0002-Backport-SPARK-51311-BUILD-Promote-bcprov-jdk18on-to.patch b/spark-k8s/stackable/patches/3.5.6/0002-Backport-SPARK-51311-BUILD-Promote-bcprov-jdk18on-to.patch deleted file mode 100644 index 1a7029a6f..000000000 --- a/spark-k8s/stackable/patches/3.5.6/0002-Backport-SPARK-51311-BUILD-Promote-bcprov-jdk18on-to.patch +++ /dev/null @@ -1,83 +0,0 @@ -From 37d866706d952702effd640babf891fef349da7d Mon Sep 17 00:00:00 2001 -From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> -Date: Tue, 22 Jul 2025 17:34:03 +0200 -Subject: Backport [SPARK-51311][BUILD] Promote bcprov-jdk18on to compile scope - ---- - LICENSE-binary | 1 + - assembly/pom.xml | 14 ++++++++++++++ - dev/deps/spark-deps-hadoop-3-hive-2.3 | 1 + - licenses-binary/LICENSE-bouncycastle.txt | 13 +++++++++++++ - 4 files changed, 29 insertions(+) - create mode 100644 licenses-binary/LICENSE-bouncycastle.txt - -diff --git a/LICENSE-binary b/LICENSE-binary -index 05645977a0..9834cf333f 100644 ---- a/LICENSE-binary -+++ b/LICENSE-binary -@@ -480,6 +480,7 @@ org.typelevel:algebra_2.12:jar - org.typelevel:cats-kernel_2.12 - org.typelevel:machinist_2.12 - net.razorvine:pickle -+org.bouncycastle:bcprov-jdk18on - org.slf4j:jcl-over-slf4j - org.slf4j:jul-to-slf4j - org.slf4j:slf4j-api -diff --git a/assembly/pom.xml b/assembly/pom.xml -index dcc46b0b82..def40ad52e 100644 ---- a/assembly/pom.xml -+++ b/assembly/pom.xml -@@ -85,8 +85,22 @@ - guava - ${hadoop.deps.scope} - -+ -+ -+ -+ org.bouncycastle -+ bcprov-jdk18on -+ ${hadoop.deps.scope} -+ -+ - - -+ - - - -diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3 -index dbf0cb34c5..689f50612b 100644 ---- a/dev/deps/spark-deps-hadoop-3-hive-2.3 -+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3 -@@ -28,6 +28,7 @@ aws-java-sdk-bundle/1.12.262//aws-java-sdk-bundle-1.12.262.jar - azure-data-lake-store-sdk/2.3.9//azure-data-lake-store-sdk-2.3.9.jar - azure-keyvault-core/1.0.0//azure-keyvault-core-1.0.0.jar - azure-storage/7.0.1//azure-storage-7.0.1.jar -+bcprov-jdk18on/1.77//bcprov-jdk18on-1.77.jar - blas/3.0.3//blas-3.0.3.jar - bonecp/0.8.0.RELEASE//bonecp-0.8.0.RELEASE.jar - breeze-macros_2.12/2.1.0//breeze-macros_2.12-2.1.0.jar -diff --git a/licenses-binary/LICENSE-bouncycastle.txt b/licenses-binary/LICENSE-bouncycastle.txt -new file mode 100644 -index 0000000000..277dcd1ebb ---- /dev/null -+++ b/licenses-binary/LICENSE-bouncycastle.txt -@@ -0,0 +1,13 @@ -+Copyright (c) 2000-2024 The Legion of the Bouncy Castle Inc. (https://www.bouncycastle.org). -+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and -+associated documentation files (the "Software"), to deal in the Software without restriction, -+including without limitation the rights to use, copy, modify, merge, publish, distribute, -+sub license, and/or sell copies of the Software, and to permit persons to whom the Software is -+furnished to do so, subject to the following conditions: The above copyright notice and this -+permission notice shall be included in all copies or substantial portions of the Software. -+ -+**THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT -+NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, -+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT -+OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.** diff --git a/spark-k8s/stackable/patches/3.5.6/patchable.toml b/spark-k8s/stackable/patches/3.5.6/patchable.toml deleted file mode 100644 index 633d26c88..000000000 --- a/spark-k8s/stackable/patches/3.5.6/patchable.toml +++ /dev/null @@ -1 +0,0 @@ -base = "303c18c74664f161b9b969ac343784c088b47593" diff --git a/spark-k8s/stackable/patches/4.0.1/0001-Update-CycloneDX-plugin.patch b/spark-k8s/stackable/patches/4.0.1/0001-Update-CycloneDX-plugin.patch deleted file mode 100644 index 863f280c6..000000000 --- a/spark-k8s/stackable/patches/4.0.1/0001-Update-CycloneDX-plugin.patch +++ /dev/null @@ -1,38 +0,0 @@ -From b5de94e20aff25a394c6095c0649b4fcbaa941aa Mon Sep 17 00:00:00 2001 -From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> -Date: Fri, 4 Jul 2025 15:54:55 +0200 -Subject: Update CycloneDX plugin - ---- - dev/make-distribution.sh | 1 - - pom.xml | 5 +++++ - 2 files changed, 5 insertions(+), 1 deletion(-) - -diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh -index 16607e45ae6..44e345a245d 100755 ---- a/dev/make-distribution.sh -+++ b/dev/make-distribution.sh -@@ -176,7 +176,6 @@ BUILD_COMMAND=("$MVN" clean package \ - -Dmaven.javadoc.skip=true \ - -Dmaven.scaladoc.skip=true \ - -Dmaven.source.skip \ -- -Dcyclonedx.skip=true \ - $@) - - # Actually build the jar -diff --git a/pom.xml b/pom.xml -index 22922143fc3..59c3747c625 100644 ---- a/pom.xml -+++ b/pom.xml -@@ -3327,6 +3327,11 @@ - org.cyclonedx - cyclonedx-maven-plugin - 2.8.0 -+ -+ application -+ 1.5 -+ false -+ - - - package diff --git a/spark-k8s/stackable/patches/4.0.1/patchable.toml b/spark-k8s/stackable/patches/4.0.1/patchable.toml deleted file mode 100644 index bd074097c..000000000 --- a/spark-k8s/stackable/patches/4.0.1/patchable.toml +++ /dev/null @@ -1,2 +0,0 @@ -base = "29434ea766b0fc3c3bf6eaadb43a8f931133649e" -mirror = "https://github.com/stackabletech/spark.git" From 802933cbad77955e12e99036c643f5e99f686e3c Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Fri, 16 Jan 2026 10:59:47 +0100 Subject: [PATCH 3/7] split Dockerfiles per version --- spark-k8s/{Dockerfile => Dockerfile.3} | 50 +---- spark-k8s/Dockerfile.4 | 263 +++++++++++++++++++++++++ spark-k8s/boil-config.toml | 10 +- 3 files changed, 279 insertions(+), 44 deletions(-) rename spark-k8s/{Dockerfile => Dockerfile.3} (85%) create mode 100644 spark-k8s/Dockerfile.4 diff --git a/spark-k8s/Dockerfile b/spark-k8s/Dockerfile.3 similarity index 85% rename from spark-k8s/Dockerfile rename to spark-k8s/Dockerfile.3 index 2afbd7a4d..668242723 100644 --- a/spark-k8s/Dockerfile +++ b/spark-k8s/Dockerfile.3 @@ -62,16 +62,6 @@ COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/hbase-connectors/stackable/patche RUN <>> Build spark +RUN <]' '{print $3}') + + mkdir -p dist/connect + cd dist/connect + + case "${PRODUCT_VERSION}" in + 4*) + cp "/stackable/spark-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}/sql/connect/server/target/spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}.jar" . + cp "/stackable/spark-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}/sql/connect/common/target/spark-connect-common_${SCALA_BINARY_VERSION}-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}.jar" . + cp "/stackable/spark-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}/sql/connect/client/jvm/target/spark-connect-client-jvm_${SCALA_BINARY_VERSION}-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}.jar" . + ;; + *) + cp "/stackable/spark-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}/connector/connect/server/target/spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}.jar" . + cp "/stackable/spark-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}/connector/connect/common/target/spark-connect-common_${SCALA_BINARY_VERSION}-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}.jar" . + cp "/stackable/spark-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}/connector/connect/client/jvm/target/spark-connect-client-jvm_${SCALA_BINARY_VERSION}-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}.jar" . + ;; + esac + + # This link is needed by the operator and is kept for backwards compatibility. + # TODO: remove it at some time in the future. + ln -s "spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}.jar" "spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT_VERSION}.jar" + # Link to the spark-connect jar without the stackable suffix and scala version. + # This link supersedes the previous link. + ln -s "spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}.jar" "spark-connect-${PRODUCT_VERSION}.jar" +EOF + +# <<< Build spark + +WORKDIR /stackable/spark-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}/dist/jars + +# Copy modules required for s3a:// +COPY --from=hadoop-builder --chown=${STACKABLE_USER_UID}:0 \ + /stackable/hadoop/share/hadoop/tools/lib/hadoop-aws-${HADOOP_VERSION}-stackable${RELEASE_VERSION}.jar \ + /stackable/hadoop/share/hadoop/tools/lib/bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar \ + ./ + +# Copy modules required for abfs:// +COPY --from=hadoop-builder --chown=${STACKABLE_USER_UID}:0 \ + /stackable/hadoop/share/hadoop/tools/lib/hadoop-azure-${HADOOP_VERSION}-stackable${RELEASE_VERSION}.jar \ + /stackable/hadoop/share/hadoop/tools/lib/azure-storage-${AZURE_STORAGE_VERSION}.jar \ + /stackable/hadoop/share/hadoop/tools/lib/azure-keyvault-core-${AZURE_KEYVAULT_CORE_VERSION}.jar \ + ./ + +COPY spark-k8s/stackable/jmx /stackable/jmx + +WORKDIR /stackable/spark-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}/dist/extra-jars + +RUN < Date: Fri, 16 Jan 2026 11:03:08 +0100 Subject: [PATCH 4/7] update changelog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ad1bd5074..d79133bdc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ All notable changes to this project will be documented in this file. - opensearch-dashboards: Add `3.4.0` ([#1392]). - testing-tools: build testing tools subimages in workflow ([#1366]). - kafka: Add `4.1.1` ([#1395]). +- spark: Add `4.1.1`, use one Dockerfile per version and remove all HBase deps from the Spark4 image ([#1402]). ### Changed @@ -33,6 +34,7 @@ All notable changes to this project will be documented in this file. - superset: Remove 4.0.2 and 4.1.2 ([#1394]). - kafka: Remove `3.7.2` and `4.1.0` ([#1395]). - opa: remove 1.4.2 ([#1396]). +- spark: Remove `3.5.6` and `4.0.1` ([#1402]). ### Fixed @@ -60,6 +62,7 @@ All notable changes to this project will be documented in this file. [#1394]: https://github.com/stackabletech/docker-images/pull/1394 [#1395]: https://github.com/stackabletech/docker-images/pull/1395 [#1396]: https://github.com/stackabletech/docker-images/pull/1396 +[#1402]: https://github.com/stackabletech/docker-images/pull/1402 ## [25.11.0] - 2025-11-07 From 73930cb07c404a6316d8ebac4c09a6c62f16624e Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Fri, 16 Jan 2026 12:14:01 +0100 Subject: [PATCH 5/7] cleanup and successful build --- CHANGELOG.md | 5 +++- spark-k8s/Dockerfile.3 | 14 +++++----- spark-k8s/Dockerfile.4 | 58 ++++++++++++++---------------------------- 3 files changed, 30 insertions(+), 47 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d79133bdc..9c0021eca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,7 +15,7 @@ All notable changes to this project will be documented in this file. - opensearch-dashboards: Add `3.4.0` ([#1392]). - testing-tools: build testing tools subimages in workflow ([#1366]). - kafka: Add `4.1.1` ([#1395]). -- spark: Add `4.1.1`, use one Dockerfile per version and remove all HBase deps from the Spark4 image ([#1402]). +- spark: Add `4.1.1` ([#1402]). ### Changed @@ -27,6 +27,9 @@ All notable changes to this project will be documented in this file. - trino: Backport Kafka offset handling to 477 ([#1373]). - ubi: Bumped ubi9 and ubi10 hashes ([#1386]). - vector: Bumped from 0.49.0 to 0.52.0 ([#1387]). +- spark: Use one Dockerfile per major product version ([#1402]). + Remove all HBase dependencies from the Spark 4 image. + Pull logging dependencies with `mvn` instead of `curl` to remove manual maintenance in Nexus `packages`. ### Removed diff --git a/spark-k8s/Dockerfile.3 b/spark-k8s/Dockerfile.3 index 668242723..351dab6e9 100644 --- a/spark-k8s/Dockerfile.3 +++ b/spark-k8s/Dockerfile.3 @@ -239,16 +239,16 @@ WORKDIR /stackable/spark-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}/dist/ext RUN < Date: Fri, 16 Jan 2026 16:06:37 +0100 Subject: [PATCH 6/7] update spark-connect-client versions --- CHANGELOG.md | 2 ++ spark-connect-client/boil-config.toml | 17 +++++------------ 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9c0021eca..69e08958e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ All notable changes to this project will be documented in this file. - testing-tools: build testing tools subimages in workflow ([#1366]). - kafka: Add `4.1.1` ([#1395]). - spark: Add `4.1.1` ([#1402]). +- spark-connect-client: Add `4.1.1` ([#1402]). ### Changed @@ -38,6 +39,7 @@ All notable changes to this project will be documented in this file. - kafka: Remove `3.7.2` and `4.1.0` ([#1395]). - opa: remove 1.4.2 ([#1396]). - spark: Remove `3.5.6` and `4.0.1` ([#1402]). +- spark-connect-client: Remove `3.5.6` and `4.0.1` ([#1402]). ### Fixed diff --git a/spark-connect-client/boil-config.toml b/spark-connect-client/boil-config.toml index c0cfddf69..88d33dfe1 100644 --- a/spark-connect-client/boil-config.toml +++ b/spark-connect-client/boil-config.toml @@ -1,10 +1,3 @@ -[versions."3.5.6".local-images] -spark-k8s = "3.5.6" -java-base = "17" - -[versions."3.5.6".build-arguments] -python-version = "3.11" - [versions."3.5.7".local-images] spark-k8s = "3.5.7" java-base = "17" @@ -12,9 +5,9 @@ java-base = "17" [versions."3.5.7".build-arguments] python-version = "3.11" -[versions."4.0.1".local-images] -spark-k8s = "4.0.1" -java-base = "17" +[versions."4.1.1".local-images] +spark-k8s = "4.1.1" +java-base = "21" -[versions."4.0.1".build-arguments] -python-version = "3.11" +[versions."4.1.1".build-arguments] +python-version = "3.12" From a1446e41faba0ccf0d9ab5eda87d4d4547f39e69 Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Fri, 16 Jan 2026 20:57:10 +0100 Subject: [PATCH 7/7] new spark-k8s/hbase-connectors image --- spark-k8s/Dockerfile.3 | 91 +------------- spark-k8s/boil-config.toml | 4 +- spark-k8s/hbase-connectors/Dockerfile | 119 ++++++++++++++++++ spark-k8s/hbase-connectors/boil-config.toml | 10 ++ .../0001-Fix-protobuf-on-aarch64.patch | 0 .../patches/{1.0.1 => 1.0.1_3}/patchable.toml | 0 6 files changed, 132 insertions(+), 92 deletions(-) create mode 100644 spark-k8s/hbase-connectors/Dockerfile create mode 100644 spark-k8s/hbase-connectors/boil-config.toml rename spark-k8s/hbase-connectors/stackable/patches/{1.0.1 => 1.0.1_3}/0001-Fix-protobuf-on-aarch64.patch (100%) rename spark-k8s/hbase-connectors/stackable/patches/{1.0.1 => 1.0.1_3}/patchable.toml (100%) diff --git a/spark-k8s/Dockerfile.3 b/spark-k8s/Dockerfile.3 index 351dab6e9..bf9be8cb4 100644 --- a/spark-k8s/Dockerfile.3 +++ b/spark-k8s/Dockerfile.3 @@ -33,96 +33,7 @@ EOF # hbase-connectors-builder: Build the Spark HBase connector and copy # required JARs into /stackable/spark/jars -FROM local-image/java-devel AS hbase-connectors-builder - -ARG PRODUCT_VERSION -ARG RELEASE_VERSION -ARG HADOOP_HADOOP_VERSION -# Reassign the arg to `HADOOP_VERSION` for better readability. -ENV HADOOP_VERSION=${HADOOP_HADOOP_VERSION} -ARG HBASE_VERSION -ARG HBASE_CONNECTOR_VERSION -ARG STACKABLE_USER_UID - -WORKDIR /stackable - -# Copy the pom.xml file from the patched Spark source code to read the -# versions used by Spark. The pom.xml defines child modules which are -# not required and not copied, therefore mvn must be called with the -# parameter --non-recursive. -COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-source-builder \ - /stackable/src/spark-k8s/patchable-work/worktree/${PRODUCT_VERSION}/pom.xml \ - spark/ - -# Patch the hbase-connectors source code -WORKDIR /stackable - -COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/hbase-connectors/stackable/patches/patchable.toml /stackable/src/spark-k8s/hbase-connectors/stackable/patches/patchable.toml -COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/hbase-connectors/stackable/patches/${HBASE_CONNECTOR_VERSION} /stackable/src/spark-k8s/hbase-connectors/stackable/patches/${HBASE_CONNECTOR_VERSION} - -RUN <]' '{print $3}') - -# Get the Scala binary version used by Spark -SCALA_BINARY_VERSION=$(grep "scala.binary.version" /stackable/spark/pom.xml | head -n1 | awk -F '[<>]' '{print $3}') - -# Build the Spark HBase connector -# Skip the tests because the MiniHBaseCluster does not get ready for -# whatever reason: -# Caused by: java.lang.RuntimeException: Master not active after 30000ms -# at org.apache.hadoop.hbase.util.JVMClusterUtil.waitForEvent(JVMClusterUtil.java:221) -# at org.apache.hadoop.hbase.util.JVMClusterUtil.startup(JVMClusterUtil.java:177) -# at org.apache.hadoop.hbase.LocalHBaseCluster.startup(LocalHBaseCluster.java:407) -# at org.apache.hadoop.hbase.MiniHBaseCluster.init(MiniHBaseCluster.java:250) -mvn \ - --batch-mode \ - --no-transfer-progress \ - --define spark.version="${PRODUCT_VERSION}" \ - --define scala.version="${SCALA_VERSION}" \ - --define scala.binary.version="${SCALA_BINARY_VERSION}" \ - --define hadoop-three.version="${HADOOP_VERSION}" \ - --define hbase.version="${HBASE_VERSION}" \ - --define skipTests \ - --define maven.test.skip=true \ - clean package - -mkdir -p /stackable/spark/jars -ln -s "$(pwd)/hbase-spark/target/hbase-spark-${HBASE_CONNECTOR_VERSION}-stackable${RELEASE_VERSION}.jar" /stackable/spark/jars/hbase-spark-${HBASE_CONNECTOR_VERSION}-stackable${RELEASE_VERSION}.jar - -cd /stackable/spark/jars - -# Download log4j-slf4j-impl-x.x.x.jar containing the StaticLoggerBinder -# which is required by the connector. -# Spark contains only log4j-slf4j2-impl-x.x.x.jar but not -# log4j-slf4j-impl-x.x.x.jar. It is okay to have both JARs in the -# classpath as long as they have the same version. -mvn --non-recursive --file /stackable/spark/pom.xml \ - dependency:copy \ - -Dartifact=org.apache.logging.log4j:log4j-slf4j-impl:'${log4j.version}' \ - -DoutputDirectory=./jars -chmod g=u /stackable/hbase-connector-${HBASE_CONNECTOR_VERSION}-stackable${RELEASE_VERSION}-src.tar.gz . -EOF - +FROM local-image/spark-k8s/hbase-connectors AS hbase-connectors-builder # spark-builder: Build Spark into /stackable/spark-${PRODUCT_VERSION}/dist, # download additional JARs and perform checks diff --git a/spark-k8s/boil-config.toml b/spark-k8s/boil-config.toml index d9a3336b5..9545f617e 100644 --- a/spark-k8s/boil-config.toml +++ b/spark-k8s/boil-config.toml @@ -6,6 +6,7 @@ containerfile = "Dockerfile.3" java-base = "17" java-devel = "17" hbase = "2.6.3" +"spark-k8s/hbase-connectors" = "1.0.1_3" [versions."3.5.7".build-arguments] python-version = "3.11" @@ -17,7 +18,7 @@ stax2-api-version = "4.2.1" # needs to match the jackson version h woodstox-core-version = "6.5.1" # needs to match the jackson version https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2 jmx-exporter-version = "1.3.0" tini-version = "0.19.0" -hbase-connector-version = "1.0.1" +hbase-connector-version = "1.0.1_3" [versions."4.1.1"] containerfile = "Dockerfile.4" @@ -38,4 +39,3 @@ stax2-api-version = "4.2.2" # needs to match the jackson version h woodstox-core-version = "7.1.1" # needs to match the jackson version https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.20.0/dependencies jmx-exporter-version = "1.3.0" tini-version = "0.19.0" -# hbase-connector-version = "1.0.1" # Not yet available for Spark 4.x https://github.com/apache/hbase-connectors/pull/130 diff --git a/spark-k8s/hbase-connectors/Dockerfile b/spark-k8s/hbase-connectors/Dockerfile new file mode 100644 index 000000000..f32ae2bcf --- /dev/null +++ b/spark-k8s/hbase-connectors/Dockerfile @@ -0,0 +1,119 @@ +# syntax=docker/dockerfile:1.16.0@sha256:e2dd261f92e4b763d789984f6eab84be66ab4f5f08052316d8eb8f173593acf7 +# check=error=true + +# The purpose of this stage is to gather jars and environment variables needed in the final stage. +# These are collected in the /stackable/spark directory. +FROM local-image/java-devel AS spark-source-builder + +ARG RELEASE_VERSION +ARG SPARK_VERSION +ARG STACKABLE_USER_UID + +WORKDIR /stackable + +COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/stackable/patches/patchable.toml /stackable/src/spark-k8s/stackable/patches/patchable.toml +COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/stackable/patches/${SPARK_VERSION} /stackable/src/spark-k8s/stackable/patches/${SPARK_VERSION} + +RUN <]' '{print $3}') + +# Get the Scala binary version used by Spark +SCALA_BINARY_VERSION=$(grep "scala.binary.version" pom.xml | head -n1 | awk -F '[<>]' '{print $3}') + +echo "SCALA_VERSION=${SCALA_VERSION}" > /stackable/spark/env +echo "SCALA_BINARY_VERSION=${SCALA_BINARY_VERSION}" >> /stackable/spark/env +echo "SPARK_VERSION=${SPARK_VERSION}" >> /stackable/spark/env +EOF + +# hbase-connectors-builder: Build the Spark HBase connector and copy +# required JARs into /stackable/spark/jars +FROM local-image/java-devel AS final + +ARG PRODUCT_VERSION +ARG RELEASE_VERSION +ARG HADOOP_VERSION +ARG HBASE_VERSION +ARG STACKABLE_USER_UID + +# Patch the hbase-connectors source code +WORKDIR /stackable + +COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/hbase-connectors/stackable/patches/patchable.toml /stackable/src/spark-k8s/hbase-connectors/stackable/patches/patchable.toml +COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/hbase-connectors/stackable/patches/${PRODUCT_VERSION} /stackable/src/spark-k8s/hbase-connectors/stackable/patches/${PRODUCT_VERSION} + +# Copy jars and env from spark-source-builder +COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-source-builder \ + /stackable/spark/jars \ + spark/jars +COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-source-builder \ + /stackable/spark/env \ + spark/env + +RUN <