diff --git a/CHANGELOG.md b/CHANGELOG.md index ad1bd5074..69e08958e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,8 @@ All notable changes to this project will be documented in this file. - opensearch-dashboards: Add `3.4.0` ([#1392]). - testing-tools: build testing tools subimages in workflow ([#1366]). - kafka: Add `4.1.1` ([#1395]). +- spark: Add `4.1.1` ([#1402]). +- spark-connect-client: Add `4.1.1` ([#1402]). ### Changed @@ -26,6 +28,9 @@ All notable changes to this project will be documented in this file. - trino: Backport Kafka offset handling to 477 ([#1373]). - ubi: Bumped ubi9 and ubi10 hashes ([#1386]). - vector: Bumped from 0.49.0 to 0.52.0 ([#1387]). +- spark: Use one Dockerfile per major product version ([#1402]). + Remove all HBase dependencies from the Spark 4 image. + Pull logging dependencies with `mvn` instead of `curl` to remove manual maintenance in Nexus `packages`. ### Removed @@ -33,6 +38,8 @@ All notable changes to this project will be documented in this file. - superset: Remove 4.0.2 and 4.1.2 ([#1394]). - kafka: Remove `3.7.2` and `4.1.0` ([#1395]). - opa: remove 1.4.2 ([#1396]). +- spark: Remove `3.5.6` and `4.0.1` ([#1402]). +- spark-connect-client: Remove `3.5.6` and `4.0.1` ([#1402]). ### Fixed @@ -60,6 +67,7 @@ All notable changes to this project will be documented in this file. [#1394]: https://github.com/stackabletech/docker-images/pull/1394 [#1395]: https://github.com/stackabletech/docker-images/pull/1395 [#1396]: https://github.com/stackabletech/docker-images/pull/1396 +[#1402]: https://github.com/stackabletech/docker-images/pull/1402 ## [25.11.0] - 2025-11-07 diff --git a/spark-connect-client/boil-config.toml b/spark-connect-client/boil-config.toml index c0cfddf69..88d33dfe1 100644 --- a/spark-connect-client/boil-config.toml +++ b/spark-connect-client/boil-config.toml @@ -1,10 +1,3 @@ -[versions."3.5.6".local-images] -spark-k8s = "3.5.6" -java-base = "17" - -[versions."3.5.6".build-arguments] -python-version = "3.11" - [versions."3.5.7".local-images] spark-k8s = "3.5.7" java-base = "17" @@ -12,9 +5,9 @@ java-base = "17" [versions."3.5.7".build-arguments] python-version = "3.11" -[versions."4.0.1".local-images] -spark-k8s = "4.0.1" -java-base = "17" +[versions."4.1.1".local-images] +spark-k8s = "4.1.1" +java-base = "21" -[versions."4.0.1".build-arguments] -python-version = "3.11" +[versions."4.1.1".build-arguments] +python-version = "3.12" diff --git a/spark-k8s/Dockerfile b/spark-k8s/Dockerfile.3 similarity index 57% rename from spark-k8s/Dockerfile rename to spark-k8s/Dockerfile.3 index 562a435eb..bf9be8cb4 100644 --- a/spark-k8s/Dockerfile +++ b/spark-k8s/Dockerfile.3 @@ -33,106 +33,7 @@ EOF # hbase-connectors-builder: Build the Spark HBase connector and copy # required JARs into /stackable/spark/jars -FROM local-image/java-devel AS hbase-connectors-builder - -ARG PRODUCT_VERSION -ARG RELEASE_VERSION -ARG HADOOP_HADOOP_VERSION -# Reassign the arg to `HADOOP_VERSION` for better readability. -ENV HADOOP_VERSION=${HADOOP_HADOOP_VERSION} -ARG HBASE_VERSION -ARG HBASE_CONNECTOR_VERSION -ARG STACKABLE_USER_UID - -WORKDIR /stackable - -# Copy the pom.xml file from the patched Spark source code to read the -# versions used by Spark. The pom.xml defines child modules which are -# not required and not copied, therefore mvn must be called with the -# parameter --non-recursive. -COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-source-builder \ - /stackable/src/spark-k8s/patchable-work/worktree/${PRODUCT_VERSION}/pom.xml \ - spark/ - -# Patch the hbase-connectors source code -WORKDIR /stackable - -COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/hbase-connectors/stackable/patches/patchable.toml /stackable/src/spark-k8s/hbase-connectors/stackable/patches/patchable.toml -COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/hbase-connectors/stackable/patches/${HBASE_CONNECTOR_VERSION} /stackable/src/spark-k8s/hbase-connectors/stackable/patches/${HBASE_CONNECTOR_VERSION} - -RUN <]' '{print $3}') - -# Get the Scala binary version used by Spark -SCALA_BINARY_VERSION=$(grep "scala.binary.version" /stackable/spark/pom.xml | head -n1 | awk -F '[<>]' '{print $3}') - -# Build the Spark HBase connector -# Skip the tests because the MiniHBaseCluster does not get ready for -# whatever reason: -# Caused by: java.lang.RuntimeException: Master not active after 30000ms -# at org.apache.hadoop.hbase.util.JVMClusterUtil.waitForEvent(JVMClusterUtil.java:221) -# at org.apache.hadoop.hbase.util.JVMClusterUtil.startup(JVMClusterUtil.java:177) -# at org.apache.hadoop.hbase.LocalHBaseCluster.startup(LocalHBaseCluster.java:407) -# at org.apache.hadoop.hbase.MiniHBaseCluster.init(MiniHBaseCluster.java:250) -mvn \ - --batch-mode \ - --no-transfer-progress \ - --define spark.version="${PRODUCT_VERSION}" \ - --define scala.version="${SCALA_VERSION}" \ - --define scala.binary.version="${SCALA_BINARY_VERSION}" \ - --define hadoop-three.version="${HADOOP_VERSION}" \ - --define hbase.version="${HBASE_VERSION}" \ - --define skipTests \ - --define maven.test.skip=true \ - clean package - -mkdir -p /stackable/spark/jars -ln -s "$(pwd)/hbase-spark/target/hbase-spark-${HBASE_CONNECTOR_VERSION}-stackable${RELEASE_VERSION}.jar" /stackable/spark/jars/hbase-spark-${HBASE_CONNECTOR_VERSION}-stackable${RELEASE_VERSION}.jar - -cd /stackable/spark/jars - -# Download log4j-slf4j-impl-x.x.x.jar containing the StaticLoggerBinder -# which is required by the connector. -# Spark contains only log4j-slf4j2-impl-x.x.x.jar but not -# log4j-slf4j-impl-x.x.x.jar. It is okay to have both JARs in the -# classpath as long as they have the same version. -mvn --non-recursive --file /stackable/spark/pom.xml \ - dependency:copy \ - -Dartifact=org.apache.logging.log4j:log4j-slf4j-impl:'${log4j.version}' \ - -DoutputDirectory=./jars -chmod g=u /stackable/hbase-connector-${HBASE_CONNECTOR_VERSION}-stackable${RELEASE_VERSION}-src.tar.gz . -EOF - +FROM local-image/spark-k8s/hbase-connectors AS hbase-connectors-builder # spark-builder: Build Spark into /stackable/spark-${PRODUCT_VERSION}/dist, # download additional JARs and perform checks @@ -173,26 +74,11 @@ RUN <>> Build spark +RUN <]' '{print $3}') + + mkdir -p dist/connect + cd dist/connect + + cp "/stackable/spark-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}/sql/connect/server/target/spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}.jar" . + cp "/stackable/spark-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}/sql/connect/common/target/spark-connect-common_${SCALA_BINARY_VERSION}-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}.jar" . + cp "/stackable/spark-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}/sql/connect/client/jvm/target/spark-connect-client-jvm_${SCALA_BINARY_VERSION}-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}.jar" . + + # This link is needed by the operator and is kept for backwards compatibility. + # TODO: remove it at some time in the future. + ln -s "spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}.jar" "spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT_VERSION}.jar" + # Link to the spark-connect jar without the stackable suffix and scala version. + # This link supersedes the previous link. + ln -s "spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}.jar" "spark-connect-${PRODUCT_VERSION}.jar" +EOF + +# <<< Build spark + +WORKDIR /stackable/spark-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}/dist/jars + +# Copy modules required for s3a:// +COPY --from=hadoop-builder --chown=${STACKABLE_USER_UID}:0 \ + /stackable/hadoop/share/hadoop/tools/lib/hadoop-aws-${HADOOP_VERSION}-stackable${RELEASE_VERSION}.jar \ + /stackable/hadoop/share/hadoop/tools/lib/bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar \ + ./ + +# Copy modules required for abfs:// +COPY --from=hadoop-builder --chown=${STACKABLE_USER_UID}:0 \ + /stackable/hadoop/share/hadoop/tools/lib/hadoop-azure-${HADOOP_VERSION}-stackable${RELEASE_VERSION}.jar \ + /stackable/hadoop/share/hadoop/tools/lib/azure-storage-${AZURE_STORAGE_VERSION}.jar \ + /stackable/hadoop/share/hadoop/tools/lib/azure-keyvault-core-${AZURE_KEYVAULT_CORE_VERSION}.jar \ + ./ + +COPY spark-k8s/stackable/jmx /stackable/jmx + +WORKDIR /stackable/spark-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}/dist/extra-jars + +RUN <]' '{print $3}') + +# Get the Scala binary version used by Spark +SCALA_BINARY_VERSION=$(grep "scala.binary.version" pom.xml | head -n1 | awk -F '[<>]' '{print $3}') + +echo "SCALA_VERSION=${SCALA_VERSION}" > /stackable/spark/env +echo "SCALA_BINARY_VERSION=${SCALA_BINARY_VERSION}" >> /stackable/spark/env +echo "SPARK_VERSION=${SPARK_VERSION}" >> /stackable/spark/env +EOF + +# hbase-connectors-builder: Build the Spark HBase connector and copy +# required JARs into /stackable/spark/jars +FROM local-image/java-devel AS final + +ARG PRODUCT_VERSION +ARG RELEASE_VERSION +ARG HADOOP_VERSION +ARG HBASE_VERSION +ARG STACKABLE_USER_UID + +# Patch the hbase-connectors source code +WORKDIR /stackable + +COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/hbase-connectors/stackable/patches/patchable.toml /stackable/src/spark-k8s/hbase-connectors/stackable/patches/patchable.toml +COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/hbase-connectors/stackable/patches/${PRODUCT_VERSION} /stackable/src/spark-k8s/hbase-connectors/stackable/patches/${PRODUCT_VERSION} + +# Copy jars and env from spark-source-builder +COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-source-builder \ + /stackable/spark/jars \ + spark/jars +COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-source-builder \ + /stackable/spark/env \ + spark/env + +RUN < -Date: Tue, 11 Mar 2025 17:29:39 +0200 -Subject: Update CycloneDX plugin - ---- - dev/make-distribution.sh | 1 - - pom.xml | 7 ++++++- - 2 files changed, 6 insertions(+), 2 deletions(-) - -diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh -index ef7c010e93..0f4c1c74e4 100755 ---- a/dev/make-distribution.sh -+++ b/dev/make-distribution.sh -@@ -171,7 +171,6 @@ BUILD_COMMAND=("$MVN" clean package \ - -Dmaven.javadoc.skip=true \ - -Dmaven.scaladoc.skip=true \ - -Dmaven.source.skip \ -- -Dcyclonedx.skip=true \ - $@) - - # Actually build the jar -diff --git a/pom.xml b/pom.xml -index 68e2c422a2..6216ebb08e 100644 ---- a/pom.xml -+++ b/pom.xml -@@ -3534,7 +3534,12 @@ - - org.cyclonedx - cyclonedx-maven-plugin -- 2.7.9 -+ 2.8.0 -+ -+ application -+ 1.5 -+ false -+ - - - package diff --git a/spark-k8s/stackable/patches/3.5.6/0002-Backport-SPARK-51311-BUILD-Promote-bcprov-jdk18on-to.patch b/spark-k8s/stackable/patches/3.5.6/0002-Backport-SPARK-51311-BUILD-Promote-bcprov-jdk18on-to.patch deleted file mode 100644 index 1a7029a6f..000000000 --- a/spark-k8s/stackable/patches/3.5.6/0002-Backport-SPARK-51311-BUILD-Promote-bcprov-jdk18on-to.patch +++ /dev/null @@ -1,83 +0,0 @@ -From 37d866706d952702effd640babf891fef349da7d Mon Sep 17 00:00:00 2001 -From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> -Date: Tue, 22 Jul 2025 17:34:03 +0200 -Subject: Backport [SPARK-51311][BUILD] Promote bcprov-jdk18on to compile scope - ---- - LICENSE-binary | 1 + - assembly/pom.xml | 14 ++++++++++++++ - dev/deps/spark-deps-hadoop-3-hive-2.3 | 1 + - licenses-binary/LICENSE-bouncycastle.txt | 13 +++++++++++++ - 4 files changed, 29 insertions(+) - create mode 100644 licenses-binary/LICENSE-bouncycastle.txt - -diff --git a/LICENSE-binary b/LICENSE-binary -index 05645977a0..9834cf333f 100644 ---- a/LICENSE-binary -+++ b/LICENSE-binary -@@ -480,6 +480,7 @@ org.typelevel:algebra_2.12:jar - org.typelevel:cats-kernel_2.12 - org.typelevel:machinist_2.12 - net.razorvine:pickle -+org.bouncycastle:bcprov-jdk18on - org.slf4j:jcl-over-slf4j - org.slf4j:jul-to-slf4j - org.slf4j:slf4j-api -diff --git a/assembly/pom.xml b/assembly/pom.xml -index dcc46b0b82..def40ad52e 100644 ---- a/assembly/pom.xml -+++ b/assembly/pom.xml -@@ -85,8 +85,22 @@ - guava - ${hadoop.deps.scope} - -+ -+ -+ -+ org.bouncycastle -+ bcprov-jdk18on -+ ${hadoop.deps.scope} -+ -+ - - -+ - - - -diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3 -index dbf0cb34c5..689f50612b 100644 ---- a/dev/deps/spark-deps-hadoop-3-hive-2.3 -+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3 -@@ -28,6 +28,7 @@ aws-java-sdk-bundle/1.12.262//aws-java-sdk-bundle-1.12.262.jar - azure-data-lake-store-sdk/2.3.9//azure-data-lake-store-sdk-2.3.9.jar - azure-keyvault-core/1.0.0//azure-keyvault-core-1.0.0.jar - azure-storage/7.0.1//azure-storage-7.0.1.jar -+bcprov-jdk18on/1.77//bcprov-jdk18on-1.77.jar - blas/3.0.3//blas-3.0.3.jar - bonecp/0.8.0.RELEASE//bonecp-0.8.0.RELEASE.jar - breeze-macros_2.12/2.1.0//breeze-macros_2.12-2.1.0.jar -diff --git a/licenses-binary/LICENSE-bouncycastle.txt b/licenses-binary/LICENSE-bouncycastle.txt -new file mode 100644 -index 0000000000..277dcd1ebb ---- /dev/null -+++ b/licenses-binary/LICENSE-bouncycastle.txt -@@ -0,0 +1,13 @@ -+Copyright (c) 2000-2024 The Legion of the Bouncy Castle Inc. (https://www.bouncycastle.org). -+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and -+associated documentation files (the "Software"), to deal in the Software without restriction, -+including without limitation the rights to use, copy, modify, merge, publish, distribute, -+sub license, and/or sell copies of the Software, and to permit persons to whom the Software is -+furnished to do so, subject to the following conditions: The above copyright notice and this -+permission notice shall be included in all copies or substantial portions of the Software. -+ -+**THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT -+NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, -+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT -+OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.** diff --git a/spark-k8s/stackable/patches/3.5.6/patchable.toml b/spark-k8s/stackable/patches/3.5.6/patchable.toml deleted file mode 100644 index 633d26c88..000000000 --- a/spark-k8s/stackable/patches/3.5.6/patchable.toml +++ /dev/null @@ -1 +0,0 @@ -base = "303c18c74664f161b9b969ac343784c088b47593" diff --git a/spark-k8s/stackable/patches/4.0.1/patchable.toml b/spark-k8s/stackable/patches/4.0.1/patchable.toml deleted file mode 100644 index bd074097c..000000000 --- a/spark-k8s/stackable/patches/4.0.1/patchable.toml +++ /dev/null @@ -1,2 +0,0 @@ -base = "29434ea766b0fc3c3bf6eaadb43a8f931133649e" -mirror = "https://github.com/stackabletech/spark.git" diff --git a/spark-k8s/stackable/patches/4.0.1/0001-Update-CycloneDX-plugin.patch b/spark-k8s/stackable/patches/4.1.1/0001-Update-CycloneDX-plugin.patch similarity index 63% rename from spark-k8s/stackable/patches/4.0.1/0001-Update-CycloneDX-plugin.patch rename to spark-k8s/stackable/patches/4.1.1/0001-Update-CycloneDX-plugin.patch index 863f280c6..fc0b0ec9f 100644 --- a/spark-k8s/stackable/patches/4.0.1/0001-Update-CycloneDX-plugin.patch +++ b/spark-k8s/stackable/patches/4.1.1/0001-Update-CycloneDX-plugin.patch @@ -1,6 +1,6 @@ -From b5de94e20aff25a394c6095c0649b4fcbaa941aa Mon Sep 17 00:00:00 2001 +From 81b0e112da483402a453c22b672608779eeb8187 Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> -Date: Fri, 4 Jul 2025 15:54:55 +0200 +Date: Thu, 15 Jan 2026 14:27:23 +0100 Subject: Update CycloneDX plugin --- @@ -9,25 +9,25 @@ Subject: Update CycloneDX plugin 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh -index 16607e45ae6..44e345a245d 100755 +index 16598bda873..327fa763144 100755 --- a/dev/make-distribution.sh +++ b/dev/make-distribution.sh -@@ -176,7 +176,6 @@ BUILD_COMMAND=("$MVN" clean package \ - -Dmaven.javadoc.skip=true \ - -Dmaven.scaladoc.skip=true \ - -Dmaven.source.skip \ -- -Dcyclonedx.skip=true \ - $@) +@@ -185,7 +185,6 @@ else + -Dmaven.javadoc.skip=true \ + -Dmaven.scaladoc.skip=true \ + -Dmaven.source.skip \ +- -Dcyclonedx.skip=true \ + $@) + fi - # Actually build the jar diff --git a/pom.xml b/pom.xml -index 22922143fc3..59c3747c625 100644 +index dc757d78812..05f1af034f3 100644 --- a/pom.xml +++ b/pom.xml -@@ -3327,6 +3327,11 @@ +@@ -3333,6 +3333,11 @@ org.cyclonedx cyclonedx-maven-plugin - 2.8.0 + 2.9.1 + + application + 1.5 diff --git a/spark-k8s/stackable/patches/4.1.1/patchable.toml b/spark-k8s/stackable/patches/4.1.1/patchable.toml new file mode 100644 index 000000000..15b658491 --- /dev/null +++ b/spark-k8s/stackable/patches/4.1.1/patchable.toml @@ -0,0 +1 @@ +base = "c0690c763bafabd08e7079d1137fa0a769a05bae"