Skip to content

Commit d72bb63

Browse files
authored
[GH-2609] Support Spark 4.1 (#2649)
1 parent 7c6c768 commit d72bb63

99 files changed

Lines changed: 4578 additions & 208 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/java.yml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -62,17 +62,17 @@ jobs:
6262
fail-fast: true
6363
matrix:
6464
include:
65-
- spark: 4.0.0
66-
scala: 2.13.8
65+
- spark: 4.1.1
66+
scala: 2.13.17
6767
jdk: '17'
68-
- spark: 3.5.4
69-
scala: 2.12.18
68+
- spark: 4.0.2
69+
scala: 2.13.17
7070
jdk: '17'
71-
- spark: 3.5.0
71+
- spark: 3.5.8
7272
scala: 2.13.8
7373
jdk: '11'
7474
skipTests: ''
75-
- spark: 3.5.0
75+
- spark: 3.5.8
7676
scala: 2.12.15
7777
jdk: '11'
7878
skipTests: ''

.github/workflows/python.yml

Lines changed: 3 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ jobs:
6060
strategy:
6161
matrix:
6262
include:
63-
- spark: '4.0.0'
63+
- spark: '4.1.1'
6464
scala: '2.13.8'
6565
java: '17'
6666
python: '3.11'
@@ -69,42 +69,9 @@ jobs:
6969
java: '17'
7070
python: '3.10'
7171
- spark: '3.5.0'
72-
scala: '2.12.8'
73-
java: '11'
74-
python: '3.11'
75-
- spark: '3.5.0'
76-
scala: '2.12.8'
77-
java: '11'
78-
python: '3.10'
79-
shapely: '1'
80-
- spark: '3.5.0'
81-
scala: '2.12.8'
82-
java: '11'
83-
python: '3.10'
84-
- spark: '3.5.0'
85-
scala: '2.12.8'
86-
java: '11'
87-
python: '3.9'
88-
- spark: '3.5.0'
89-
scala: '2.12.8'
90-
java: '11'
91-
python: '3.8'
92-
- spark: '3.4.0'
93-
scala: '2.12.8'
94-
java: '11'
95-
python: '3.11'
96-
- spark: '3.4.0'
97-
scala: '2.12.8'
98-
java: '11'
99-
python: '3.10'
100-
- spark: '3.4.0'
10172
scala: '2.12.8'
10273
java: '11'
10374
python: '3.9'
104-
- spark: '3.4.0'
105-
scala: '2.12.8'
106-
java: '11'
107-
python: '3.8'
10875
- spark: '3.4.0'
10976
scala: '2.12.8'
11077
java: '11'
@@ -149,9 +116,9 @@ jobs:
149116
fi
150117
151118
if [ "${SPARK_VERSION:0:1}" == "4" ]; then
152-
# Spark 4.0 requires Python 3.9+, and we remove flink since it conflicts with pyspark 4.0
119+
# Spark 4.x requires Python 3.10+, and we remove flink since it conflicts with pyspark 4.x
153120
uv remove apache-flink --optional flink
154-
uv add "pyspark==4.0.0; python_version >= '3.9'"
121+
uv add "pyspark==${SPARK_VERSION}; python_version >= '3.10'"
155122
else
156123
# Install specific pyspark version matching matrix
157124
uv add pyspark==${SPARK_VERSION}

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -288,7 +288,7 @@ repos:
288288
- id: clang-format
289289
name: run clang-format
290290
description: format C files with clang-format
291-
args: [--style=file:.github/linters/.clang-format]
291+
args: ['--style=file:.github/linters/.clang-format']
292292
types_or: [c]
293293
- repo: https://github.com/PyCQA/bandit
294294
rev: 1.9.3

docs/community/publish.md

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -119,13 +119,13 @@ rm -rf $LOCAL_DIR && git clone --depth 1 --branch $TAG $REPO_URL $LOCAL_DIR && c
119119
MAVEN_PLUGIN_VERSION="2.3.2"
120120

121121
# Define Spark and Scala versions
122-
declare -a SPARK_VERSIONS=("3.4" "3.5" "4.0")
122+
declare -a SPARK_VERSIONS=("3.4" "3.5" "4.0" "4.1")
123123
declare -a SCALA_VERSIONS=("2.12" "2.13")
124124

125125
# Function to get Java version for Spark version
126126
get_java_version() {
127127
local spark_version=$1
128-
if [[ "$spark_version" == "4.0" ]]; then
128+
if [[ "$spark_version" == "4."* ]]; then
129129
echo "17"
130130
else
131131
echo "11"
@@ -217,8 +217,8 @@ verify_java_version() {
217217
# Iterate through Spark and Scala versions
218218
for SPARK in "${SPARK_VERSIONS[@]}"; do
219219
for SCALA in "${SCALA_VERSIONS[@]}"; do
220-
# Skip Spark 4.0 + Scala 2.12 combination as it's not supported
221-
if [[ "$SPARK" == "4.0" && "$SCALA" == "2.12" ]]; then
220+
# Skip Spark 4.0+ + Scala 2.12 combination as it's not supported
221+
if [[ "$SPARK" == "4."* && "$SCALA" == "2.12" ]]; then
222222
echo "Skipping Spark $SPARK with Scala $SCALA (not supported)"
223223
continue
224224
fi
@@ -286,7 +286,7 @@ mkdir apache-sedona-${SEDONA_VERSION}-bin
286286
# Function to get Java version for Spark version
287287
get_java_version() {
288288
local spark_version=$1
289-
if [[ "$spark_version" == "4.0" ]]; then
289+
if [[ "$spark_version" == "4."* ]]; then
290290
echo "17"
291291
else
292292
echo "11"
@@ -410,6 +410,15 @@ echo "Compiling for Spark 4.0 with Scala 2.13 using Java $JAVA_VERSION..."
410410
cd apache-sedona-${SEDONA_VERSION}-src && $MVN_WRAPPER clean && $MVN_WRAPPER install -DskipTests -Dspark=4.0 -Dscala=2.13 && cd ..
411411
cp apache-sedona-${SEDONA_VERSION}-src/spark-shaded/target/sedona-*${SEDONA_VERSION}.jar apache-sedona-${SEDONA_VERSION}-bin/
412412

413+
# Compile for Spark 4.1 with Java 17
414+
JAVA_VERSION=$(get_java_version "4.1")
415+
MVN_WRAPPER=$(create_mvn_wrapper $JAVA_VERSION)
416+
verify_java_version $MVN_WRAPPER $JAVA_VERSION
417+
418+
echo "Compiling for Spark 4.1 with Scala 2.13 using Java $JAVA_VERSION..."
419+
cd apache-sedona-${SEDONA_VERSION}-src && $MVN_WRAPPER clean && $MVN_WRAPPER install -DskipTests -Dspark=4.1 -Dscala=2.13 && cd ..
420+
cp apache-sedona-${SEDONA_VERSION}-src/spark-shaded/target/sedona-*${SEDONA_VERSION}.jar apache-sedona-${SEDONA_VERSION}-bin/
421+
413422
# Clean up Maven wrappers
414423
rm -f /tmp/mvn-java11 /tmp/mvn-java17
415424

docs/setup/maven-coordinates.md

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,22 @@ The optional GeoTools library is required if you want to use raster operators. V
133133
</dependency>
134134
```
135135

136+
=== "Spark 4.1 and Scala 2.13"
137+
138+
```xml
139+
<dependency>
140+
<groupId>org.apache.sedona</groupId>
141+
<artifactId>sedona-spark-shaded-4.1_2.13</artifactId>
142+
<version>{{ sedona.current_version }}</version>
143+
</dependency>
144+
<!-- Optional: https://mvnrepository.com/artifact/org.datasyslab/geotools-wrapper -->
145+
<dependency>
146+
<groupId>org.datasyslab</groupId>
147+
<artifactId>geotools-wrapper</artifactId>
148+
<version>{{ sedona.current_geotools }}</version>
149+
</dependency>
150+
```
151+
136152
!!! abstract "Sedona with Apache Flink"
137153

138154
=== "Flink 1.12+ and Scala 2.12"
@@ -265,6 +281,19 @@ The optional GeoTools library is required if you want to use raster operators. V
265281
<version>{{ sedona.current_geotools }}</version>
266282
</dependency>
267283
```
284+
=== "Spark 4.1 and Scala 2.13"
285+
```xml
286+
<dependency>
287+
<groupId>org.apache.sedona</groupId>
288+
<artifactId>sedona-spark-4.1_2.13</artifactId>
289+
<version>{{ sedona.current_version }}</version>
290+
</dependency>
291+
<dependency>
292+
<groupId>org.datasyslab</groupId>
293+
<artifactId>geotools-wrapper</artifactId>
294+
<version>{{ sedona.current_geotools }}</version>
295+
</dependency>
296+
```
268297

269298
!!! abstract "Sedona with Apache Flink"
270299

docs/setup/platform.md

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -22,28 +22,28 @@ Sedona binary releases are compiled by Java 11/17 and Scala 2.12/2.13 and tested
2222
**Java Requirements:**
2323

2424
- Spark 3.4 & 3.5: Java 11
25-
- Spark 4.0: Java 17
25+
- Spark 4.0 & 4.1: Java 17
2626

2727
**Note:** Java 8 support is dropped since Sedona 1.8.0. Spark 3.3 support is dropped since Sedona 1.8.0.
2828

2929
=== "Sedona Scala/Java"
3030

31-
| | Spark 3.4| Spark 3.5 | Spark 4.0 |
32-
|:---------:|:---------:|:---------:|:---------:|
33-
| Scala 2.12 |✅ |✅ |✅ |
34-
| Scala 2.13 |✅ |✅ |✅ |
31+
| | Spark 3.4| Spark 3.5 | Spark 4.0 | Spark 4.1 |
32+
|:---------:|:---------:|:---------:|:---------:|:---------:|
33+
| Scala 2.12 |✅ |✅ |✅ | |
34+
| Scala 2.13 |✅ |✅ |✅ |✅ |
3535

3636
=== "Sedona Python"
3737

38-
| | Spark 3.4 (Scala 2.12)|Spark 3.5 (Scala 2.12)| Spark 4.0 (Scala 2.12)|
39-
|:---------:|:---------:|:---------:|:---------:|
40-
| Python 3.7 | ✅ | ✅ | |
41-
| Python 3.8 | ✅ | ✅ | |
42-
| Python 3.9 | ✅ | ✅ | ✅ |
43-
| Python 3.10 | ✅ | ✅ | ✅ |
38+
| | Spark 3.4 (Scala 2.12)|Spark 3.5 (Scala 2.12)| Spark 4.0 (Scala 2.13)| Spark 4.1 (Scala 2.13)|
39+
|:---------:|:---------:|:---------:|:---------:|:---------:|
40+
| Python 3.7 | ✅ | ✅ | | |
41+
| Python 3.8 | ✅ | ✅ | | |
42+
| Python 3.9 | ✅ | ✅ | ✅ | ✅ |
43+
| Python 3.10 | ✅ | ✅ | ✅ | ✅ |
4444

4545
=== "Sedona R"
4646

47-
| | Spark 3.4 | Spark 3.5 | Spark 4.0 |
48-
|:---------:|:---------:|:---------:|:---------:|
49-
| Scala 2.12 | ✅ | ✅ | ✅ |
47+
| | Spark 3.4 | Spark 3.5 | Spark 4.0 | Spark 4.1 |
48+
|:---------:|:---------:|:---------:|:---------:|:---------:|
49+
| Scala 2.12 | ✅ | ✅ | ✅ | |

pom.xml

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -758,7 +758,30 @@
758758
<log4j.version>2.24.3</log4j.version>
759759
<slf4j.version>2.0.16</slf4j.version>
760760

761-
<scala.version>2.13.12</scala.version>
761+
<scala.version>2.13.17</scala.version>
762+
<scala.compat.version>2.13</scala.compat.version>
763+
764+
<!-- Skip deploying parent module. it will be deployed with sedona-spark-3.4 -->
765+
<skip.deploy.common.modules>true</skip.deploy.common.modules>
766+
</properties>
767+
</profile>
768+
<profile>
769+
<id>sedona-spark-4.1</id>
770+
<activation>
771+
<property>
772+
<name>spark</name>
773+
<value>4.1</value>
774+
</property>
775+
</activation>
776+
<properties>
777+
<spark.version>4.1.1</spark.version>
778+
<spark.compat.version>4.1</spark.compat.version>
779+
<spark.major.version>4</spark.major.version>
780+
<hadoop.version>3.4.1</hadoop.version>
781+
<log4j.version>2.24.3</log4j.version>
782+
<slf4j.version>2.0.16</slf4j.version>
783+
784+
<scala.version>2.13.17</scala.version>
762785
<scala.compat.version>2.13</scala.compat.version>
763786

764787
<!-- Skip deploying parent module. it will be deployed with sedona-spark-3.4 -->
@@ -775,7 +798,7 @@
775798
<activeByDefault>false</activeByDefault>
776799
</activation>
777800
<properties>
778-
<scala.version>2.13.12</scala.version>
801+
<scala.version>2.13.17</scala.version>
779802
<scala.compat.version>2.13</scala.compat.version>
780803
<scaladoc.arg>-no-java-comments</scaladoc.arg>
781804
<!-- Skip deploying parent module for Scala 2.13 profile, it will be deployed with 2.12 -->

python/pyproject.toml

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,17 @@ dependencies = [
3737
]
3838

3939
[project.optional-dependencies]
40-
spark = ["pyspark>=3.4.0,<4.1.0"]
40+
spark = [
41+
"pyspark>=3.4.0,<4.1.0; python_version < '3.10'",
42+
"pyspark>=3.4.0,<4.2.0; python_version >= '3.10'",
43+
]
4144
pydeck-map = ["geopandas", "pydeck==0.8.0"]
4245
kepler-map = ["geopandas", "keplergl==0.3.2"]
4346
flink = ["apache-flink>=1.19.0"]
4447
db = ["sedonadb[geopandas]; python_version >= '3.9'"]
4548
all = [
46-
"pyspark>=3.4.0,<4.1.0",
49+
"pyspark>=3.4.0,<4.1.0; python_version < '3.10'",
50+
"pyspark>=3.4.0,<4.2.0; python_version >= '3.10'",
4751
"geopandas",
4852
"pydeck==0.8.0",
4953
"keplergl==0.3.2",
@@ -71,7 +75,8 @@ dev = [
7175
# cannot set geopandas>=0.14.4 since it doesn't support python 3.8, so we pin fiona to <1.10.0
7276
"fiona<1.10.0",
7377
"pyarrow",
74-
"pyspark>=3.4.0,<4.1.0",
78+
"pyspark>=3.4.0,<4.1.0; python_version < '3.10'",
79+
"pyspark>=3.4.0,<4.2.0; python_version >= '3.10'",
7580
"keplergl==0.3.2",
7681
"pydeck==0.8.0",
7782
"pystac==1.5.0",

python/tests/sql/test_dataframe_api.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1790,7 +1790,7 @@ def test_dataframe_function(
17901790
elif isinstance(actual_result, Geography):
17911791
# self.assert_geometry_almost_equal(expected_result, actual_result.geometry)
17921792
return
1793-
elif isinstance(actual_result, bytearray):
1793+
elif isinstance(actual_result, (bytes, bytearray)):
17941794
actual_result = actual_result.hex()
17951795
elif isinstance(actual_result, Row):
17961796
actual_result = {

spark/common/pom.xml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -355,5 +355,22 @@
355355
</dependency>
356356
</dependencies>
357357
</profile>
358+
<profile>
359+
<id>sedona-spark-4.1</id>
360+
<activation>
361+
<property>
362+
<name>spark</name>
363+
<value>4.1</value>
364+
</property>
365+
</activation>
366+
<dependencies>
367+
<dependency>
368+
<groupId>org.apache.spark</groupId>
369+
<artifactId>spark-sql-api_${scala.compat.version}</artifactId>
370+
<version>${spark.version}</version>
371+
<scope>provided</scope>
372+
</dependency>
373+
</dependencies>
374+
</profile>
358375
</profiles>
359376
</project>

0 commit comments

Comments
 (0)