@@ -108,24 +108,24 @@ jobs:
108108 runs-on : ubuntu-latest
109109 steps :
110110 - uses : actions/checkout@v4
111-
111+
112112 - name : Cache benchmark data
113113 id : cache-data
114114 uses : actions/cache@v4
115115 with :
116116 path : benchmark-data-sf${{ env.SCALE_FACTOR }}
117117 key : benchmark-data-${{ env.HF_DATA_VERSION }}-sf${{ env.SCALE_FACTOR }}
118-
118+
119119 - name : Setup Python
120120 if : steps.cache-data.outputs.cache-hit != 'true'
121121 uses : actions/setup-python@v5
122122 with :
123123 python-version : ' 3.11'
124-
124+
125125 - name : Install huggingface-hub
126126 if : steps.cache-data.outputs.cache-hit != 'true'
127127 run : pip install huggingface-hub
128-
128+
129129 - name : Download benchmark data from Hugging Face
130130 if : steps.cache-data.outputs.cache-hit != 'true'
131131 run : |
@@ -136,36 +136,36 @@ jobs:
136136 else
137137 HF_SF="sf${SF}"
138138 fi
139-
139+
140140 echo "Downloading data from HF: ${{ env.HF_DATASET }}/${{ env.HF_DATA_VERSION }}/${HF_SF}"
141-
141+
142142 python -c "
143143 from huggingface_hub import snapshot_download
144144 import os
145-
145+
146146 sf = os.environ['SCALE_FACTOR']
147147 hf_sf = 'sf0.1' if sf == '0.1' else f'sf{sf}'
148-
148+
149149 snapshot_download(
150150 repo_id='${{ env.HF_DATASET }}',
151151 repo_type='dataset',
152152 local_dir='hf-data',
153153 allow_patterns=[f'${{ env.HF_DATA_VERSION }}/{hf_sf}/**'],
154154 )
155155 "
156-
156+
157157 # Move data to expected location
158158 mkdir -p benchmark-data-sf${{ env.SCALE_FACTOR }}
159-
159+
160160 SF="${{ env.SCALE_FACTOR }}"
161161 if [ "$SF" = "0.1" ]; then
162162 HF_SF="sf0.1"
163163 else
164164 HF_SF="sf${SF}"
165165 fi
166-
166+
167167 cp -r hf-data/${{ env.HF_DATA_VERSION }}/${HF_SF}/* benchmark-data-sf${{ env.SCALE_FACTOR }}/
168-
168+
169169 echo "Downloaded data structure:"
170170 find benchmark-data-sf${{ env.SCALE_FACTOR }} -type f -name "*.parquet" | head -20
171171 echo ""
@@ -174,7 +174,7 @@ jobs:
174174 echo ""
175175 echo "Total size:"
176176 du -sh benchmark-data-sf${{ env.SCALE_FACTOR }}/
177-
177+
178178 - name : Show cached data info
179179 if : steps.cache-data.outputs.cache-hit == 'true'
180180 run : |
@@ -192,19 +192,19 @@ jobs:
192192 if : contains(github.event.inputs.engines || 'duckdb,geopandas,sedonadb,spatial_polars', 'duckdb')
193193 steps :
194194 - uses : actions/checkout@v4
195-
195+
196196 - name : Restore benchmark data from cache
197197 uses : actions/cache/restore@v4
198198 with :
199199 path : benchmark-data-sf${{ env.SCALE_FACTOR }}
200200 key : benchmark-data-${{ env.HF_DATA_VERSION }}-sf${{ env.SCALE_FACTOR }}
201201 fail-on-cache-miss : true
202-
202+
203203 - name : Setup Python
204204 uses : actions/setup-python@v5
205205 with :
206206 python-version : ' 3.11'
207-
207+
208208 - name : Install dependencies
209209 run : |
210210 echo "=== DuckDB Installation Parameters ==="
@@ -221,12 +221,12 @@ jobs:
221221 pip install duckdb pyarrow pandas
222222 fi
223223 echo "Installed DuckDB version: $(python -c 'import duckdb; print(duckdb.__version__)')"
224-
224+
225225 - name : Pre-install DuckDB spatial extension
226226 run : |
227227 # Dev builds don't have spatial extension in core_nightly, so always use default repo
228228 python -c "import duckdb; con = duckdb.connect(); con.execute('INSTALL spatial'); print('DuckDB spatial extension installed')"
229-
229+
230230 - name : Run DuckDB benchmark
231231 run : |
232232 python benchmark/run_benchmark.py \
@@ -236,7 +236,7 @@ jobs:
236236 --runs ${{ env.BENCHMARK_RUNS }} \
237237 --scale-factor ${{ env.SCALE_FACTOR }} \
238238 --output duckdb_results.json
239-
239+
240240 - name : Upload results
241241 uses : actions/upload-artifact@v4
242242 with :
@@ -251,19 +251,19 @@ jobs:
251251 if : contains(github.event.inputs.engines || 'duckdb,geopandas,sedonadb,spatial_polars', 'geopandas')
252252 steps :
253253 - uses : actions/checkout@v4
254-
254+
255255 - name : Restore benchmark data from cache
256256 uses : actions/cache/restore@v4
257257 with :
258258 path : benchmark-data-sf${{ env.SCALE_FACTOR }}
259259 key : benchmark-data-${{ env.HF_DATA_VERSION }}-sf${{ env.SCALE_FACTOR }}
260260 fail-on-cache-miss : true
261-
261+
262262 - name : Setup Python
263263 uses : actions/setup-python@v5
264264 with :
265265 python-version : ' 3.11'
266-
266+
267267 - name : Install dependencies
268268 run : |
269269 if [ -n "${{ env.GEOPANDAS_VERSION }}" ]; then
@@ -272,7 +272,7 @@ jobs:
272272 pip install geopandas pandas pyarrow shapely
273273 fi
274274 echo "Installed GeoPandas version: $(python -c 'from importlib.metadata import version; print(version("geopandas"))')"
275-
275+
276276 - name : Run GeoPandas benchmark
277277 run : |
278278 python benchmark/run_benchmark.py \
@@ -282,7 +282,7 @@ jobs:
282282 --runs ${{ env.BENCHMARK_RUNS }} \
283283 --scale-factor ${{ env.SCALE_FACTOR }} \
284284 --output geopandas_results.json
285-
285+
286286 - name : Upload results
287287 uses : actions/upload-artifact@v4
288288 with :
@@ -297,19 +297,19 @@ jobs:
297297 if : contains(github.event.inputs.engines || 'duckdb,geopandas,sedonadb,spatial_polars', 'sedonadb')
298298 steps :
299299 - uses : actions/checkout@v4
300-
300+
301301 - name : Restore benchmark data from cache
302302 uses : actions/cache/restore@v4
303303 with :
304304 path : benchmark-data-sf${{ env.SCALE_FACTOR }}
305305 key : benchmark-data-${{ env.HF_DATA_VERSION }}-sf${{ env.SCALE_FACTOR }}
306306 fail-on-cache-miss : true
307-
307+
308308 - name : Setup Python
309309 uses : actions/setup-python@v5
310310 with :
311311 python-version : ' 3.11'
312-
312+
313313 - name : Install dependencies
314314 run : |
315315 echo "=== SedonaDB Installation Parameters ==="
@@ -328,7 +328,7 @@ jobs:
328328 pip install "sedonadb[geopandas]" pandas pyarrow pyproj
329329 fi
330330 echo "Installed SedonaDB version: $(python -c 'from importlib.metadata import version; print(version("sedonadb"))')"
331-
331+
332332 - name : Run SedonaDB benchmark
333333 run : |
334334 python benchmark/run_benchmark.py \
@@ -338,7 +338,7 @@ jobs:
338338 --runs ${{ env.BENCHMARK_RUNS }} \
339339 --scale-factor ${{ env.SCALE_FACTOR }} \
340340 --output sedonadb_results.json
341-
341+
342342 - name : Upload results
343343 uses : actions/upload-artifact@v4
344344 with :
@@ -353,19 +353,19 @@ jobs:
353353 if : contains(github.event.inputs.engines || 'duckdb,geopandas,sedonadb,spatial_polars', 'spatial_polars')
354354 steps :
355355 - uses : actions/checkout@v4
356-
356+
357357 - name : Restore benchmark data from cache
358358 uses : actions/cache/restore@v4
359359 with :
360360 path : benchmark-data-sf${{ env.SCALE_FACTOR }}
361361 key : benchmark-data-${{ env.HF_DATA_VERSION }}-sf${{ env.SCALE_FACTOR }}
362362 fail-on-cache-miss : true
363-
363+
364364 - name : Setup Python
365365 uses : actions/setup-python@v5
366366 with :
367367 python-version : ' 3.11'
368-
368+
369369 - name : Install dependencies
370370 run : |
371371 if [ -n "${{ env.SPATIAL_POLARS_VERSION }}" ]; then
@@ -374,7 +374,7 @@ jobs:
374374 pip install "spatial-polars[knn]" pyarrow
375375 fi
376376 echo "Installed Spatial Polars version: $(python -c 'from importlib.metadata import version; print(version("spatial-polars"))')"
377-
377+
378378 - name : Run Spatial Polars benchmark
379379 run : |
380380 python benchmark/run_benchmark.py \
@@ -384,7 +384,7 @@ jobs:
384384 --runs ${{ env.BENCHMARK_RUNS }} \
385385 --scale-factor ${{ env.SCALE_FACTOR }} \
386386 --output spatial_polars_results.json
387-
387+
388388 - name : Upload results
389389 uses : actions/upload-artifact@v4
390390 with :
@@ -399,58 +399,58 @@ jobs:
399399 runs-on : ubuntu-latest
400400 steps :
401401 - uses : actions/checkout@v4
402-
402+
403403 - name : Download DuckDB results
404404 if : needs.benchmark-duckdb.result == 'success'
405405 uses : actions/download-artifact@v4
406406 with :
407407 name : duckdb-results-sf${{ env.SCALE_FACTOR }}
408408 path : results
409409 continue-on-error : true
410-
410+
411411 - name : Download GeoPandas results
412412 if : needs.benchmark-geopandas.result == 'success'
413413 uses : actions/download-artifact@v4
414414 with :
415415 name : geopandas-results-sf${{ env.SCALE_FACTOR }}
416416 path : results
417417 continue-on-error : true
418-
418+
419419 - name : Download SedonaDB results
420420 if : needs.benchmark-sedonadb.result == 'success'
421421 uses : actions/download-artifact@v4
422422 with :
423423 name : sedonadb-results-sf${{ env.SCALE_FACTOR }}
424424 path : results
425425 continue-on-error : true
426-
426+
427427 - name : Download Spatial Polars results
428428 if : needs.benchmark-spatial-polars.result == 'success'
429429 uses : actions/download-artifact@v4
430430 with :
431431 name : spatial_polars-results-sf${{ env.SCALE_FACTOR }}
432432 path : results
433433 continue-on-error : true
434-
434+
435435 - name : Setup Python
436436 uses : actions/setup-python@v5
437437 with :
438438 python-version : ' 3.11'
439-
439+
440440 - name : Generate summary
441441 run : |
442442 python benchmark/summarize_results.py \
443443 --results-dir results \
444444 --timeout ${{ env.QUERY_TIMEOUT }} \
445445 --runs ${{ env.BENCHMARK_RUNS }} \
446446 --output benchmark_summary.md
447-
447+
448448 - name : Display summary
449449 run : cat benchmark_summary.md
450-
450+
451451 - name : Add summary to job output
452452 run : cat benchmark_summary.md >> $GITHUB_STEP_SUMMARY
453-
453+
454454 - name : Upload combined results
455455 uses : actions/upload-artifact@v4
456456 with :
0 commit comments