Skip to content

Commit e681aa7

Browse files
authored
[CI] Add pre-commit hook to trim trailing whitespace (#79)
1 parent 9dc2391 commit e681aa7

10 files changed

Lines changed: 376 additions & 372 deletions

File tree

.github/workflows/benchmark.yml

Lines changed: 43 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -108,24 +108,24 @@ jobs:
108108
runs-on: ubuntu-latest
109109
steps:
110110
- uses: actions/checkout@v4
111-
111+
112112
- name: Cache benchmark data
113113
id: cache-data
114114
uses: actions/cache@v4
115115
with:
116116
path: benchmark-data-sf${{ env.SCALE_FACTOR }}
117117
key: benchmark-data-${{ env.HF_DATA_VERSION }}-sf${{ env.SCALE_FACTOR }}
118-
118+
119119
- name: Setup Python
120120
if: steps.cache-data.outputs.cache-hit != 'true'
121121
uses: actions/setup-python@v5
122122
with:
123123
python-version: '3.11'
124-
124+
125125
- name: Install huggingface-hub
126126
if: steps.cache-data.outputs.cache-hit != 'true'
127127
run: pip install huggingface-hub
128-
128+
129129
- name: Download benchmark data from Hugging Face
130130
if: steps.cache-data.outputs.cache-hit != 'true'
131131
run: |
@@ -136,36 +136,36 @@ jobs:
136136
else
137137
HF_SF="sf${SF}"
138138
fi
139-
139+
140140
echo "Downloading data from HF: ${{ env.HF_DATASET }}/${{ env.HF_DATA_VERSION }}/${HF_SF}"
141-
141+
142142
python -c "
143143
from huggingface_hub import snapshot_download
144144
import os
145-
145+
146146
sf = os.environ['SCALE_FACTOR']
147147
hf_sf = 'sf0.1' if sf == '0.1' else f'sf{sf}'
148-
148+
149149
snapshot_download(
150150
repo_id='${{ env.HF_DATASET }}',
151151
repo_type='dataset',
152152
local_dir='hf-data',
153153
allow_patterns=[f'${{ env.HF_DATA_VERSION }}/{hf_sf}/**'],
154154
)
155155
"
156-
156+
157157
# Move data to expected location
158158
mkdir -p benchmark-data-sf${{ env.SCALE_FACTOR }}
159-
159+
160160
SF="${{ env.SCALE_FACTOR }}"
161161
if [ "$SF" = "0.1" ]; then
162162
HF_SF="sf0.1"
163163
else
164164
HF_SF="sf${SF}"
165165
fi
166-
166+
167167
cp -r hf-data/${{ env.HF_DATA_VERSION }}/${HF_SF}/* benchmark-data-sf${{ env.SCALE_FACTOR }}/
168-
168+
169169
echo "Downloaded data structure:"
170170
find benchmark-data-sf${{ env.SCALE_FACTOR }} -type f -name "*.parquet" | head -20
171171
echo ""
@@ -174,7 +174,7 @@ jobs:
174174
echo ""
175175
echo "Total size:"
176176
du -sh benchmark-data-sf${{ env.SCALE_FACTOR }}/
177-
177+
178178
- name: Show cached data info
179179
if: steps.cache-data.outputs.cache-hit == 'true'
180180
run: |
@@ -192,19 +192,19 @@ jobs:
192192
if: contains(github.event.inputs.engines || 'duckdb,geopandas,sedonadb,spatial_polars', 'duckdb')
193193
steps:
194194
- uses: actions/checkout@v4
195-
195+
196196
- name: Restore benchmark data from cache
197197
uses: actions/cache/restore@v4
198198
with:
199199
path: benchmark-data-sf${{ env.SCALE_FACTOR }}
200200
key: benchmark-data-${{ env.HF_DATA_VERSION }}-sf${{ env.SCALE_FACTOR }}
201201
fail-on-cache-miss: true
202-
202+
203203
- name: Setup Python
204204
uses: actions/setup-python@v5
205205
with:
206206
python-version: '3.11'
207-
207+
208208
- name: Install dependencies
209209
run: |
210210
echo "=== DuckDB Installation Parameters ==="
@@ -221,12 +221,12 @@ jobs:
221221
pip install duckdb pyarrow pandas
222222
fi
223223
echo "Installed DuckDB version: $(python -c 'import duckdb; print(duckdb.__version__)')"
224-
224+
225225
- name: Pre-install DuckDB spatial extension
226226
run: |
227227
# Dev builds don't have spatial extension in core_nightly, so always use default repo
228228
python -c "import duckdb; con = duckdb.connect(); con.execute('INSTALL spatial'); print('DuckDB spatial extension installed')"
229-
229+
230230
- name: Run DuckDB benchmark
231231
run: |
232232
python benchmark/run_benchmark.py \
@@ -236,7 +236,7 @@ jobs:
236236
--runs ${{ env.BENCHMARK_RUNS }} \
237237
--scale-factor ${{ env.SCALE_FACTOR }} \
238238
--output duckdb_results.json
239-
239+
240240
- name: Upload results
241241
uses: actions/upload-artifact@v4
242242
with:
@@ -251,19 +251,19 @@ jobs:
251251
if: contains(github.event.inputs.engines || 'duckdb,geopandas,sedonadb,spatial_polars', 'geopandas')
252252
steps:
253253
- uses: actions/checkout@v4
254-
254+
255255
- name: Restore benchmark data from cache
256256
uses: actions/cache/restore@v4
257257
with:
258258
path: benchmark-data-sf${{ env.SCALE_FACTOR }}
259259
key: benchmark-data-${{ env.HF_DATA_VERSION }}-sf${{ env.SCALE_FACTOR }}
260260
fail-on-cache-miss: true
261-
261+
262262
- name: Setup Python
263263
uses: actions/setup-python@v5
264264
with:
265265
python-version: '3.11'
266-
266+
267267
- name: Install dependencies
268268
run: |
269269
if [ -n "${{ env.GEOPANDAS_VERSION }}" ]; then
@@ -272,7 +272,7 @@ jobs:
272272
pip install geopandas pandas pyarrow shapely
273273
fi
274274
echo "Installed GeoPandas version: $(python -c 'from importlib.metadata import version; print(version("geopandas"))')"
275-
275+
276276
- name: Run GeoPandas benchmark
277277
run: |
278278
python benchmark/run_benchmark.py \
@@ -282,7 +282,7 @@ jobs:
282282
--runs ${{ env.BENCHMARK_RUNS }} \
283283
--scale-factor ${{ env.SCALE_FACTOR }} \
284284
--output geopandas_results.json
285-
285+
286286
- name: Upload results
287287
uses: actions/upload-artifact@v4
288288
with:
@@ -297,19 +297,19 @@ jobs:
297297
if: contains(github.event.inputs.engines || 'duckdb,geopandas,sedonadb,spatial_polars', 'sedonadb')
298298
steps:
299299
- uses: actions/checkout@v4
300-
300+
301301
- name: Restore benchmark data from cache
302302
uses: actions/cache/restore@v4
303303
with:
304304
path: benchmark-data-sf${{ env.SCALE_FACTOR }}
305305
key: benchmark-data-${{ env.HF_DATA_VERSION }}-sf${{ env.SCALE_FACTOR }}
306306
fail-on-cache-miss: true
307-
307+
308308
- name: Setup Python
309309
uses: actions/setup-python@v5
310310
with:
311311
python-version: '3.11'
312-
312+
313313
- name: Install dependencies
314314
run: |
315315
echo "=== SedonaDB Installation Parameters ==="
@@ -328,7 +328,7 @@ jobs:
328328
pip install "sedonadb[geopandas]" pandas pyarrow pyproj
329329
fi
330330
echo "Installed SedonaDB version: $(python -c 'from importlib.metadata import version; print(version("sedonadb"))')"
331-
331+
332332
- name: Run SedonaDB benchmark
333333
run: |
334334
python benchmark/run_benchmark.py \
@@ -338,7 +338,7 @@ jobs:
338338
--runs ${{ env.BENCHMARK_RUNS }} \
339339
--scale-factor ${{ env.SCALE_FACTOR }} \
340340
--output sedonadb_results.json
341-
341+
342342
- name: Upload results
343343
uses: actions/upload-artifact@v4
344344
with:
@@ -353,19 +353,19 @@ jobs:
353353
if: contains(github.event.inputs.engines || 'duckdb,geopandas,sedonadb,spatial_polars', 'spatial_polars')
354354
steps:
355355
- uses: actions/checkout@v4
356-
356+
357357
- name: Restore benchmark data from cache
358358
uses: actions/cache/restore@v4
359359
with:
360360
path: benchmark-data-sf${{ env.SCALE_FACTOR }}
361361
key: benchmark-data-${{ env.HF_DATA_VERSION }}-sf${{ env.SCALE_FACTOR }}
362362
fail-on-cache-miss: true
363-
363+
364364
- name: Setup Python
365365
uses: actions/setup-python@v5
366366
with:
367367
python-version: '3.11'
368-
368+
369369
- name: Install dependencies
370370
run: |
371371
if [ -n "${{ env.SPATIAL_POLARS_VERSION }}" ]; then
@@ -374,7 +374,7 @@ jobs:
374374
pip install "spatial-polars[knn]" pyarrow
375375
fi
376376
echo "Installed Spatial Polars version: $(python -c 'from importlib.metadata import version; print(version("spatial-polars"))')"
377-
377+
378378
- name: Run Spatial Polars benchmark
379379
run: |
380380
python benchmark/run_benchmark.py \
@@ -384,7 +384,7 @@ jobs:
384384
--runs ${{ env.BENCHMARK_RUNS }} \
385385
--scale-factor ${{ env.SCALE_FACTOR }} \
386386
--output spatial_polars_results.json
387-
387+
388388
- name: Upload results
389389
uses: actions/upload-artifact@v4
390390
with:
@@ -399,58 +399,58 @@ jobs:
399399
runs-on: ubuntu-latest
400400
steps:
401401
- uses: actions/checkout@v4
402-
402+
403403
- name: Download DuckDB results
404404
if: needs.benchmark-duckdb.result == 'success'
405405
uses: actions/download-artifact@v4
406406
with:
407407
name: duckdb-results-sf${{ env.SCALE_FACTOR }}
408408
path: results
409409
continue-on-error: true
410-
410+
411411
- name: Download GeoPandas results
412412
if: needs.benchmark-geopandas.result == 'success'
413413
uses: actions/download-artifact@v4
414414
with:
415415
name: geopandas-results-sf${{ env.SCALE_FACTOR }}
416416
path: results
417417
continue-on-error: true
418-
418+
419419
- name: Download SedonaDB results
420420
if: needs.benchmark-sedonadb.result == 'success'
421421
uses: actions/download-artifact@v4
422422
with:
423423
name: sedonadb-results-sf${{ env.SCALE_FACTOR }}
424424
path: results
425425
continue-on-error: true
426-
426+
427427
- name: Download Spatial Polars results
428428
if: needs.benchmark-spatial-polars.result == 'success'
429429
uses: actions/download-artifact@v4
430430
with:
431431
name: spatial_polars-results-sf${{ env.SCALE_FACTOR }}
432432
path: results
433433
continue-on-error: true
434-
434+
435435
- name: Setup Python
436436
uses: actions/setup-python@v5
437437
with:
438438
python-version: '3.11'
439-
439+
440440
- name: Generate summary
441441
run: |
442442
python benchmark/summarize_results.py \
443443
--results-dir results \
444444
--timeout ${{ env.QUERY_TIMEOUT }} \
445445
--runs ${{ env.BENCHMARK_RUNS }} \
446446
--output benchmark_summary.md
447-
447+
448448
- name: Display summary
449449
run: cat benchmark_summary.md
450-
450+
451451
- name: Add summary to job output
452452
run: cat benchmark_summary.md >> $GITHUB_STEP_SUMMARY
453-
453+
454454
- name: Upload combined results
455455
uses: actions/upload-artifact@v4
456456
with:

.pre-commit-config.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,3 +89,7 @@ repos:
8989
- id: forbid-submodules
9090
name: run forbid-submodules
9191
description: forbids any submodules in the repository
92+
- id: trailing-whitespace
93+
name: run trailing-whitespace
94+
description: trims trailing whitespace
95+
args: [--markdown-linebreak-ext=md]

ARCHITECTURE.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ has many more dependencies.
2121

2222
## Performance
2323

24-
Speed is a very important aspect of this project, and care has been taken to keep
24+
Speed is a very important aspect of this project, and care has been taken to keep
2525
the code as fast as possible, using some of the following techniques:
2626
1. Avoiding heap allocations during data generation
2727
2. Integer arithmetic and display instead of floating point arithmetic and display

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ writing "idiomatic" pandas code. We would be interested in hearing feedback on t
2929
hand-optimized" version of the queries.
3030

3131
[Spatial Polars](https://atl2001.github.io/spatial_polars), like Geopandas, is not SQL-based. It uses shapely to extend
32-
polars, enabling it to work with geospatial data similar to how Geopandas extends pandas. It is much newer and nowhere
32+
polars, enabling it to work with geospatial data similar to how Geopandas extends pandas. It is much newer and nowhere
3333
near as popular/tested as Geopandas, but is capable of computing all of the spatial bench queries, and has been included.
3434

3535
We welcome contributions and civil discussions on how to improve the queries and their implementations.

dev/release/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ git push upstream sedona-spatialbench-0.1.0-rc1
9191

9292
### Signing Commands
9393

94-
Now the assets need to be signed with signatures.
94+
Now the assets need to be signed with signatures.
9595

9696
**GPG Signing:**
9797

docs/contributors-guide.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ Your first step is to create a personal copy of the repository and connect it to
6767
origin https://github.com/YourUsername/sedona-spatialbench.git (push)
6868
upstream https://github.com/apache/sedona-spatialbench.git (fetch)
6969
upstream https://github.com/apache/sedona-spatialbench.git (push)
70-
```
70+
```
7171
7272
## Development Setup
7373

spatialbench-arrow/README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,18 +24,18 @@ This crate generates Spatial Bench data directly into [Apache Arrow] format usin
2424
[Apache Arrow]: https://arrow.apache.org/
2525
[arrow]: https://crates.io/crates/arrow
2626

27-
# Example usage:
27+
# Example usage:
2828

2929
See [docs.rs page](https://docs.rs/tpchgen-arrow/latest/tpchgen_arrow/)
3030

3131
# Testing:
3232
This crate ensures correct results using two methods.
3333

3434
1. Basic functional tests are in Rust doc tests in the source code (`cargo test --doc`)
35-
2. The `reparse` integration test ensures that the Arrow generators
36-
produce the same results as parsing the original `tbl` format (`cargo test --test reparse`)
35+
2. The `reparse` integration test ensures that the Arrow generators
36+
produce the same results as parsing the original `tbl` format (`cargo test --test reparse`)
3737

38-
# Contributing:
38+
# Contributing:
3939

4040
Please see [CONTRIBUTING.md] for more information on how to contribute to this project.
4141

spatialbench-queries/spatial_polars.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222
import spatial_polars # NOQA:F401 needed to add spatial namespace to polars dataframes
2323

24-
# for Q12 Spatial polars uses scipy's KDtree for KNN joins.
24+
# for Q12 Spatial polars uses scipy's KDtree for KNN joins.
2525
# Scipy must be installed for this to work.
2626
# `pip install spatial-polars[knn]`
2727
# which is essentially the same as

0 commit comments

Comments
 (0)