diff --git a/.github/workflows/asv-benchmarks.yml b/.github/workflows/asv-benchmarks.yml new file mode 100644 index 00000000..294dbff4 --- /dev/null +++ b/.github/workflows/asv-benchmarks.yml @@ -0,0 +1,48 @@ +name: ASV Benchmarks + +on: + workflow_dispatch: + +jobs: + benchmark: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - uses: conda-incubator/setup-miniconda@v3 + with: + activate-environment: asv + python-version: "3.12" + channels: conda-forge + auto-activate-base: false + + - name: Install ASV + shell: bash -l {0} + run: | + conda install -y -c conda-forge asv + + - name: Configure ASV machine + shell: bash -l {0} + run: | + asv machine --yes + + - name: Run ASV benchmarks + shell: bash -l {0} + run: | + asv run + + - name: Publish ASV report + shell: bash -l {0} + run: | + asv publish + + - name: Upload ASV results + uses: actions/upload-artifact@v4 + with: + name: asv-results + path: | + .asv/results/ + .asv/html/ diff --git a/.gitignore b/.gitignore index bcdc980b..1cec6e51 100644 --- a/.gitignore +++ b/.gitignore @@ -41,6 +41,8 @@ htmlcov/ .coverage .coverage.* .cache +.asv/* +.asv-openmp/* nosetests.xml coverage.xml *.cover diff --git a/asv.conf.json b/asv.conf.json new file mode 100644 index 00000000..289cc426 --- /dev/null +++ b/asv.conf.json @@ -0,0 +1,30 @@ +{ + "version": 1, + "project": "GSTools", + "project_url": "https://github.com/jeilealr/GSTools", + "repo": ".", + "branches": ["main"], + "benchmark_dir": "benchmarks", + "env_dir": ".asv/env", + "results_dir": ".asv/results", + "html_dir": ".asv/html", + "show_commit_url": "https://github.com/jeilealr/GSTools/commit/", + "environment_type": "conda", + "conda_channels": ["conda-forge"], + "pythons": ["3.12"], + "matrix": { + "req": { + "emcee": [""], + "hankel": [""], + "meshio": [""], + "numpy": [""], + "pyevtk": [""], + "scipy": [""], + "gstools-cython": [""] + } + }, + "install_command": [ + "in-dir={env_dir} python -m pip install gstools_core>=1.0.0", + "in-dir={env_dir} python -m pip install --no-deps {build_dir}" + ] +} diff --git a/asv.macos-openmp.conf.json b/asv.macos-openmp.conf.json new file mode 100644 index 00000000..9df8067a --- /dev/null +++ b/asv.macos-openmp.conf.json @@ -0,0 +1,36 @@ +{ + "version": 1, + "project": "GSTools", + "project_url": "https://github.com/jeilealr/GSTools", + "repo": ".", + "branches": ["main"], + "benchmark_dir": "benchmarks", + "env_dir": ".asv-openmp/env", + "results_dir": ".asv-openmp/results", + "html_dir": ".asv-openmp/html", + "show_commit_url": "https://github.com/jeilealr/GSTools/commit/", + "environment_type": "conda", + "conda_channels": ["conda-forge"], + "pythons": ["3.12"], + "matrix": { + "req": { + "cython": [""], + "emcee": [""], + "extension-helpers": [""], + "hankel": [""], + "llvm-openmp": [""], + "meshio": [""], + "numpy": [""], + "pyevtk": [""], + "scipy": [""], + "setuptools": [""], + "wheel": [""] + } + }, + "install_command": [ + "in-dir={env_dir} python -m pip install gstools_core>=1.0.0", + "in-dir={env_dir} python {conf_dir}/benchmarks/tools/install_macos_openmp_cython.py {env_dir}", + "in-dir={env_dir} python {conf_dir}/benchmarks/tools/check_cython_openmp.py --fail-if-no-openmp", + "in-dir={env_dir} python -m pip install --no-deps {build_dir}" + ] +} diff --git a/benchmarks/README.md b/benchmarks/README.md new file mode 100644 index 00000000..b4df15b6 --- /dev/null +++ b/benchmarks/README.md @@ -0,0 +1,765 @@ +# GSTools Benchmark Guide + +This directory contains the Airspeed Velocity ([ASV](https://github.com/airspeed-velocity/asv/)) benchmark suite for GSTools and a complementary profiling helper implemented with cProfile (part of the Python standard library). + +This guide benchmarks GSTools, inspects the +results, profiles where runtime is spent, and then decides what to optimize. + +Unit tests in `tests/` answer "is the code correct?". The ASV benchmarks in +`benchmarks/` answer "how fast is this workflow, how much memory does it use, +and did that change across commits?". The complementary cProfile helper +answers "inside this workflow, which Python functions are taking most of the +time right now?". + +The benchmarks compare two GSTools backends, which gives more context for +deciding where optimization work should go: + +- `cython_fallback`: the default Cython-backed fallback implementation from + [gstools-cython](https://github.com/GeoStat-Framework/GSTools-Cython). +- `rust_core`: the Rust-backed implementation from + [gstools_core](https://github.com/GeoStat-Framework/GSTools-Core). + +## Index + +- [Setup](#setup) +- [Benchmarking Scripts](#benchmarking-scripts) + - [ASV Configuration](#asv-configuration) + - [Benchmark Naming](#benchmark-naming) +- [Benchmark Coverage](#benchmark-coverage) + - [Shared Constants](#shared-constants) + - [Shared Helpers](#shared-helpers) + - [Benchmark Classes](#benchmark-classes) + - [VariogramWorkflowBenchmarks](#variogramworkflowbenchmarks) + - [KrigingWorkflowBenchmarks](#krigingworkflowbenchmarks) + - [RandomFieldWorkflowBenchmarks](#randomfieldworkflowbenchmarks) +- [Running The Benchmarks](#running-the-benchmarks) + - [Baseline Benchmark](#baseline-benchmark) + - [Current Commit Baseline](#current-commit-baseline) + - [Several Commits Baseline](#several-commits-baseline) + - [Summary of Results](#summary-of-results) + - [Visualization of Results](#visualization-of-results) + - [Profiling With cProfile](#profiling-with-cprofile) +- [Optional Parallelisation with OpenMP](#optional-parallelisation-with-openmp) + - [Shared OpenMP Rule](#shared-openmp-rule) + - [macOS Example](#macos-example) + - [What The macOS OpenMP Config Does](#what-the-macos-openmp-config-does) + - [Run On macOS](#run-on-macos) + - [Interpreting The macOS OpenMP Run](#interpreting-the-macos-openmp-run) + - [Windows Example](#windows-example) + - [Linux Example](#linux-example) + - [HPC Example](#hpc-example) + - [Profiling With cProfile for Multiple Threads](#profiling-with-cprofile-for-multiple-threads) +- [More ASV Commands](#more-asv-commands) +- [External Reference](#external-reference) + +## Setup + +The regular installation commands in the main `README.md` install GSTools for +normal use. This benchmark guide uses conda because ASV creates isolated +benchmark environments for the commits it measures. + +The default benchmark configuration intentionally compares both backends with +one GSTools thread: + +```text +gstools.config.NUM_THREADS = 1 +``` + +That keeps the first comparison simple: Cython fallback vs Rust core without +parallelism as a confounding factor. Parallel/OpenMP scaling is treated as a +separate optional experiment because the correct Cython OpenMP build depends on +the user's operating system, compiler, and runtime environment. + +To run the benchmark and the optional cProfile helper, follow these steps: + +1. Move to the GSTools repository root: + +```bash +cd /path/to/GSTools +``` + +2. Create and activate a conda environment for local benchmark work: + +```bash +conda create -n gstools-benchmark -c conda-forge python=3.12 asv packaging +conda activate gstools-benchmark +``` + +If you already have a suitable conda environment, activate that instead. + +3. If you use an existing environment, make sure ASV is installed: + +```bash +conda install -c conda-forge asv +``` + +4. Create a machine profile once per computer: + +```bash +asv machine --yes +``` + +The machine profile records local hardware information so ASV can label results correctly. Do not compare absolute times across different machines. + +## Benchmarking Scripts + +The benchmarking setup currently consists of: + +- `asv.conf.json`: tells ASV how to build GSTools, where benchmarks live, where + to store results, and which Python/environment matrix to use. +- `asv.macos-openmp.conf.json`: optional macOS-specific ASV configuration that + builds `gstools-cython` from source with OpenMP inside ASV's own environment. +- `benchmarks/benchmark_backends.py`: contains the ASV benchmark classes. +- `benchmarks/README.md`: this practical guide. +- `benchmarks/tools/asv_speedup_summary.py`: reads `.asv/results/` and prints + Rust-vs-Cython speedup ratios. +- `benchmarks/tools/profile_benchmark_workflows.py`: runs one representative + workflow from `benchmark_backends.py` under Python's built-in `cProfile`, so + you can see which functions take time in the current checkout. +- `benchmarks/tools/check_cython_openmp.py`: optional helper for checking + whether the active Python environment's GSTools-Cython extensions detect + OpenMP parallel support. +- `benchmarks/tools/install_macos_openmp_cython.py`: helper used only by + `asv.macos-openmp.conf.json` to compile `gstools-cython` with `llvm-openmp` + on macOS. + + +### ASV Configuration + +The repo root `asv.conf.json` is tailored to this GSTools checkout: + +```json +{ + "repo": ".", + "branches": ["main"], + "benchmark_dir": "benchmarks", + "env_dir": ".asv/env", + "results_dir": ".asv/results", + "html_dir": ".asv/html", + "environment_type": "conda", + "pythons": ["3.12"], + "matrix": { + "req": { + "emcee": [""], + "hankel": [""], + "meshio": [""], + "numpy": [""], + "pyevtk": [""], + "scipy": [""], + "gstools-cython": [""] + } + }, + "install_command": [ + "in-dir={env_dir} python -m pip install gstools_core>=1.0.0", + "in-dir={env_dir} python -m pip install --no-deps {build_dir}" + ] +} +``` + +Important details: + +- `environment_type: "conda"` means conda is required for the ASV workflow in + this guide. ASV creates isolated conda environments for the commits it + benchmarks. +- `pythons: ["3.12"]` means ASV creates Python 3.12 benchmark environments. + Keep this pinned unless you intentionally validate a newer Python/GSTools + backend stack. +- `matrix.req` asks ASV to install GSTools runtime dependencies before + installing the checked-out GSTools source. It includes `gstools-cython` + explicitly because the GSTools commit is installed with `--no-deps`. +- `{build_dir}` is ASV's temporary checkout/build directory for the exact + GSTools commit being benchmarked. +- `install_command` installs the checked-out GSTools revision with `--no-deps`. + It also installs `gstools_core` with pip because `gstools-core` is not + available as a conda package in every solver/platform combination. +- ASV still needs its own `install_command` because it creates isolated + environments for the commits it benchmarks. +- Run the cProfile helper with the Python executable from ASV's isolated + environment, for example `.asv/env//bin/python`. In that mode, the + ASV environment provides dependencies while the helper imports the current + checkout through the repo `src/` path. + +ASV creates these generated directories: + +```text +.asv/env/ benchmark environments +.asv/results/ local benchmark result JSON files +.asv/html/ generated local benchmark website +``` + +Those directories are machine-specific generated artifacts. They should +normally stay out of git. + +If needed, users can list more than one branch, Python version, benchmark +directory, and so on. For example: + +```json +"branches": ["main", "my-feature-branch"] +``` + +Users can also benchmark any explicit branch, commit, tag, or range without +changing `asv.conf.json`: + +```bash +asv run my-feature-branch^! --bench benchmark_backends +asv run main..my-feature-branch --bench benchmark_backends +``` + +ASV checks out package code at each git commit being benchmarked. Commit source +changes before benchmarking them with ASV. Otherwise ASV may benchmark the last +committed package code rather than your uncommitted source changes. + +### Benchmark Naming + +ASV recognizes benchmark methods by name: + +- methods starting with `time_` measure runtime +- methods starting with `peakmem_` measure peak memory +- `setup_cache()` creates reusable data once per benchmark environment +- `setup()` can skip or prepare individual parameter combinations + +## Benchmark Coverage + +This section describes what is measured by the ASV suite and how the benchmark +labels map to real GSTools workflows. The goal is to cover representative +operations that are relevant for geostatistical work, not isolated +micro-functions. + +The current suite measures runtime and peak memory for variogram estimation, +global kriging, spatial random field generation, and conditioned random field +generation. Each workflow is run with both backends so the results can show +both absolute performance and Rust-vs-Cython differences. + +### Shared Constants + +```python +BACKENDS = ("cython_fallback", "rust_core") +THREAD_COUNTS = _configured_thread_counts() +VARIOGRAM_CASES = ( + "full_900", + "sampled_5000_to_1500", + "sampled_15000_to_4500", +) +KRIGE_CASES = ("small_30x500", "large_120x2000", "extra_large_360x6000") +FIELD_CASES = ( + "srf_unstructured_randmeth", + "srf_structured_randmeth", + "srf_structured_fourier", + "condsrf_unstructured", +) +``` + +These constants define parameter labels shown in ASV results. + +`BACKENDS` compares: + +- `cython_fallback` +- `rust_core` + +`THREAD_COUNTS` defaults to: + +- `threads_1`: force `gstools.config.NUM_THREADS = 1` + +That is the default because the first benchmark target is a clean Cython-vs-Rust +backend comparison without parallelism. + +### Shared Helpers + +`gstools_backend(use_core, num_threads)` temporarily forces GSTools to use +either the Cython fallback backend or the Rust `gstools_core` backend, and +sets `gstools.config.NUM_THREADS` for that benchmark run. + +`_random_points(seed, count, scale)` creates deterministic 2D point clouds. + +`_smooth_field(x, y)` creates deterministic synthetic values: + +```python +np.sin(x / 10.0) + np.cos(y / 15.0) +``` + +`_make_variogram_data(...)` creates positions, field values, and bins for +variogram estimation. + +`_make_krige_data(...)` creates conditioning points, conditioning values, and +target points for kriging and conditioned random fields. + +The fixed random seeds are intentional. They keep benchmark inputs stable so +changes in results are more likely to come from code changes, not new random +data. + +### Benchmark Classes + +The ASV benchmarking is organized around workflow classes. Each workflow class +compares `cython_fallback` and `rust_core`, and each class includes both +runtime and peak-memory methods. + +The suite currently measures: + +- `VariogramWorkflowBenchmarks`: full pairwise work vs sampled large work +- `KrigingWorkflowBenchmarks`: small vs larger global kriging systems +- `RandomFieldWorkflowBenchmarks`: unstructured SRF, structured SRF, Fourier + SRF, and conditioned SRF + +This keeps the ASV suite focused on representative workflows rather than +separate duplicate backend checks. + +#### VariogramWorkflowBenchmarks + +This class measures variogram estimation cases: + +```text +full_900 +sampled_5000_to_1500 +sampled_15000_to_4500 +``` + +The labels mean: + +- `full_900`: create 900 scattered points and use all 900 points for the + variogram calculation. +- `sampled_5000_to_1500`: create 5,000 scattered points, then randomly select + 1,500 of those points for the variogram calculation. +- `sampled_15000_to_4500`: create 15,000 scattered points, then randomly select + 4,500 of those points for the variogram calculation. + +The sampled cases still represent larger input datasets, but the variogram +calculation is done on the randomly selected subset so the pairwise work stays +practical. + +#### KrigingWorkflowBenchmarks + +This class measures global kriging at three scales: + +```text +small_30x500 +large_120x2000 +extra_large_360x6000 +``` + +The labels mean: + +- `small_30x500`: 30 conditioning points, 500 target points +- `large_120x2000`: 120 conditioning points, 2,000 target points +- `extra_large_360x6000`: 360 conditioning points, 6,000 target points + +#### RandomFieldWorkflowBenchmarks + +This class measures SRF and CondSRF generation workflows: + +```text +srf_unstructured_randmeth +srf_structured_randmeth +srf_structured_fourier +condsrf_unstructured +``` + +The cases are: + +- `srf_unstructured_randmeth`: SRF using RandMeth on 2,000 unstructured points +- `srf_structured_randmeth`: SRF using RandMeth on a 64 by 64 structured grid +- `srf_structured_fourier`: SRF using the Fourier generator on a 64 by 64 + structured grid +- `condsrf_unstructured`: conditioned SRF with 40 conditioning points and 1,000 + target points + +## Running The Benchmarks + +### Baseline Benchmark + +The baseline benchmark is the first result set to create before doing any +optimization work. It uses the default ASV configuration, so each workflow is +measured with `threads_1` for both `cython_fallback` and `rust_core`. + +#### Current Commit Baseline + +- Save a baseline for the current commit: + +```bash +asv run HEAD^! --bench benchmark_backends +``` + +#### Several Commits Baseline + +As mentioned previously, ASV can also compare several commits, here we will run the last five commits: + +- Run the last five commits on main branch: + +```bash +asv run HEAD~5..HEAD --bench benchmark_backends +``` + +#### Summary of Results + +After running ASV, inspect the explicit Rust-vs-Cython speedup ratios: + +```bash +python benchmarks/tools/asv_speedup_summary.py +``` + +The helper reads `.asv/results/` and reports ratios per case and thread label: + +```text +speedup = cython_fallback_time / rust_core_time +``` + +Interpret the ratio as: + +- `speedup > 1.0` means Rust is faster +- `speedup = 1.0` means similar performance +- `speedup < 1.0` means Rust is slower + +The speedup helper prints the backend ratio explicitly in the terminal. By +default, the helper skips removed legacy duplicate rows from older saved +results. + +#### Visualization of Results + +You can inspect the results in the ASV browser report by building and opening +the local website: + +```bash +asv publish +asv preview +``` + +Then open the printed local URL, for example: + +```text +http://127.0.0.1:8082/#/ +``` +(or any other `http://127.0.0.1:/#/` URL shown by the running preview). + +The browser report shows ASV plots and trends. ASV plot views do not draw a line/graph when there is only one x-axis point, therefore running `asv run HEAD^! --bench benchmark_backends` will most likely not load any graphs. + +For the default benchmark run, the `threads` column should show `threads_1`. +If you later run the +[optional OpenMP scaling experiment](#optional-parallelisation-with-openmp), +the same column can be used to compare several threads. + + +### Profiling With cProfile + +`cProfile` does not update the ASV results shown in the browser report. +Instead, it prints a table in the terminal showing which Python +functions consumed time while one workflow ran. + +The helper script is: + +```text +benchmarks/tools/profile_benchmark_workflows.py +``` + +It imports the ASV benchmark classes from `benchmark_backends.py`, selects one +case, forces one backend, and runs that case under `cProfile`. + +Since ASV has already created an isolated Python environment, select that +environment to execute the profiling helper: + +```bash +ASV_ENV="$(ls -td .asv/env/* | head -n 1)" +ASV_PYTHON="$ASV_ENV/bin/python" +``` + +The helper still profiles the current checkout because +`profile_benchmark_workflows.py` adds the repository `src/` directory to +`sys.path`. The ASV environment provides the installed dependencies, including +`gstools-cython` and `gstools_core`. + +List available cases: + +```bash +"$ASV_PYTHON" benchmarks/tools/profile_benchmark_workflows.py --list +``` + +Possible profile selected cases: + +```bash +"$ASV_PYTHON" benchmarks/tools/profile_benchmark_workflows.py --case variogram-sampled --backend rust_core --threads threads_1 --limit 10 +"$ASV_PYTHON" benchmarks/tools/profile_benchmark_workflows.py --case variogram-extra-large --backend rust_core --threads threads_1 --limit 10 +"$ASV_PYTHON" benchmarks/tools/profile_benchmark_workflows.py --case krige-large --backend rust_core --threads threads_1 --limit 10 +"$ASV_PYTHON" benchmarks/tools/profile_benchmark_workflows.py --case krige-extra-large --backend rust_core --threads threads_1 --limit 10 +"$ASV_PYTHON" benchmarks/tools/profile_benchmark_workflows.py --case condsrf --backend rust_core --threads threads_1 --limit 10 +``` + +## Optional Parallelisation with OpenMP + +This section collects optional workflows for testing Cython and Rust with +several thread counts. OpenMP setup is platform-dependent, so each operating +system should have its own tested instructions. + +The default setup above remains the recommended baseline: one thread, normal +ASV environment, and no extra OpenMP build steps. Use this section only when +you explicitly want to measure backend scaling with multiple thread counts. + +### Shared OpenMP Rule + +The benchmark code can be run with several thread labels by setting for example +`GSTOOLS_BENCHMARK_THREADS=1,2,4,8,16`. That only passes different +`gstools.config.NUM_THREADS` values to GSTools. It does not, by itself, make +the Cython backend parallel. + +For Cython OpenMP scaling, the Cython extension must be compiled with OpenMP +support inside the same ASV environment that runs the benchmark. Always verify +that environment before interpreting Cython scaling results: + +```bash +ASV_ENV="$(ls -td .asv-openmp/env/* | head -n 1)" +"$ASV_ENV/bin/python" benchmarks/tools/check_cython_openmp.py --fail-if-no-openmp +``` + +If the check fails, the benchmark may still run, but the Cython backend should +not be interpreted as an OpenMP-enabled Cython run. + +### macOS Example + +This is the currently tested OpenMP workflow. It is separate from the +default setup above. + +The default ASV configuration, `asv.conf.json`, stays conservative: it is the +one-thread baseline and uses the normal conda-forge `gstools-cython` package. +The default `.asv/env/` environment does not provide Cython OpenMP support. That is why this section uses a second ASV configuration: + +```text +asv.macos-openmp.conf.json +``` + +This OpenMP config creates separate generated directories: + +```text +.asv-openmp/env/ +.asv-openmp/results/ +.asv-openmp/html/ +``` + +That keeps the OpenMP experiment separate from the default `.asv/` baseline. + +#### What The macOS OpenMP Config Does + +`asv.macos-openmp.conf.json` asks conda to install the build/runtime pieces +needed for the macOS OpenMP experiment: + +```text +llvm-openmp +cython +extension-helpers +setuptools +wheel +``` + +During ASV installation, it runs: + +```bash +benchmarks/tools/install_macos_openmp_cython.py +``` + +That helper compiles `gstools-cython` from source inside ASV's own environment, +not inside your active conda environment. This matters because ASV benchmarks +the packages installed under `.asv-openmp/env/`. + +Internally, the helper sets: + +```text +GSTOOLS_BUILD_PARALLEL=1 +CC=/bin/gstools-asv-clang-openmp +CXX=/bin/gstools-asv-clang-openmp++ +``` + +The wrapper translates the plain `-fopenmp` flag used by the Cython build into +Apple-clang-compatible compiler and linker arguments that use conda's +`llvm-openmp`. + +#### Run On macOS + +In the previous section, the default config gives a quick overview for both +backends with `threads_1`. In this section, the OpenMP config runs several +thread labels: `threads_1`, `threads_2`, `threads_4`, `threads_8`, and +`threads_16`. + +Start from the GSTools repository root: + +```bash +cd /path/to/GSTools +``` + +Create a clean driver environment. This environment only runs ASV; ASV will +create the real benchmark environment under `.asv-openmp/env/`. + +```bash +conda create -n gstools-benchmark -c conda-forge python=3.12 asv +conda activate gstools-benchmark +``` + +Create the ASV machine profile once: + +```bash +asv --config asv.macos-openmp.conf.json machine --yes +``` + +Run a quick current-commit OpenMP check. This builds the OpenMP-enabled +`gstools-cython` package inside `.asv-openmp/env/` and runs the benchmark suite: + +```bash +GSTOOLS_BENCHMARK_THREADS=1,2,4,8,16 \ +asv --config asv.macos-openmp.conf.json run HEAD^! --quick --bench benchmark_backends --show-stderr +``` + +Verify that the ASV OpenMP environment really uses Cython OpenMP: + +```bash +ASV_OPENMP_ENV="$(ls -td .asv-openmp/env/* | head -n 1)" +"$ASV_OPENMP_ENV/bin/python" benchmarks/tools/check_cython_openmp.py --verbose +"$ASV_OPENMP_ENV/bin/python" benchmarks/tools/check_cython_openmp.py --fail-if-no-openmp +``` + +Expected result on the tested Mac M2 setup: + +```text +variogram default None -> 10 +field default None -> 10 +krige default None -> 10 +OpenMP check: PASS +``` + +If that check passes, run the last-five-commits OpenMP benchmark: + +```bash +GSTOOLS_BENCHMARK_THREADS=1,2,4,8,16 \ +asv --config asv.macos-openmp.conf.json run HEAD~5..HEAD --bench benchmark_backends --show-stderr +``` + +Print Rust-vs-Cython ratios from the OpenMP result folder: + +```bash +python benchmarks/tools/asv_speedup_summary.py --results-dir .asv-openmp/results +``` + +Build and preview the OpenMP browser report: + +```bash +asv --config asv.macos-openmp.conf.json publish +asv --config asv.macos-openmp.conf.json preview +``` + +#### Interpreting The macOS OpenMP Run + +- Use default `asv.conf.json` for the reproducible one-thread baseline. +- Use `asv.macos-openmp.conf.json` for the macOS OpenMP experiment. +- Only claim Cython OpenMP scaling if `check_cython_openmp.py` passes inside + `.asv-openmp/env/...`. +- The active `gstools-benchmark` conda environment does not need `gstools` + installed. It only needs ASV. The benchmarked GSTools packages live inside + `.asv-openmp/env/...`. + +This workflow is intended for macOS systems that use Apple clang with conda's +`llvm-openmp`. It should be portable across many macOS machines, including +Apple Silicon and Intel Macs, but it is not guaranteed for every macOS setup. + +It is not guaranteed to run without local changes on: + +- older macOS versions +- systems missing Xcode command-line tools +- systems with a nonstandard compiler setup +- HPC or managed macOS environments +- unusual conda installations + +Do not assume this exact OpenMP setup applies to Linux, Windows, or HPC systems. + +### Windows Example + +### Linux Example + +### HPC Example + +### Profiling With cProfile for Multiple Threads + +To profile how a workflow changes across configured thread counts, run the +same cProfile case several times with the OpenMP ASV environment: + +```bash +ASV_OPENMP_ENV="$(ls -td .asv-openmp/env/* | head -n 1)" +ASV_OPENMP_PYTHON="$ASV_OPENMP_ENV/bin/python" + +for threads in threads_1 threads_2 threads_4 threads_8 threads_16; do + "$ASV_OPENMP_PYTHON" benchmarks/tools/profile_benchmark_workflows.py --case krige-extra-large --backend rust_core --threads "$threads" --limit 10 +done +``` + +Useful options: + +- `--case`: choose one workflow, or use `all` +- `--backend`: choose `cython_fallback` or `rust_core` +- `--threads`: choose `threads_1`, `threads_2`, `threads_4`, `threads_8`, + or `threads_16` +- `--limit`: number of function rows to print from the cProfile table +- `--sort cumtime`: sort by cumulative time, usually the best first view +- `--sort tottime`: sort by time spent directly in each function +- `--repeat`: repeat a workflow inside the profiler + +For example, `--limit 10` means "print the top 10 function rows after sorting". + +## More ASV Commands + +Save results for only the current commit: + +```bash +asv run HEAD^! --bench benchmark_backends +``` + +Compare current commit with previous commit: + +```bash +asv run HEAD~1^! --bench benchmark_backends +asv run HEAD^! --bench benchmark_backends +asv compare HEAD~1 HEAD +``` + +Compare local `main` with the current branch tip: + +```bash +asv run main^! --bench benchmark_backends +asv run HEAD^! --bench benchmark_backends +asv compare main HEAD +``` + +Compare remote `main` with the current branch tip: + +```bash +git fetch origin main +asv run origin/main^! --bench benchmark_backends +asv run HEAD^! --bench benchmark_backends +asv compare origin/main HEAD +``` + +On a linear branch, `HEAD~5..HEAD` benchmarks: + +```text +HEAD~4 +HEAD~3 +HEAD~2 +HEAD~1 +HEAD +``` + +Run a selected list of commits: + +```bash +git rev-parse HEAD HEAD~3 main e20c88f7 > /tmp/gstools-asv-commits.txt +asv run HASHFILE:/tmp/gstools-asv-commits.txt --bench benchmark_backends +``` + +Use full commit hashes when sharing results. Short hashes and branch names are +fine locally but can become ambiguous later. + +If running ASV from outside the repo root, pass the config explicitly: + +```bash +asv --config /path/to/MPS-Tools/GSTools/asv.conf.json run --quick --bench benchmark_backends +``` + +## External Reference + +For complete ASV command syntax, see: + +```text +https://asv.readthedocs.io/en/stable/commands.html +``` diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py new file mode 100644 index 00000000..c94e5d28 --- /dev/null +++ b/benchmarks/__init__.py @@ -0,0 +1 @@ +"""ASV benchmarks for GSTools.""" diff --git a/benchmarks/benchmark_backends.py b/benchmarks/benchmark_backends.py new file mode 100644 index 00000000..c9526eb7 --- /dev/null +++ b/benchmarks/benchmark_backends.py @@ -0,0 +1,307 @@ +"""Workflow benchmarks for GSTools backends. + +Usage: + cd /path/to/MPS-Tools/GSTools + # See benchmarks/README.md for ASV and optional cProfile setup. + asv machine --yes + asv run --quick --show-stderr --bench benchmark_backends + asv run HEAD^! --bench benchmark_backends + asv run + asv publish + asv preview + asv compare HEAD~1 HEAD + +Backend speedup should be interpreted as: + speedup = cython_fallback_time / rust_core_time + +Values greater than 1.0 mean the Rust backend is faster on the same machine +for the same benchmark, commit, and thread label. + +By default the suite uses one GSTools thread. For local OpenMP scaling +experiments, set GSTOOLS_BENCHMARK_THREADS, for example: + GSTOOLS_BENCHMARK_THREADS=1,2,4,8,16 asv run HEAD^! +""" + +from __future__ import annotations + +import contextlib +import os + +import numpy as np + +import gstools as gs + + +BACKENDS = ("cython_fallback", "rust_core") + + +def _configured_thread_counts(): + raw = os.environ.get("GSTOOLS_BENCHMARK_THREADS", "1") + thread_counts = [] + for item in raw.split(","): + item = item.strip() + if not item: + continue + if item.startswith("threads_"): + label = item + value = item.removeprefix("threads_") + else: + label = f"threads_{item}" + value = item + int(value) + thread_counts.append(label) + if not thread_counts: + raise ValueError("GSTOOLS_BENCHMARK_THREADS did not define threads") + return tuple(thread_counts) + + +THREAD_COUNTS = _configured_thread_counts() +VARIOGRAM_CASES = ( + "full_900", + "sampled_5000_to_1500", + "sampled_15000_to_4500", +) +KRIGE_CASES = ("small_30x500", "large_120x2000", "extra_large_360x6000") +FIELD_CASES = ( + "srf_unstructured_randmeth", + "srf_structured_randmeth", + "srf_structured_fourier", + "condsrf_unstructured", +) + + +@contextlib.contextmanager +def gstools_backend(use_core, num_threads): + """Temporarily force backend and GSTools thread count.""" + previous = ( + gs.config._GSTOOLS_CORE_AVAIL, + gs.config.USE_GSTOOLS_CORE, + gs.config.NUM_THREADS, + ) + try: + if use_core: + if not previous[0]: + raise NotImplementedError("gstools_core is not available") + gs.config._GSTOOLS_CORE_AVAIL = True + gs.config.USE_GSTOOLS_CORE = True + else: + gs.config._GSTOOLS_CORE_AVAIL = False + gs.config.USE_GSTOOLS_CORE = False + gs.config.NUM_THREADS = num_threads + yield + finally: + ( + gs.config._GSTOOLS_CORE_AVAIL, + gs.config.USE_GSTOOLS_CORE, + gs.config.NUM_THREADS, + ) = previous + + +def _use_core(backend): + if backend == "rust_core": + return True + if backend == "cython_fallback": + return False + raise ValueError(f"Unknown backend: {backend}") + + +def _num_threads(thread_count): + if thread_count.startswith("threads_"): + return int(thread_count.removeprefix("threads_")) + raise ValueError(f"Unknown thread count: {thread_count}") + + +def _random_points(seed, count, scale): + rng = np.random.RandomState(seed) + return rng.rand(count) * scale, rng.rand(count) * scale + + +def _smooth_field(x, y): + return np.sin(x / 10.0) + np.cos(y / 15.0) + + +def _make_variogram_data(seed, count, scale=100.0): + x, y = _random_points(seed, count, scale) + field = _smooth_field(x, y) + bins = np.linspace(0.0, scale * 0.6, 16) + return (x, y), field, bins + + +def _make_krige_data(seed, cond_count, target_count, scale=50.0): + rng = np.random.RandomState(seed) + cond_x = rng.rand(cond_count) * scale + cond_y = rng.rand(cond_count) * scale + cond_val = _smooth_field(cond_x, cond_y) + target_pos = ( + rng.rand(target_count) * scale, + rng.rand(target_count) * scale, + ) + return (cond_x, cond_y), cond_val, target_pos + + +class VariogramWorkflowBenchmarks: + """Variogram workflow benchmarks by case and backend.""" + + params = [VARIOGRAM_CASES, BACKENDS, THREAD_COUNTS] + param_names = ["case", "backend", "threads"] + + def setup_cache(self): + return { + "full_900": _make_variogram_data(20220501, 900), + "sampled_5000_to_1500": _make_variogram_data(20220502, 5000), + "sampled_15000_to_4500": _make_variogram_data(20220503, 15000), + } + + def setup(self, data, case, backend, threads): + if backend == "rust_core" and not gs.config._GSTOOLS_CORE_AVAIL: + raise NotImplementedError("gstools_core is not available") + _num_threads(threads) + + def time_variogram_estimate(self, data, case, backend, threads): + with gstools_backend(_use_core(backend), _num_threads(threads)): + self._run_variogram(data, case) + + def peakmem_variogram_estimate(self, data, case, backend, threads): + with gstools_backend(_use_core(backend), _num_threads(threads)): + self._run_variogram(data, case) + + def _run_variogram(self, data, case): + pos, field, bins = data[case] + kwargs = {} + if case == "sampled_5000_to_1500": + kwargs = {"sampling_size": 1500, "sampling_seed": 20220504} + if case == "sampled_15000_to_4500": + kwargs = {"sampling_size": 4500, "sampling_seed": 20220505} + return gs.vario_estimate( + pos, + field, + bins, + mesh_type="unstructured", + return_counts=True, + **kwargs, + ) + + +class KrigingWorkflowBenchmarks: + """Global kriging workflow benchmarks by case and backend.""" + + params = [KRIGE_CASES, BACKENDS, THREAD_COUNTS] + param_names = ["case", "backend", "threads"] + + def setup_cache(self): + return { + "small_30x500": _make_krige_data(20220506, 30, 500), + "large_120x2000": _make_krige_data(20220507, 120, 2000), + "extra_large_360x6000": _make_krige_data(20220508, 360, 6000), + } + + def setup(self, data, case, backend, threads): + if backend == "rust_core" and not gs.config._GSTOOLS_CORE_AVAIL: + raise NotImplementedError("gstools_core is not available") + _num_threads(threads) + + def time_global_krige(self, data, case, backend, threads): + with gstools_backend(_use_core(backend), _num_threads(threads)): + self._run_krige(data, case) + + def peakmem_global_krige(self, data, case, backend, threads): + with gstools_backend(_use_core(backend), _num_threads(threads)): + self._run_krige(data, case) + + def _run_krige(self, data, case): + cond_pos, cond_val, target_pos = data[case] + model = gs.Exponential(dim=2, var=1.5, len_scale=12.0, nugget=0.05) + krige = gs.Krige( + model, + cond_pos, + cond_val, + exact=False, + cond_err=0.05, + ) + return krige( + target_pos, + mesh_type="unstructured", + return_var=True, + store=False, + ) + + +class RandomFieldWorkflowBenchmarks: + """SRF and CondSRF workflow benchmarks by case and backend.""" + + params = [FIELD_CASES, BACKENDS, THREAD_COUNTS] + param_names = ["case", "backend", "threads"] + + def setup_cache(self): + return { + "unstructured_pos": _random_points(20220509, 2000, 100.0), + "structured_pos": ( + np.linspace(0.0, 100.0, 64), + np.linspace(0.0, 100.0, 64), + ), + "condsrf": _make_krige_data(20220510, 40, 1000), + } + + def setup(self, data, case, backend, threads): + if backend == "rust_core" and not gs.config._GSTOOLS_CORE_AVAIL: + raise NotImplementedError("gstools_core is not available") + _num_threads(threads) + + def time_field_generation(self, data, case, backend, threads): + with gstools_backend(_use_core(backend), _num_threads(threads)): + self._run_field(data, case) + + def peakmem_field_generation(self, data, case, backend, threads): + with gstools_backend(_use_core(backend), _num_threads(threads)): + self._run_field(data, case) + + def _run_field(self, data, case): + if case == "srf_unstructured_randmeth": + return self._run_srf_unstructured(data) + if case == "srf_structured_randmeth": + return self._run_srf_structured(data) + if case == "srf_structured_fourier": + return self._run_srf_fourier(data) + if case == "condsrf_unstructured": + return self._run_condsrf(data) + raise ValueError(f"Unknown field benchmark case: {case}") + + def _run_srf_unstructured(self, data): + model = gs.Exponential(dim=2, var=2.0, len_scale=8.0) + srf = gs.SRF(model, mean=1.0, seed=20220508, mode_no=512) + return srf(data["unstructured_pos"], mesh_type="unstructured") + + def _run_srf_structured(self, data): + model = gs.Exponential(dim=2, var=2.0, len_scale=8.0) + srf = gs.SRF(model, mean=1.0, seed=20220509, mode_no=512) + return srf(data["structured_pos"], mesh_type="structured") + + def _run_srf_fourier(self, data): + model = gs.Gaussian(dim=2, var=2.0, len_scale=30.0) + srf = gs.SRF( + model, + generator="Fourier", + period=[100.0, 100.0], + mode_no=[32, 32], + seed=20220510, + ) + return srf(data["structured_pos"], mesh_type="structured") + + def _run_condsrf(self, data): + cond_pos, cond_val, target_pos = data["condsrf"] + model = gs.Exponential(dim=2, var=1.5, len_scale=12.0, nugget=0.05) + krige = gs.Krige( + model, + cond_pos, + cond_val, + exact=False, + cond_err=0.05, + ) + cond_srf = gs.CondSRF(krige, seed=20220511, mode_no=512) + return cond_srf( + target_pos, + mesh_type="unstructured", + seed=20220512, + store=False, + krige_store=False, + ) diff --git a/benchmarks/tools/asv_speedup_summary.py b/benchmarks/tools/asv_speedup_summary.py new file mode 100644 index 00000000..b3239d70 --- /dev/null +++ b/benchmarks/tools/asv_speedup_summary.py @@ -0,0 +1,251 @@ +#!/usr/bin/env python +"""Print Rust-vs-Cython speedups from local ASV result files. + +The summary is optional. ASV itself remains the source of truth for benchmark +storage and visualization. + +Usage: + python benchmarks/tools/asv_speedup_summary.py + python benchmarks/tools/asv_speedup_summary.py --results-dir .asv/results + python benchmarks/tools/asv_speedup_summary.py --include-legacy + +Speedup is calculated as: + cython_fallback_time / rust_core_time + +Values greater than 1.0 mean Rust was faster on the same machine, commit, +environment, benchmark, case, and thread-count combination. +""" + +from __future__ import annotations + +import argparse +import itertools +import json +import math +from pathlib import Path + + +BACKENDS = ("cython_fallback", "rust_core") +THREAD_PREFIX = "threads_" +LEGACY_BENCHMARKS = { + "time_srf", + "peakmem_srf", + "time_variogram", + "peakmem_variogram", + "time_krige", + "peakmem_krige", +} + + +def parse_args(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--results-dir", + default=".asv/results", + type=Path, + help="Path to the ASV results directory.", + ) + parser.add_argument( + "--all", + action="store_true", + help="Include non-time benchmarks as ratios too.", + ) + parser.add_argument( + "--include-legacy", + action="store_true", + help="Include removed BackendBenchmarks rows from older saved results.", + ) + return parser.parse_args() + + +def iter_result_files(results_dir): + for path in sorted(results_dir.glob("**/*.json")): + if path.name in {"benchmarks.json", "machine.json"}: + continue + yield path + + +def load_json(path): + try: + with path.open(encoding="utf8") as handle: + return json.load(handle) + except json.JSONDecodeError: + return None + + +def result_entry(raw_result, result_columns): + if isinstance(raw_result, dict): + return raw_result + if isinstance(raw_result, list) and result_columns: + return dict(zip(result_columns, raw_result)) + return {"result": raw_result, "params": []} + + +def is_number(value): + return isinstance(value, (int, float)) and not math.isnan(value) + + +def flatten_values(values): + if isinstance(values, list): + for value in values: + yield from flatten_values(value) + return + yield values + + +def backend_values(entry): + result = entry.get("result") + params = entry.get("params") or [] + if not isinstance(result, list) or not params: + return {} + + values = {} + combinations = itertools.product(*params) + for combo, value in zip(combinations, flatten_values(result)): + if not is_number(value): + continue + combo_values = [str(item).strip("'\"") for item in combo] + for backend in BACKENDS: + if backend in combo_values: + values[backend] = float(value) + return values + + +def backend_rows(entry): + result = entry.get("result") + params = entry.get("params") or [] + if not isinstance(result, list) or not params: + return [] + + rows = [] + combinations = itertools.product(*params) + for combo, value in zip(combinations, flatten_values(result)): + if not is_number(value): + continue + combo_values = [str(item).strip("'\"") for item in combo] + backend = next( + (candidate for candidate in BACKENDS if candidate in combo_values), + None, + ) + if backend is None: + continue + case_values = [ + item + for item in combo_values + if item not in BACKENDS and not item.startswith(THREAD_PREFIX) + ] + threads = next( + ( + item + for item in combo_values + if item.startswith(THREAD_PREFIX) + ), + "-", + ) + rows.append( + { + "backend": backend, + "case": "/".join(case_values) if case_values else "-", + "threads": threads, + "value": float(value), + } + ) + return rows + + +def short_benchmark_name(name): + return name.rsplit(".", maxsplit=1)[-1] + + +def collect_speedups(results_dir, include_all, include_legacy): + rows = [] + for path in iter_result_files(results_dir): + data = load_json(path) + if not data: + continue + result_columns = data.get("result_columns", []) + commit = data.get("commit_hash", "unknown")[:8] + env_name = data.get("env_name", path.stem) + results = data.get("results", {}) + for benchmark, raw_result in results.items(): + benchmark_name = short_benchmark_name(benchmark) + if not include_legacy and benchmark_name in LEGACY_BENCHMARKS: + continue + if not include_all and ".time_" not in benchmark: + continue + by_case = {} + for row in backend_rows(result_entry(raw_result, result_columns)): + key = (row["case"], row["threads"]) + by_case.setdefault(key, {})[row["backend"]] = row["value"] + for (case, threads), values in by_case.items(): + cython = values.get("cython_fallback") + rust = values.get("rust_core") + if not is_number(cython) or not is_number(rust) or rust == 0: + continue + rows.append( + { + "commit": commit, + "env": env_name, + "benchmark": benchmark_name, + "case": case, + "threads": threads, + "cython": cython, + "rust": rust, + "speedup": cython / rust, + } + ) + return rows + + +def print_table(rows): + if not rows: + print("No matching Rust-vs-Cython ASV results found.") + return + + headers = [ + "commit", + "env", + "benchmark", + "case", + "threads", + "cython", + "rust", + "speedup", + ] + table = [ + [ + row["commit"], + row["env"], + row["benchmark"], + row["case"], + row["threads"], + f"{row['cython']:.6g}", + f"{row['rust']:.6g}", + f"{row['speedup']:.3f}x", + ] + for row in rows + ] + widths = [ + max(len(str(item)) for item in column) + for column in zip(headers, *table) + ] + + def fmt(row): + return " ".join( + str(item).ljust(width) for item, width in zip(row, widths) + ) + + print(fmt(headers)) + print(fmt(["-" * width for width in widths])) + for row in table: + print(fmt(row)) + + +def main(): + args = parse_args() + rows = collect_speedups(args.results_dir, args.all, args.include_legacy) + print_table(rows) + + +if __name__ == "__main__": + main() diff --git a/benchmarks/tools/check_cython_openmp.py b/benchmarks/tools/check_cython_openmp.py new file mode 100644 index 00000000..02fe73db --- /dev/null +++ b/benchmarks/tools/check_cython_openmp.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python +"""Check whether GSTools-Cython detects OpenMP parallel support. + +This script verifies the active Python environment. Use it with the editable +development environment or with an ASV-created environment. + +Examples: + python benchmarks/tools/check_cython_openmp.py + python benchmarks/tools/check_cython_openmp.py --fail-if-no-openmp + python benchmarks/tools/check_cython_openmp.py --verbose + .asv/env//bin/python3 benchmarks/tools/check_cython_openmp.py +""" + +from __future__ import annotations + +import argparse +import importlib +import sys + + +MODULES = { + "variogram": "gstools_cython.variogram", + "field": "gstools_cython.field", + "krige": "gstools_cython.krige", +} +EXPLICIT_THREAD_COUNTS = (1, 2, 4, 8, 16) + + +def parse_args(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--fail-if-no-openmp", + action="store_true", + help="Exit with status 1 if OpenMP thread detection reports <= 1.", + ) + parser.add_argument( + "--verbose", + action="store_true", + help="Print per-module default and explicit thread-count values.", + ) + return parser.parse_args() + + +def package_version(package_name): + try: + package = importlib.import_module(package_name) + except ModuleNotFoundError: + return "not installed" + return getattr(package, "__version__", "unknown") + + +def check_module(label, module_name): + module = importlib.import_module(module_name) + default_threads = module.set_num_threads(None) + explicit = { + count: module.set_num_threads(count) + for count in EXPLICIT_THREAD_COUNTS + } + return label, default_threads, explicit + + +def main(): + args = parse_args() + + print(f"python: {sys.executable}") + print(f"gstools: {package_version('gstools')}") + print(f"gstools_cython: {package_version('gstools_cython')}") + print(f"gstools_core: {package_version('gstools_core')}") + if args.verbose: + print( + "OpenMP evidence: default None should be >1. " + "Explicit values only prove the wrapper accepts the requested count." + ) + + default_values = [] + for label, module_name in MODULES.items(): + try: + label, default_threads, explicit = check_module(label, module_name) + except ModuleNotFoundError as err: + print(f"OpenMP check: FAIL. Missing module: {err.name}") + return 1 + default_values.append(default_threads) + if args.verbose: + explicit_text = ", ".join( + f"{request}->{actual}" for request, actual in explicit.items() + ) + print(f"{label} default None -> {default_threads}") + print(f"{label} explicit -> {explicit_text}") + + if min(default_values) > 1: + print("OpenMP check: PASS") + return 0 + + print( + "OpenMP check: FAIL. GSTools-Cython reports one default thread. " + "Explicit thread values may be accepted by the wrapper, but this does " + "not prove that the compiled extension is using OpenMP." + ) + return 1 if args.fail_if_no_openmp else 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/benchmarks/tools/install_macos_openmp_cython.py b/benchmarks/tools/install_macos_openmp_cython.py new file mode 100644 index 00000000..09f0840c --- /dev/null +++ b/benchmarks/tools/install_macos_openmp_cython.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python +"""Install GSTools-Cython with OpenMP inside a macOS ASV environment. + +This helper is intentionally macOS-specific. It is called from +``asv.macos-openmp.conf.json`` after ASV has created a conda environment that +contains ``llvm-openmp``. +""" + +from __future__ import annotations + +import os +import platform +import stat +import subprocess +import sys +from pathlib import Path + + +def run(command, env=None, check=True): + print("+ " + " ".join(str(part) for part in command), flush=True) + return subprocess.run(command, check=check, env=env) + + +def write_wrapper(path, force_cxx=False): + text = """#!/bin/bash +set -e +prefix="${GSTOOLS_OPENMP_PREFIX:-${CONDA_PREFIX:-}}" +name="$(basename "$0")" +if [[ "${GSTOOLS_FORCE_CXX:-0}" == "1" || "$name" == *++* ]]; then + real="${GSTOOLS_REAL_CXX:-/usr/bin/clang++}" +else + real="${GSTOOLS_REAL_CC:-/usr/bin/clang}" +fi +is_compile=0 +for arg in "$@"; do + [[ "$arg" == "-c" ]] && is_compile=1 +done +args=() +for arg in "$@"; do + if [[ "$arg" == "-fopenmp" ]]; then + if [[ "$is_compile" == "1" ]]; then + args+=("-Xpreprocessor" "-fopenmp" "-I${prefix}/include") + else + args+=("-L${prefix}/lib" "-lomp" "-Wl,-rpath,${prefix}/lib") + fi + else + args+=("$arg") + fi +done +exec "$real" "${args[@]}" +""" + if force_cxx: + text = """#!/bin/bash +GSTOOLS_FORCE_CXX=1 exec "$(dirname "$0")/gstools-asv-clang-openmp" "$@" +""" + path.write_text(text, encoding="utf8") + path.chmod(path.stat().st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) + + +def main(): + if len(sys.argv) != 2: + print( + "Usage: install_macos_openmp_cython.py ", + file=sys.stderr, + ) + return 2 + + if platform.system() != "Darwin": + print( + "This helper is macOS-specific. Use the default ASV config or " + "write an OpenMP setup for this platform.", + file=sys.stderr, + ) + return 2 + + env_dir = Path(sys.argv[1]).resolve() + include_dir = env_dir / "include" + lib_dir = env_dir / "lib" + omp_header = include_dir / "omp.h" + omp_lib = lib_dir / "libomp.dylib" + + if not omp_header.exists() or not omp_lib.exists(): + print( + "llvm-openmp was not found in the ASV environment. Expected " + f"{omp_header} and {omp_lib}.", + file=sys.stderr, + ) + return 2 + + cc_wrapper = env_dir / "bin" / "gstools-asv-clang-openmp" + cxx_wrapper = env_dir / "bin" / "gstools-asv-clang-openmp++" + write_wrapper(cc_wrapper) + write_wrapper(cxx_wrapper, force_cxx=True) + + build_env = os.environ.copy() + build_env.update( + { + "GSTOOLS_BUILD_PARALLEL": "1", + "GSTOOLS_OPENMP_PREFIX": str(env_dir), + "CC": str(cc_wrapper), + "CXX": str(cxx_wrapper), + "CFLAGS": f"-I{include_dir}", + "LDFLAGS": f"-L{lib_dir}", + } + ) + + run( + [ + sys.executable, + "-m", + "pip", + "uninstall", + "-y", + "gstools-cython", + "gstools_cython", + ], + env=build_env, + check=False, + ) + run( + [ + sys.executable, + "-m", + "pip", + "install", + "--no-build-isolation", + "--no-cache-dir", + "--force-reinstall", + "--no-binary=gstools-cython", + "--no-deps", + "gstools-cython", + ], + env=build_env, + ) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/benchmarks/tools/profile_benchmark_workflows.py b/benchmarks/tools/profile_benchmark_workflows.py new file mode 100644 index 00000000..2fd2fdff --- /dev/null +++ b/benchmarks/tools/profile_benchmark_workflows.py @@ -0,0 +1,212 @@ +#!/usr/bin/env python +"""Profile the representative GSTools benchmark workflows with cProfile. + +This is a quick measurement helper. ASV remains the source of truth for saved +benchmark results, while this script helps identify the top cumulative Python +call sites before making algorithmic changes. + +Usage: + cd /path/to/MPS-Tools/GSTools + ASV_ENV="$(ls -td .asv/env/* | head -n 1)" + "$ASV_ENV/bin/python" benchmarks/tools/profile_benchmark_workflows.py --list + "$ASV_ENV/bin/python" benchmarks/tools/profile_benchmark_workflows.py \ + --case variogram-sampled + "$ASV_ENV/bin/python" benchmarks/tools/profile_benchmark_workflows.py \ + --case krige-large \ + --backend rust_core --threads threads_1 --limit 30 +""" + +from __future__ import annotations + +import argparse +import cProfile +from pathlib import Path +import pstats +import sys + + +REPO_ROOT = Path(__file__).resolve().parents[2] +sys.path.insert(0, str(REPO_ROOT)) +sys.path.insert(0, str(REPO_ROOT / "src")) + + +CASES = { + "variogram-full": ( + "VariogramWorkflowBenchmarks", + "time_variogram_estimate", + ("full_900",), + ), + "variogram-sampled": ( + "VariogramWorkflowBenchmarks", + "time_variogram_estimate", + ("sampled_5000_to_1500",), + ), + "variogram-extra-large": ( + "VariogramWorkflowBenchmarks", + "time_variogram_estimate", + ("sampled_15000_to_4500",), + ), + "krige-small": ( + "KrigingWorkflowBenchmarks", + "time_global_krige", + ("small_30x500",), + ), + "krige-large": ( + "KrigingWorkflowBenchmarks", + "time_global_krige", + ("large_120x2000",), + ), + "krige-extra-large": ( + "KrigingWorkflowBenchmarks", + "time_global_krige", + ("extra_large_360x6000",), + ), + "srf-unstructured": ( + "RandomFieldWorkflowBenchmarks", + "time_field_generation", + ("srf_unstructured_randmeth",), + ), + "srf-structured": ( + "RandomFieldWorkflowBenchmarks", + "time_field_generation", + ("srf_structured_randmeth",), + ), + "srf-fourier": ( + "RandomFieldWorkflowBenchmarks", + "time_field_generation", + ("srf_structured_fourier",), + ), + "condsrf": ( + "RandomFieldWorkflowBenchmarks", + "time_field_generation", + ("condsrf_unstructured",), + ), +} + +THREAD_COUNTS = ( + "threads_1", + "threads_2", + "threads_4", + "threads_8", + "threads_16", +) + + +def parse_args(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--case", + default="all", + choices=["all", *CASES], + help="Workflow to profile. Defaults to all workflows.", + ) + parser.add_argument( + "--repeat", + default=1, + type=int, + help="Number of times to run each selected workflow.", + ) + parser.add_argument( + "--limit", + default=25, + type=int, + help="Number of cProfile rows to print per workflow.", + ) + parser.add_argument( + "--sort", + default="cumtime", + choices=["cumtime", "tottime", "calls"], + help="pstats sort key.", + ) + parser.add_argument( + "--backend", + default="rust_core", + choices=["cython_fallback", "rust_core"], + help="Backend label to force while profiling.", + ) + parser.add_argument( + "--threads", + default="threads_1", + choices=THREAD_COUNTS, + help="GSTools thread count label.", + ) + parser.add_argument( + "--list", + action="store_true", + help="List available workflow cases and exit.", + ) + return parser.parse_args() + + +def iter_selected(case): + if case == "all": + yield from CASES.items() + return + yield case, CASES[case] + + +def load_suite_class(class_name): + try: + from benchmarks import benchmark_backends + except ModuleNotFoundError as err: + print( + "Could not import GSTools benchmark dependencies. Activate the " + "GSTools benchmark environment, run this script with an ASV env " + "Python from .asv/env//bin/python, or install the project " + f"dependencies first. Original error: {err}", + file=sys.stderr, + ) + raise SystemExit(1) from err + return getattr(benchmark_backends, class_name) + + +def run_case( + name, + class_name, + method_name, + params, + repeat, + limit, + sort, + backend, + threads, +): + suite_cls = load_suite_class(class_name) + suite = suite_cls() + data = suite.setup_cache() + method = getattr(suite, method_name) + + profiler = cProfile.Profile() + profiler.enable() + for _ in range(repeat): + method(data, *params, backend, threads) + profiler.disable() + + print(f"\n== {name} [{backend}, {threads}] ==") + stats = pstats.Stats(profiler, stream=sys.stdout) + stats.strip_dirs().sort_stats(sort).print_stats(limit) + + +def main(): + args = parse_args() + if args.list: + for name in CASES: + print(name) + return + + for name, (suite_cls, method_name, params) in iter_selected(args.case): + run_case( + name, + suite_cls, + method_name, + params, + args.repeat, + args.limit, + args.sort, + args.backend, + args.threads, + ) + + +if __name__ == "__main__": + main() diff --git a/pyproject.toml b/pyproject.toml index 885bcd77..f39ea2ea 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,6 +66,7 @@ doc = [ ] plotting = ["matplotlib>=3.7", "pyvista>=0.40"] rust = ["gstools_core>=1.0.0"] +benchmark = ["asv"] test = ["pytest-cov>=3"] lint = ["ruff"]