diff --git a/.github/workflows/asv-benchmarks.yml b/.github/workflows/asv-benchmarks.yml
new file mode 100644
index 00000000..294dbff4
--- /dev/null
+++ b/.github/workflows/asv-benchmarks.yml
@@ -0,0 +1,48 @@
+name: ASV Benchmarks
+
+on:
+  workflow_dispatch:
+
+jobs:
+  benchmark:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - uses: conda-incubator/setup-miniconda@v3
+        with:
+          activate-environment: asv
+          python-version: "3.12"
+          channels: conda-forge
+          auto-activate-base: false
+
+      - name: Install ASV
+        shell: bash -l {0}
+        run: |
+          conda install -y -c conda-forge asv
+
+      - name: Configure ASV machine
+        shell: bash -l {0}
+        run: |
+          asv machine --yes
+
+      - name: Run ASV benchmarks
+        shell: bash -l {0}
+        run: |
+          asv run
+
+      - name: Publish ASV report
+        shell: bash -l {0}
+        run: |
+          asv publish
+
+      - name: Upload ASV results
+        uses: actions/upload-artifact@v4
+        with:
+          name: asv-results
+          path: |
+            .asv/results/
+            .asv/html/
diff --git a/.gitignore b/.gitignore
index bcdc980b..1cec6e51 100644
--- a/.gitignore
+++ b/.gitignore
@@ -41,6 +41,8 @@ htmlcov/
 .coverage
 .coverage.*
 .cache
+.asv/*
+.asv-openmp/*
 nosetests.xml
 coverage.xml
 *.cover
diff --git a/asv.conf.json b/asv.conf.json
new file mode 100644
index 00000000..289cc426
--- /dev/null
+++ b/asv.conf.json
@@ -0,0 +1,30 @@
+{
+  "version": 1,
+  "project": "GSTools",
+  "project_url": "https://github.com/jeilealr/GSTools",
+  "repo": ".",
+  "branches": ["main"],
+  "benchmark_dir": "benchmarks",
+  "env_dir": ".asv/env",
+  "results_dir": ".asv/results",
+  "html_dir": ".asv/html",
+  "show_commit_url": "https://github.com/jeilealr/GSTools/commit/",
+  "environment_type": "conda",
+  "conda_channels": ["conda-forge"],
+  "pythons": ["3.12"],
+  "matrix": {
+    "req": {
+      "emcee": [""],
+      "hankel": [""],
+      "meshio": [""],
+      "numpy": [""],
+      "pyevtk": [""],
+      "scipy": [""],
+      "gstools-cython": [""]
+    }
+  },
+  "install_command": [
+    "in-dir={env_dir} python -m pip install gstools_core>=1.0.0",
+    "in-dir={env_dir} python -m pip install --no-deps {build_dir}"
+  ]
+}
diff --git a/asv.macos-openmp.conf.json b/asv.macos-openmp.conf.json
new file mode 100644
index 00000000..9df8067a
--- /dev/null
+++ b/asv.macos-openmp.conf.json
@@ -0,0 +1,36 @@
+{
+  "version": 1,
+  "project": "GSTools",
+  "project_url": "https://github.com/jeilealr/GSTools",
+  "repo": ".",
+  "branches": ["main"],
+  "benchmark_dir": "benchmarks",
+  "env_dir": ".asv-openmp/env",
+  "results_dir": ".asv-openmp/results",
+  "html_dir": ".asv-openmp/html",
+  "show_commit_url": "https://github.com/jeilealr/GSTools/commit/",
+  "environment_type": "conda",
+  "conda_channels": ["conda-forge"],
+  "pythons": ["3.12"],
+  "matrix": {
+    "req": {
+      "cython": [""],
+      "emcee": [""],
+      "extension-helpers": [""],
+      "hankel": [""],
+      "llvm-openmp": [""],
+      "meshio": [""],
+      "numpy": [""],
+      "pyevtk": [""],
+      "scipy": [""],
+      "setuptools": [""],
+      "wheel": [""]
+    }
+  },
+  "install_command": [
+    "in-dir={env_dir} python -m pip install gstools_core>=1.0.0",
+    "in-dir={env_dir} python {conf_dir}/benchmarks/tools/install_macos_openmp_cython.py {env_dir}",
+    "in-dir={env_dir} python {conf_dir}/benchmarks/tools/check_cython_openmp.py --fail-if-no-openmp",
+    "in-dir={env_dir} python -m pip install --no-deps {build_dir}"
+  ]
+}
diff --git a/benchmarks/README.md b/benchmarks/README.md
new file mode 100644
index 00000000..b4df15b6
--- /dev/null
+++ b/benchmarks/README.md
@@ -0,0 +1,765 @@
+# GSTools Benchmark Guide
+
+This directory contains the Airspeed Velocity ([ASV](https://github.com/airspeed-velocity/asv/)) benchmark suite for GSTools and a complementary profiling helper implemented with cProfile (part of the Python standard library).
+
+This guide benchmarks GSTools, inspects the
+results, profiles where runtime is spent, and then decides what to optimize.
+
+Unit tests in `tests/` answer "is the code correct?". The ASV benchmarks in
+`benchmarks/` answer "how fast is this workflow, how much memory does it use,
+and did that change across commits?". The complementary cProfile helper
+answers "inside this workflow, which Python functions are taking most of the
+time right now?".
+
+The benchmarks compare two GSTools backends, which gives more context for
+deciding where optimization work should go:
+
+- `cython_fallback`: the default Cython-backed fallback implementation from
+  [gstools-cython](https://github.com/GeoStat-Framework/GSTools-Cython).
+- `rust_core`: the Rust-backed implementation from
+  [gstools_core](https://github.com/GeoStat-Framework/GSTools-Core).
+
+## Index
+
+- [Setup](#setup)
+- [Benchmarking Scripts](#benchmarking-scripts)
+  - [ASV Configuration](#asv-configuration)
+  - [Benchmark Naming](#benchmark-naming)
+- [Benchmark Coverage](#benchmark-coverage)
+  - [Shared Constants](#shared-constants)
+  - [Shared Helpers](#shared-helpers)
+  - [Benchmark Classes](#benchmark-classes)
+    - [VariogramWorkflowBenchmarks](#variogramworkflowbenchmarks)
+    - [KrigingWorkflowBenchmarks](#krigingworkflowbenchmarks)
+    - [RandomFieldWorkflowBenchmarks](#randomfieldworkflowbenchmarks)
+- [Running The Benchmarks](#running-the-benchmarks)
+  - [Baseline Benchmark](#baseline-benchmark)
+    - [Current Commit Baseline](#current-commit-baseline)
+    - [Several Commits Baseline](#several-commits-baseline)
+    - [Summary of Results](#summary-of-results)
+    - [Visualization of Results](#visualization-of-results)
+    - [Profiling With cProfile](#profiling-with-cprofile)
+- [Optional Parallelisation with OpenMP](#optional-parallelisation-with-openmp)
+  - [Shared OpenMP Rule](#shared-openmp-rule)
+  - [macOS Example](#macos-example)
+    - [What The macOS OpenMP Config Does](#what-the-macos-openmp-config-does)
+    - [Run On macOS](#run-on-macos)
+    - [Interpreting The macOS OpenMP Run](#interpreting-the-macos-openmp-run)
+  - [Windows Example](#windows-example)
+  - [Linux Example](#linux-example)
+  - [HPC Example](#hpc-example)
+  - [Profiling With cProfile for Multiple Threads](#profiling-with-cprofile-for-multiple-threads)
+- [More ASV Commands](#more-asv-commands)
+- [External Reference](#external-reference)
+
+## Setup
+
+The regular installation commands in the main `README.md` install GSTools for
+normal use. This benchmark guide uses conda because ASV creates isolated
+benchmark environments for the commits it measures.
+
+The default benchmark configuration intentionally compares both backends with
+one GSTools thread:
+
+```text
+gstools.config.NUM_THREADS = 1
+```
+
+That keeps the first comparison simple: Cython fallback vs Rust core without
+parallelism as a confounding factor. Parallel/OpenMP scaling is treated as a
+separate optional experiment because the correct Cython OpenMP build depends on
+the user's operating system, compiler, and runtime environment.
+
+To run the benchmark and the optional cProfile helper, follow these steps:
+
+1. Move to the GSTools repository root:
+
+```bash
+cd /path/to/GSTools
+```
+
+2. Create and activate a conda environment for local benchmark work:
+
+```bash
+conda create -n gstools-benchmark -c conda-forge python=3.12 asv packaging
+conda activate gstools-benchmark
+```
+
+If you already have a suitable conda environment, activate that instead.
+
+3. If you use an existing environment, make sure ASV is installed:
+
+```bash
+conda install -c conda-forge asv
+```
+
+4. Create a machine profile once per computer:
+
+```bash
+asv machine --yes
+```
+
+The machine profile records local hardware information so ASV can label results correctly. Do not compare absolute times across different machines.
+
+## Benchmarking Scripts
+
+The benchmarking setup currently consists of:
+
+- `asv.conf.json`: tells ASV how to build GSTools, where benchmarks live, where
+  to store results, and which Python/environment matrix to use.
+- `asv.macos-openmp.conf.json`: optional macOS-specific ASV configuration that
+  builds `gstools-cython` from source with OpenMP inside ASV's own environment.
+- `benchmarks/benchmark_backends.py`: contains the ASV benchmark classes.
+- `benchmarks/README.md`: this practical guide.
+- `benchmarks/tools/asv_speedup_summary.py`: reads `.asv/results/` and prints
+  Rust-vs-Cython speedup ratios.
+- `benchmarks/tools/profile_benchmark_workflows.py`: runs one representative
+  workflow from `benchmark_backends.py` under Python's built-in `cProfile`, so
+  you can see which functions take time in the current checkout.
+- `benchmarks/tools/check_cython_openmp.py`: optional helper for checking
+  whether the active Python environment's GSTools-Cython extensions detect
+  OpenMP parallel support.
+- `benchmarks/tools/install_macos_openmp_cython.py`: helper used only by
+  `asv.macos-openmp.conf.json` to compile `gstools-cython` with `llvm-openmp`
+  on macOS.
+
+
+### ASV Configuration
+
+The repo root `asv.conf.json` is tailored to this GSTools checkout:
+
+```json
+{
+  "repo": ".",
+  "branches": ["main"],
+  "benchmark_dir": "benchmarks",
+  "env_dir": ".asv/env",
+  "results_dir": ".asv/results",
+  "html_dir": ".asv/html",
+  "environment_type": "conda",
+  "pythons": ["3.12"],
+  "matrix": {
+    "req": {
+      "emcee": [""],
+      "hankel": [""],
+      "meshio": [""],
+      "numpy": [""],
+      "pyevtk": [""],
+      "scipy": [""],
+      "gstools-cython": [""]
+    }
+  },
+  "install_command": [
+    "in-dir={env_dir} python -m pip install gstools_core>=1.0.0",
+    "in-dir={env_dir} python -m pip install --no-deps {build_dir}"
+  ]
+}
+```
+
+Important details:
+
+- `environment_type: "conda"` means conda is required for the ASV workflow in
+  this guide. ASV creates isolated conda environments for the commits it
+  benchmarks.
+- `pythons: ["3.12"]` means ASV creates Python 3.12 benchmark environments.
+  Keep this pinned unless you intentionally validate a newer Python/GSTools
+  backend stack.
+- `matrix.req` asks ASV to install GSTools runtime dependencies before
+  installing the checked-out GSTools source. It includes `gstools-cython`
+  explicitly because the GSTools commit is installed with `--no-deps`.
+- `{build_dir}` is ASV's temporary checkout/build directory for the exact
+  GSTools commit being benchmarked.
+- `install_command` installs the checked-out GSTools revision with `--no-deps`.
+  It also installs `gstools_core` with pip because `gstools-core` is not
+  available as a conda package in every solver/platform combination.
+- ASV still needs its own `install_command` because it creates isolated
+  environments for the commits it benchmarks.
+- Run the cProfile helper with the Python executable from ASV's isolated
+  environment, for example `.asv/env/<env-id>/bin/python`. In that mode, the
+  ASV environment provides dependencies while the helper imports the current
+  checkout through the repo `src/` path.
+
+ASV creates these generated directories:
+
+```text
+.asv/env/      benchmark environments
+.asv/results/  local benchmark result JSON files
+.asv/html/     generated local benchmark website
+```
+
+Those directories are machine-specific generated artifacts. They should
+normally stay out of git.
+
+If needed, users can list more than one branch, Python version, benchmark
+directory, and so on. For example:
+
+```json
+"branches": ["main", "my-feature-branch"]
+```
+
+Users can also benchmark any explicit branch, commit, tag, or range without
+changing `asv.conf.json`:
+
+```bash
+asv run my-feature-branch^! --bench benchmark_backends
+asv run main..my-feature-branch --bench benchmark_backends
+```
+
+ASV checks out package code at each git commit being benchmarked. Commit source
+changes before benchmarking them with ASV. Otherwise ASV may benchmark the last
+committed package code rather than your uncommitted source changes.
+
+### Benchmark Naming
+
+ASV recognizes benchmark methods by name:
+
+- methods starting with `time_` measure runtime
+- methods starting with `peakmem_` measure peak memory
+- `setup_cache()` creates reusable data once per benchmark environment
+- `setup()` can skip or prepare individual parameter combinations
+
+## Benchmark Coverage
+
+This section describes what is measured by the ASV suite and how the benchmark
+labels map to real GSTools workflows. The goal is to cover representative
+operations that are relevant for geostatistical work, not isolated
+micro-functions.
+
+The current suite measures runtime and peak memory for variogram estimation,
+global kriging, spatial random field generation, and conditioned random field
+generation. Each workflow is run with both backends so the results can show
+both absolute performance and Rust-vs-Cython differences.
+
+### Shared Constants
+
+```python
+BACKENDS = ("cython_fallback", "rust_core")
+THREAD_COUNTS = _configured_thread_counts()
+VARIOGRAM_CASES = (
+    "full_900",
+    "sampled_5000_to_1500",
+    "sampled_15000_to_4500",
+)
+KRIGE_CASES = ("small_30x500", "large_120x2000", "extra_large_360x6000")
+FIELD_CASES = (
+    "srf_unstructured_randmeth",
+    "srf_structured_randmeth",
+    "srf_structured_fourier",
+    "condsrf_unstructured",
+)
+```
+
+These constants define parameter labels shown in ASV results.
+
+`BACKENDS` compares:
+
+- `cython_fallback`
+- `rust_core`
+
+`THREAD_COUNTS` defaults to:
+
+- `threads_1`: force `gstools.config.NUM_THREADS = 1`
+
+That is the default because the first benchmark target is a clean Cython-vs-Rust
+backend comparison without parallelism.
+
+### Shared Helpers
+
+`gstools_backend(use_core, num_threads)` temporarily forces GSTools to use
+either the Cython fallback backend or the Rust `gstools_core` backend, and
+sets `gstools.config.NUM_THREADS` for that benchmark run.
+
+`_random_points(seed, count, scale)` creates deterministic 2D point clouds.
+
+`_smooth_field(x, y)` creates deterministic synthetic values:
+
+```python
+np.sin(x / 10.0) + np.cos(y / 15.0)
+```
+
+`_make_variogram_data(...)` creates positions, field values, and bins for
+variogram estimation.
+
+`_make_krige_data(...)` creates conditioning points, conditioning values, and
+target points for kriging and conditioned random fields.
+
+The fixed random seeds are intentional. They keep benchmark inputs stable so
+changes in results are more likely to come from code changes, not new random
+data.
+
+### Benchmark Classes
+
+The ASV benchmarking is organized around workflow classes. Each workflow class
+compares `cython_fallback` and `rust_core`, and each class includes both
+runtime and peak-memory methods.
+
+The suite currently measures:
+
+- `VariogramWorkflowBenchmarks`: full pairwise work vs sampled large work
+- `KrigingWorkflowBenchmarks`: small vs larger global kriging systems
+- `RandomFieldWorkflowBenchmarks`: unstructured SRF, structured SRF, Fourier
+  SRF, and conditioned SRF
+
+This keeps the ASV suite focused on representative workflows rather than
+separate duplicate backend checks.
+
+#### VariogramWorkflowBenchmarks
+
+This class measures variogram estimation cases:
+
+```text
+full_900
+sampled_5000_to_1500
+sampled_15000_to_4500
+```
+
+The labels mean:
+
+- `full_900`: create 900 scattered points and use all 900 points for the
+  variogram calculation.
+- `sampled_5000_to_1500`: create 5,000 scattered points, then randomly select
+  1,500 of those points for the variogram calculation.
+- `sampled_15000_to_4500`: create 15,000 scattered points, then randomly select
+  4,500 of those points for the variogram calculation.
+
+The sampled cases still represent larger input datasets, but the variogram
+calculation is done on the randomly selected subset so the pairwise work stays
+practical.
+
+#### KrigingWorkflowBenchmarks
+
+This class measures global kriging at three scales:
+
+```text
+small_30x500
+large_120x2000
+extra_large_360x6000
+```
+
+The labels mean:
+
+- `small_30x500`: 30 conditioning points, 500 target points
+- `large_120x2000`: 120 conditioning points, 2,000 target points
+- `extra_large_360x6000`: 360 conditioning points, 6,000 target points
+
+#### RandomFieldWorkflowBenchmarks
+
+This class measures SRF and CondSRF generation workflows:
+
+```text
+srf_unstructured_randmeth
+srf_structured_randmeth
+srf_structured_fourier
+condsrf_unstructured
+```
+
+The cases are:
+
+- `srf_unstructured_randmeth`: SRF using RandMeth on 2,000 unstructured points
+- `srf_structured_randmeth`: SRF using RandMeth on a 64 by 64 structured grid
+- `srf_structured_fourier`: SRF using the Fourier generator on a 64 by 64
+  structured grid
+- `condsrf_unstructured`: conditioned SRF with 40 conditioning points and 1,000
+  target points
+
+## Running The Benchmarks
+
+### Baseline Benchmark
+
+The baseline benchmark is the first result set to create before doing any
+optimization work. It uses the default ASV configuration, so each workflow is
+measured with `threads_1` for both `cython_fallback` and `rust_core`.
+
+#### Current Commit Baseline
+
+- Save a baseline for the current commit:
+
+```bash
+asv run HEAD^! --bench benchmark_backends
+```
+
+#### Several Commits Baseline
+
+As mentioned previously, ASV can also compare several commits, here we will run the last five commits:
+
+- Run the last five commits on main branch:
+
+```bash
+asv run HEAD~5..HEAD --bench benchmark_backends
+```
+
+#### Summary of Results
+
+After running ASV, inspect the explicit Rust-vs-Cython speedup ratios:
+
+```bash
+python benchmarks/tools/asv_speedup_summary.py
+```
+
+The helper reads `.asv/results/` and reports ratios per case and thread label:
+
+```text
+speedup = cython_fallback_time / rust_core_time
+```
+
+Interpret the ratio as:
+
+- `speedup > 1.0` means Rust is faster
+- `speedup = 1.0` means similar performance
+- `speedup < 1.0` means Rust is slower
+
+The speedup helper prints the backend ratio explicitly in the terminal. By
+default, the helper skips removed legacy duplicate rows from older saved
+results.
+
+#### Visualization of Results
+
+You can inspect the results in the ASV browser report by building and opening
+the local website:
+
+```bash
+asv publish
+asv preview
+```
+
+Then open the printed local URL,  for example:
+
+```text
+http://127.0.0.1:8082/#/
+```
+(or any other `http://127.0.0.1:<port>/#/` URL shown by the running preview).
+
+The browser report shows ASV plots and trends. ASV plot views do not draw a line/graph when there is only one x-axis point, therefore running `asv run HEAD^! --bench benchmark_backends` will most likely not load any graphs.
+
+For the default benchmark run, the `threads` column should show `threads_1`.
+If you later run the
+[optional OpenMP scaling experiment](#optional-parallelisation-with-openmp),
+the same column can be used to compare several threads.
+
+
+### Profiling With cProfile
+
+`cProfile` does not update the ASV results shown in the browser report.
+Instead, it prints a table in the terminal showing which Python
+functions consumed time while one workflow ran.
+
+The helper script is:
+
+```text
+benchmarks/tools/profile_benchmark_workflows.py
+```
+
+It imports the ASV benchmark classes from `benchmark_backends.py`, selects one
+case, forces one backend, and runs that case under `cProfile`.
+
+Since ASV has already created an isolated Python environment, select that
+environment to execute the profiling helper:
+
+```bash
+ASV_ENV="$(ls -td .asv/env/* | head -n 1)"
+ASV_PYTHON="$ASV_ENV/bin/python"
+```
+
+The helper still profiles the current checkout because
+`profile_benchmark_workflows.py` adds the repository `src/` directory to
+`sys.path`. The ASV environment provides the installed dependencies, including
+`gstools-cython` and `gstools_core`.
+
+List available cases:
+
+```bash
+"$ASV_PYTHON" benchmarks/tools/profile_benchmark_workflows.py --list
+```
+
+Possible profile selected cases:
+
+```bash
+"$ASV_PYTHON" benchmarks/tools/profile_benchmark_workflows.py --case variogram-sampled --backend rust_core --threads threads_1 --limit 10
+"$ASV_PYTHON" benchmarks/tools/profile_benchmark_workflows.py --case variogram-extra-large --backend rust_core --threads threads_1 --limit 10
+"$ASV_PYTHON" benchmarks/tools/profile_benchmark_workflows.py --case krige-large --backend rust_core --threads threads_1 --limit 10
+"$ASV_PYTHON" benchmarks/tools/profile_benchmark_workflows.py --case krige-extra-large --backend rust_core --threads threads_1 --limit 10
+"$ASV_PYTHON" benchmarks/tools/profile_benchmark_workflows.py --case condsrf --backend rust_core --threads threads_1 --limit 10
+```
+
+## Optional Parallelisation with OpenMP
+
+This section collects optional workflows for testing Cython and Rust with
+several thread counts. OpenMP setup is platform-dependent, so each operating
+system should have its own tested instructions.
+
+The default setup above remains the recommended baseline: one thread, normal
+ASV environment, and no extra OpenMP build steps. Use this section only when
+you explicitly want to measure backend scaling with multiple thread counts.
+
+### Shared OpenMP Rule
+
+The benchmark code can be run with several thread labels by setting for example 
+`GSTOOLS_BENCHMARK_THREADS=1,2,4,8,16`. That only passes different
+`gstools.config.NUM_THREADS` values to GSTools. It does not, by itself, make
+the Cython backend parallel.
+
+For Cython OpenMP scaling, the Cython extension must be compiled with OpenMP
+support inside the same ASV environment that runs the benchmark. Always verify
+that environment before interpreting Cython scaling results:
+
+```bash
+ASV_ENV="$(ls -td .asv-openmp/env/* | head -n 1)"
+"$ASV_ENV/bin/python" benchmarks/tools/check_cython_openmp.py --fail-if-no-openmp
+```
+
+If the check fails, the benchmark may still run, but the Cython backend should
+not be interpreted as an OpenMP-enabled Cython run.
+
+### macOS Example
+
+This is the currently tested OpenMP workflow. It is separate from the
+default setup above.
+
+The default ASV configuration, `asv.conf.json`, stays conservative: it is the
+one-thread baseline and uses the normal conda-forge `gstools-cython` package.
+The default `.asv/env/` environment does not provide Cython OpenMP support. That is why this section uses a second ASV configuration:
+
+```text
+asv.macos-openmp.conf.json
+```
+
+This OpenMP config creates separate generated directories:
+
+```text
+.asv-openmp/env/
+.asv-openmp/results/
+.asv-openmp/html/
+```
+
+That keeps the OpenMP experiment separate from the default `.asv/` baseline.
+
+#### What The macOS OpenMP Config Does
+
+`asv.macos-openmp.conf.json` asks conda to install the build/runtime pieces
+needed for the macOS OpenMP experiment:
+
+```text
+llvm-openmp
+cython
+extension-helpers
+setuptools
+wheel
+```
+
+During ASV installation, it runs:
+
+```bash
+benchmarks/tools/install_macos_openmp_cython.py
+```
+
+That helper compiles `gstools-cython` from source inside ASV's own environment,
+not inside your active conda environment. This matters because ASV benchmarks
+the packages installed under `.asv-openmp/env/`.
+
+Internally, the helper sets:
+
+```text
+GSTOOLS_BUILD_PARALLEL=1
+CC=<ASV OpenMP env>/bin/gstools-asv-clang-openmp
+CXX=<ASV OpenMP env>/bin/gstools-asv-clang-openmp++
+```
+
+The wrapper translates the plain `-fopenmp` flag used by the Cython build into
+Apple-clang-compatible compiler and linker arguments that use conda's
+`llvm-openmp`.
+
+#### Run On macOS
+
+In the previous section, the default config gives a quick overview for both
+backends with `threads_1`. In this section, the OpenMP config runs several
+thread labels: `threads_1`, `threads_2`, `threads_4`, `threads_8`, and
+`threads_16`.
+
+Start from the GSTools repository root:
+
+```bash
+cd /path/to/GSTools
+```
+
+Create a clean driver environment. This environment only runs ASV; ASV will
+create the real benchmark environment under `.asv-openmp/env/`.
+
+```bash
+conda create -n gstools-benchmark -c conda-forge python=3.12 asv
+conda activate gstools-benchmark
+```
+
+Create the ASV machine profile once:
+
+```bash
+asv --config asv.macos-openmp.conf.json machine --yes
+```
+
+Run a quick current-commit OpenMP check. This builds the OpenMP-enabled
+`gstools-cython` package inside `.asv-openmp/env/` and runs the benchmark suite:
+
+```bash
+GSTOOLS_BENCHMARK_THREADS=1,2,4,8,16 \
+asv --config asv.macos-openmp.conf.json run HEAD^! --quick --bench benchmark_backends --show-stderr
+```
+
+Verify that the ASV OpenMP environment really uses Cython OpenMP:
+
+```bash
+ASV_OPENMP_ENV="$(ls -td .asv-openmp/env/* | head -n 1)"
+"$ASV_OPENMP_ENV/bin/python" benchmarks/tools/check_cython_openmp.py --verbose
+"$ASV_OPENMP_ENV/bin/python" benchmarks/tools/check_cython_openmp.py --fail-if-no-openmp
+```
+
+Expected result on the tested Mac M2 setup:
+
+```text
+variogram default None -> 10
+field default None -> 10
+krige default None -> 10
+OpenMP check: PASS
+```
+
+If that check passes, run the last-five-commits OpenMP benchmark:
+
+```bash
+GSTOOLS_BENCHMARK_THREADS=1,2,4,8,16 \
+asv --config asv.macos-openmp.conf.json run HEAD~5..HEAD --bench benchmark_backends --show-stderr
+```
+
+Print Rust-vs-Cython ratios from the OpenMP result folder:
+
+```bash
+python benchmarks/tools/asv_speedup_summary.py --results-dir .asv-openmp/results
+```
+
+Build and preview the OpenMP browser report:
+
+```bash
+asv --config asv.macos-openmp.conf.json publish
+asv --config asv.macos-openmp.conf.json preview
+```
+
+#### Interpreting The macOS OpenMP Run
+
+- Use default `asv.conf.json` for the reproducible one-thread baseline.
+- Use `asv.macos-openmp.conf.json` for the macOS OpenMP experiment.
+- Only claim Cython OpenMP scaling if `check_cython_openmp.py` passes inside
+  `.asv-openmp/env/...`.
+- The active `gstools-benchmark` conda environment does not need `gstools`
+  installed. It only needs ASV. The benchmarked GSTools packages live inside
+  `.asv-openmp/env/...`.
+
+This workflow is intended for macOS systems that use Apple clang with conda's
+`llvm-openmp`. It should be portable across many macOS machines, including
+Apple Silicon and Intel Macs, but it is not guaranteed for every macOS setup.
+
+It is not guaranteed to run without local changes on:
+
+- older macOS versions
+- systems missing Xcode command-line tools
+- systems with a nonstandard compiler setup
+- HPC or managed macOS environments
+- unusual conda installations
+
+Do not assume this exact OpenMP setup applies to Linux, Windows, or HPC systems.
+
+### Windows Example
+
+### Linux Example
+
+### HPC Example
+
+### Profiling With cProfile for Multiple Threads
+
+To profile how a workflow changes across configured thread counts, run the
+same cProfile case several times with the OpenMP ASV environment:
+
+```bash
+ASV_OPENMP_ENV="$(ls -td .asv-openmp/env/* | head -n 1)"
+ASV_OPENMP_PYTHON="$ASV_OPENMP_ENV/bin/python"
+
+for threads in threads_1 threads_2 threads_4 threads_8 threads_16; do
+  "$ASV_OPENMP_PYTHON" benchmarks/tools/profile_benchmark_workflows.py --case krige-extra-large --backend rust_core --threads "$threads" --limit 10
+done
+```
+
+Useful options:
+
+- `--case`: choose one workflow, or use `all`
+- `--backend`: choose `cython_fallback` or `rust_core`
+- `--threads`: choose `threads_1`, `threads_2`, `threads_4`, `threads_8`,
+  or `threads_16`
+- `--limit`: number of function rows to print from the cProfile table
+- `--sort cumtime`: sort by cumulative time, usually the best first view
+- `--sort tottime`: sort by time spent directly in each function
+- `--repeat`: repeat a workflow inside the profiler
+
+For example, `--limit 10` means "print the top 10 function rows after sorting".
+
+## More ASV Commands
+
+Save results for only the current commit:
+
+```bash
+asv run HEAD^! --bench benchmark_backends
+```
+
+Compare current commit with previous commit:
+
+```bash
+asv run HEAD~1^! --bench benchmark_backends
+asv run HEAD^! --bench benchmark_backends
+asv compare HEAD~1 HEAD
+```
+
+Compare local `main` with the current branch tip:
+
+```bash
+asv run main^! --bench benchmark_backends
+asv run HEAD^! --bench benchmark_backends
+asv compare main HEAD
+```
+
+Compare remote `main` with the current branch tip:
+
+```bash
+git fetch origin main
+asv run origin/main^! --bench benchmark_backends
+asv run HEAD^! --bench benchmark_backends
+asv compare origin/main HEAD
+```
+
+On a linear branch, `HEAD~5..HEAD` benchmarks:
+
+```text
+HEAD~4
+HEAD~3
+HEAD~2
+HEAD~1
+HEAD
+```
+
+Run a selected list of commits:
+
+```bash
+git rev-parse HEAD HEAD~3 main e20c88f7 > /tmp/gstools-asv-commits.txt
+asv run HASHFILE:/tmp/gstools-asv-commits.txt --bench benchmark_backends
+```
+
+Use full commit hashes when sharing results. Short hashes and branch names are
+fine locally but can become ambiguous later.
+
+If running ASV from outside the repo root, pass the config explicitly:
+
+```bash
+asv --config /path/to/MPS-Tools/GSTools/asv.conf.json run --quick --bench benchmark_backends
+```
+
+## External Reference
+
+For complete ASV command syntax, see:
+
+```text
+https://asv.readthedocs.io/en/stable/commands.html
+```
diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py
new file mode 100644
index 00000000..c94e5d28
--- /dev/null
+++ b/benchmarks/__init__.py
@@ -0,0 +1 @@
+"""ASV benchmarks for GSTools."""
diff --git a/benchmarks/benchmark_backends.py b/benchmarks/benchmark_backends.py
new file mode 100644
index 00000000..c9526eb7
--- /dev/null
+++ b/benchmarks/benchmark_backends.py
@@ -0,0 +1,307 @@
+"""Workflow benchmarks for GSTools backends.
+
+Usage:
+    cd /path/to/MPS-Tools/GSTools
+    # See benchmarks/README.md for ASV and optional cProfile setup.
+    asv machine --yes
+    asv run --quick --show-stderr --bench benchmark_backends
+    asv run HEAD^! --bench benchmark_backends
+    asv run
+    asv publish
+    asv preview
+    asv compare HEAD~1 HEAD
+
+Backend speedup should be interpreted as:
+    speedup = cython_fallback_time / rust_core_time
+
+Values greater than 1.0 mean the Rust backend is faster on the same machine
+for the same benchmark, commit, and thread label.
+
+By default the suite uses one GSTools thread. For local OpenMP scaling
+experiments, set GSTOOLS_BENCHMARK_THREADS, for example:
+    GSTOOLS_BENCHMARK_THREADS=1,2,4,8,16 asv run HEAD^!
+"""
+
+from __future__ import annotations
+
+import contextlib
+import os
+
+import numpy as np
+
+import gstools as gs
+
+
+BACKENDS = ("cython_fallback", "rust_core")
+
+
+def _configured_thread_counts():
+    raw = os.environ.get("GSTOOLS_BENCHMARK_THREADS", "1")
+    thread_counts = []
+    for item in raw.split(","):
+        item = item.strip()
+        if not item:
+            continue
+        if item.startswith("threads_"):
+            label = item
+            value = item.removeprefix("threads_")
+        else:
+            label = f"threads_{item}"
+            value = item
+        int(value)
+        thread_counts.append(label)
+    if not thread_counts:
+        raise ValueError("GSTOOLS_BENCHMARK_THREADS did not define threads")
+    return tuple(thread_counts)
+
+
+THREAD_COUNTS = _configured_thread_counts()
+VARIOGRAM_CASES = (
+    "full_900",
+    "sampled_5000_to_1500",
+    "sampled_15000_to_4500",
+)
+KRIGE_CASES = ("small_30x500", "large_120x2000", "extra_large_360x6000")
+FIELD_CASES = (
+    "srf_unstructured_randmeth",
+    "srf_structured_randmeth",
+    "srf_structured_fourier",
+    "condsrf_unstructured",
+)
+
+
+@contextlib.contextmanager
+def gstools_backend(use_core, num_threads):
+    """Temporarily force backend and GSTools thread count."""
+    previous = (
+        gs.config._GSTOOLS_CORE_AVAIL,
+        gs.config.USE_GSTOOLS_CORE,
+        gs.config.NUM_THREADS,
+    )
+    try:
+        if use_core:
+            if not previous[0]:
+                raise NotImplementedError("gstools_core is not available")
+            gs.config._GSTOOLS_CORE_AVAIL = True
+            gs.config.USE_GSTOOLS_CORE = True
+        else:
+            gs.config._GSTOOLS_CORE_AVAIL = False
+            gs.config.USE_GSTOOLS_CORE = False
+        gs.config.NUM_THREADS = num_threads
+        yield
+    finally:
+        (
+            gs.config._GSTOOLS_CORE_AVAIL,
+            gs.config.USE_GSTOOLS_CORE,
+            gs.config.NUM_THREADS,
+        ) = previous
+
+
+def _use_core(backend):
+    if backend == "rust_core":
+        return True
+    if backend == "cython_fallback":
+        return False
+    raise ValueError(f"Unknown backend: {backend}")
+
+
+def _num_threads(thread_count):
+    if thread_count.startswith("threads_"):
+        return int(thread_count.removeprefix("threads_"))
+    raise ValueError(f"Unknown thread count: {thread_count}")
+
+
+def _random_points(seed, count, scale):
+    rng = np.random.RandomState(seed)
+    return rng.rand(count) * scale, rng.rand(count) * scale
+
+
+def _smooth_field(x, y):
+    return np.sin(x / 10.0) + np.cos(y / 15.0)
+
+
+def _make_variogram_data(seed, count, scale=100.0):
+    x, y = _random_points(seed, count, scale)
+    field = _smooth_field(x, y)
+    bins = np.linspace(0.0, scale * 0.6, 16)
+    return (x, y), field, bins
+
+
+def _make_krige_data(seed, cond_count, target_count, scale=50.0):
+    rng = np.random.RandomState(seed)
+    cond_x = rng.rand(cond_count) * scale
+    cond_y = rng.rand(cond_count) * scale
+    cond_val = _smooth_field(cond_x, cond_y)
+    target_pos = (
+        rng.rand(target_count) * scale,
+        rng.rand(target_count) * scale,
+    )
+    return (cond_x, cond_y), cond_val, target_pos
+
+
+class VariogramWorkflowBenchmarks:
+    """Variogram workflow benchmarks by case and backend."""
+
+    params = [VARIOGRAM_CASES, BACKENDS, THREAD_COUNTS]
+    param_names = ["case", "backend", "threads"]
+
+    def setup_cache(self):
+        return {
+            "full_900": _make_variogram_data(20220501, 900),
+            "sampled_5000_to_1500": _make_variogram_data(20220502, 5000),
+            "sampled_15000_to_4500": _make_variogram_data(20220503, 15000),
+        }
+
+    def setup(self, data, case, backend, threads):
+        if backend == "rust_core" and not gs.config._GSTOOLS_CORE_AVAIL:
+            raise NotImplementedError("gstools_core is not available")
+        _num_threads(threads)
+
+    def time_variogram_estimate(self, data, case, backend, threads):
+        with gstools_backend(_use_core(backend), _num_threads(threads)):
+            self._run_variogram(data, case)
+
+    def peakmem_variogram_estimate(self, data, case, backend, threads):
+        with gstools_backend(_use_core(backend), _num_threads(threads)):
+            self._run_variogram(data, case)
+
+    def _run_variogram(self, data, case):
+        pos, field, bins = data[case]
+        kwargs = {}
+        if case == "sampled_5000_to_1500":
+            kwargs = {"sampling_size": 1500, "sampling_seed": 20220504}
+        if case == "sampled_15000_to_4500":
+            kwargs = {"sampling_size": 4500, "sampling_seed": 20220505}
+        return gs.vario_estimate(
+            pos,
+            field,
+            bins,
+            mesh_type="unstructured",
+            return_counts=True,
+            **kwargs,
+        )
+
+
+class KrigingWorkflowBenchmarks:
+    """Global kriging workflow benchmarks by case and backend."""
+
+    params = [KRIGE_CASES, BACKENDS, THREAD_COUNTS]
+    param_names = ["case", "backend", "threads"]
+
+    def setup_cache(self):
+        return {
+            "small_30x500": _make_krige_data(20220506, 30, 500),
+            "large_120x2000": _make_krige_data(20220507, 120, 2000),
+            "extra_large_360x6000": _make_krige_data(20220508, 360, 6000),
+        }
+
+    def setup(self, data, case, backend, threads):
+        if backend == "rust_core" and not gs.config._GSTOOLS_CORE_AVAIL:
+            raise NotImplementedError("gstools_core is not available")
+        _num_threads(threads)
+
+    def time_global_krige(self, data, case, backend, threads):
+        with gstools_backend(_use_core(backend), _num_threads(threads)):
+            self._run_krige(data, case)
+
+    def peakmem_global_krige(self, data, case, backend, threads):
+        with gstools_backend(_use_core(backend), _num_threads(threads)):
+            self._run_krige(data, case)
+
+    def _run_krige(self, data, case):
+        cond_pos, cond_val, target_pos = data[case]
+        model = gs.Exponential(dim=2, var=1.5, len_scale=12.0, nugget=0.05)
+        krige = gs.Krige(
+            model,
+            cond_pos,
+            cond_val,
+            exact=False,
+            cond_err=0.05,
+        )
+        return krige(
+            target_pos,
+            mesh_type="unstructured",
+            return_var=True,
+            store=False,
+        )
+
+
+class RandomFieldWorkflowBenchmarks:
+    """SRF and CondSRF workflow benchmarks by case and backend."""
+
+    params = [FIELD_CASES, BACKENDS, THREAD_COUNTS]
+    param_names = ["case", "backend", "threads"]
+
+    def setup_cache(self):
+        return {
+            "unstructured_pos": _random_points(20220509, 2000, 100.0),
+            "structured_pos": (
+                np.linspace(0.0, 100.0, 64),
+                np.linspace(0.0, 100.0, 64),
+            ),
+            "condsrf": _make_krige_data(20220510, 40, 1000),
+        }
+
+    def setup(self, data, case, backend, threads):
+        if backend == "rust_core" and not gs.config._GSTOOLS_CORE_AVAIL:
+            raise NotImplementedError("gstools_core is not available")
+        _num_threads(threads)
+
+    def time_field_generation(self, data, case, backend, threads):
+        with gstools_backend(_use_core(backend), _num_threads(threads)):
+            self._run_field(data, case)
+
+    def peakmem_field_generation(self, data, case, backend, threads):
+        with gstools_backend(_use_core(backend), _num_threads(threads)):
+            self._run_field(data, case)
+
+    def _run_field(self, data, case):
+        if case == "srf_unstructured_randmeth":
+            return self._run_srf_unstructured(data)
+        if case == "srf_structured_randmeth":
+            return self._run_srf_structured(data)
+        if case == "srf_structured_fourier":
+            return self._run_srf_fourier(data)
+        if case == "condsrf_unstructured":
+            return self._run_condsrf(data)
+        raise ValueError(f"Unknown field benchmark case: {case}")
+
+    def _run_srf_unstructured(self, data):
+        model = gs.Exponential(dim=2, var=2.0, len_scale=8.0)
+        srf = gs.SRF(model, mean=1.0, seed=20220508, mode_no=512)
+        return srf(data["unstructured_pos"], mesh_type="unstructured")
+
+    def _run_srf_structured(self, data):
+        model = gs.Exponential(dim=2, var=2.0, len_scale=8.0)
+        srf = gs.SRF(model, mean=1.0, seed=20220509, mode_no=512)
+        return srf(data["structured_pos"], mesh_type="structured")
+
+    def _run_srf_fourier(self, data):
+        model = gs.Gaussian(dim=2, var=2.0, len_scale=30.0)
+        srf = gs.SRF(
+            model,
+            generator="Fourier",
+            period=[100.0, 100.0],
+            mode_no=[32, 32],
+            seed=20220510,
+        )
+        return srf(data["structured_pos"], mesh_type="structured")
+
+    def _run_condsrf(self, data):
+        cond_pos, cond_val, target_pos = data["condsrf"]
+        model = gs.Exponential(dim=2, var=1.5, len_scale=12.0, nugget=0.05)
+        krige = gs.Krige(
+            model,
+            cond_pos,
+            cond_val,
+            exact=False,
+            cond_err=0.05,
+        )
+        cond_srf = gs.CondSRF(krige, seed=20220511, mode_no=512)
+        return cond_srf(
+            target_pos,
+            mesh_type="unstructured",
+            seed=20220512,
+            store=False,
+            krige_store=False,
+        )
diff --git a/benchmarks/tools/asv_speedup_summary.py b/benchmarks/tools/asv_speedup_summary.py
new file mode 100644
index 00000000..b3239d70
--- /dev/null
+++ b/benchmarks/tools/asv_speedup_summary.py
@@ -0,0 +1,251 @@
+#!/usr/bin/env python
+"""Print Rust-vs-Cython speedups from local ASV result files.
+
+The summary is optional. ASV itself remains the source of truth for benchmark
+storage and visualization.
+
+Usage:
+    python benchmarks/tools/asv_speedup_summary.py
+    python benchmarks/tools/asv_speedup_summary.py --results-dir .asv/results
+    python benchmarks/tools/asv_speedup_summary.py --include-legacy
+
+Speedup is calculated as:
+    cython_fallback_time / rust_core_time
+
+Values greater than 1.0 mean Rust was faster on the same machine, commit,
+environment, benchmark, case, and thread-count combination.
+"""
+
+from __future__ import annotations
+
+import argparse
+import itertools
+import json
+import math
+from pathlib import Path
+
+
+BACKENDS = ("cython_fallback", "rust_core")
+THREAD_PREFIX = "threads_"
+LEGACY_BENCHMARKS = {
+    "time_srf",
+    "peakmem_srf",
+    "time_variogram",
+    "peakmem_variogram",
+    "time_krige",
+    "peakmem_krige",
+}
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "--results-dir",
+        default=".asv/results",
+        type=Path,
+        help="Path to the ASV results directory.",
+    )
+    parser.add_argument(
+        "--all",
+        action="store_true",
+        help="Include non-time benchmarks as ratios too.",
+    )
+    parser.add_argument(
+        "--include-legacy",
+        action="store_true",
+        help="Include removed BackendBenchmarks rows from older saved results.",
+    )
+    return parser.parse_args()
+
+
+def iter_result_files(results_dir):
+    for path in sorted(results_dir.glob("**/*.json")):
+        if path.name in {"benchmarks.json", "machine.json"}:
+            continue
+        yield path
+
+
+def load_json(path):
+    try:
+        with path.open(encoding="utf8") as handle:
+            return json.load(handle)
+    except json.JSONDecodeError:
+        return None
+
+
+def result_entry(raw_result, result_columns):
+    if isinstance(raw_result, dict):
+        return raw_result
+    if isinstance(raw_result, list) and result_columns:
+        return dict(zip(result_columns, raw_result))
+    return {"result": raw_result, "params": []}
+
+
+def is_number(value):
+    return isinstance(value, (int, float)) and not math.isnan(value)
+
+
+def flatten_values(values):
+    if isinstance(values, list):
+        for value in values:
+            yield from flatten_values(value)
+        return
+    yield values
+
+
+def backend_values(entry):
+    result = entry.get("result")
+    params = entry.get("params") or []
+    if not isinstance(result, list) or not params:
+        return {}
+
+    values = {}
+    combinations = itertools.product(*params)
+    for combo, value in zip(combinations, flatten_values(result)):
+        if not is_number(value):
+            continue
+        combo_values = [str(item).strip("'\"") for item in combo]
+        for backend in BACKENDS:
+            if backend in combo_values:
+                values[backend] = float(value)
+    return values
+
+
+def backend_rows(entry):
+    result = entry.get("result")
+    params = entry.get("params") or []
+    if not isinstance(result, list) or not params:
+        return []
+
+    rows = []
+    combinations = itertools.product(*params)
+    for combo, value in zip(combinations, flatten_values(result)):
+        if not is_number(value):
+            continue
+        combo_values = [str(item).strip("'\"") for item in combo]
+        backend = next(
+            (candidate for candidate in BACKENDS if candidate in combo_values),
+            None,
+        )
+        if backend is None:
+            continue
+        case_values = [
+            item
+            for item in combo_values
+            if item not in BACKENDS and not item.startswith(THREAD_PREFIX)
+        ]
+        threads = next(
+            (
+                item
+                for item in combo_values
+                if item.startswith(THREAD_PREFIX)
+            ),
+            "-",
+        )
+        rows.append(
+            {
+                "backend": backend,
+                "case": "/".join(case_values) if case_values else "-",
+                "threads": threads,
+                "value": float(value),
+            }
+        )
+    return rows
+
+
+def short_benchmark_name(name):
+    return name.rsplit(".", maxsplit=1)[-1]
+
+
+def collect_speedups(results_dir, include_all, include_legacy):
+    rows = []
+    for path in iter_result_files(results_dir):
+        data = load_json(path)
+        if not data:
+            continue
+        result_columns = data.get("result_columns", [])
+        commit = data.get("commit_hash", "unknown")[:8]
+        env_name = data.get("env_name", path.stem)
+        results = data.get("results", {})
+        for benchmark, raw_result in results.items():
+            benchmark_name = short_benchmark_name(benchmark)
+            if not include_legacy and benchmark_name in LEGACY_BENCHMARKS:
+                continue
+            if not include_all and ".time_" not in benchmark:
+                continue
+            by_case = {}
+            for row in backend_rows(result_entry(raw_result, result_columns)):
+                key = (row["case"], row["threads"])
+                by_case.setdefault(key, {})[row["backend"]] = row["value"]
+            for (case, threads), values in by_case.items():
+                cython = values.get("cython_fallback")
+                rust = values.get("rust_core")
+                if not is_number(cython) or not is_number(rust) or rust == 0:
+                    continue
+                rows.append(
+                    {
+                        "commit": commit,
+                        "env": env_name,
+                        "benchmark": benchmark_name,
+                        "case": case,
+                        "threads": threads,
+                        "cython": cython,
+                        "rust": rust,
+                        "speedup": cython / rust,
+                    }
+                )
+    return rows
+
+
+def print_table(rows):
+    if not rows:
+        print("No matching Rust-vs-Cython ASV results found.")
+        return
+
+    headers = [
+        "commit",
+        "env",
+        "benchmark",
+        "case",
+        "threads",
+        "cython",
+        "rust",
+        "speedup",
+    ]
+    table = [
+        [
+            row["commit"],
+            row["env"],
+            row["benchmark"],
+            row["case"],
+            row["threads"],
+            f"{row['cython']:.6g}",
+            f"{row['rust']:.6g}",
+            f"{row['speedup']:.3f}x",
+        ]
+        for row in rows
+    ]
+    widths = [
+        max(len(str(item)) for item in column)
+        for column in zip(headers, *table)
+    ]
+
+    def fmt(row):
+        return "  ".join(
+            str(item).ljust(width) for item, width in zip(row, widths)
+        )
+
+    print(fmt(headers))
+    print(fmt(["-" * width for width in widths]))
+    for row in table:
+        print(fmt(row))
+
+
+def main():
+    args = parse_args()
+    rows = collect_speedups(args.results_dir, args.all, args.include_legacy)
+    print_table(rows)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/benchmarks/tools/check_cython_openmp.py b/benchmarks/tools/check_cython_openmp.py
new file mode 100644
index 00000000..02fe73db
--- /dev/null
+++ b/benchmarks/tools/check_cython_openmp.py
@@ -0,0 +1,103 @@
+#!/usr/bin/env python
+"""Check whether GSTools-Cython detects OpenMP parallel support.
+
+This script verifies the active Python environment. Use it with the editable
+development environment or with an ASV-created environment.
+
+Examples:
+    python benchmarks/tools/check_cython_openmp.py
+    python benchmarks/tools/check_cython_openmp.py --fail-if-no-openmp
+    python benchmarks/tools/check_cython_openmp.py --verbose
+    .asv/env/<hash>/bin/python3 benchmarks/tools/check_cython_openmp.py
+"""
+
+from __future__ import annotations
+
+import argparse
+import importlib
+import sys
+
+
+MODULES = {
+    "variogram": "gstools_cython.variogram",
+    "field": "gstools_cython.field",
+    "krige": "gstools_cython.krige",
+}
+EXPLICIT_THREAD_COUNTS = (1, 2, 4, 8, 16)
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "--fail-if-no-openmp",
+        action="store_true",
+        help="Exit with status 1 if OpenMP thread detection reports <= 1.",
+    )
+    parser.add_argument(
+        "--verbose",
+        action="store_true",
+        help="Print per-module default and explicit thread-count values.",
+    )
+    return parser.parse_args()
+
+
+def package_version(package_name):
+    try:
+        package = importlib.import_module(package_name)
+    except ModuleNotFoundError:
+        return "not installed"
+    return getattr(package, "__version__", "unknown")
+
+
+def check_module(label, module_name):
+    module = importlib.import_module(module_name)
+    default_threads = module.set_num_threads(None)
+    explicit = {
+        count: module.set_num_threads(count)
+        for count in EXPLICIT_THREAD_COUNTS
+    }
+    return label, default_threads, explicit
+
+
+def main():
+    args = parse_args()
+
+    print(f"python: {sys.executable}")
+    print(f"gstools: {package_version('gstools')}")
+    print(f"gstools_cython: {package_version('gstools_cython')}")
+    print(f"gstools_core: {package_version('gstools_core')}")
+    if args.verbose:
+        print(
+            "OpenMP evidence: default None should be >1. "
+            "Explicit values only prove the wrapper accepts the requested count."
+        )
+
+    default_values = []
+    for label, module_name in MODULES.items():
+        try:
+            label, default_threads, explicit = check_module(label, module_name)
+        except ModuleNotFoundError as err:
+            print(f"OpenMP check: FAIL. Missing module: {err.name}")
+            return 1
+        default_values.append(default_threads)
+        if args.verbose:
+            explicit_text = ", ".join(
+                f"{request}->{actual}" for request, actual in explicit.items()
+            )
+            print(f"{label} default None -> {default_threads}")
+            print(f"{label} explicit -> {explicit_text}")
+
+    if min(default_values) > 1:
+        print("OpenMP check: PASS")
+        return 0
+
+    print(
+        "OpenMP check: FAIL. GSTools-Cython reports one default thread. "
+        "Explicit thread values may be accepted by the wrapper, but this does "
+        "not prove that the compiled extension is using OpenMP."
+    )
+    return 1 if args.fail_if_no_openmp else 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/benchmarks/tools/install_macos_openmp_cython.py b/benchmarks/tools/install_macos_openmp_cython.py
new file mode 100644
index 00000000..09f0840c
--- /dev/null
+++ b/benchmarks/tools/install_macos_openmp_cython.py
@@ -0,0 +1,139 @@
+#!/usr/bin/env python
+"""Install GSTools-Cython with OpenMP inside a macOS ASV environment.
+
+This helper is intentionally macOS-specific. It is called from
+``asv.macos-openmp.conf.json`` after ASV has created a conda environment that
+contains ``llvm-openmp``.
+"""
+
+from __future__ import annotations
+
+import os
+import platform
+import stat
+import subprocess
+import sys
+from pathlib import Path
+
+
+def run(command, env=None, check=True):
+    print("+ " + " ".join(str(part) for part in command), flush=True)
+    return subprocess.run(command, check=check, env=env)
+
+
+def write_wrapper(path, force_cxx=False):
+    text = """#!/bin/bash
+set -e
+prefix="${GSTOOLS_OPENMP_PREFIX:-${CONDA_PREFIX:-}}"
+name="$(basename "$0")"
+if [[ "${GSTOOLS_FORCE_CXX:-0}" == "1" || "$name" == *++* ]]; then
+  real="${GSTOOLS_REAL_CXX:-/usr/bin/clang++}"
+else
+  real="${GSTOOLS_REAL_CC:-/usr/bin/clang}"
+fi
+is_compile=0
+for arg in "$@"; do
+  [[ "$arg" == "-c" ]] && is_compile=1
+done
+args=()
+for arg in "$@"; do
+  if [[ "$arg" == "-fopenmp" ]]; then
+    if [[ "$is_compile" == "1" ]]; then
+      args+=("-Xpreprocessor" "-fopenmp" "-I${prefix}/include")
+    else
+      args+=("-L${prefix}/lib" "-lomp" "-Wl,-rpath,${prefix}/lib")
+    fi
+  else
+    args+=("$arg")
+  fi
+done
+exec "$real" "${args[@]}"
+"""
+    if force_cxx:
+        text = """#!/bin/bash
+GSTOOLS_FORCE_CXX=1 exec "$(dirname "$0")/gstools-asv-clang-openmp" "$@"
+"""
+    path.write_text(text, encoding="utf8")
+    path.chmod(path.stat().st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
+
+
+def main():
+    if len(sys.argv) != 2:
+        print(
+            "Usage: install_macos_openmp_cython.py <asv-env-dir>",
+            file=sys.stderr,
+        )
+        return 2
+
+    if platform.system() != "Darwin":
+        print(
+            "This helper is macOS-specific. Use the default ASV config or "
+            "write an OpenMP setup for this platform.",
+            file=sys.stderr,
+        )
+        return 2
+
+    env_dir = Path(sys.argv[1]).resolve()
+    include_dir = env_dir / "include"
+    lib_dir = env_dir / "lib"
+    omp_header = include_dir / "omp.h"
+    omp_lib = lib_dir / "libomp.dylib"
+
+    if not omp_header.exists() or not omp_lib.exists():
+        print(
+            "llvm-openmp was not found in the ASV environment. Expected "
+            f"{omp_header} and {omp_lib}.",
+            file=sys.stderr,
+        )
+        return 2
+
+    cc_wrapper = env_dir / "bin" / "gstools-asv-clang-openmp"
+    cxx_wrapper = env_dir / "bin" / "gstools-asv-clang-openmp++"
+    write_wrapper(cc_wrapper)
+    write_wrapper(cxx_wrapper, force_cxx=True)
+
+    build_env = os.environ.copy()
+    build_env.update(
+        {
+            "GSTOOLS_BUILD_PARALLEL": "1",
+            "GSTOOLS_OPENMP_PREFIX": str(env_dir),
+            "CC": str(cc_wrapper),
+            "CXX": str(cxx_wrapper),
+            "CFLAGS": f"-I{include_dir}",
+            "LDFLAGS": f"-L{lib_dir}",
+        }
+    )
+
+    run(
+        [
+            sys.executable,
+            "-m",
+            "pip",
+            "uninstall",
+            "-y",
+            "gstools-cython",
+            "gstools_cython",
+        ],
+        env=build_env,
+        check=False,
+    )
+    run(
+        [
+            sys.executable,
+            "-m",
+            "pip",
+            "install",
+            "--no-build-isolation",
+            "--no-cache-dir",
+            "--force-reinstall",
+            "--no-binary=gstools-cython",
+            "--no-deps",
+            "gstools-cython",
+        ],
+        env=build_env,
+    )
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/benchmarks/tools/profile_benchmark_workflows.py b/benchmarks/tools/profile_benchmark_workflows.py
new file mode 100644
index 00000000..2fd2fdff
--- /dev/null
+++ b/benchmarks/tools/profile_benchmark_workflows.py
@@ -0,0 +1,212 @@
+#!/usr/bin/env python
+"""Profile the representative GSTools benchmark workflows with cProfile.
+
+This is a quick measurement helper. ASV remains the source of truth for saved
+benchmark results, while this script helps identify the top cumulative Python
+call sites before making algorithmic changes.
+
+Usage:
+    cd /path/to/MPS-Tools/GSTools
+    ASV_ENV="$(ls -td .asv/env/* | head -n 1)"
+    "$ASV_ENV/bin/python" benchmarks/tools/profile_benchmark_workflows.py --list
+    "$ASV_ENV/bin/python" benchmarks/tools/profile_benchmark_workflows.py \
+        --case variogram-sampled
+    "$ASV_ENV/bin/python" benchmarks/tools/profile_benchmark_workflows.py \
+        --case krige-large \
+        --backend rust_core --threads threads_1 --limit 30
+"""
+
+from __future__ import annotations
+
+import argparse
+import cProfile
+from pathlib import Path
+import pstats
+import sys
+
+
+REPO_ROOT = Path(__file__).resolve().parents[2]
+sys.path.insert(0, str(REPO_ROOT))
+sys.path.insert(0, str(REPO_ROOT / "src"))
+
+
+CASES = {
+    "variogram-full": (
+        "VariogramWorkflowBenchmarks",
+        "time_variogram_estimate",
+        ("full_900",),
+    ),
+    "variogram-sampled": (
+        "VariogramWorkflowBenchmarks",
+        "time_variogram_estimate",
+        ("sampled_5000_to_1500",),
+    ),
+    "variogram-extra-large": (
+        "VariogramWorkflowBenchmarks",
+        "time_variogram_estimate",
+        ("sampled_15000_to_4500",),
+    ),
+    "krige-small": (
+        "KrigingWorkflowBenchmarks",
+        "time_global_krige",
+        ("small_30x500",),
+    ),
+    "krige-large": (
+        "KrigingWorkflowBenchmarks",
+        "time_global_krige",
+        ("large_120x2000",),
+    ),
+    "krige-extra-large": (
+        "KrigingWorkflowBenchmarks",
+        "time_global_krige",
+        ("extra_large_360x6000",),
+    ),
+    "srf-unstructured": (
+        "RandomFieldWorkflowBenchmarks",
+        "time_field_generation",
+        ("srf_unstructured_randmeth",),
+    ),
+    "srf-structured": (
+        "RandomFieldWorkflowBenchmarks",
+        "time_field_generation",
+        ("srf_structured_randmeth",),
+    ),
+    "srf-fourier": (
+        "RandomFieldWorkflowBenchmarks",
+        "time_field_generation",
+        ("srf_structured_fourier",),
+    ),
+    "condsrf": (
+        "RandomFieldWorkflowBenchmarks",
+        "time_field_generation",
+        ("condsrf_unstructured",),
+    ),
+}
+
+THREAD_COUNTS = (
+    "threads_1",
+    "threads_2",
+    "threads_4",
+    "threads_8",
+    "threads_16",
+)
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "--case",
+        default="all",
+        choices=["all", *CASES],
+        help="Workflow to profile. Defaults to all workflows.",
+    )
+    parser.add_argument(
+        "--repeat",
+        default=1,
+        type=int,
+        help="Number of times to run each selected workflow.",
+    )
+    parser.add_argument(
+        "--limit",
+        default=25,
+        type=int,
+        help="Number of cProfile rows to print per workflow.",
+    )
+    parser.add_argument(
+        "--sort",
+        default="cumtime",
+        choices=["cumtime", "tottime", "calls"],
+        help="pstats sort key.",
+    )
+    parser.add_argument(
+        "--backend",
+        default="rust_core",
+        choices=["cython_fallback", "rust_core"],
+        help="Backend label to force while profiling.",
+    )
+    parser.add_argument(
+        "--threads",
+        default="threads_1",
+        choices=THREAD_COUNTS,
+        help="GSTools thread count label.",
+    )
+    parser.add_argument(
+        "--list",
+        action="store_true",
+        help="List available workflow cases and exit.",
+    )
+    return parser.parse_args()
+
+
+def iter_selected(case):
+    if case == "all":
+        yield from CASES.items()
+        return
+    yield case, CASES[case]
+
+
+def load_suite_class(class_name):
+    try:
+        from benchmarks import benchmark_backends
+    except ModuleNotFoundError as err:
+        print(
+            "Could not import GSTools benchmark dependencies. Activate the "
+            "GSTools benchmark environment, run this script with an ASV env "
+            "Python from .asv/env/<env-id>/bin/python, or install the project "
+            f"dependencies first. Original error: {err}",
+            file=sys.stderr,
+        )
+        raise SystemExit(1) from err
+    return getattr(benchmark_backends, class_name)
+
+
+def run_case(
+    name,
+    class_name,
+    method_name,
+    params,
+    repeat,
+    limit,
+    sort,
+    backend,
+    threads,
+):
+    suite_cls = load_suite_class(class_name)
+    suite = suite_cls()
+    data = suite.setup_cache()
+    method = getattr(suite, method_name)
+
+    profiler = cProfile.Profile()
+    profiler.enable()
+    for _ in range(repeat):
+        method(data, *params, backend, threads)
+    profiler.disable()
+
+    print(f"\n== {name} [{backend}, {threads}] ==")
+    stats = pstats.Stats(profiler, stream=sys.stdout)
+    stats.strip_dirs().sort_stats(sort).print_stats(limit)
+
+
+def main():
+    args = parse_args()
+    if args.list:
+        for name in CASES:
+            print(name)
+        return
+
+    for name, (suite_cls, method_name, params) in iter_selected(args.case):
+        run_case(
+            name,
+            suite_cls,
+            method_name,
+            params,
+            args.repeat,
+            args.limit,
+            args.sort,
+            args.backend,
+            args.threads,
+        )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pyproject.toml b/pyproject.toml
index 885bcd77..f39ea2ea 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -66,6 +66,7 @@ doc = [
 ]
 plotting = ["matplotlib>=3.7", "pyvista>=0.40"]
 rust = ["gstools_core>=1.0.0"]
+benchmark = ["asv"]
 test = ["pytest-cov>=3"]
 lint = ["ruff"]