quantifyearth · mdales · Feb 5, 2026 · Feb 5, 2026 · Feb 10, 2026 · Feb 10, 2026
diff --git a/README.md b/README.md
@@ -4,7 +4,89 @@ This repository implements the LIFE extinction risk methodology as published in
 
 ## Running the code
 
-The methodology is explained in more detail in [method.md](method.md), but there is also a script set up to just do an entire run of the pipeline in `./scripts/run.sh`.
+The methodology is explained in more detail in [method.md](method.md). The pipeline is implemented as a [Snakemake](https://snakemake.readthedocs.io/) workflow in `workflow/`.
+
+### Prerequisites
+
+The following tools must be available on `PATH`:
+
+- `reclaimer` — downloads inputs from Zenodo ([quantifyearth/reclaimer](https://github.com/quantifyearth/reclaimer))
+- `aoh-calc`, `aoh-habitat-process`, `aoh-collate-data`, `aoh-species-richness`, `aoh-endemism`, `aoh-validate-prevalence` — from the `aoh` Python package
+- `gdalwarp` — part of GDAL
+
+The following environment variables must be set:
+
+| Variable | Purpose |
+|----------|---------|
+| `DATADIR` | Root directory for all input/output data |
+| `DB_HOST`, `DB_NAME`, `DB_USER`, `DB_PASSWORD` | PostgreSQL credentials for IUCN species database |
+
+For GBIF occurrence validation (optional):
+
+| Variable | Purpose |
+|----------|---------|
+| `GBIF_USERNAME`, `GBIF_EMAIL`, `GBIF_PASSWORD` | GBIF account credentials |
+
+### Running the pipeline
+
+```bash
+# Full pipeline: delta P maps for all scenarios + model validation
+snakemake --cores N all
+
+# Delta P maps only, without validation
+snakemake --cores N life
+
+# Species richness and endemism maps (not included in 'all')
+snakemake --cores N summaries
+
+# Model validation only
+snakemake --cores N validation
+
+# GBIF occurrence validation (expensive — downloads gigabytes of data)
+snakemake --cores N occurrence_validation
+```
+
+Other useful targets for incremental runs:
+
+```bash
+snakemake --cores N prepare       # Download and warp all base layers
+snakemake --cores N species_data  # Extract species data from the database
+snakemake --cores N aohs          # Generate all AOH rasters and collated CSVs
+```
+
+### Precious layers
+
+The habitat and elevation layers are expensive to generate and are treated as **precious**: they will only be rebuilt if their output files are explicitly deleted, even if upstream inputs or code have changed. This applies to:
+
+- `habitat_layers/current/` — food-enhanced current habitat map at ~1.67km resolution
+- `habitat_layers/{scenario}/` — per-scenario fractional habitat maps
+- `habitat_layers/pnv/` — potential natural vegetation fractional maps
+- `elevation-max.tif`, `elevation-min.tif` — warped elevation layers
+
+To force a rebuild of any of these, delete the relevant `.sentinel` file (or the output file itself) before re-running.
+
+### Configuration
+
+Pipeline parameters are in `config/config.yaml`:
+
+- `taxa` — taxonomic classes to process (default: AMPHIBIA, AVES, MAMMALIA, REPTILIA)
+- `scenarios` — habitat change scenarios to evaluate (default: arable, restore)
+- `curve` — extinction curve exponent for delta P (default: `"0.25"`)
+- `pixel_scale` — output raster resolution in degrees (default: ~5 arc-seconds)
+
+### Inspecting the pipeline graph
+
+The rule graph can be generated with:
+
+```bash
+snakemake --forceall --rulegraph 2>/dev/null | dot -Tsvg > graph.svg
+```
+
+Note that rules whose inputs are determined by the species-extraction checkpoint (`generate_aoh`, `calculate_delta_p`) will not appear in the static rule graph — this is a known snakemake limitation with checkpoint-based expansion. To see the complete job graph after species data has been extracted:
+
+```bash
+snakemake --forceall --dag 2>/dev/null | unflatten -f -l 5 | dot -Tsvg > dag.svg
+```
 
 ## Credits
 

diff --git a/config/config.yaml b/config/config.yaml
@@ -0,0 +1,46 @@
+# LIFE Pipeline Configuration
+# ===========================
+
+# Taxonomic classes to process
+taxa:
+  - AMPHIBIA
+  - AVES
+  - MAMMALIA
+  - REPTILIA
+
+# Pixel scale
+pixel_scale: 0.016666666666667
+
+# Hyde projection data
+hyde_projection: 'GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]'
+hyde_pixel_scale: 0.08333333333333333
+
+# Scenarios to run
+scenarios:
+  - arable
+  - restore
+
+# Z-curve value for delta P calculation
+curve: "0.25"
+
+# Projection for species data extraction
+projection: "EPSG:4326"
+
+# Zenodo configuration for downloading raw habitat
+zenodo:
+    jung_habitat:
+        zenodo_id: 4058819
+        filename: "iucn_habitatclassification_composite_lvl2_ver004.zip"
+    jung_habitat_updates:
+        zenodo_id: 4058819
+        filename: "lvl2_changemasks_ver004.zip"
+    jung_pnv:
+        zenodo_id: 4038749
+        filename: "pnv_lvl1_004.zip"
+    elevation:
+        zenodo_id: 5719984
+        filename: "dem-100m-esri54017.tif"
+
+# Optional inputs
+optional_inputs:
+    species_overrides: "overrides.csv"
diff --git a/deltap/delta_p_scaled.py b/deltap/delta_p_scaled.py
@@ -4,66 +4,57 @@
 from pathlib import Path
 
 import pandas as pd
-import yirgacheffe.operators as yo
-from yirgacheffe.layers import RasterLayer
+import yirgacheffe as yg
 
 SCALE = 1e6
 
 def delta_p_scaled_area(
     input_path: Path,
     diff_area_map_path: Path,
-    totals_path: Path,
+    species_totals_path: Path,
     output_path: Path,
 ):
     os.makedirs(output_path.parent, exist_ok=True)
 
     per_taxa = [
-        RasterLayer.layer_from_file(os.path.join(input_path, x))
-        for x in sorted(input_path.glob("*.tif"))
+        yg.read_raster(x) for x in sorted(input_path.glob("*.tif"))
     ]
     if not per_taxa:
         sys.exit(f"Failed to find any per-taxa maps in {input_path}")
 
-    area_restore = RasterLayer.layer_from_file(diff_area_map_path)
+    species_total_counts = pd.read_csv(species_totals_path)
 
-    total_counts = pd.read_csv(totals_path)
+    diff_area = yg.read_raster(diff_area_map_path)
+    diff_area_rescaled = yg.where(diff_area < SCALE, float('nan'), diff_area / SCALE)
 
-    area_restore_filter = yo.where(area_restore < SCALE, float('nan'), area_restore) / SCALE
-
-    with RasterLayer.empty_raster_layer_like(
-        area_restore,
-        filename=output_path,
-        nodata=float('nan'),
-        bands=len(per_taxa) + 1
-    ) as result:
+    # Process all species in total
+    total_species_count = int(species_total_counts[species_total_counts.taxa=="all"]["count"].values[0])
+    summed_layer = yg.sum(per_taxa)
+    all_scaled_filtered_layer = yg.where(
+        diff_area_rescaled != 0,
+        ((summed_layer / diff_area_rescaled) * -1.0) / total_species_count,
+        float('nan')
+    )
 
-        species_count = int(total_counts[total_counts.taxa=="all"]["count"].values[0])
+    bands = [all_scaled_filtered_layer]
+    labels = ["all"]
 
-        result._dataset.GetRasterBand(1).SetDescription("all")  # pylint: disable=W0212
-        summed_layer = per_taxa[0]
-        for layer in per_taxa[1:]:
-            summed_layer = summed_layer + layer
+    # Now per taxa
+    for inlayer in per_taxa:
+        # get the taxa from the filename
+        _, name = os.path.split(inlayer.name)
+        taxa = name[:-4]
+        labels.append(taxa)
 
-        scaled_filtered_layer = yo.where(
-            area_restore_filter != 0,
-            ((summed_layer / area_restore_filter) * -1.0) / species_count,
+        taxa_species_count = int(species_total_counts[species_total_counts.taxa==taxa]["count"].values[0])
+        scaled_filtered_layer = yg.where(
+            diff_area_rescaled != 0,
+            ((inlayer / diff_area_rescaled) * -1.0) / taxa_species_count,
             float('nan')
         )
-        scaled_filtered_layer.parallel_save(result, band=1)
-
-        for idx, inlayer in enumerate(per_taxa):
-            assert inlayer.name
-            _, name = os.path.split(inlayer.name)
-            taxa = name[:-4]
-            species_count = int(total_counts[total_counts.taxa==taxa]["count"].values[0])
-            result._dataset.GetRasterBand(idx + 2).SetDescription(taxa)  # pylint: disable=W0212
-            scaled_filtered_layer = yo.where(
-                area_restore_filter != 0,
-                ((inlayer / area_restore_filter) * -1.0) / species_count,
-                float('nan')
-            )
-            scaled_filtered_layer.parallel_save(result, band=idx + 2)
+        bands.append(scaled_filtered_layer)
 
+    yg.to_geotiff(output_path, bands, labels, nodata=float('nan'))
 
 def main() -> None:
     parser = argparse.ArgumentParser(description="Scale final results for publication.")