From b5a1e56a3ea6d9cd326d1f29fea115bf3f0a9dba Mon Sep 17 00:00:00 2001
From: Greg Partin <gpartin@gmail.com>
Date: Wed, 11 Mar 2026 07:52:03 -0700
Subject: [PATCH 1/4] Add wave equation and Klein-Gordon equation benchmark
 tasks

---
 README.md                                     |   2 +
 WAVE_BENCHMARK.md                             | 109 +++++++++
 pdebench/data_gen/configs/wave.yaml           |  29 +++
 pdebench/data_gen/gen_wave.py                 | 170 ++++++++++++++
 pdebench/data_gen/src/sim_wave.py             | 221 ++++++++++++++++++
 .../config/args/config_klein_gordon.yaml      |  30 +++
 pdebench/models/config/args/config_wave.yaml  |  30 +++
 7 files changed, 591 insertions(+)
 create mode 100644 WAVE_BENCHMARK.md
 create mode 100644 pdebench/data_gen/configs/wave.yaml
 create mode 100644 pdebench/data_gen/gen_wave.py
 create mode 100644 pdebench/data_gen/src/sim_wave.py
 create mode 100644 pdebench/models/config/args/config_klein_gordon.yaml
 create mode 100644 pdebench/models/config/args/config_wave.yaml

diff --git a/README.md b/README.md
index c354f6f..2f5c673 100644
--- a/README.md
+++ b/README.md
@@ -140,6 +140,8 @@ The data generation codes are contained in [data_gen](./pdebench/data_gen):
 - `gen_radial_dam_break.py` to generate the 2D shallow-water data.
 - `gen_ns_incomp.py` to generate the 2D incompressible inhomogeneous
   Navier-Stokes data.
+- `gen_wave.py` to generate 1D/2D wave equation and Klein-Gordon equation data.
+  See [WAVE_BENCHMARK.md](WAVE_BENCHMARK.md) for details and baseline results.
 - `plot.py` to plot the generated data.
 - `uploader.py` to upload the generated data to the data repository.
 - `.env` is the environment data to store Dataverse URL and API token to upload
diff --git a/WAVE_BENCHMARK.md b/WAVE_BENCHMARK.md
new file mode 100644
index 0000000..1d60da5
--- /dev/null
+++ b/WAVE_BENCHMARK.md
@@ -0,0 +1,109 @@
+# Wave Equation & Klein-Gordon Benchmark
+
+**Contributed by:** Greg Partin ([@gpartin](https://github.com/gpartin))
+
+New PDE benchmark tasks for PDEBench: the **wave equation** and **Klein-Gordon equation** in 1D and 2D with periodic boundary conditions.
+
+## Equations
+
+**Wave equation:**
+$$\frac{\partial^2 u}{\partial t^2} = c^2 \nabla^2 u$$
+
+**Klein-Gordon equation:**
+$$\frac{\partial^2 u}{\partial t^2} = c^2 \nabla^2 u - \chi^2 u$$
+
+Both solved on periodic domain $[0, 1]^d$ with random Fourier initial conditions.
+
+## Data Generation
+
+```bash
+# 1D wave equation, c=1.0, 1000 samples
+python -m pdebench.data_gen.gen_wave sim.c=1.0 sim.chi=0.0
+
+# Vary wave speed
+python -m pdebench.data_gen.gen_wave sim.c=0.1
+python -m pdebench.data_gen.gen_wave sim.c=0.4
+python -m pdebench.data_gen.gen_wave sim.c=2.0
+
+# Klein-Gordon with mass parameter
+python -m pdebench.data_gen.gen_wave sim.c=1.0 sim.chi=0.5
+python -m pdebench.data_gen.gen_wave sim.c=1.0 sim.chi=1.0
+python -m pdebench.data_gen.gen_wave sim.c=1.0 sim.chi=2.0
+python -m pdebench.data_gen.gen_wave sim.c=1.0 sim.chi=5.0
+
+# 2D wave equation
+python -m pdebench.data_gen.gen_wave sim.c=1.0 sim.ndim=2 sim.xdim=128
+```
+
+## Solver Details
+
+- **Method**: Leapfrog (Verlet) finite-difference, second-order in space and time
+- **Boundary conditions**: Periodic
+- **CFL**: dt = 0.5 * dx / (c * sqrt(ndim))
+- **Spatial resolution**: 1024 (1D), 128×128 (2D)
+- **Temporal resolution**: 201 snapshots over t ∈ [0, 2]
+- **Initial conditions**: Random superposition of 5 Fourier modes, normalized to unit max amplitude
+
+## HDF5 Format
+
+Output follows PDEBench Format 1A:
+- `"tensor"`: shape `(n_samples, t, x)` for 1D, `(n_samples, t, x, y)` for 2D
+- `"x-coordinate"`: spatial grid
+- `"t-coordinate"`: temporal grid
+
+## Baseline Results
+
+### 1D Wave Equation — FNO vs UNet
+
+Trained with autoregressive 41-step rollout, `initial_step=10`, spatial resolution 256 (downsampled 4×), temporal resolution 51 (downsampled 4×), 100 epochs.
+
+| Wave Speed c | FNO nRMSE | UNet nRMSE | Solver nRMSE |
+|:---:|:---:|:---:|:---:|
+| 0.1 | 0.101 | 0.537 | 1.68 × 10⁻⁴ |
+| 0.4 | 0.112 | 0.978 | 6.86 × 10⁻⁴ |
+| 1.0 | 0.099 | 0.934 | 1.71 × 10⁻³ |
+| 2.0 | 0.095 | 0.632 | 3.33 × 10⁻³ |
+
+- **FNO** achieves ~10% nRMSE, stable across wave speeds (spectral convolutions match wave dynamics)
+- **UNet** fails catastrophically (54–98% nRMSE) — spatial convolutions cannot capture global wave propagation
+- **Solver** achieves near-machine-precision (O(c² Δt²) discretization error)
+
+### Klein-Gordon Cross-Parameter Generalization
+
+FNO trained on one χ value, tested on all four. This tests whether neural surrogates can extrapolate across the mass parameter.
+
+| Train χ \ Test χ | 0.5 | 1.0 | 2.0 | 5.0 |
+|:---:|:---:|:---:|:---:|:---:|
+| **0.5** | **0.093** | 0.096 | 0.174 | 0.891 |
+| **1.0** | 0.097 | **0.094** | 0.154 | 0.877 |
+| **2.0** | 0.170 | 0.150 | **0.095** | 0.789 |
+| **5.0** | 0.783 | 0.771 | 0.707 | **0.098** |
+
+**Key findings:**
+1. **Small χ changes (≤2×)**: 1.0–1.9× degradation (FNO extrapolates acceptably)
+2. **Large χ changes (≥5×)**: 7–10× degradation (catastrophic failure)
+3. **Physical interpretation**: High χ transitions solutions from propagating to evanescent regime — a qualitative phase change that FNO cannot bridge
+
+This generalization matrix provides a systematic test of neural PDE solver robustness to parameter variation.
+
+## Why These Benchmarks Are Useful
+
+1. **Wave equation** is a fundamental hyperbolic PDE missing from the current PDEBench lineup (which focuses on diffusion, reaction-diffusion, advection, and Navier-Stokes)
+2. **Klein-Gordon** adds a parametric family dimension — the mass parameter χ smoothly interpolates between wave-like and dispersive/evanescent regimes
+3. **Cross-parameter generalization matrix** is a new benchmark format that tests a practical deployment scenario: can a model trained at one parameter value predict at another?
+4. **UNet failure** on wave equations is a clear architectural diagnostic — useful for method developers
+
+## Model Configurations
+
+- FNO: `pdebench/models/config/args/config_wave.yaml`
+- Klein-Gordon: `pdebench/models/config/args/config_klein_gordon.yaml`
+
+## Files Added
+
+```
+pdebench/data_gen/gen_wave.py                      # Data generation script
+pdebench/data_gen/src/sim_wave.py                   # Wave/KG simulator
+pdebench/data_gen/configs/wave.yaml                 # Hydra config
+pdebench/models/config/args/config_wave.yaml        # FNO/UNet training config
+pdebench/models/config/args/config_klein_gordon.yaml # KG training config
+```
diff --git a/pdebench/data_gen/configs/wave.yaml b/pdebench/data_gen/configs/wave.yaml
new file mode 100644
index 0000000..2dc6efb
--- /dev/null
+++ b/pdebench/data_gen/configs/wave.yaml
@@ -0,0 +1,29 @@
+# @package _global_
+
+defaults:
+  - _self_
+  - mode: default.yaml
+
+work_dir: ${hydra:runtime.cwd}
+data_dir: data
+upload: false
+
+# Output filename (without extension)
+output_path: 1D_Wave_c1.0
+
+name: 1d_wave
+
+sim:
+  c: 1.0
+  chi: 0.0       # 0 = wave equation, >0 = Klein-Gordon
+  t: 2.0
+  tdim: 201
+  xdim: 1024
+  ndim: 1
+  n_modes: 5
+  seed: "???"
+
+plot:
+  t_idx: 1.0
+  dim: 1
+  channel_idx: 0
diff --git a/pdebench/data_gen/gen_wave.py b/pdebench/data_gen/gen_wave.py
new file mode 100644
index 0000000..afe6474
--- /dev/null
+++ b/pdebench/data_gen/gen_wave.py
@@ -0,0 +1,170 @@
+"""
+Data generation script for the wave equation and Klein-Gordon equation.
+
+Wave equation: d2u/dt2 = c^2 * Lap(u)
+Klein-Gordon:  d2u/dt2 = c^2 * Lap(u) - chi^2 * u
+
+Uses leapfrog finite-difference integration with periodic BCs on [0,1]^d.
+Initial conditions are random Fourier superpositions.
+
+Usage:
+    python gen_wave.py                        # Default: 1D wave, c=1.0
+    python gen_wave.py sim.c=0.4              # Change wave speed
+    python gen_wave.py sim.chi=1.0            # Klein-Gordon with mass=1
+    python gen_wave.py sim.ndim=2 sim.xdim=128  # 2D wave equation
+
+Output format: HDF5 with PDEBench Format 1A
+    - "tensor": shape (n_samples, t, x) for 1D or (n_samples, t, x, y) for 2D
+    - "x-coordinate": spatial grid coordinates
+    - "t-coordinate": temporal grid coordinates
+"""
+
+from __future__ import annotations
+
+import logging
+import multiprocessing as mp
+import os
+import time
+from itertools import repeat
+from pathlib import Path
+
+import dotenv
+import h5py
+import hydra
+import numpy as np
+from hydra.utils import get_original_cwd
+from omegaconf import DictConfig, OmegaConf
+
+dotenv.load_dotenv()
+
+num_threads = "4"
+os.environ["OMP_NUM_THREADS"] = num_threads
+os.environ["MKL_NUM_THREADS"] = num_threads
+os.environ["OPENBLAS_NUM_THREADS"] = num_threads
+
+log = logging.getLogger(__name__)
+
+
+def simulator(config: DictConfig, seed: int) -> None:
+    """Generate one sample and write to HDF5."""
+    from pdebench.data_gen.src import sim_wave
+
+    sim_config = dict(config.sim)
+    sim_config["seed"] = seed
+
+    log.info(f"Starting seed {seed}")
+    start_time = time.time()
+
+    sim = sim_wave.WaveSimulator(**sim_config)
+    data_sample = sim.generate_sample()  # shape: (Nt, Nx) or (Nt, Nx, Nx)
+
+    duration = time.time() - start_time
+    log.info(f"Seed {seed} took {duration:.2f}s")
+
+    seed_str = str(seed).zfill(4)
+
+    while True:
+        try:
+            with h5py.File(str(config.output_path), "a") as f:
+                f.create_dataset(
+                    f"{seed_str}/data",
+                    data=data_sample,
+                    dtype="float32",
+                    compression="lzf",
+                )
+                f.create_dataset(
+                    f"{seed_str}/grid/x",
+                    data=sim.x.astype(np.float32),
+                    dtype="float32",
+                    compression="lzf",
+                )
+                f.create_dataset(
+                    f"{seed_str}/grid/t",
+                    data=sim.t_save.astype(np.float32),
+                    dtype="float32",
+                    compression="lzf",
+                )
+                seed_group = f[seed_str]
+                seed_group.attrs["config"] = OmegaConf.to_yaml(config)
+        except OSError:
+            time.sleep(0.1)
+            continue
+        else:
+            break
+
+
+def combine_to_tensor_format(
+    h5_path: Path,
+    output_path: Path,
+    n_samples: int,
+) -> None:
+    """
+    Convert per-seed HDF5 to PDEBench Format 1A
+    (single "tensor" dataset with batch dimension).
+    """
+    with h5py.File(str(h5_path), "r") as f_in:
+        # Get shape from first sample
+        first_key = str(0).zfill(4)
+        sample_shape = f_in[f"{first_key}/data"].shape
+
+        x_coord = np.array(f_in[f"{first_key}/grid/x"])
+        t_coord = np.array(f_in[f"{first_key}/grid/t"])
+
+        # Allocate combined tensor
+        full_shape = (n_samples, *sample_shape)
+
+        with h5py.File(str(output_path), "w") as f_out:
+            tensor = f_out.create_dataset(
+                "tensor",
+                shape=full_shape,
+                dtype="float32",
+                compression="lzf",
+            )
+            for i in range(n_samples):
+                key = str(i).zfill(4)
+                if key in f_in:
+                    tensor[i] = f_in[f"{key}/data"]
+
+            f_out.create_dataset("x-coordinate", data=x_coord)
+            f_out.create_dataset("t-coordinate", data=t_coord)
+
+    log.info(f"Combined tensor format saved to {output_path} with shape {full_shape}")
+
+
+@hydra.main(config_path="configs/", config_name="wave", version_base="1.1")
+def main(config: DictConfig):
+    """Generate wave equation dataset using Hydra config."""
+    temp_path = Path.cwd()
+    os.chdir(get_original_cwd())
+    os.chdir(temp_path)
+
+    work_path = Path(config.work_dir)
+    output_dir: Path = work_path / config.data_dir
+    output_dir.mkdir(exist_ok=True, parents=True)
+
+    chi_str = f"_chi{config.sim.chi}" if config.sim.chi > 0 else ""
+    base_name = f"{config.sim.ndim}D_Wave_c{config.sim.c}{chi_str}"
+    config.output_path = str((output_dir / base_name).with_suffix(".h5"))
+
+    num_samples = 1000
+
+    log.info(f"Generating {num_samples} samples -> {config.output_path}")
+    log.info(f"PDE: d2u/dt2 = {config.sim.c}^2 * Lap(u) - {config.sim.chi}^2 * u")
+
+    # Generate samples in parallel
+    pool = mp.Pool(mp.cpu_count())
+    seeds = list(range(num_samples))
+    pool.starmap(simulator, zip(repeat(config), seeds))
+    pool.close()
+    pool.join()
+
+    # Convert to PDEBench Format 1A ("tensor" key)
+    raw_path = Path(config.output_path)
+    tensor_path = raw_path.parent / f"{base_name}_tensor.hdf5"
+    combine_to_tensor_format(raw_path, tensor_path, num_samples)
+
+    log.info("Done.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pdebench/data_gen/src/sim_wave.py b/pdebench/data_gen/src/sim_wave.py
new file mode 100644
index 0000000..9a76a3b
--- /dev/null
+++ b/pdebench/data_gen/src/sim_wave.py
@@ -0,0 +1,221 @@
+"""
+Simulator for the 1D/2D wave equation and Klein-Gordon equation.
+
+Wave equation:
+    d2u/dt2 = c^2 * Lap(u)
+
+Klein-Gordon equation:
+    d2u/dt2 = c^2 * Lap(u) - chi^2 * u
+
+Solved using second-order leapfrog (Verlet) finite-difference integration
+with periodic boundary conditions on domain [0, 1]^d.
+
+Initial conditions are random superpositions of Fourier modes.
+Analytical solutions are computed via Fourier decomposition for validation.
+"""
+
+from __future__ import annotations
+
+import logging
+
+import numpy as np
+
+log = logging.getLogger(__name__)
+
+
+class WaveSimulator:
+    """
+    Simulator for the wave equation in 1D or 2D.
+
+    Uses leapfrog (Verlet) time integration, second-order in both
+    space and time. Periodic boundary conditions.
+
+    :param c: wave speed
+    :param chi: mass parameter (0 = wave equation, >0 = Klein-Gordon)
+    :param t: stop time
+    :param tdim: number of time steps to save
+    :param xdim: number of spatial grid points per dimension
+    :param ndim: spatial dimensionality (1 or 2)
+    :param n_modes: number of Fourier modes in IC generation
+    :param seed: random seed for IC generation
+    """
+
+    def __init__(
+        self,
+        c: float = 1.0,
+        chi: float = 0.0,
+        t: float = 2.0,
+        tdim: int = 201,
+        xdim: int = 1024,
+        ndim: int = 1,
+        n_modes: int = 5,
+        seed: int = 0,
+    ):
+        self.c = c
+        self.chi = chi
+        self.T = t
+        self.Nt = tdim
+        self.Nx = xdim
+        self.ndim = ndim
+        self.n_modes = n_modes
+        self.seed = seed
+
+        # Spatial grid
+        self.dx = 1.0 / self.Nx
+        if ndim == 1:
+            self.x = np.linspace(0, 1, self.Nx, endpoint=False, dtype=np.float64)
+        elif ndim == 2:
+            x1d = np.linspace(0, 1, self.Nx, endpoint=False, dtype=np.float64)
+            self.x = x1d  # store 1D coordinate
+            self.X, self.Y = np.meshgrid(x1d, x1d, indexing="ij")
+        else:
+            errmsg = f"ndim must be 1 or 2, got {ndim}"
+            raise ValueError(errmsg)
+
+        # Time grid
+        self.t_save = np.linspace(0, self.T, self.Nt, dtype=np.float64)
+
+        # CFL condition: dt < dx / (c * sqrt(ndim))
+        cfl_dt = self.dx / (self.c * np.sqrt(self.ndim)) * 0.5
+        self.dt = cfl_dt
+        self.n_steps = int(np.ceil(self.T / self.dt))
+        self.dt = self.T / self.n_steps  # adjust for exact final time
+
+        log.info(
+            "WaveSimulator: c=%.2f, chi=%.2f, ndim=%d, Nx=%d, Nt=%d, dt=%.2e",
+            self.c,
+            self.chi,
+            self.ndim,
+            self.Nx,
+            self.Nt,
+            self.dt,
+        )
+
+    def _random_fourier_ic_1d(self, rng: np.random.Generator) -> np.ndarray:
+        """Generate random Fourier initial condition in 1D."""
+        u0 = np.zeros(self.Nx, dtype=np.float64)
+        for _ in range(self.n_modes):
+            k = rng.integers(1, self.Nx // 4)
+            amp = rng.uniform(0.1, 1.0)
+            phase = rng.uniform(0, 2 * np.pi)
+            u0 += amp * np.sin(2 * np.pi * k * self.x + phase)
+        # Normalize to unit max amplitude
+        u0 /= np.max(np.abs(u0)) + 1e-12
+        return u0
+
+    def _random_fourier_ic_2d(self, rng: np.random.Generator) -> np.ndarray:
+        """Generate random Fourier initial condition in 2D."""
+        u0 = np.zeros((self.Nx, self.Nx), dtype=np.float64)
+        for _ in range(self.n_modes):
+            kx = rng.integers(1, self.Nx // 8)
+            ky = rng.integers(1, self.Nx // 8)
+            amp = rng.uniform(0.1, 1.0)
+            phase = rng.uniform(0, 2 * np.pi)
+            u0 += amp * np.sin(2 * np.pi * (kx * self.X + ky * self.Y) + phase)
+        u0 /= np.max(np.abs(u0)) + 1e-12
+        return u0
+
+    def _laplacian_1d(self, u: np.ndarray) -> np.ndarray:
+        """Periodic Laplacian in 1D via finite differences."""
+        return (np.roll(u, 1) + np.roll(u, -1) - 2 * u) / self.dx**2
+
+    def _laplacian_2d(self, u: np.ndarray) -> np.ndarray:
+        """Periodic Laplacian in 2D via finite differences."""
+        return (
+            np.roll(u, 1, axis=0)
+            + np.roll(u, -1, axis=0)
+            + np.roll(u, 1, axis=1)
+            + np.roll(u, -1, axis=1)
+            - 4 * u
+        ) / self.dx**2
+
+    def generate_sample(self) -> np.ndarray:
+        """
+        Generate one sample of the wave/Klein-Gordon equation.
+
+        Returns:
+            np.ndarray: Solution array of shape (Nt, Nx) for 1D
+                        or (Nt, Nx, Nx) for 2D, dtype float32.
+        """
+        rng = np.random.default_rng(self.seed)
+
+        # Initial condition
+        if self.ndim == 1:
+            u0 = self._random_fourier_ic_1d(rng)
+            laplacian = self._laplacian_1d
+        else:
+            u0 = self._random_fourier_ic_2d(rng)
+            laplacian = self._laplacian_2d
+
+        # Leapfrog integration
+        u_prev = u0.copy()
+        u_curr = u0.copy()  # du/dt = 0 at t=0
+
+        # First half-step (Taylor expansion for du/dt=0)
+        accel = self.c**2 * laplacian(u0) - self.chi**2 * u0
+        u_curr = u0 + 0.5 * self.dt**2 * accel
+
+        # Save schedule
+        if self.ndim == 1:
+            result = np.zeros((self.Nt, self.Nx), dtype=np.float32)
+        else:
+            result = np.zeros((self.Nt, self.Nx, self.Nx), dtype=np.float32)
+
+        result[0] = u0.astype(np.float32)
+        save_idx = 1
+        save_interval = max(1, self.n_steps // (self.Nt - 1))
+
+        c2dt2 = self.c**2 * self.dt**2
+        chi2dt2 = self.chi**2 * self.dt**2
+
+        for step in range(1, self.n_steps + 1):
+            lap = laplacian(u_curr)
+            u_next = 2 * u_curr - u_prev + c2dt2 * lap - chi2dt2 * u_curr
+            u_prev = u_curr
+            u_curr = u_next
+
+            if save_idx < self.Nt and step % save_interval == 0:
+                result[save_idx] = u_curr.astype(np.float32)
+                save_idx += 1
+
+        # Fill remaining if rounding caused fewer saves
+        while save_idx < self.Nt:
+            result[save_idx] = u_curr.astype(np.float32)
+            save_idx += 1
+
+        return result
+
+
+def analytical_solution_1d(
+    x: np.ndarray,
+    t: np.ndarray,
+    u0: np.ndarray,
+    c: float,
+    chi: float = 0.0,
+) -> np.ndarray:
+    """
+    Compute analytical solution for 1D wave/KG equation via FFT.
+
+    The exact solution decomposes u0 into Fourier modes. Each mode k
+    oscillates at frequency omega_k = sqrt(c^2 * (2*pi*k)^2 + chi^2).
+
+    :param x: spatial grid (Nx,)
+    :param t: time points (Nt,)
+    :param u0: initial condition (Nx,)
+    :param c: wave speed
+    :param chi: mass parameter
+    :return: solution array (Nt, Nx)
+    """
+    Nx = len(x)
+    u0_hat = np.fft.fft(u0)
+    k = np.fft.fftfreq(Nx, d=1.0 / Nx)  # wavenumbers
+
+    omega = np.sqrt(c**2 * (2 * np.pi * k) ** 2 + chi**2 + 0j).real
+
+    result = np.zeros((len(t), Nx), dtype=np.float64)
+    for i, ti in enumerate(t):
+        # du/dt(0) = 0 => only cosine term
+        u_hat_t = u0_hat * np.cos(omega * ti)
+        result[i] = np.fft.ifft(u_hat_t).real
+
+    return result
diff --git a/pdebench/models/config/args/config_klein_gordon.yaml b/pdebench/models/config/args/config_klein_gordon.yaml
new file mode 100644
index 0000000..1118be2
--- /dev/null
+++ b/pdebench/models/config/args/config_klein_gordon.yaml
@@ -0,0 +1,30 @@
+# Klein-Gordon Equation - 1D
+# d2u/dt2 = c^2 * d2u/dx2 - chi^2 * u, periodic BCs, domain [0,1]
+# Datasets: 1D_Wave_c1.0_chi{0.5,1.0,2.0,5.0}_tensor.hdf5
+model_name: "FNO"
+if_training: True
+continue_training: False
+batch_size: 16
+unroll_step: 20
+t_train: 201
+model_update: 1
+filename: "1D_Wave_c1.0_chi1.0_tensor.hdf5"
+single_file: True
+reduced_resolution: 4
+reduced_resolution_t: 4
+reduced_batch: 1
+epochs: 100
+learning_rate: 1.e-3
+num_workers: 0
+# Unet
+in_channels: 1
+out_channels: 1
+ar_mode: True
+pushforward: True
+# FNO
+num_channels: 1
+modes: 12
+width: 20
+scheduler_step: 50
+scheduler_gamma: 0.5
+initial_step: 10
diff --git a/pdebench/models/config/args/config_wave.yaml b/pdebench/models/config/args/config_wave.yaml
new file mode 100644
index 0000000..7d4c410
--- /dev/null
+++ b/pdebench/models/config/args/config_wave.yaml
@@ -0,0 +1,30 @@
+# Wave Equation - 1D
+# d2u/dt2 = c^2 * d2u/dx2, periodic BCs, domain [0,1]
+# Datasets: 1D_Wave_c{0.1,0.4,1.0,2.0}_tensor.hdf5
+model_name: "FNO"
+if_training: True
+continue_training: False
+batch_size: 16
+unroll_step: 20
+t_train: 201
+model_update: 1
+filename: "1D_Wave_c1.0_tensor.hdf5"
+single_file: True
+reduced_resolution: 4
+reduced_resolution_t: 4
+reduced_batch: 1
+epochs: 100
+learning_rate: 1.e-3
+num_workers: 0
+# Unet
+in_channels: 1
+out_channels: 1
+ar_mode: True
+pushforward: True
+# FNO
+num_channels: 1
+modes: 12
+width: 20
+scheduler_step: 50
+scheduler_gamma: 0.5
+initial_step: 10

From 57ddc30f3a267ffdaabb9c6eed596a19f80c59ff Mon Sep 17 00:00:00 2001
From: Greg Partin <gpartin@gmail.com>
Date: Wed, 11 Mar 2026 11:14:02 -0700
Subject: [PATCH 2/4] Address review: fix save timing, add 2D y-coordinate,
 validate seeds

- Save before advance in leapfrog loop to fix off-by-one snapshot timing
- Precompute save steps from t_save for exact time alignment
- Write grid/y for 2D simulations in per-seed HDF5
- Copy y-coordinate into combined tensor format for 2D
- Raise KeyError for missing seeds instead of silent zero-fill
- Remove unused 'upload' config field from wave.yaml
---
 pdebench/data_gen/configs/wave.yaml |  1 -
 pdebench/data_gen/gen_wave.py       | 22 ++++++++++++++++++++--
 pdebench/data_gen/src/sim_wave.py   | 22 +++++++++++-----------
 3 files changed, 31 insertions(+), 14 deletions(-)

diff --git a/pdebench/data_gen/configs/wave.yaml b/pdebench/data_gen/configs/wave.yaml
index 2dc6efb..9dd50c6 100644
--- a/pdebench/data_gen/configs/wave.yaml
+++ b/pdebench/data_gen/configs/wave.yaml
@@ -6,7 +6,6 @@ defaults:
 
 work_dir: ${hydra:runtime.cwd}
 data_dir: data
-upload: false
 
 # Output filename (without extension)
 output_path: 1D_Wave_c1.0
diff --git a/pdebench/data_gen/gen_wave.py b/pdebench/data_gen/gen_wave.py
index afe6474..70d9782 100644
--- a/pdebench/data_gen/gen_wave.py
+++ b/pdebench/data_gen/gen_wave.py
@@ -84,6 +84,13 @@ def simulator(config: DictConfig, seed: int) -> None:
                     dtype="float32",
                     compression="lzf",
                 )
+                if config.sim.ndim == 2:
+                    f.create_dataset(
+                        f"{seed_str}/grid/y",
+                        data=sim.x.astype(np.float32),
+                        dtype="float32",
+                        compression="lzf",
+                    )
                 seed_group = f[seed_str]
                 seed_group.attrs["config"] = OmegaConf.to_yaml(config)
         except OSError:
@@ -109,6 +116,9 @@ def combine_to_tensor_format(
 
         x_coord = np.array(f_in[f"{first_key}/grid/x"])
         t_coord = np.array(f_in[f"{first_key}/grid/t"])
+        y_coord = None
+        if f"{first_key}/grid/y" in f_in:
+            y_coord = np.array(f_in[f"{first_key}/grid/y"])
 
         # Allocate combined tensor
         full_shape = (n_samples, *sample_shape)
@@ -122,11 +132,19 @@ def combine_to_tensor_format(
             )
             for i in range(n_samples):
                 key = str(i).zfill(4)
-                if key in f_in:
-                    tensor[i] = f_in[f"{key}/data"]
+                if key not in f_in:
+                    msg = (
+                        f"Missing seed {key} in {h5_path}; "
+                        f"expected {n_samples} consecutive seeds "
+                        f"0000..{str(n_samples - 1).zfill(4)}"
+                    )
+                    raise KeyError(msg)
+                tensor[i] = f_in[f"{key}/data"]
 
             f_out.create_dataset("x-coordinate", data=x_coord)
             f_out.create_dataset("t-coordinate", data=t_coord)
+            if y_coord is not None:
+                f_out.create_dataset("y-coordinate", data=y_coord)
 
     log.info(f"Combined tensor format saved to {output_path} with shape {full_shape}")
 
diff --git a/pdebench/data_gen/src/sim_wave.py b/pdebench/data_gen/src/sim_wave.py
index 9a76a3b..ebd8422 100644
--- a/pdebench/data_gen/src/sim_wave.py
+++ b/pdebench/data_gen/src/sim_wave.py
@@ -162,27 +162,27 @@ def generate_sample(self) -> np.ndarray:
             result = np.zeros((self.Nt, self.Nx, self.Nx), dtype=np.float32)
 
         result[0] = u0.astype(np.float32)
-        save_idx = 1
-        save_interval = max(1, self.n_steps // (self.Nt - 1))
+
+        # Precompute which integration step maps to each t_save entry
+        save_step_to_idx = {}
+        for k in range(1, self.Nt):
+            target_step = int(round(self.t_save[k] / self.dt))
+            target_step = min(target_step, self.n_steps)
+            save_step_to_idx[target_step] = k
 
         c2dt2 = self.c**2 * self.dt**2
         chi2dt2 = self.chi**2 * self.dt**2
 
         for step in range(1, self.n_steps + 1):
+            # Save before advance: u_curr is at time step * dt
+            if step in save_step_to_idx:
+                result[save_step_to_idx[step]] = u_curr.astype(np.float32)
+
             lap = laplacian(u_curr)
             u_next = 2 * u_curr - u_prev + c2dt2 * lap - chi2dt2 * u_curr
             u_prev = u_curr
             u_curr = u_next
 
-            if save_idx < self.Nt and step % save_interval == 0:
-                result[save_idx] = u_curr.astype(np.float32)
-                save_idx += 1
-
-        # Fill remaining if rounding caused fewer saves
-        while save_idx < self.Nt:
-            result[save_idx] = u_curr.astype(np.float32)
-            save_idx += 1
-
         return result
 
 

From e703ff025d62ba072a90b7527eb99982e4f9b5b8 Mon Sep 17 00:00:00 2001
From: Greg Partin <gpartin@gmail.com>
Date: Thu, 19 Mar 2026 12:00:45 -0700
Subject: [PATCH 3/4] test: add pytest suite for wave/Klein-Gordon simulator

10 tests covering:
- 1D/2D output shape and dtype (float32)
- Finite output (no NaN/Inf)
- Invalid ndim raises ValueError
- Klein-Gordon chi>0 runs in 1D and 2D
- Leapfrog vs analytical solution nRMSE < 1% (wave and KG)
- analytical_solution_1d returns u0 at t=0

All tests pass in 0.33s.
---
 tests/test_sim_wave.py | 143 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 143 insertions(+)
 create mode 100644 tests/test_sim_wave.py

diff --git a/tests/test_sim_wave.py b/tests/test_sim_wave.py
new file mode 100644
index 0000000..733f331
--- /dev/null
+++ b/tests/test_sim_wave.py
@@ -0,0 +1,143 @@
+"""Tests for the 1D/2D wave equation and Klein-Gordon simulator."""
+
+from __future__ import annotations
+
+import numpy as np
+import pytest
+
+from pdebench.data_gen.src.sim_wave import WaveSimulator, analytical_solution_1d
+
+
+# ---------------------------------------------------------------------------
+# Output shape and dtype
+# ---------------------------------------------------------------------------
+
+
+def test_wave_1d_output_shape():
+    """generate_sample() for 1D should return (Nt, Nx) float32."""
+    sim = WaveSimulator(c=1.0, chi=0.0, xdim=64, tdim=21, t=1.0, seed=0)
+    result = sim.generate_sample()
+    assert result.shape == (21, 64), f"Expected (21, 64), got {result.shape}"
+    assert result.dtype == np.float32
+
+
+def test_wave_2d_output_shape():
+    """generate_sample() for 2D should return (Nt, Nx, Nx) float32."""
+    sim = WaveSimulator(c=1.0, chi=0.0, xdim=32, tdim=11, t=0.5, ndim=2, seed=0)
+    result = sim.generate_sample()
+    assert result.shape == (11, 32, 32), f"Expected (11, 32, 32), got {result.shape}"
+    assert result.dtype == np.float32
+
+
+def test_wave_output_finite():
+    """All output values should be finite (no NaN or Inf)."""
+    sim = WaveSimulator(c=1.0, chi=0.0, xdim=64, tdim=21, t=1.0, seed=7)
+    result = sim.generate_sample()
+    assert np.isfinite(result).all(), "Output contains NaN or Inf"
+
+
+def test_invalid_ndim_raises():
+    """ndim=3 should raise ValueError."""
+    with pytest.raises(ValueError, match="ndim must be 1 or 2"):
+        WaveSimulator(ndim=3)
+
+
+# ---------------------------------------------------------------------------
+# Klein-Gordon (chi > 0)
+# ---------------------------------------------------------------------------
+
+
+def test_klein_gordon_1d_runs():
+    """Klein-Gordon (chi=2.0) should run without error and return finite values."""
+    sim = WaveSimulator(c=1.0, chi=2.0, xdim=64, tdim=11, t=0.5, seed=0)
+    result = sim.generate_sample()
+    assert result.shape == (11, 64)
+    assert np.isfinite(result).all()
+
+
+def test_klein_gordon_2d_runs():
+    """Klein-Gordon 2D should run without error."""
+    sim = WaveSimulator(c=1.0, chi=1.5, xdim=32, tdim=11, t=0.5, ndim=2, seed=0)
+    result = sim.generate_sample()
+    assert result.shape == (11, 32, 32)
+    assert np.isfinite(result).all()
+
+
+# ---------------------------------------------------------------------------
+# Accuracy: leapfrog vs analytical solution
+# ---------------------------------------------------------------------------
+
+
+def test_leapfrog_matches_analytical_single_mode():
+    """
+    Single k=1 cosine mode: leapfrog nRMSE vs analytical solution < 1%.
+
+    For u0 = cos(2*pi*x) with du/dt=0, the exact solution is
+        u(x,t) = cos(2*pi*x) * cos(2*pi*c*t).
+
+    With Nx=256 and k=1, the FD dispersion error is < 0.02%, so the
+    leapfrog and analytical solutions should agree to well within 1%.
+    """
+    sim = WaveSimulator(c=1.0, chi=0.0, xdim=256, tdim=21, t=0.5, seed=0)
+    u0 = np.cos(2 * np.pi * sim.x)
+
+    # Patch the IC generator to return our single-mode IC
+    sim._random_fourier_ic_1d = lambda rng: u0.copy()  # noqa: ARG005
+
+    numerical = sim.generate_sample()  # (21, 256), float32
+    exact = analytical_solution_1d(sim.x, sim.t_save, u0, c=1.0, chi=0.0)
+
+    u_range = exact.max() - exact.min()
+    nrmse = np.sqrt(np.mean((numerical.astype(np.float64) - exact) ** 2)) / u_range
+    assert nrmse < 0.01, f"nRMSE={nrmse:.4f} exceeds 1% tolerance"
+
+
+def test_klein_gordon_leapfrog_matches_analytical():
+    """
+    Single k=1 cosine mode with chi=2: leapfrog nRMSE < 1%.
+
+    Exact: u(x,t) = cos(2*pi*x) * cos(omega*t), omega = sqrt((2*pi)^2 + chi^2).
+    """
+    chi = 2.0
+    sim = WaveSimulator(c=1.0, chi=chi, xdim=256, tdim=21, t=0.5, seed=0)
+    u0 = np.cos(2 * np.pi * sim.x)
+
+    sim._random_fourier_ic_1d = lambda rng: u0.copy()  # noqa: ARG005
+
+    numerical = sim.generate_sample()
+    exact = analytical_solution_1d(sim.x, sim.t_save, u0, c=1.0, chi=chi)
+
+    u_range = exact.max() - exact.min()
+    nrmse = np.sqrt(np.mean((numerical.astype(np.float64) - exact) ** 2)) / u_range
+    assert nrmse < 0.01, f"KG nRMSE={nrmse:.4f} exceeds 1% tolerance"
+
+
+# ---------------------------------------------------------------------------
+# analytical_solution_1d sanity check
+# ---------------------------------------------------------------------------
+
+
+def test_analytical_solution_1d_at_t0():
+    """Analytical solution at t=0 must equal u0 exactly."""
+    Nx = 128
+    x = np.linspace(0, 1, Nx, endpoint=False)
+    u0 = np.sin(4 * np.pi * x) + 0.5 * np.cos(2 * np.pi * x)
+    t = np.array([0.0, 0.25, 0.5])
+
+    result = analytical_solution_1d(x, t, u0, c=1.0, chi=0.0)
+
+    np.testing.assert_allclose(
+        result[0], u0, atol=1e-12, err_msg="Analytical solution at t=0 != u0"
+    )
+
+
+def test_analytical_solution_kg_at_t0():
+    """Analytical solution with chi>0 at t=0 must equal u0."""
+    Nx = 64
+    x = np.linspace(0, 1, Nx, endpoint=False)
+    u0 = np.cos(2 * np.pi * x)
+    t = np.array([0.0, 0.1])
+
+    result = analytical_solution_1d(x, t, u0, c=1.0, chi=5.0)
+
+    np.testing.assert_allclose(result[0], u0, atol=1e-12)

From f2573dd262ace3012bdc32c3bb18166210850964 Mon Sep 17 00:00:00 2001
From: Greg Partin <gpartin@gmail.com>
Date: Thu, 2 Apr 2026 07:28:40 -0700
Subject: [PATCH 4/4] feat: add 2D analytical solution, num_samples config, n
 param, 4 new tests

- sim_wave.py: add analytical_solution_2d (2D FFT exact solution for wave/KG)
- sim_wave.py: add n param to WaveSimulator for API compat with other simulators
- wave.yaml: add num_samples (was hardcoded 1000 in gen_wave.py) and n: 1
- gen_wave.py: use config.num_samples instead of hardcoded 1000
- test_sim_wave.py: add seed reproducibility, 2D leapfrog vs analytical,
  2D analytical at t=0, and periodicity test (single mode returns to IC after T=1/c)

Tests: 14/14 pass
---
 pdebench/data_gen/configs/wave.yaml |  3 ++
 pdebench/data_gen/gen_wave.py       |  2 +-
 pdebench/data_gen/src/sim_wave.py   | 41 ++++++++++++++
 tests/test_sim_wave.py              | 84 ++++++++++++++++++++++++++++-
 4 files changed, 128 insertions(+), 2 deletions(-)

diff --git a/pdebench/data_gen/configs/wave.yaml b/pdebench/data_gen/configs/wave.yaml
index 9dd50c6..4d3d4ab 100644
--- a/pdebench/data_gen/configs/wave.yaml
+++ b/pdebench/data_gen/configs/wave.yaml
@@ -12,6 +12,8 @@ output_path: 1D_Wave_c1.0
 
 name: 1d_wave
 
+num_samples: 1000
+
 sim:
   c: 1.0
   chi: 0.0       # 0 = wave equation, >0 = Klein-Gordon
@@ -20,6 +22,7 @@ sim:
   xdim: 1024
   ndim: 1
   n_modes: 5
+  n: 1
   seed: "???"
 
 plot:
diff --git a/pdebench/data_gen/gen_wave.py b/pdebench/data_gen/gen_wave.py
index 70d9782..a9bd74b 100644
--- a/pdebench/data_gen/gen_wave.py
+++ b/pdebench/data_gen/gen_wave.py
@@ -164,7 +164,7 @@ def main(config: DictConfig):
     base_name = f"{config.sim.ndim}D_Wave_c{config.sim.c}{chi_str}"
     config.output_path = str((output_dir / base_name).with_suffix(".h5"))
 
-    num_samples = 1000
+    num_samples = config.num_samples
 
     log.info(f"Generating {num_samples} samples -> {config.output_path}")
     log.info(f"PDE: d2u/dt2 = {config.sim.c}^2 * Lap(u) - {config.sim.chi}^2 * u")
diff --git a/pdebench/data_gen/src/sim_wave.py b/pdebench/data_gen/src/sim_wave.py
index ebd8422..e6028fa 100644
--- a/pdebench/data_gen/src/sim_wave.py
+++ b/pdebench/data_gen/src/sim_wave.py
@@ -37,6 +37,7 @@ class WaveSimulator:
     :param xdim: number of spatial grid points per dimension
     :param ndim: spatial dimensionality (1 or 2)
     :param n_modes: number of Fourier modes in IC generation
+    :param n: number of batches (unused; present for API compatibility)
     :param seed: random seed for IC generation
     """
 
@@ -49,6 +50,7 @@ def __init__(
         xdim: int = 1024,
         ndim: int = 1,
         n_modes: int = 5,
+        n: int = 1,  # noqa: ARG002
         seed: int = 0,
     ):
         self.c = c
@@ -219,3 +221,42 @@ def analytical_solution_1d(
         result[i] = np.fft.ifft(u_hat_t).real
 
     return result
+
+
+def analytical_solution_2d(
+    x: np.ndarray,
+    t: np.ndarray,
+    u0: np.ndarray,
+    c: float,
+    chi: float = 0.0,
+) -> np.ndarray:
+    """
+    Compute analytical solution for 2D wave/KG equation via 2D FFT.
+
+    Each Fourier mode (kx, ky) oscillates at frequency
+    omega = sqrt(c^2 * (2*pi)^2 * (kx^2 + ky^2) + chi^2).
+
+    :param x: 1D spatial grid (Nx,), same for both dimensions
+    :param t: time points (Nt,)
+    :param u0: initial condition (Nx, Nx)
+    :param c: wave speed
+    :param chi: mass parameter
+    :return: solution array (Nt, Nx, Nx)
+    """
+    Nx = u0.shape[0]
+    u0_hat = np.fft.fft2(u0)
+    kx = np.fft.fftfreq(Nx, d=1.0 / Nx)
+    ky = np.fft.fftfreq(Nx, d=1.0 / Nx)
+    KX, KY = np.meshgrid(kx, ky, indexing="ij")
+
+    omega = np.sqrt(
+        c**2 * (2 * np.pi) ** 2 * (KX**2 + KY**2) + chi**2 + 0j
+    ).real
+
+    result = np.zeros((len(t), Nx, Nx), dtype=np.float64)
+    for i, ti in enumerate(t):
+        # du/dt(0) = 0 => only cosine term
+        u_hat_t = u0_hat * np.cos(omega * ti)
+        result[i] = np.fft.ifft2(u_hat_t).real
+
+    return result
diff --git a/tests/test_sim_wave.py b/tests/test_sim_wave.py
index 733f331..e55a547 100644
--- a/tests/test_sim_wave.py
+++ b/tests/test_sim_wave.py
@@ -5,7 +5,7 @@
 import numpy as np
 import pytest
 
-from pdebench.data_gen.src.sim_wave import WaveSimulator, analytical_solution_1d
+from pdebench.data_gen.src.sim_wave import WaveSimulator, analytical_solution_1d, analytical_solution_2d
 
 
 # ---------------------------------------------------------------------------
@@ -141,3 +141,85 @@ def test_analytical_solution_kg_at_t0():
     result = analytical_solution_1d(x, t, u0, c=1.0, chi=5.0)
 
     np.testing.assert_allclose(result[0], u0, atol=1e-12)
+
+
+# ---------------------------------------------------------------------------
+# Seed reproducibility
+# ---------------------------------------------------------------------------
+
+
+def test_seed_reproducibility():
+    """Same seed must produce bit-identical output across two calls."""
+    sim_a = WaveSimulator(c=1.0, chi=0.0, xdim=32, tdim=11, t=0.5, seed=42)
+    sim_b = WaveSimulator(c=1.0, chi=0.0, xdim=32, tdim=11, t=0.5, seed=42)
+    np.testing.assert_array_equal(sim_a.generate_sample(), sim_b.generate_sample())
+
+
+# ---------------------------------------------------------------------------
+# 2D analytical solution
+# ---------------------------------------------------------------------------
+
+
+def test_wave_2d_matches_analytical():
+    """
+    2D leapfrog should match the 2D analytical solution to within 1% nRMSE.
+
+    IC: u0(x, y) = cos(2*pi*x) * cos(2*pi*y), du/dt=0.
+    Exact: u(x, y, t) = cos(2*pi*x) * cos(2*pi*y) * cos(2*pi*sqrt(2)*c*t).
+    """
+    Nx = 64
+    sim = WaveSimulator(c=1.0, chi=0.0, xdim=Nx, tdim=11, t=0.3, ndim=2, seed=0)
+
+    x1d = np.linspace(0, 1, Nx, endpoint=False)
+    X, Y = np.meshgrid(x1d, x1d, indexing="ij")
+    u0_2d = np.cos(2 * np.pi * X) * np.cos(2 * np.pi * Y)
+
+    sim._random_fourier_ic_2d = lambda rng: u0_2d.copy()  # noqa: ARG005
+
+    numerical = sim.generate_sample()  # (11, Nx, Nx) float32
+    exact = analytical_solution_2d(x1d, sim.t_save, u0_2d, c=1.0, chi=0.0)
+
+    u_range = exact.max() - exact.min()
+    nrmse = np.sqrt(np.mean((numerical.astype(np.float64) - exact) ** 2)) / u_range
+    assert nrmse < 0.01, f"2D nRMSE={nrmse:.4f} exceeds 1% tolerance"
+
+
+def test_analytical_solution_2d_at_t0():
+    """2D analytical solution at t=0 must equal u0 exactly."""
+    Nx = 32
+    x = np.linspace(0, 1, Nx, endpoint=False)
+    X, Y = np.meshgrid(x, x, indexing="ij")
+    u0 = np.sin(2 * np.pi * X) + 0.3 * np.cos(4 * np.pi * Y)
+    t = np.array([0.0, 0.5, 1.0])
+
+    result = analytical_solution_2d(x, t, u0, c=1.0, chi=0.0)
+
+    np.testing.assert_allclose(result[0], u0, atol=1e-12)
+
+
+# ---------------------------------------------------------------------------
+# Periodicity: single mode returns to IC after one period
+# ---------------------------------------------------------------------------
+
+
+def test_wave_periodicity_single_mode():
+    """
+    After one full period T = 1/c, a single k=1 cosine IC returns to itself.
+
+    u(x, t) = cos(2*pi*x) * cos(2*pi*c*t) has period T = 1/c.
+    The leapfrog nRMSE at t=T should be < 0.1% (O(dt^2) per step).
+    """
+    c = 1.0
+    Nx = 256
+    # tdim=3 saves t=0, t=T/2, t=T
+    sim = WaveSimulator(c=c, chi=0.0, xdim=Nx, tdim=3, t=1.0 / c, seed=0)
+    u0 = np.cos(2 * np.pi * sim.x)
+
+    sim._random_fourier_ic_1d = lambda rng: u0.copy()  # noqa: ARG005
+
+    result = sim.generate_sample()  # (3, Nx): t=0, t=T/2, t=T
+    u_final = result[-1].astype(np.float64)
+
+    u_range = u0.max() - u0.min()
+    nrmse = np.sqrt(np.mean((u_final - u0) ** 2)) / u_range
+    assert nrmse < 0.001, f"Periodicity nRMSE={nrmse:.6f} exceeds 0.1%"