From 485f921b1d138b6c486303d0adeb09b8596c4974 Mon Sep 17 00:00:00 2001
From: Richard Styron <richard.h.styron@gmail.com>
Date: Thu, 29 Jan 2026 10:46:08 -0800
Subject: [PATCH 1/3] annual N test, to be moved shortly

---
 .../gem/gem_test_functions.py                 |  60 +++++++++++
 .../model_test_frameworks/gem/gem_tests.py    |  58 ++++++++++
 openquake/hme/reporting/reporting.py          |  32 ++++++
 openquake/hme/utils/plots.py                  | 100 ++++++++++++++++++
 4 files changed, 250 insertions(+)

diff --git a/openquake/hme/model_test_frameworks/gem/gem_test_functions.py b/openquake/hme/model_test_frameworks/gem/gem_test_functions.py
index e41f902..ce4d98a 100644
--- a/openquake/hme/model_test_frameworks/gem/gem_test_functions.py
+++ b/openquake/hme/model_test_frameworks/gem/gem_test_functions.py
@@ -654,6 +654,66 @@ def get_closest_rupture(eq, rupture_df):
     return rupture_df.iloc[dists.argmin()]
 
 
+def annual_N_eval_fn(
+    rup_gdf,
+    eq_gdf,
+    mag_bins,
+    investigation_time,
+):
+    """
+    Creates a distribution of annual earthquake counts from the catalog
+    and compares it to a Poisson distribution based on the mean annual model rate.
+
+    This function counts the number of earthquakes per year in the catalog
+    and compares this distribution to what would be expected from a Poisson
+    distribution with the model's mean annual rate.
+
+    Parameters
+    ----------
+    rup_gdf : GeoDataFrame
+        Rupture geodataframe with model ruptures
+    eq_gdf : GeoDataFrame
+        Earthquake geodataframe with observed earthquakes
+    mag_bins : dict
+        Magnitude bins used in the test
+    investigation_time : float
+        Total investigation time in years
+
+    Returns
+    -------
+    dict
+        Dictionary containing:
+        - annual_counts: array of earthquake counts per year
+        - mean_annual_model_rate: mean annual rate from the model
+        - years: array of years in the catalog
+    """
+    # Get the mean annual model rate
+    model_mfd = get_model_mfd(rup_gdf, mag_bins, t_yrs=1.0)
+    mean_annual_model_rate = sum(model_mfd.values())
+
+    # Extract years from earthquake times
+    eq_years = pd.DatetimeIndex(eq_gdf['time']).year
+    unique_years = sorted(eq_years.unique())
+
+    # Count earthquakes per year
+    annual_counts = np.array([
+        (eq_years == year).sum() for year in unique_years
+    ])
+
+    results = {
+        "test_data": {
+            "annual_counts": annual_counts.tolist(),
+            "mean_annual_model_rate": mean_annual_model_rate,
+            "years": unique_years,
+            "n_years": len(unique_years),
+            "mean_annual_obs_count": annual_counts.mean(),
+            "std_annual_obs_count": annual_counts.std(),
+        }
+    }
+
+    return results
+
+
 def catalog_ground_motion_eval_fn(test_config, input_data):
 
     # defining this here for now, will go in config later
diff --git a/openquake/hme/model_test_frameworks/gem/gem_tests.py b/openquake/hme/model_test_frameworks/gem/gem_tests.py
index 01c1895..94d0ac0 100644
--- a/openquake/hme/model_test_frameworks/gem/gem_tests.py
+++ b/openquake/hme/model_test_frameworks/gem/gem_tests.py
@@ -22,6 +22,7 @@
     moment_over_under_eval_fn,
     rupture_matching_eval_fn,
     catalog_ground_motion_eval_fn,
+    annual_N_eval_fn,
 )
 
 from ..relm.relm_tests import (
@@ -473,6 +474,62 @@ def cumulative_occurrence_eval(cfg, input_data):
     }
 
 
+def annual_N_eval(cfg, input_data):
+    """
+    Annual N Evaluation: Compares the distribution of annual earthquake counts
+    from the catalog to a Poisson distribution based on the model's mean annual rate.
+
+    This evaluation is similar to the N_test, but instead of comparing a single
+    observed count to a model rate, it compares the full distribution of annual
+    counts across all years in the catalog.
+
+    Note: This evaluation currently only works when the test_config does not have
+    a completeness table, as the completeness table would change the number of
+    events in different years.
+    """
+    logging.info("Running GEM Annual N Evaluation")
+
+    test_config = cfg["config"]["model_framework"]["gem"]["annual_N_eval"]
+    mag_bins = get_mag_bins_from_cfg(cfg)
+    completeness_table = cfg["input"]["seis_catalog"].get("completeness_table")
+
+    if completeness_table is not None:
+        logging.warning(
+            "annual_N_eval does not currently support completeness tables. "
+            "Results may not be accurate."
+        )
+
+    prospective = test_config.get("prospective", False)
+    investigation_time = test_config.get(
+        "investigation_time", cfg["input"]["seis_catalog"].get("duration")
+    )
+
+    if prospective:
+        eq_gdf = input_data["pro_gdf"]
+    else:
+        eq_gdf = input_data["eq_gdf"]
+
+    results = annual_N_eval_fn(
+        input_data["rupture_gdf"],
+        eq_gdf,
+        mag_bins,
+        investigation_time,
+    )
+
+    logging.info(
+        "Annual N Eval: mean annual obs count: {:.2f}".format(
+            results["test_data"]["mean_annual_obs_count"]
+        )
+    )
+    logging.info(
+        "Annual N Eval: mean annual model rate: {:.2f}".format(
+            results["test_data"]["mean_annual_model_rate"]
+        )
+    )
+
+    return results
+
+
 def catalog_ground_motion_eval(cfg, input_data):
 
     logging.info("Running GEM catalog ground motion evaluation")
@@ -502,6 +559,7 @@ def catalog_ground_motion_eval(cfg, input_data):
     "S_test": S_test,
     "N_test": N_test,
     "L_test": L_test,
+    "annual_N_eval": annual_N_eval,
     "rupture_matching_eval": rupture_matching_eval,
     "cumulative_occurrence_eval": cumulative_occurrence_eval,
     "catalog_ground_motion_eval": catalog_ground_motion_eval,
diff --git a/openquake/hme/reporting/reporting.py b/openquake/hme/reporting/reporting.py
index a8416d0..6a40d66 100644
--- a/openquake/hme/reporting/reporting.py
+++ b/openquake/hme/reporting/reporting.py
@@ -23,6 +23,7 @@
     plot_rup_match_mag_dist,
     plot_rup_match_map,
     plot_N_test_results,
+    plot_annual_N_eval_results,
     plot_L_test_results,
     plot_M_test_results,
     plot_eqs_by_mag_time,
@@ -124,6 +125,11 @@ def render_result_text(
                 env=env, cfg=cfg, results=results, model_test_framework="gem"
             )
 
+        if "annual_N_eval" in results["gem"].keys():
+            render_annual_N_eval(
+                env=env, cfg=cfg, results=results, model_test_framework="gem"
+            )
+
         if "M_test" in results["gem"].keys():
             render_M_test(
                 env=env, cfg=cfg, results=results, model_test_framework="gem"
@@ -203,6 +209,32 @@ def render_N_test(
     )
 
 
+def render_annual_N_eval(
+    env: Environment, cfg: dict, results: dict, model_test_framework="gem"
+):
+
+    annual_n_eval_plot_str = plot_annual_N_eval_results(
+        results[model_test_framework]["annual_N_eval"]["val"],
+        return_string=True,
+    )
+
+    # Try to use a template if it exists, otherwise just store the plot
+    try:
+        annual_n_eval = env.get_template("annual_n_eval.html")
+        results[model_test_framework]["annual_N_eval"]["rendered_text"] = (
+            annual_n_eval.render(
+                res=results[model_test_framework]["annual_N_eval"]["val"],
+                mtf=model_test_framework,
+                annual_n_eval_plot_str=annual_n_eval_plot_str,
+            )
+        )
+    except Exception:
+        # If template doesn't exist, just store the plot
+        results[model_test_framework]["annual_N_eval"]["rendered_text"] = (
+            "<h3>Annual N Evaluation</h3>" + annual_n_eval_plot_str
+        )
+
+
 def render_L_test(
     env: Environment, cfg: dict, results: dict, model_test_framework="gem"
 ):
diff --git a/openquake/hme/utils/plots.py b/openquake/hme/utils/plots.py
index acde9f3..2043b47 100644
--- a/openquake/hme/utils/plots.py
+++ b/openquake/hme/utils/plots.py
@@ -62,6 +62,106 @@ def _make_stoch_mfds(mfd, iters: int, t_yrs: float = 1.0):
     return stoch_mfd_vals
 
 
+def plot_annual_N_eval_results(
+    annual_N_eval_results: dict,
+    return_fig: bool = False,
+    return_string: bool = False,
+    save_fig: Union[bool, str] = False,
+):
+    """
+    Plot histogram of annual earthquake counts compared to Poisson distribution.
+
+    Parameters
+    ----------
+    annual_N_eval_results : dict
+        Results from annual_N_eval containing annual_counts and mean_annual_model_rate
+    return_fig : bool
+        If True, return the figure object
+    return_string : bool
+        If True, return SVG string
+    save_fig : Union[bool, str]
+        If string, save figure to this path
+
+    Returns
+    -------
+    fig or str
+        Matplotlib figure or SVG string
+    """
+    test_data = annual_N_eval_results["test_data"]
+    annual_counts = np.array(test_data["annual_counts"])
+    mean_annual_model_rate = test_data["mean_annual_model_rate"]
+
+    fig, ax = plt.subplots(figsize=(10, 6))
+
+    # Create histogram of observed annual counts
+    max_count = int(np.ceil(max(annual_counts.max(), mean_annual_model_rate * 2)))
+    bins = np.arange(-0.5, max_count + 1.5, 1)
+
+    ax.hist(
+        annual_counts,
+        bins=bins,
+        density=True,
+        histtype="stepfilled",
+        alpha=0.6,
+        color="C0",
+        label=f"Observed annual counts (n={len(annual_counts)} years)",
+        edgecolor="black",
+        linewidth=0.5,
+    )
+
+    # Plot Poisson distribution for model
+    x_poisson = np.arange(0, max_count + 1)
+    poisson_probs = poisson.pmf(x_poisson, mean_annual_model_rate)
+
+    ax.plot(
+        x_poisson,
+        poisson_probs,
+        "o-",
+        color="C1",
+        label=f"Poisson (λ={mean_annual_model_rate:.2f})",
+        linewidth=2,
+        markersize=6,
+    )
+
+    # Add mean lines
+    ax.axvline(
+        annual_counts.mean(),
+        color="C0",
+        linestyle="--",
+        linewidth=2,
+        label=f"Mean observed ({annual_counts.mean():.2f})",
+    )
+    ax.axvline(
+        mean_annual_model_rate,
+        color="C1",
+        linestyle="--",
+        linewidth=2,
+        label=f"Mean model ({mean_annual_model_rate:.2f})",
+    )
+
+    ax.set_xlabel("Number of Earthquakes per Year", fontsize=12)
+    ax.set_ylabel("Probability Density", fontsize=12)
+    ax.set_title("Annual Earthquake Count Distribution vs. Model", fontsize=14)
+    ax.legend(loc="best")
+    ax.grid(True, alpha=0.3)
+
+    plt.tight_layout()
+
+    if save_fig is not False:
+        fig.savefig(save_fig)
+
+    if return_fig is True:
+        return fig
+
+    elif return_string is True:
+        plt.switch_backend("svg")
+        fig_str = io.StringIO()
+        fig.savefig(fig_str, format="svg")
+        plt.close(fig)
+        fig_svg = "<svg" + fig_str.getvalue().split("<svg")[1]
+        return fig_svg
+
+
 def plot_N_test_results(
     N_test_results: dict,
     return_fig: bool = False,

From ee61c046c0ec19e09908b48b2df4ce2dfdb3bb79 Mon Sep 17 00:00:00 2001
From: Richard Styron <richard.h.styron@gmail.com>
Date: Thu, 29 Jan 2026 11:51:01 -0800
Subject: [PATCH 2/3] added annual n-test with negative binomial statistics

---
 .../gem/gem_test_functions.py                 | 167 +++++++++++-------
 .../model_test_frameworks/gem/gem_tests.py    | 101 ++++-------
 .../relm/relm_test_functions.py               |   6 +
 openquake/hme/reporting/reporting.py          |  32 ----
 openquake/hme/utils/plots.py                  | 109 +++++++-----
 5 files changed, 215 insertions(+), 200 deletions(-)

diff --git a/openquake/hme/model_test_frameworks/gem/gem_test_functions.py b/openquake/hme/model_test_frameworks/gem/gem_test_functions.py
index ce4d98a..991bc90 100644
--- a/openquake/hme/model_test_frameworks/gem/gem_test_functions.py
+++ b/openquake/hme/model_test_frameworks/gem/gem_test_functions.py
@@ -27,6 +27,7 @@
 )
 from openquake.hme.utils.utils import _n_procs, breakpoint
 from openquake.hme.utils.stats import geom_mean, weighted_geom_mean
+from scipy.stats import poisson, nbinom, t as t_dist
 
 
 def get_rupture_gdf_cell_moment(rupture_gdf, t_yrs, rup_groups=None):
@@ -654,66 +655,6 @@ def get_closest_rupture(eq, rupture_df):
     return rupture_df.iloc[dists.argmin()]
 
 
-def annual_N_eval_fn(
-    rup_gdf,
-    eq_gdf,
-    mag_bins,
-    investigation_time,
-):
-    """
-    Creates a distribution of annual earthquake counts from the catalog
-    and compares it to a Poisson distribution based on the mean annual model rate.
-
-    This function counts the number of earthquakes per year in the catalog
-    and compares this distribution to what would be expected from a Poisson
-    distribution with the model's mean annual rate.
-
-    Parameters
-    ----------
-    rup_gdf : GeoDataFrame
-        Rupture geodataframe with model ruptures
-    eq_gdf : GeoDataFrame
-        Earthquake geodataframe with observed earthquakes
-    mag_bins : dict
-        Magnitude bins used in the test
-    investigation_time : float
-        Total investigation time in years
-
-    Returns
-    -------
-    dict
-        Dictionary containing:
-        - annual_counts: array of earthquake counts per year
-        - mean_annual_model_rate: mean annual rate from the model
-        - years: array of years in the catalog
-    """
-    # Get the mean annual model rate
-    model_mfd = get_model_mfd(rup_gdf, mag_bins, t_yrs=1.0)
-    mean_annual_model_rate = sum(model_mfd.values())
-
-    # Extract years from earthquake times
-    eq_years = pd.DatetimeIndex(eq_gdf['time']).year
-    unique_years = sorted(eq_years.unique())
-
-    # Count earthquakes per year
-    annual_counts = np.array([
-        (eq_years == year).sum() for year in unique_years
-    ])
-
-    results = {
-        "test_data": {
-            "annual_counts": annual_counts.tolist(),
-            "mean_annual_model_rate": mean_annual_model_rate,
-            "years": unique_years,
-            "n_years": len(unique_years),
-            "mean_annual_obs_count": annual_counts.mean(),
-            "std_annual_obs_count": annual_counts.std(),
-        }
-    }
-
-    return results
-
-
 def catalog_ground_motion_eval_fn(test_config, input_data):
 
     # defining this here for now, will go in config later
@@ -785,3 +726,109 @@ def catalog_ground_motion_eval_fn(test_config, input_data):
             logging.warning(f"can't do eq {eq.name}")
 
     return gmm_results
+
+
+def N_test_annual(
+    eq_gdf: GeoDataFrame, mean_annual_model_rate: float, conf_interval: float
+) -> dict:
+    """
+    N-test using annual earthquake count distribution.
+
+    Tests if the model's mean annual rate is consistent with the observed
+    annual earthquake counts, accounting for temporal variability without
+    assuming a Poisson process. Uses a t-based confidence interval approach.
+
+    If the data shows overdispersion (variance > mean), estimates a negative
+    binomial dispersion parameter for visualization and reporting.
+
+    Parameters
+    ----------
+    eq_gdf : GeoDataFrame
+        Earthquake geodataframe with observed earthquakes
+    mean_annual_model_rate : float
+        Mean annual rate from the model
+    conf_interval : float
+        Confidence interval for the test (e.g., 0.95 for 95%)
+
+    Returns
+    -------
+    dict
+        Test results including annual counts, dispersion statistics, and
+        confidence intervals
+    """
+    # Extract years from earthquake times
+    eq_years = pd.DatetimeIndex(eq_gdf['time']).year
+    unique_years = sorted(eq_years.unique())
+    n_years = len(unique_years)
+
+    # Count earthquakes per year
+    annual_counts = np.array([
+        (eq_years == year).sum() for year in unique_years
+    ])
+
+    # Mean and standard deviation of annual counts
+    mean_annual_obs = annual_counts.mean()
+    std_annual_obs = annual_counts.std(ddof=1)
+
+    # Calculate variance for dispersion estimation
+    var_annual_obs = annual_counts.var(ddof=1)
+
+    # Estimate dispersion parameter r for negative binomial
+    # r = mu^2 / (var - mu)
+    # If var <= mu, data is not overdispersed (Poisson-like or underdispersed)
+    if var_annual_obs > mean_annual_obs:
+        r_dispersion = mean_annual_obs**2 / (var_annual_obs - mean_annual_obs)
+        is_overdispersed = True
+        logging.info(
+            f"N-Test (annual): Observed overdispersion - "
+            f"mean={mean_annual_obs:.2f}, var={var_annual_obs:.2f}, r={r_dispersion:.2f}"
+        )
+    else:
+        r_dispersion = None
+        is_overdispersed = False
+        logging.info(
+            f"N-Test (annual): No overdispersion detected - "
+            f"mean={mean_annual_obs:.2f}, var={var_annual_obs:.2f}"
+        )
+
+    # Test if model mean is consistent with observed data
+    # Use t-distribution confidence interval (doesn't assume Poisson)
+    t_crit = t_dist.ppf((1 + conf_interval) / 2, n_years - 1)
+    margin_error = t_crit * (std_annual_obs / np.sqrt(n_years))
+
+    conf_min = mean_annual_obs - margin_error
+    conf_max = mean_annual_obs + margin_error
+
+    # Test passes if model mean is within CI of observed mean
+    test_pass = conf_min <= mean_annual_model_rate <= conf_max
+
+    logging.info(
+        f"N-Test (annual): Model rate {mean_annual_model_rate:.2f}, "
+        f"Observed mean {mean_annual_obs:.2f} ± {margin_error:.2f} "
+        f"({conf_interval*100:.0f}% CI: [{conf_min:.2f}, {conf_max:.2f}])"
+    )
+
+    test_res = "Pass" if test_pass else "Fail"
+    logging.info(f"N-Test (annual): {test_res}")
+
+    test_result = {
+        "conf_interval_frac": conf_interval,
+        "conf_interval": (conf_min, conf_max),
+        "n_pred_earthquakes": mean_annual_model_rate,  # annual rate
+        "n_obs_earthquakes": mean_annual_obs,  # mean annual count
+        "test_res": test_res,
+        "test_pass": bool(test_pass),
+        # Temporal variability statistics
+        "is_overdispersed": is_overdispersed,
+        "r_dispersion": float(r_dispersion) if r_dispersion is not None else None,
+        "variance_annual_obs": float(var_annual_obs),
+        # Additional data for annual distribution plotting
+        "annual_counts": annual_counts.tolist(),
+        "years": unique_years,
+        "n_years": n_years,
+        "mean_annual_obs_count": float(mean_annual_obs),
+        "std_annual_obs_count": float(std_annual_obs),
+        "mean_annual_model_rate": mean_annual_model_rate,
+    }
+
+    return test_result
diff --git a/openquake/hme/model_test_frameworks/gem/gem_tests.py b/openquake/hme/model_test_frameworks/gem/gem_tests.py
index 94d0ac0..0648c1f 100644
--- a/openquake/hme/model_test_frameworks/gem/gem_tests.py
+++ b/openquake/hme/model_test_frameworks/gem/gem_tests.py
@@ -22,7 +22,7 @@
     moment_over_under_eval_fn,
     rupture_matching_eval_fn,
     catalog_ground_motion_eval_fn,
-    annual_N_eval_fn,
+    N_test_annual,
 )
 
 from ..relm.relm_tests import (
@@ -209,16 +209,50 @@ def L_test(
 
 def N_test(cfg: dict, input_data: dict) -> dict:
     logging.info("Running N-Test")
-    
+
     test_config = cfg["config"]["model_framework"]["gem"]["N_test"]
     completeness_table = cfg["input"]["seis_catalog"].get("completeness_table")
     test_config["mag_bins"] = get_mag_bins_from_cfg(cfg)
 
     prospective = test_config.get("prospective", False)
+    prob_model = test_config["prob_model"]
+
+    # Handle annual prob_model separately (GEM-specific implementation)
+    if prob_model == "annual":
+        if prospective:
+            eq_gdf = input_data["pro_gdf"]
+        else:
+            eq_gdf = input_data["eq_gdf"]
+
+        if completeness_table is not None:
+            logging.warning(
+                "N-Test with prob_model='annual' does not support completeness tables. "
+                "Results may not be accurate."
+            )
+
+        # Get mean annual rate from model
+        mag_bins = get_mag_bins_from_cfg(cfg)
+        model_mfd = get_model_mfd(input_data["rupture_gdf"], mag_bins, t_yrs=1.0)
+        mean_annual_rate = sum(model_mfd.values())
 
-    if (
-        test_config["prob_model"] in ["poisson", "poisson_cum"]
-    ) and not prospective:
+        conf_interval = test_config.get("conf_interval", 0.95)
+        test_results = N_test_annual(eq_gdf, mean_annual_rate, conf_interval)
+
+        # Add metadata for plotting/reporting
+        test_results["M_min"] = min(mag_bins.keys())
+        test_results["prob_model"] = "annual"
+
+        logging.info(
+            "N-Test number obs eqs: {}".format(test_results["n_obs_earthquakes"])
+        )
+        logging.info(
+            "N-Test number pred eqs: {}".format(test_results["n_pred_earthquakes"])
+        )
+        logging.info("N-Test {}".format(test_results["test_pass"]))
+        return test_results
+
+    # For other prob_models, use RELM implementation
+    if prob_model in ["poisson", "poisson_cum"] and not prospective:
         if completeness_table is not None:
             test_config["completeness_table"] = completeness_table
             test_config["mag_bins"] = get_mag_bins_from_cfg(cfg)
@@ -474,62 +508,6 @@ def cumulative_occurrence_eval(cfg, input_data):
     }
 
 
-def annual_N_eval(cfg, input_data):
-    """
-    Annual N Evaluation: Compares the distribution of annual earthquake counts
-    from the catalog to a Poisson distribution based on the model's mean annual rate.
-
-    This evaluation is similar to the N_test, but instead of comparing a single
-    observed count to a model rate, it compares the full distribution of annual
-    counts across all years in the catalog.
-
-    Note: This evaluation currently only works when the test_config does not have
-    a completeness table, as the completeness table would change the number of
-    events in different years.
-    """
-    logging.info("Running GEM Annual N Evaluation")
-
-    test_config = cfg["config"]["model_framework"]["gem"]["annual_N_eval"]
-    mag_bins = get_mag_bins_from_cfg(cfg)
-    completeness_table = cfg["input"]["seis_catalog"].get("completeness_table")
-
-    if completeness_table is not None:
-        logging.warning(
-            "annual_N_eval does not currently support completeness tables. "
-            "Results may not be accurate."
-        )
-
-    prospective = test_config.get("prospective", False)
-    investigation_time = test_config.get(
-        "investigation_time", cfg["input"]["seis_catalog"].get("duration")
-    )
-
-    if prospective:
-        eq_gdf = input_data["pro_gdf"]
-    else:
-        eq_gdf = input_data["eq_gdf"]
-
-    results = annual_N_eval_fn(
-        input_data["rupture_gdf"],
-        eq_gdf,
-        mag_bins,
-        investigation_time,
-    )
-
-    logging.info(
-        "Annual N Eval: mean annual obs count: {:.2f}".format(
-            results["test_data"]["mean_annual_obs_count"]
-        )
-    )
-    logging.info(
-        "Annual N Eval: mean annual model rate: {:.2f}".format(
-            results["test_data"]["mean_annual_model_rate"]
-        )
-    )
-
-    return results
-
-
 def catalog_ground_motion_eval(cfg, input_data):
 
     logging.info("Running GEM catalog ground motion evaluation")
@@ -559,7 +537,6 @@ def catalog_ground_motion_eval(cfg, input_data):
     "S_test": S_test,
     "N_test": N_test,
     "L_test": L_test,
-    "annual_N_eval": annual_N_eval,
     "rupture_matching_eval": rupture_matching_eval,
     "cumulative_occurrence_eval": cumulative_occurrence_eval,
     "catalog_ground_motion_eval": catalog_ground_motion_eval,
diff --git a/openquake/hme/model_test_frameworks/relm/relm_test_functions.py b/openquake/hme/model_test_frameworks/relm/relm_test_functions.py
index c0f397d..2db09c1 100644
--- a/openquake/hme/model_test_frameworks/relm/relm_test_functions.py
+++ b/openquake/hme/model_test_frameworks/relm/relm_test_functions.py
@@ -695,6 +695,12 @@ def n_test_function(rup_gdf, eq_gdf, test_config: dict):
         ]
         test_result = N_test_empirical(n_obs, n_eq_samples, conf_interval)
 
+    elif test_config["prob_model"] == "annual":
+        raise ValueError(
+            "prob_model='annual' is not supported in the RELM framework. "
+            "Use the GEM framework instead (model_framework: gem: N_test:)."
+        )
+
     elif test_config["prob_model"] == "neg_binom":
         raise NotImplementedError("can't subdivide earthquakes yet")
         n_eqs_in_subs = subdivide_observed_eqs(
diff --git a/openquake/hme/reporting/reporting.py b/openquake/hme/reporting/reporting.py
index 6a40d66..a8416d0 100644
--- a/openquake/hme/reporting/reporting.py
+++ b/openquake/hme/reporting/reporting.py
@@ -23,7 +23,6 @@
     plot_rup_match_mag_dist,
     plot_rup_match_map,
     plot_N_test_results,
-    plot_annual_N_eval_results,
     plot_L_test_results,
     plot_M_test_results,
     plot_eqs_by_mag_time,
@@ -125,11 +124,6 @@ def render_result_text(
                 env=env, cfg=cfg, results=results, model_test_framework="gem"
             )
 
-        if "annual_N_eval" in results["gem"].keys():
-            render_annual_N_eval(
-                env=env, cfg=cfg, results=results, model_test_framework="gem"
-            )
-
         if "M_test" in results["gem"].keys():
             render_M_test(
                 env=env, cfg=cfg, results=results, model_test_framework="gem"
@@ -209,32 +203,6 @@ def render_N_test(
     )
 
 
-def render_annual_N_eval(
-    env: Environment, cfg: dict, results: dict, model_test_framework="gem"
-):
-
-    annual_n_eval_plot_str = plot_annual_N_eval_results(
-        results[model_test_framework]["annual_N_eval"]["val"],
-        return_string=True,
-    )
-
-    # Try to use a template if it exists, otherwise just store the plot
-    try:
-        annual_n_eval = env.get_template("annual_n_eval.html")
-        results[model_test_framework]["annual_N_eval"]["rendered_text"] = (
-            annual_n_eval.render(
-                res=results[model_test_framework]["annual_N_eval"]["val"],
-                mtf=model_test_framework,
-                annual_n_eval_plot_str=annual_n_eval_plot_str,
-            )
-        )
-    except Exception:
-        # If template doesn't exist, just store the plot
-        results[model_test_framework]["annual_N_eval"]["rendered_text"] = (
-            "<h3>Annual N Evaluation</h3>" + annual_n_eval_plot_str
-        )
-
-
 def render_L_test(
     env: Environment, cfg: dict, results: dict, model_test_framework="gem"
 ):
diff --git a/openquake/hme/utils/plots.py b/openquake/hme/utils/plots.py
index 2043b47..00ce3f2 100644
--- a/openquake/hme/utils/plots.py
+++ b/openquake/hme/utils/plots.py
@@ -62,34 +62,29 @@ def _make_stoch_mfds(mfd, iters: int, t_yrs: float = 1.0):
     return stoch_mfd_vals
 
 
-def plot_annual_N_eval_results(
-    annual_N_eval_results: dict,
-    return_fig: bool = False,
-    return_string: bool = False,
-    save_fig: Union[bool, str] = False,
-):
+def plot_N_test_annual(N_test_results: dict):
     """
-    Plot histogram of annual earthquake counts compared to Poisson distribution.
+    Plot histogram of annual earthquake counts with model distribution.
+
+    Shows either Poisson (if no overdispersion) or Negative Binomial
+    (if overdispersion detected) distribution for comparison.
 
     Parameters
     ----------
-    annual_N_eval_results : dict
-        Results from annual_N_eval containing annual_counts and mean_annual_model_rate
-    return_fig : bool
-        If True, return the figure object
-    return_string : bool
-        If True, return SVG string
-    save_fig : Union[bool, str]
-        If string, save figure to this path
+    N_test_results : dict
+        N-test results with prob_model='annual' containing annual_counts and
+        mean_annual_model_rate
 
     Returns
     -------
-    fig or str
-        Matplotlib figure or SVG string
+    fig
+        Matplotlib figure
     """
-    test_data = annual_N_eval_results["test_data"]
-    annual_counts = np.array(test_data["annual_counts"])
-    mean_annual_model_rate = test_data["mean_annual_model_rate"]
+    annual_counts = np.array(N_test_results["annual_counts"])
+    mean_annual_model_rate = N_test_results["mean_annual_model_rate"]
+    is_overdispersed = N_test_results.get("is_overdispersed", False)
+    r_dispersion = N_test_results.get("r_dispersion")
+    conf_interval = N_test_results.get("conf_interval", (None, None))
 
     fig, ax = plt.subplots(figsize=(10, 6))
 
@@ -109,19 +104,40 @@ def plot_annual_N_eval_results(
         linewidth=0.5,
     )
 
-    # Plot Poisson distribution for model
-    x_poisson = np.arange(0, max_count + 1)
-    poisson_probs = poisson.pmf(x_poisson, mean_annual_model_rate)
+    # Plot model distribution - use negative binomial if overdispersed, otherwise Poisson
+    x_vals = np.arange(0, max_count + 1)
 
-    ax.plot(
-        x_poisson,
-        poisson_probs,
-        "o-",
-        color="C1",
-        label=f"Poisson (λ={mean_annual_model_rate:.2f})",
-        linewidth=2,
-        markersize=6,
-    )
+    if is_overdispersed and r_dispersion is not None:
+        # Negative binomial distribution
+        # Convert (mu, r) to scipy's (n, p) parameterization
+        # n = r, p = r / (r + mu)
+        from scipy.stats import nbinom
+        n_param = r_dispersion
+        p_param = r_dispersion / (r_dispersion + mean_annual_model_rate)
+        model_probs = nbinom.pmf(x_vals, n_param, p_param)
+
+        ax.plot(
+            x_vals,
+            model_probs,
+            "o-",
+            color="C1",
+            label=f"Negative Binomial (μ={mean_annual_model_rate:.2f}, r={r_dispersion:.2f})",
+            linewidth=2,
+            markersize=6,
+        )
+    else:
+        # Poisson distribution
+        model_probs = poisson.pmf(x_vals, mean_annual_model_rate)
+
+        ax.plot(
+            x_vals,
+            model_probs,
+            "o-",
+            color="C1",
+            label=f"Poisson (λ={mean_annual_model_rate:.2f})",
+            linewidth=2,
+            markersize=6,
+        )
 
     # Add mean lines
     ax.axvline(
@@ -139,27 +155,25 @@ def plot_annual_N_eval_results(
         label=f"Mean model ({mean_annual_model_rate:.2f})",
     )
 
+    # Add confidence interval shading if available
+    if conf_interval[0] is not None and conf_interval[1] is not None:
+        ax.axvspan(
+            conf_interval[0],
+            conf_interval[1],
+            alpha=0.2,
+            color="C0",
+            label=f"Obs. CI ({N_test_results.get('conf_interval_frac', 0.95)*100:.0f}%)",
+        )
+
     ax.set_xlabel("Number of Earthquakes per Year", fontsize=12)
     ax.set_ylabel("Probability Density", fontsize=12)
-    ax.set_title("Annual Earthquake Count Distribution vs. Model", fontsize=14)
+    ax.set_title("Annual Earthquake Count Distribution", fontsize=14)
     ax.legend(loc="best")
     ax.grid(True, alpha=0.3)
 
     plt.tight_layout()
 
-    if save_fig is not False:
-        fig.savefig(save_fig)
-
-    if return_fig is True:
-        return fig
-
-    elif return_string is True:
-        plt.switch_backend("svg")
-        fig_str = io.StringIO()
-        fig.savefig(fig_str, format="svg")
-        plt.close(fig)
-        fig_svg = "<svg" + fig_str.getvalue().split("<svg")[1]
-        return fig_svg
+    return fig
 
 
 def plot_N_test_results(
@@ -175,6 +189,9 @@ def plot_N_test_results(
             N_o=N_test_results["n_obs_earthquakes"],
             conf_interval=N_test_results["conf_interval"],
         )
+    elif N_test_results["prob_model"] == "annual":
+        # Use the annual plotting function
+        fig = plot_N_test_annual(N_test_results)
     elif N_test_results.get("pred_samples"):
         fig = plot_N_test_empirical(
             N_test_results["pred_samples"],

From a965bdda95e0de96757660c241dad17808102ac5 Mon Sep 17 00:00:00 2001
From: Richard Styron <richard.h.styron@gmail.com>
Date: Thu, 29 Jan 2026 13:10:06 -0800
Subject: [PATCH 3/3] ecdf option for annual N test

---
 .../gem/gem_test_functions.py                 | 75 ++++++++++++++-----
 .../model_test_frameworks/gem/gem_tests.py    |  3 +-
 2 files changed, 59 insertions(+), 19 deletions(-)

diff --git a/openquake/hme/model_test_frameworks/gem/gem_test_functions.py b/openquake/hme/model_test_frameworks/gem/gem_test_functions.py
index 991bc90..60f0fd4 100644
--- a/openquake/hme/model_test_frameworks/gem/gem_test_functions.py
+++ b/openquake/hme/model_test_frameworks/gem/gem_test_functions.py
@@ -729,14 +729,17 @@ def catalog_ground_motion_eval_fn(test_config, input_data):
 
 
 def N_test_annual(
-    eq_gdf: GeoDataFrame, mean_annual_model_rate: float, conf_interval: float
+    eq_gdf: GeoDataFrame,
+    mean_annual_model_rate: float,
+    conf_interval: float,
+    test_method: str = "ecdf"
 ) -> dict:
     """
     N-test using annual earthquake count distribution.
 
     Tests if the model's mean annual rate is consistent with the observed
     annual earthquake counts, accounting for temporal variability without
-    assuming a Poisson process. Uses a t-based confidence interval approach.
+    assuming a Poisson process.
 
     If the data shows overdispersion (variance > mean), estimates a negative
     binomial dispersion parameter for visualization and reporting.
@@ -749,6 +752,10 @@ def N_test_annual(
         Mean annual rate from the model
     conf_interval : float
         Confidence interval for the test (e.g., 0.95 for 95%)
+    test_method : str, optional
+        Method for calculating confidence intervals:
+        - "ecdf": Use empirical percentiles from observed distribution (default)
+        - "t_distribution": Use t-distribution confidence interval
 
     Returns
     -------
@@ -780,36 +787,58 @@ def N_test_annual(
         r_dispersion = mean_annual_obs**2 / (var_annual_obs - mean_annual_obs)
         is_overdispersed = True
         logging.info(
-            f"N-Test (annual): Observed overdispersion - "
+            f"N-Test (annual, {test_method}): Observed overdispersion - "
             f"mean={mean_annual_obs:.2f}, var={var_annual_obs:.2f}, r={r_dispersion:.2f}"
         )
     else:
         r_dispersion = None
         is_overdispersed = False
         logging.info(
-            f"N-Test (annual): No overdispersion detected - "
+            f"N-Test (annual, {test_method}): No overdispersion detected - "
             f"mean={mean_annual_obs:.2f}, var={var_annual_obs:.2f}"
         )
 
-    # Test if model mean is consistent with observed data
-    # Use t-distribution confidence interval (doesn't assume Poisson)
-    t_crit = t_dist.ppf((1 + conf_interval) / 2, n_years - 1)
-    margin_error = t_crit * (std_annual_obs / np.sqrt(n_years))
+    # Calculate confidence interval based on test_method
+    if test_method == "ecdf":
+        # Use empirical percentiles from observed distribution
+        alpha = 1 - conf_interval
+        lower_percentile = (alpha / 2) * 100
+        upper_percentile = (1 - alpha / 2) * 100
 
-    conf_min = mean_annual_obs - margin_error
-    conf_max = mean_annual_obs + margin_error
+        conf_min = np.percentile(annual_counts, lower_percentile)
+        conf_max = np.percentile(annual_counts, upper_percentile)
 
-    # Test passes if model mean is within CI of observed mean
-    test_pass = conf_min <= mean_annual_model_rate <= conf_max
+        # Calculate what percentile the model rate corresponds to
+        model_percentile = (annual_counts < mean_annual_model_rate).sum() / n_years * 100
 
-    logging.info(
-        f"N-Test (annual): Model rate {mean_annual_model_rate:.2f}, "
-        f"Observed mean {mean_annual_obs:.2f} ± {margin_error:.2f} "
-        f"({conf_interval*100:.0f}% CI: [{conf_min:.2f}, {conf_max:.2f}])"
-    )
+        logging.info(
+            f"N-Test (annual, ECDF): Model rate {mean_annual_model_rate:.2f}, "
+            f"Observed {conf_interval*100:.0f}% CI: [{conf_min:.2f}, {conf_max:.2f}] "
+            f"(from {lower_percentile:.1f}th to {upper_percentile:.1f}th percentile)"
+        )
+        logging.info(
+            f"N-Test (annual, ECDF): Model rate at {model_percentile:.1f}th percentile "
+            f"of observed distribution"
+        )
+    else:
+        # Use t-distribution confidence interval (doesn't assume Poisson)
+        t_crit = t_dist.ppf((1 + conf_interval) / 2, n_years - 1)
+        margin_error = t_crit * (std_annual_obs / np.sqrt(n_years))
+
+        conf_min = mean_annual_obs - margin_error
+        conf_max = mean_annual_obs + margin_error
+
+        logging.info(
+            f"N-Test (annual, t-distribution): Model rate {mean_annual_model_rate:.2f}, "
+            f"Observed mean {mean_annual_obs:.2f} ± {margin_error:.2f} "
+            f"({conf_interval*100:.0f}% CI: [{conf_min:.2f}, {conf_max:.2f}])"
+        )
+
+    # Test passes if model mean is within CI
+    test_pass = conf_min <= mean_annual_model_rate <= conf_max
 
     test_res = "Pass" if test_pass else "Fail"
-    logging.info(f"N-Test (annual): {test_res}")
+    logging.info(f"N-Test (annual, {test_method}): {test_res}")
 
     test_result = {
         "conf_interval_frac": conf_interval,
@@ -818,6 +847,8 @@ def N_test_annual(
         "n_obs_earthquakes": mean_annual_obs,  # mean annual count
         "test_res": test_res,
         "test_pass": bool(test_pass),
+        # Test method
+        "test_method": test_method,
         # Temporal variability statistics
         "is_overdispersed": is_overdispersed,
         "r_dispersion": float(r_dispersion) if r_dispersion is not None else None,
@@ -831,4 +862,12 @@ def N_test_annual(
         "mean_annual_model_rate": mean_annual_model_rate,
     }
 
+    # Add ECDF-specific fields if using ECDF method
+    if test_method == "ecdf":
+        test_result.update({
+            "model_percentile": float(model_percentile),
+            "lower_percentile": float(lower_percentile),
+            "upper_percentile": float(upper_percentile),
+        })
+
     return test_result
diff --git a/openquake/hme/model_test_frameworks/gem/gem_tests.py b/openquake/hme/model_test_frameworks/gem/gem_tests.py
index 0648c1f..7a165e0 100644
--- a/openquake/hme/model_test_frameworks/gem/gem_tests.py
+++ b/openquake/hme/model_test_frameworks/gem/gem_tests.py
@@ -236,7 +236,8 @@ def N_test(cfg: dict, input_data: dict) -> dict:
         mean_annual_rate = sum(model_mfd.values())
 
         conf_interval = test_config.get("conf_interval", 0.95)
-        test_results = N_test_annual(eq_gdf, mean_annual_rate, conf_interval)
+        test_method = test_config.get("test_method", "ecdf")
+        test_results = N_test_annual(eq_gdf, mean_annual_rate, conf_interval, test_method)
 
         # Add metadata for plotting/reporting
         test_results["M_min"] = min(mag_bins.keys())