From 485f921b1d138b6c486303d0adeb09b8596c4974 Mon Sep 17 00:00:00 2001 From: Richard Styron Date: Thu, 29 Jan 2026 10:46:08 -0800 Subject: [PATCH 1/3] annual N test, to be moved shortly --- .../gem/gem_test_functions.py | 60 +++++++++++ .../model_test_frameworks/gem/gem_tests.py | 58 ++++++++++ openquake/hme/reporting/reporting.py | 32 ++++++ openquake/hme/utils/plots.py | 100 ++++++++++++++++++ 4 files changed, 250 insertions(+) diff --git a/openquake/hme/model_test_frameworks/gem/gem_test_functions.py b/openquake/hme/model_test_frameworks/gem/gem_test_functions.py index e41f902..ce4d98a 100644 --- a/openquake/hme/model_test_frameworks/gem/gem_test_functions.py +++ b/openquake/hme/model_test_frameworks/gem/gem_test_functions.py @@ -654,6 +654,66 @@ def get_closest_rupture(eq, rupture_df): return rupture_df.iloc[dists.argmin()] +def annual_N_eval_fn( + rup_gdf, + eq_gdf, + mag_bins, + investigation_time, +): + """ + Creates a distribution of annual earthquake counts from the catalog + and compares it to a Poisson distribution based on the mean annual model rate. + + This function counts the number of earthquakes per year in the catalog + and compares this distribution to what would be expected from a Poisson + distribution with the model's mean annual rate. + + Parameters + ---------- + rup_gdf : GeoDataFrame + Rupture geodataframe with model ruptures + eq_gdf : GeoDataFrame + Earthquake geodataframe with observed earthquakes + mag_bins : dict + Magnitude bins used in the test + investigation_time : float + Total investigation time in years + + Returns + ------- + dict + Dictionary containing: + - annual_counts: array of earthquake counts per year + - mean_annual_model_rate: mean annual rate from the model + - years: array of years in the catalog + """ + # Get the mean annual model rate + model_mfd = get_model_mfd(rup_gdf, mag_bins, t_yrs=1.0) + mean_annual_model_rate = sum(model_mfd.values()) + + # Extract years from earthquake times + eq_years = pd.DatetimeIndex(eq_gdf['time']).year + unique_years = sorted(eq_years.unique()) + + # Count earthquakes per year + annual_counts = np.array([ + (eq_years == year).sum() for year in unique_years + ]) + + results = { + "test_data": { + "annual_counts": annual_counts.tolist(), + "mean_annual_model_rate": mean_annual_model_rate, + "years": unique_years, + "n_years": len(unique_years), + "mean_annual_obs_count": annual_counts.mean(), + "std_annual_obs_count": annual_counts.std(), + } + } + + return results + + def catalog_ground_motion_eval_fn(test_config, input_data): # defining this here for now, will go in config later diff --git a/openquake/hme/model_test_frameworks/gem/gem_tests.py b/openquake/hme/model_test_frameworks/gem/gem_tests.py index 01c1895..94d0ac0 100644 --- a/openquake/hme/model_test_frameworks/gem/gem_tests.py +++ b/openquake/hme/model_test_frameworks/gem/gem_tests.py @@ -22,6 +22,7 @@ moment_over_under_eval_fn, rupture_matching_eval_fn, catalog_ground_motion_eval_fn, + annual_N_eval_fn, ) from ..relm.relm_tests import ( @@ -473,6 +474,62 @@ def cumulative_occurrence_eval(cfg, input_data): } +def annual_N_eval(cfg, input_data): + """ + Annual N Evaluation: Compares the distribution of annual earthquake counts + from the catalog to a Poisson distribution based on the model's mean annual rate. + + This evaluation is similar to the N_test, but instead of comparing a single + observed count to a model rate, it compares the full distribution of annual + counts across all years in the catalog. + + Note: This evaluation currently only works when the test_config does not have + a completeness table, as the completeness table would change the number of + events in different years. + """ + logging.info("Running GEM Annual N Evaluation") + + test_config = cfg["config"]["model_framework"]["gem"]["annual_N_eval"] + mag_bins = get_mag_bins_from_cfg(cfg) + completeness_table = cfg["input"]["seis_catalog"].get("completeness_table") + + if completeness_table is not None: + logging.warning( + "annual_N_eval does not currently support completeness tables. " + "Results may not be accurate." + ) + + prospective = test_config.get("prospective", False) + investigation_time = test_config.get( + "investigation_time", cfg["input"]["seis_catalog"].get("duration") + ) + + if prospective: + eq_gdf = input_data["pro_gdf"] + else: + eq_gdf = input_data["eq_gdf"] + + results = annual_N_eval_fn( + input_data["rupture_gdf"], + eq_gdf, + mag_bins, + investigation_time, + ) + + logging.info( + "Annual N Eval: mean annual obs count: {:.2f}".format( + results["test_data"]["mean_annual_obs_count"] + ) + ) + logging.info( + "Annual N Eval: mean annual model rate: {:.2f}".format( + results["test_data"]["mean_annual_model_rate"] + ) + ) + + return results + + def catalog_ground_motion_eval(cfg, input_data): logging.info("Running GEM catalog ground motion evaluation") @@ -502,6 +559,7 @@ def catalog_ground_motion_eval(cfg, input_data): "S_test": S_test, "N_test": N_test, "L_test": L_test, + "annual_N_eval": annual_N_eval, "rupture_matching_eval": rupture_matching_eval, "cumulative_occurrence_eval": cumulative_occurrence_eval, "catalog_ground_motion_eval": catalog_ground_motion_eval, diff --git a/openquake/hme/reporting/reporting.py b/openquake/hme/reporting/reporting.py index a8416d0..6a40d66 100644 --- a/openquake/hme/reporting/reporting.py +++ b/openquake/hme/reporting/reporting.py @@ -23,6 +23,7 @@ plot_rup_match_mag_dist, plot_rup_match_map, plot_N_test_results, + plot_annual_N_eval_results, plot_L_test_results, plot_M_test_results, plot_eqs_by_mag_time, @@ -124,6 +125,11 @@ def render_result_text( env=env, cfg=cfg, results=results, model_test_framework="gem" ) + if "annual_N_eval" in results["gem"].keys(): + render_annual_N_eval( + env=env, cfg=cfg, results=results, model_test_framework="gem" + ) + if "M_test" in results["gem"].keys(): render_M_test( env=env, cfg=cfg, results=results, model_test_framework="gem" @@ -203,6 +209,32 @@ def render_N_test( ) +def render_annual_N_eval( + env: Environment, cfg: dict, results: dict, model_test_framework="gem" +): + + annual_n_eval_plot_str = plot_annual_N_eval_results( + results[model_test_framework]["annual_N_eval"]["val"], + return_string=True, + ) + + # Try to use a template if it exists, otherwise just store the plot + try: + annual_n_eval = env.get_template("annual_n_eval.html") + results[model_test_framework]["annual_N_eval"]["rendered_text"] = ( + annual_n_eval.render( + res=results[model_test_framework]["annual_N_eval"]["val"], + mtf=model_test_framework, + annual_n_eval_plot_str=annual_n_eval_plot_str, + ) + ) + except Exception: + # If template doesn't exist, just store the plot + results[model_test_framework]["annual_N_eval"]["rendered_text"] = ( + "

Annual N Evaluation

" + annual_n_eval_plot_str + ) + + def render_L_test( env: Environment, cfg: dict, results: dict, model_test_framework="gem" ): diff --git a/openquake/hme/utils/plots.py b/openquake/hme/utils/plots.py index acde9f3..2043b47 100644 --- a/openquake/hme/utils/plots.py +++ b/openquake/hme/utils/plots.py @@ -62,6 +62,106 @@ def _make_stoch_mfds(mfd, iters: int, t_yrs: float = 1.0): return stoch_mfd_vals +def plot_annual_N_eval_results( + annual_N_eval_results: dict, + return_fig: bool = False, + return_string: bool = False, + save_fig: Union[bool, str] = False, +): + """ + Plot histogram of annual earthquake counts compared to Poisson distribution. + + Parameters + ---------- + annual_N_eval_results : dict + Results from annual_N_eval containing annual_counts and mean_annual_model_rate + return_fig : bool + If True, return the figure object + return_string : bool + If True, return SVG string + save_fig : Union[bool, str] + If string, save figure to this path + + Returns + ------- + fig or str + Matplotlib figure or SVG string + """ + test_data = annual_N_eval_results["test_data"] + annual_counts = np.array(test_data["annual_counts"]) + mean_annual_model_rate = test_data["mean_annual_model_rate"] + + fig, ax = plt.subplots(figsize=(10, 6)) + + # Create histogram of observed annual counts + max_count = int(np.ceil(max(annual_counts.max(), mean_annual_model_rate * 2))) + bins = np.arange(-0.5, max_count + 1.5, 1) + + ax.hist( + annual_counts, + bins=bins, + density=True, + histtype="stepfilled", + alpha=0.6, + color="C0", + label=f"Observed annual counts (n={len(annual_counts)} years)", + edgecolor="black", + linewidth=0.5, + ) + + # Plot Poisson distribution for model + x_poisson = np.arange(0, max_count + 1) + poisson_probs = poisson.pmf(x_poisson, mean_annual_model_rate) + + ax.plot( + x_poisson, + poisson_probs, + "o-", + color="C1", + label=f"Poisson (λ={mean_annual_model_rate:.2f})", + linewidth=2, + markersize=6, + ) + + # Add mean lines + ax.axvline( + annual_counts.mean(), + color="C0", + linestyle="--", + linewidth=2, + label=f"Mean observed ({annual_counts.mean():.2f})", + ) + ax.axvline( + mean_annual_model_rate, + color="C1", + linestyle="--", + linewidth=2, + label=f"Mean model ({mean_annual_model_rate:.2f})", + ) + + ax.set_xlabel("Number of Earthquakes per Year", fontsize=12) + ax.set_ylabel("Probability Density", fontsize=12) + ax.set_title("Annual Earthquake Count Distribution vs. Model", fontsize=14) + ax.legend(loc="best") + ax.grid(True, alpha=0.3) + + plt.tight_layout() + + if save_fig is not False: + fig.savefig(save_fig) + + if return_fig is True: + return fig + + elif return_string is True: + plt.switch_backend("svg") + fig_str = io.StringIO() + fig.savefig(fig_str, format="svg") + plt.close(fig) + fig_svg = " Date: Thu, 29 Jan 2026 11:51:01 -0800 Subject: [PATCH 2/3] added annual n-test with negative binomial statistics --- .../gem/gem_test_functions.py | 167 +++++++++++------- .../model_test_frameworks/gem/gem_tests.py | 101 ++++------- .../relm/relm_test_functions.py | 6 + openquake/hme/reporting/reporting.py | 32 ---- openquake/hme/utils/plots.py | 109 +++++++----- 5 files changed, 215 insertions(+), 200 deletions(-) diff --git a/openquake/hme/model_test_frameworks/gem/gem_test_functions.py b/openquake/hme/model_test_frameworks/gem/gem_test_functions.py index ce4d98a..991bc90 100644 --- a/openquake/hme/model_test_frameworks/gem/gem_test_functions.py +++ b/openquake/hme/model_test_frameworks/gem/gem_test_functions.py @@ -27,6 +27,7 @@ ) from openquake.hme.utils.utils import _n_procs, breakpoint from openquake.hme.utils.stats import geom_mean, weighted_geom_mean +from scipy.stats import poisson, nbinom, t as t_dist def get_rupture_gdf_cell_moment(rupture_gdf, t_yrs, rup_groups=None): @@ -654,66 +655,6 @@ def get_closest_rupture(eq, rupture_df): return rupture_df.iloc[dists.argmin()] -def annual_N_eval_fn( - rup_gdf, - eq_gdf, - mag_bins, - investigation_time, -): - """ - Creates a distribution of annual earthquake counts from the catalog - and compares it to a Poisson distribution based on the mean annual model rate. - - This function counts the number of earthquakes per year in the catalog - and compares this distribution to what would be expected from a Poisson - distribution with the model's mean annual rate. - - Parameters - ---------- - rup_gdf : GeoDataFrame - Rupture geodataframe with model ruptures - eq_gdf : GeoDataFrame - Earthquake geodataframe with observed earthquakes - mag_bins : dict - Magnitude bins used in the test - investigation_time : float - Total investigation time in years - - Returns - ------- - dict - Dictionary containing: - - annual_counts: array of earthquake counts per year - - mean_annual_model_rate: mean annual rate from the model - - years: array of years in the catalog - """ - # Get the mean annual model rate - model_mfd = get_model_mfd(rup_gdf, mag_bins, t_yrs=1.0) - mean_annual_model_rate = sum(model_mfd.values()) - - # Extract years from earthquake times - eq_years = pd.DatetimeIndex(eq_gdf['time']).year - unique_years = sorted(eq_years.unique()) - - # Count earthquakes per year - annual_counts = np.array([ - (eq_years == year).sum() for year in unique_years - ]) - - results = { - "test_data": { - "annual_counts": annual_counts.tolist(), - "mean_annual_model_rate": mean_annual_model_rate, - "years": unique_years, - "n_years": len(unique_years), - "mean_annual_obs_count": annual_counts.mean(), - "std_annual_obs_count": annual_counts.std(), - } - } - - return results - - def catalog_ground_motion_eval_fn(test_config, input_data): # defining this here for now, will go in config later @@ -785,3 +726,109 @@ def catalog_ground_motion_eval_fn(test_config, input_data): logging.warning(f"can't do eq {eq.name}") return gmm_results + + +def N_test_annual( + eq_gdf: GeoDataFrame, mean_annual_model_rate: float, conf_interval: float +) -> dict: + """ + N-test using annual earthquake count distribution. + + Tests if the model's mean annual rate is consistent with the observed + annual earthquake counts, accounting for temporal variability without + assuming a Poisson process. Uses a t-based confidence interval approach. + + If the data shows overdispersion (variance > mean), estimates a negative + binomial dispersion parameter for visualization and reporting. + + Parameters + ---------- + eq_gdf : GeoDataFrame + Earthquake geodataframe with observed earthquakes + mean_annual_model_rate : float + Mean annual rate from the model + conf_interval : float + Confidence interval for the test (e.g., 0.95 for 95%) + + Returns + ------- + dict + Test results including annual counts, dispersion statistics, and + confidence intervals + """ + # Extract years from earthquake times + eq_years = pd.DatetimeIndex(eq_gdf['time']).year + unique_years = sorted(eq_years.unique()) + n_years = len(unique_years) + + # Count earthquakes per year + annual_counts = np.array([ + (eq_years == year).sum() for year in unique_years + ]) + + # Mean and standard deviation of annual counts + mean_annual_obs = annual_counts.mean() + std_annual_obs = annual_counts.std(ddof=1) + + # Calculate variance for dispersion estimation + var_annual_obs = annual_counts.var(ddof=1) + + # Estimate dispersion parameter r for negative binomial + # r = mu^2 / (var - mu) + # If var <= mu, data is not overdispersed (Poisson-like or underdispersed) + if var_annual_obs > mean_annual_obs: + r_dispersion = mean_annual_obs**2 / (var_annual_obs - mean_annual_obs) + is_overdispersed = True + logging.info( + f"N-Test (annual): Observed overdispersion - " + f"mean={mean_annual_obs:.2f}, var={var_annual_obs:.2f}, r={r_dispersion:.2f}" + ) + else: + r_dispersion = None + is_overdispersed = False + logging.info( + f"N-Test (annual): No overdispersion detected - " + f"mean={mean_annual_obs:.2f}, var={var_annual_obs:.2f}" + ) + + # Test if model mean is consistent with observed data + # Use t-distribution confidence interval (doesn't assume Poisson) + t_crit = t_dist.ppf((1 + conf_interval) / 2, n_years - 1) + margin_error = t_crit * (std_annual_obs / np.sqrt(n_years)) + + conf_min = mean_annual_obs - margin_error + conf_max = mean_annual_obs + margin_error + + # Test passes if model mean is within CI of observed mean + test_pass = conf_min <= mean_annual_model_rate <= conf_max + + logging.info( + f"N-Test (annual): Model rate {mean_annual_model_rate:.2f}, " + f"Observed mean {mean_annual_obs:.2f} ± {margin_error:.2f} " + f"({conf_interval*100:.0f}% CI: [{conf_min:.2f}, {conf_max:.2f}])" + ) + + test_res = "Pass" if test_pass else "Fail" + logging.info(f"N-Test (annual): {test_res}") + + test_result = { + "conf_interval_frac": conf_interval, + "conf_interval": (conf_min, conf_max), + "n_pred_earthquakes": mean_annual_model_rate, # annual rate + "n_obs_earthquakes": mean_annual_obs, # mean annual count + "test_res": test_res, + "test_pass": bool(test_pass), + # Temporal variability statistics + "is_overdispersed": is_overdispersed, + "r_dispersion": float(r_dispersion) if r_dispersion is not None else None, + "variance_annual_obs": float(var_annual_obs), + # Additional data for annual distribution plotting + "annual_counts": annual_counts.tolist(), + "years": unique_years, + "n_years": n_years, + "mean_annual_obs_count": float(mean_annual_obs), + "std_annual_obs_count": float(std_annual_obs), + "mean_annual_model_rate": mean_annual_model_rate, + } + + return test_result diff --git a/openquake/hme/model_test_frameworks/gem/gem_tests.py b/openquake/hme/model_test_frameworks/gem/gem_tests.py index 94d0ac0..0648c1f 100644 --- a/openquake/hme/model_test_frameworks/gem/gem_tests.py +++ b/openquake/hme/model_test_frameworks/gem/gem_tests.py @@ -22,7 +22,7 @@ moment_over_under_eval_fn, rupture_matching_eval_fn, catalog_ground_motion_eval_fn, - annual_N_eval_fn, + N_test_annual, ) from ..relm.relm_tests import ( @@ -209,16 +209,50 @@ def L_test( def N_test(cfg: dict, input_data: dict) -> dict: logging.info("Running N-Test") - + test_config = cfg["config"]["model_framework"]["gem"]["N_test"] completeness_table = cfg["input"]["seis_catalog"].get("completeness_table") test_config["mag_bins"] = get_mag_bins_from_cfg(cfg) prospective = test_config.get("prospective", False) + prob_model = test_config["prob_model"] + + # Handle annual prob_model separately (GEM-specific implementation) + if prob_model == "annual": + if prospective: + eq_gdf = input_data["pro_gdf"] + else: + eq_gdf = input_data["eq_gdf"] + + if completeness_table is not None: + logging.warning( + "N-Test with prob_model='annual' does not support completeness tables. " + "Results may not be accurate." + ) + + # Get mean annual rate from model + mag_bins = get_mag_bins_from_cfg(cfg) + model_mfd = get_model_mfd(input_data["rupture_gdf"], mag_bins, t_yrs=1.0) + mean_annual_rate = sum(model_mfd.values()) - if ( - test_config["prob_model"] in ["poisson", "poisson_cum"] - ) and not prospective: + conf_interval = test_config.get("conf_interval", 0.95) + test_results = N_test_annual(eq_gdf, mean_annual_rate, conf_interval) + + # Add metadata for plotting/reporting + test_results["M_min"] = min(mag_bins.keys()) + test_results["prob_model"] = "annual" + + logging.info( + "N-Test number obs eqs: {}".format(test_results["n_obs_earthquakes"]) + ) + logging.info( + "N-Test number pred eqs: {}".format(test_results["n_pred_earthquakes"]) + ) + logging.info("N-Test {}".format(test_results["test_pass"])) + return test_results + + # For other prob_models, use RELM implementation + if prob_model in ["poisson", "poisson_cum"] and not prospective: if completeness_table is not None: test_config["completeness_table"] = completeness_table test_config["mag_bins"] = get_mag_bins_from_cfg(cfg) @@ -474,62 +508,6 @@ def cumulative_occurrence_eval(cfg, input_data): } -def annual_N_eval(cfg, input_data): - """ - Annual N Evaluation: Compares the distribution of annual earthquake counts - from the catalog to a Poisson distribution based on the model's mean annual rate. - - This evaluation is similar to the N_test, but instead of comparing a single - observed count to a model rate, it compares the full distribution of annual - counts across all years in the catalog. - - Note: This evaluation currently only works when the test_config does not have - a completeness table, as the completeness table would change the number of - events in different years. - """ - logging.info("Running GEM Annual N Evaluation") - - test_config = cfg["config"]["model_framework"]["gem"]["annual_N_eval"] - mag_bins = get_mag_bins_from_cfg(cfg) - completeness_table = cfg["input"]["seis_catalog"].get("completeness_table") - - if completeness_table is not None: - logging.warning( - "annual_N_eval does not currently support completeness tables. " - "Results may not be accurate." - ) - - prospective = test_config.get("prospective", False) - investigation_time = test_config.get( - "investigation_time", cfg["input"]["seis_catalog"].get("duration") - ) - - if prospective: - eq_gdf = input_data["pro_gdf"] - else: - eq_gdf = input_data["eq_gdf"] - - results = annual_N_eval_fn( - input_data["rupture_gdf"], - eq_gdf, - mag_bins, - investigation_time, - ) - - logging.info( - "Annual N Eval: mean annual obs count: {:.2f}".format( - results["test_data"]["mean_annual_obs_count"] - ) - ) - logging.info( - "Annual N Eval: mean annual model rate: {:.2f}".format( - results["test_data"]["mean_annual_model_rate"] - ) - ) - - return results - - def catalog_ground_motion_eval(cfg, input_data): logging.info("Running GEM catalog ground motion evaluation") @@ -559,7 +537,6 @@ def catalog_ground_motion_eval(cfg, input_data): "S_test": S_test, "N_test": N_test, "L_test": L_test, - "annual_N_eval": annual_N_eval, "rupture_matching_eval": rupture_matching_eval, "cumulative_occurrence_eval": cumulative_occurrence_eval, "catalog_ground_motion_eval": catalog_ground_motion_eval, diff --git a/openquake/hme/model_test_frameworks/relm/relm_test_functions.py b/openquake/hme/model_test_frameworks/relm/relm_test_functions.py index c0f397d..2db09c1 100644 --- a/openquake/hme/model_test_frameworks/relm/relm_test_functions.py +++ b/openquake/hme/model_test_frameworks/relm/relm_test_functions.py @@ -695,6 +695,12 @@ def n_test_function(rup_gdf, eq_gdf, test_config: dict): ] test_result = N_test_empirical(n_obs, n_eq_samples, conf_interval) + elif test_config["prob_model"] == "annual": + raise ValueError( + "prob_model='annual' is not supported in the RELM framework. " + "Use the GEM framework instead (model_framework: gem: N_test:)." + ) + elif test_config["prob_model"] == "neg_binom": raise NotImplementedError("can't subdivide earthquakes yet") n_eqs_in_subs = subdivide_observed_eqs( diff --git a/openquake/hme/reporting/reporting.py b/openquake/hme/reporting/reporting.py index 6a40d66..a8416d0 100644 --- a/openquake/hme/reporting/reporting.py +++ b/openquake/hme/reporting/reporting.py @@ -23,7 +23,6 @@ plot_rup_match_mag_dist, plot_rup_match_map, plot_N_test_results, - plot_annual_N_eval_results, plot_L_test_results, plot_M_test_results, plot_eqs_by_mag_time, @@ -125,11 +124,6 @@ def render_result_text( env=env, cfg=cfg, results=results, model_test_framework="gem" ) - if "annual_N_eval" in results["gem"].keys(): - render_annual_N_eval( - env=env, cfg=cfg, results=results, model_test_framework="gem" - ) - if "M_test" in results["gem"].keys(): render_M_test( env=env, cfg=cfg, results=results, model_test_framework="gem" @@ -209,32 +203,6 @@ def render_N_test( ) -def render_annual_N_eval( - env: Environment, cfg: dict, results: dict, model_test_framework="gem" -): - - annual_n_eval_plot_str = plot_annual_N_eval_results( - results[model_test_framework]["annual_N_eval"]["val"], - return_string=True, - ) - - # Try to use a template if it exists, otherwise just store the plot - try: - annual_n_eval = env.get_template("annual_n_eval.html") - results[model_test_framework]["annual_N_eval"]["rendered_text"] = ( - annual_n_eval.render( - res=results[model_test_framework]["annual_N_eval"]["val"], - mtf=model_test_framework, - annual_n_eval_plot_str=annual_n_eval_plot_str, - ) - ) - except Exception: - # If template doesn't exist, just store the plot - results[model_test_framework]["annual_N_eval"]["rendered_text"] = ( - "

Annual N Evaluation

" + annual_n_eval_plot_str - ) - - def render_L_test( env: Environment, cfg: dict, results: dict, model_test_framework="gem" ): diff --git a/openquake/hme/utils/plots.py b/openquake/hme/utils/plots.py index 2043b47..00ce3f2 100644 --- a/openquake/hme/utils/plots.py +++ b/openquake/hme/utils/plots.py @@ -62,34 +62,29 @@ def _make_stoch_mfds(mfd, iters: int, t_yrs: float = 1.0): return stoch_mfd_vals -def plot_annual_N_eval_results( - annual_N_eval_results: dict, - return_fig: bool = False, - return_string: bool = False, - save_fig: Union[bool, str] = False, -): +def plot_N_test_annual(N_test_results: dict): """ - Plot histogram of annual earthquake counts compared to Poisson distribution. + Plot histogram of annual earthquake counts with model distribution. + + Shows either Poisson (if no overdispersion) or Negative Binomial + (if overdispersion detected) distribution for comparison. Parameters ---------- - annual_N_eval_results : dict - Results from annual_N_eval containing annual_counts and mean_annual_model_rate - return_fig : bool - If True, return the figure object - return_string : bool - If True, return SVG string - save_fig : Union[bool, str] - If string, save figure to this path + N_test_results : dict + N-test results with prob_model='annual' containing annual_counts and + mean_annual_model_rate Returns ------- - fig or str - Matplotlib figure or SVG string + fig + Matplotlib figure """ - test_data = annual_N_eval_results["test_data"] - annual_counts = np.array(test_data["annual_counts"]) - mean_annual_model_rate = test_data["mean_annual_model_rate"] + annual_counts = np.array(N_test_results["annual_counts"]) + mean_annual_model_rate = N_test_results["mean_annual_model_rate"] + is_overdispersed = N_test_results.get("is_overdispersed", False) + r_dispersion = N_test_results.get("r_dispersion") + conf_interval = N_test_results.get("conf_interval", (None, None)) fig, ax = plt.subplots(figsize=(10, 6)) @@ -109,19 +104,40 @@ def plot_annual_N_eval_results( linewidth=0.5, ) - # Plot Poisson distribution for model - x_poisson = np.arange(0, max_count + 1) - poisson_probs = poisson.pmf(x_poisson, mean_annual_model_rate) + # Plot model distribution - use negative binomial if overdispersed, otherwise Poisson + x_vals = np.arange(0, max_count + 1) - ax.plot( - x_poisson, - poisson_probs, - "o-", - color="C1", - label=f"Poisson (λ={mean_annual_model_rate:.2f})", - linewidth=2, - markersize=6, - ) + if is_overdispersed and r_dispersion is not None: + # Negative binomial distribution + # Convert (mu, r) to scipy's (n, p) parameterization + # n = r, p = r / (r + mu) + from scipy.stats import nbinom + n_param = r_dispersion + p_param = r_dispersion / (r_dispersion + mean_annual_model_rate) + model_probs = nbinom.pmf(x_vals, n_param, p_param) + + ax.plot( + x_vals, + model_probs, + "o-", + color="C1", + label=f"Negative Binomial (μ={mean_annual_model_rate:.2f}, r={r_dispersion:.2f})", + linewidth=2, + markersize=6, + ) + else: + # Poisson distribution + model_probs = poisson.pmf(x_vals, mean_annual_model_rate) + + ax.plot( + x_vals, + model_probs, + "o-", + color="C1", + label=f"Poisson (λ={mean_annual_model_rate:.2f})", + linewidth=2, + markersize=6, + ) # Add mean lines ax.axvline( @@ -139,27 +155,25 @@ def plot_annual_N_eval_results( label=f"Mean model ({mean_annual_model_rate:.2f})", ) + # Add confidence interval shading if available + if conf_interval[0] is not None and conf_interval[1] is not None: + ax.axvspan( + conf_interval[0], + conf_interval[1], + alpha=0.2, + color="C0", + label=f"Obs. CI ({N_test_results.get('conf_interval_frac', 0.95)*100:.0f}%)", + ) + ax.set_xlabel("Number of Earthquakes per Year", fontsize=12) ax.set_ylabel("Probability Density", fontsize=12) - ax.set_title("Annual Earthquake Count Distribution vs. Model", fontsize=14) + ax.set_title("Annual Earthquake Count Distribution", fontsize=14) ax.legend(loc="best") ax.grid(True, alpha=0.3) plt.tight_layout() - if save_fig is not False: - fig.savefig(save_fig) - - if return_fig is True: - return fig - - elif return_string is True: - plt.switch_backend("svg") - fig_str = io.StringIO() - fig.savefig(fig_str, format="svg") - plt.close(fig) - fig_svg = " Date: Thu, 29 Jan 2026 13:10:06 -0800 Subject: [PATCH 3/3] ecdf option for annual N test --- .../gem/gem_test_functions.py | 75 ++++++++++++++----- .../model_test_frameworks/gem/gem_tests.py | 3 +- 2 files changed, 59 insertions(+), 19 deletions(-) diff --git a/openquake/hme/model_test_frameworks/gem/gem_test_functions.py b/openquake/hme/model_test_frameworks/gem/gem_test_functions.py index 991bc90..60f0fd4 100644 --- a/openquake/hme/model_test_frameworks/gem/gem_test_functions.py +++ b/openquake/hme/model_test_frameworks/gem/gem_test_functions.py @@ -729,14 +729,17 @@ def catalog_ground_motion_eval_fn(test_config, input_data): def N_test_annual( - eq_gdf: GeoDataFrame, mean_annual_model_rate: float, conf_interval: float + eq_gdf: GeoDataFrame, + mean_annual_model_rate: float, + conf_interval: float, + test_method: str = "ecdf" ) -> dict: """ N-test using annual earthquake count distribution. Tests if the model's mean annual rate is consistent with the observed annual earthquake counts, accounting for temporal variability without - assuming a Poisson process. Uses a t-based confidence interval approach. + assuming a Poisson process. If the data shows overdispersion (variance > mean), estimates a negative binomial dispersion parameter for visualization and reporting. @@ -749,6 +752,10 @@ def N_test_annual( Mean annual rate from the model conf_interval : float Confidence interval for the test (e.g., 0.95 for 95%) + test_method : str, optional + Method for calculating confidence intervals: + - "ecdf": Use empirical percentiles from observed distribution (default) + - "t_distribution": Use t-distribution confidence interval Returns ------- @@ -780,36 +787,58 @@ def N_test_annual( r_dispersion = mean_annual_obs**2 / (var_annual_obs - mean_annual_obs) is_overdispersed = True logging.info( - f"N-Test (annual): Observed overdispersion - " + f"N-Test (annual, {test_method}): Observed overdispersion - " f"mean={mean_annual_obs:.2f}, var={var_annual_obs:.2f}, r={r_dispersion:.2f}" ) else: r_dispersion = None is_overdispersed = False logging.info( - f"N-Test (annual): No overdispersion detected - " + f"N-Test (annual, {test_method}): No overdispersion detected - " f"mean={mean_annual_obs:.2f}, var={var_annual_obs:.2f}" ) - # Test if model mean is consistent with observed data - # Use t-distribution confidence interval (doesn't assume Poisson) - t_crit = t_dist.ppf((1 + conf_interval) / 2, n_years - 1) - margin_error = t_crit * (std_annual_obs / np.sqrt(n_years)) + # Calculate confidence interval based on test_method + if test_method == "ecdf": + # Use empirical percentiles from observed distribution + alpha = 1 - conf_interval + lower_percentile = (alpha / 2) * 100 + upper_percentile = (1 - alpha / 2) * 100 - conf_min = mean_annual_obs - margin_error - conf_max = mean_annual_obs + margin_error + conf_min = np.percentile(annual_counts, lower_percentile) + conf_max = np.percentile(annual_counts, upper_percentile) - # Test passes if model mean is within CI of observed mean - test_pass = conf_min <= mean_annual_model_rate <= conf_max + # Calculate what percentile the model rate corresponds to + model_percentile = (annual_counts < mean_annual_model_rate).sum() / n_years * 100 - logging.info( - f"N-Test (annual): Model rate {mean_annual_model_rate:.2f}, " - f"Observed mean {mean_annual_obs:.2f} ± {margin_error:.2f} " - f"({conf_interval*100:.0f}% CI: [{conf_min:.2f}, {conf_max:.2f}])" - ) + logging.info( + f"N-Test (annual, ECDF): Model rate {mean_annual_model_rate:.2f}, " + f"Observed {conf_interval*100:.0f}% CI: [{conf_min:.2f}, {conf_max:.2f}] " + f"(from {lower_percentile:.1f}th to {upper_percentile:.1f}th percentile)" + ) + logging.info( + f"N-Test (annual, ECDF): Model rate at {model_percentile:.1f}th percentile " + f"of observed distribution" + ) + else: + # Use t-distribution confidence interval (doesn't assume Poisson) + t_crit = t_dist.ppf((1 + conf_interval) / 2, n_years - 1) + margin_error = t_crit * (std_annual_obs / np.sqrt(n_years)) + + conf_min = mean_annual_obs - margin_error + conf_max = mean_annual_obs + margin_error + + logging.info( + f"N-Test (annual, t-distribution): Model rate {mean_annual_model_rate:.2f}, " + f"Observed mean {mean_annual_obs:.2f} ± {margin_error:.2f} " + f"({conf_interval*100:.0f}% CI: [{conf_min:.2f}, {conf_max:.2f}])" + ) + + # Test passes if model mean is within CI + test_pass = conf_min <= mean_annual_model_rate <= conf_max test_res = "Pass" if test_pass else "Fail" - logging.info(f"N-Test (annual): {test_res}") + logging.info(f"N-Test (annual, {test_method}): {test_res}") test_result = { "conf_interval_frac": conf_interval, @@ -818,6 +847,8 @@ def N_test_annual( "n_obs_earthquakes": mean_annual_obs, # mean annual count "test_res": test_res, "test_pass": bool(test_pass), + # Test method + "test_method": test_method, # Temporal variability statistics "is_overdispersed": is_overdispersed, "r_dispersion": float(r_dispersion) if r_dispersion is not None else None, @@ -831,4 +862,12 @@ def N_test_annual( "mean_annual_model_rate": mean_annual_model_rate, } + # Add ECDF-specific fields if using ECDF method + if test_method == "ecdf": + test_result.update({ + "model_percentile": float(model_percentile), + "lower_percentile": float(lower_percentile), + "upper_percentile": float(upper_percentile), + }) + return test_result diff --git a/openquake/hme/model_test_frameworks/gem/gem_tests.py b/openquake/hme/model_test_frameworks/gem/gem_tests.py index 0648c1f..7a165e0 100644 --- a/openquake/hme/model_test_frameworks/gem/gem_tests.py +++ b/openquake/hme/model_test_frameworks/gem/gem_tests.py @@ -236,7 +236,8 @@ def N_test(cfg: dict, input_data: dict) -> dict: mean_annual_rate = sum(model_mfd.values()) conf_interval = test_config.get("conf_interval", 0.95) - test_results = N_test_annual(eq_gdf, mean_annual_rate, conf_interval) + test_method = test_config.get("test_method", "ecdf") + test_results = N_test_annual(eq_gdf, mean_annual_rate, conf_interval, test_method) # Add metadata for plotting/reporting test_results["M_min"] = min(mag_bins.keys())