diff --git a/NAMESPACE b/NAMESPACE index 9d913e9..2267f61 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,7 +6,9 @@ export(flow_frequency_sampler) export(flow_frequency_sampler_expected) export(hydrograph_setup) export(mod_puls_routing) +export(pmf_stage_lognormal) export(qp3) +export(rejection_sampling_stage) export(rfa_simulate) export(scale_hydrograph) export(stage2aep) @@ -16,7 +18,9 @@ importFrom(stats,approx) importFrom(stats,approxfun) importFrom(stats,pnorm) importFrom(stats,qgamma) +importFrom(stats,qlnorm) importFrom(stats,qnorm) importFrom(stats,quantile) +importFrom(stats,rlnorm) importFrom(stats,runif) importFrom(utils,write.csv) diff --git a/R/imports.R b/R/imports.R index 20e0e19..168d544 100644 --- a/R/imports.R +++ b/R/imports.R @@ -1,3 +1,3 @@ -#' @importFrom stats approx approxfun pnorm qgamma qnorm quantile runif +#' @importFrom stats approx approxfun pnorm qgamma qnorm quantile runif qlnorm rlnorm #' @importFrom utils write.csv NULL diff --git a/R/rejection_sampling.R b/R/rejection_sampling.R new file mode 100644 index 0000000..8bce429 --- /dev/null +++ b/R/rejection_sampling.R @@ -0,0 +1,201 @@ +#' Thin rejection sampling output for plotting +#' +#' Thins a large sample of sorted z-scores and stages for plotting efficiency. +#' Retains 5,000 evenly-spaced points across the full range plus the last +#' 10,000 points for dense tail coverage. +#' +#' @param n Integer. Total number of samples. +#' @param z_sorted Numeric vector of sorted z-scores (ascending), length \code{n}. +#' @param stage_sorted Numeric vector of sorted stage values (ascending), length \code{n}. +#' +#' @return A data frame with columns \code{z_aep} and \code{stage}. +#' +#' @keywords internal +thin_samples <- function(n, z_sorted, stage_sorted) { + thin_idx <- unique(c( + round(seq(1, n, length.out = 5000)), + (n - 10000):n + )) + thin_idx <- sort(unique(thin_idx)) + thin_idx <- thin_idx[thin_idx >= 1 & thin_idx <= n] + data.frame( + z_aep = z_sorted[thin_idx], + stage = stage_sorted[thin_idx] + ) +} + +#' Parameterize a Three-Parameter Lognormal PMF for Stage +#' +#' Computes the parameters of a three-parameter lognormal distribution for use +#' as a probabilistic maximum stage (PMF) in rejection sampling. The shift +#' parameter defines the lower bound of the distribution. +#' +#' @param pmf_shift Numeric. Lower bound (shift) of the lognormal distribution. +#' Must be greater than \code{pmf_mean}. +#' @param pmf_mean Numeric. Mean of the PMF stage distribution. Must be less +#' than \code{pmf_shift}. +#' @param pmf_sigma Numeric. Standard deviation on the log scale. Defaults to +#' \code{0.5}. +#' +#' @return A named list with the following elements: +#' \describe{ +#' \item{pmf_shift}{Lower bound of the distribution.} +#' \item{pmf_mean}{Mean of the distribution.} +#' \item{pmf_sigma}{Standard deviation on the log scale.} +#' \item{pmf_mu}{Derived location parameter on the log scale.} +#' \item{pmf_p05}{5th percentile of the PMF stage distribution.} +#' \item{pmf_p95}{95th percentile of the PMF stage distribution.} +#' } +#' +#' @examples +#' pmf <- pmf_stage_lognormal(pmf_shift = 1200, pmf_mean = 1150, pmf_sigma = 0.5) +#' pmf$pmf_p05 +#' pmf$pmf_p95 +#' +#' @export +pmf_stage_lognormal <- function(pmf_shift, pmf_mean, pmf_sigma = 0.5){ + if (pmf_mean >= pmf_shift) { + cli::cli_abort(c( + "{.arg pmf_mean} must be less than {.arg pmf_shift}.", + "x" = "{.arg pmf_mean} is {.val {pmf_mean}} but {.arg pmf_shift} is {.val {pmf_shift}}." + )) + } + + # Derive mu ==== + pmf_mu <- log(pmf_mean - pmf_shift) - (pmf_sigma^2 / 2) + + # Summary stats + # pmf_median <- pmf_shift + exp(pmf_mu) + # pmf_sd <- sqrt((exp(pmf_sigma^2) - 1) * exp(2 * pmf_mu + pmf_sigma^2)) + pmf_p05 <- pmf_shift + qlnorm(0.05, pmf_mu, pmf_sigma) + pmf_p95 <- pmf_shift + qlnorm(0.95, pmf_mu, pmf_sigma) + + # create pmf_stage_LN as a list + pmf_stage_LN <- list(pmf_shift = pmf_shift, + pmf_mean = pmf_mean, + pmf_sigma = pmf_sigma, + pmf_mu = pmf_mu, + pmf_p05 = pmf_p05, + pmf_p95 = pmf_p95) + + return(pmf_stage_LN) +} + +#' Rejection Sampling of Stage Bounded by a Probabilistic Maximum Stage +#' +#' Draws \code{n_samples} stage values from a stage-frequency curve, rejecting +#' any draws that exceed a probabilistic maximum stage (PMF) sampled from a +#' three-parameter lognormal distribution. Accepted samples are ranked and +#' assigned Weibull plotting positions for use in frequency analysis. +#' +#' @param pmf_stage_LN A named list returned by \code{\link{pmf_stage_lognormal}} +#' containing the lognormal PMF parameters. +#' @param stage_freq_df A data frame with AEP in column 1 and stage in column 2. +#' Mutually exclusive with \code{aep} and \code{stage}. +#' @param aep Numeric vector of annual exceedance probabilities. Must be +#' supplied with \code{stage}. Mutually exclusive with \code{stage_freq_df}. +#' @param stage Numeric vector of stages corresponding to \code{aep}. Must be +#' supplied with \code{aep}. Mutually exclusive with \code{stage_freq_df}. +#' @param n_samples Integer. Number of samples to draw. Defaults to \code{1e7}. +#' +#' @return A data frame of thinned accepted samples with columns for z-score +#' and stage, suitable for plotting a probabilistically bounded stage-frequency +#' curve. Output is produced by \code{thin_samples()}. +#' +#' @seealso \code{\link{pmf_stage_lognormal}} +#' +#' @examples +#' \dontrun{ +#' pmf <- pmf_stage_lognormal(pmf_shift = 1200, pmf_mean = 1150, pmf_sigma = 0.5) +#' +#' result <- rejection_sampling_stage( +#' pmf_stage_LN = pmf, +#' stage_freq_df = my_stage_freq_df, +#' n_samples = 1e7 +#' ) +#' } +#' +#' @export +rejection_sampling_stage <- function(pmf_stage_LN, stage_freq_df = NULL, aep = NULL, stage = NULL, n_samples = 1E7){ + # Resolve inputs + if (!is.null(stage_freq_df)) { + if (!is.null(aep) || !is.null(stage)) { + cli::cli_abort("Provide either {.arg stage_freq_df} or {.arg aep}/{.arg stage}, not both.") + } + aep <- stage_freq_df[[1]] + stage <- stage_freq_df[[2]] + + } else if (!is.null(aep) && !is.null(stage)) { + # use as-is + + } else { + cli::cli_abort(c( + "Must provide either {.arg stage_freq_df} or both {.arg aep} and {.arg stage}.", + "i" = "Supply a data frame via {.arg stage_freq_df}, or pass numeric vectors to {.arg aep} and {.arg stage} directly." + )) + } + + # PMF Log-Normal Parameters ================================================== + pmf_shift <- pmf_stage_LN[["pmf_shift"]] + pmf_mu <- pmf_stage_LN[["pmf_mu"]] + pmf_sigma <- pmf_stage_LN[["pmf_sigma"]] + + # Transform to z-space for interpolation function ============================ + z_aep <- qnorm(1-aep) + zaep_stage <- approxfun(z_aep, stage, rule = 2) + + # First sample rejection ===================================================== + # Sample AEPs + aep_draws <- runif(n_samples, min = 1e-9, max = 1 - 1e-9) + + # Return Stage corresponding to sampled AEP (as z-value) + stage_draws <- zaep_stage(qnorm(1 - aep_draws)) + + # Randomly Sample PMF Stage from log-normal above + pmf_draws <- pmf_shift + rlnorm(n_samples, pmf_mu, pmf_sigma) + + # Reject (aep,stage) where the stage exceeds sampled PMF Stage + reject_idx <- which(stage_draws > pmf_draws) + + # Rejection Iterations ======================================================= + iter <- 0 + while (length(reject_idx) > 0) { + # Number of rejects + n_rej <- length(reject_idx) + # New AEP sample + aep_new <- runif(n_rej, min = 1e-9, max = 1 - 1e-9) + # Corresponding stage sample + stage_new <- zaep_stage(qnorm(1 - aep_new)) + # New PMF Stage sample + pmf_new <- pmf_shift + rlnorm(n_rej, pmf_mu, pmf_sigma) + # Replace rejections with resampled aep,stage + stage_draws[reject_idx] <- stage_new + pmf_draws[reject_idx] <- pmf_new + # Next rejection index + reject_idx <- reject_idx[stage_new > pmf_new] + # Count iterations + iter <- iter + 1 + if (iter >= 1E3) { + cli::cli_warn(c( + "Rejection sampling did not converge after {iter} iterations.", + "i" = "{length(reject_idx)} sample{?s} remain unresolved.", + "i" = "Check that {.arg pmf_stage_LN} is consistent with the provided stage-frequency curve." + )) + break + } + } + + # Assign Weibull to the accepted samples ===================================== + # Sort Stages + stage_sorted <- sort(stage_draws) + + # Assign Weibull pp + weibull_aep <- 1 - seq_len(n_samples) / (n_samples + 1) + + # Convert to z_aep + z_sorted <- qnorm(1 - weibull_aep) + + # Thin for plotting purposes + probabilistic_bounded <- thin_samples(n_samples, z_sorted, stage_sorted) + return(probabilistic_bounded) +} diff --git a/R/utils.R b/R/utils.R index f680e4d..d6a4055 100644 --- a/R/utils.R +++ b/R/utils.R @@ -8,26 +8,3 @@ power_function <- function(x) { 10^(1:x) } -#' Thin samples -#' Thin a large sample of sorted z-scores and stages for efficiency. -#' Retains 5,000 evenly-spaced points across the full range plus the last -#' 10,000 points for dense coverage at the extreme tail. -#' -#' @param n Total number of samples -#' @param z_sorted Sorted z-scores (ascending), length n -#' @param stage_sorted Sorted stage values (ascending), length n -#' @return A tibble with columns z_aep and stage -thin_samples <- function(n, z_sorted, stage_sorted) { - - thin_idx <- unique(c( - round(seq(1, n, length.out = 5000)), - (n - 10000):n - )) - thin_idx <- sort(unique(thin_idx)) - thin_idx <- thin_idx[thin_idx >= 1 & thin_idx <= n] - - data.frame( - z_aep = z_sorted[thin_idx], - stage = stage_sorted[thin_idx] - ) -} diff --git a/_pkgdown.yml b/_pkgdown.yml index f98ffb8..44b8823 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -40,6 +40,8 @@ reference: - interpolate_stage_matrix - power_function - theme_rfar_conceptual + - pmf_stage_lognormal + - rejection_sampling_stage - title: "Datasets" desc: "Example datasets included with rfaR" contents: diff --git a/man/pmf_stage_lognormal.Rd b/man/pmf_stage_lognormal.Rd new file mode 100644 index 0000000..8b1e269 --- /dev/null +++ b/man/pmf_stage_lognormal.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rejection_sampling.R +\name{pmf_stage_lognormal} +\alias{pmf_stage_lognormal} +\title{Parameterize a Three-Parameter Lognormal PMF for Stage} +\usage{ +pmf_stage_lognormal(pmf_shift, pmf_mean, pmf_sigma = 0.5) +} +\arguments{ +\item{pmf_shift}{Numeric. Lower bound (shift) of the lognormal distribution. +Must be greater than \code{pmf_mean}.} + +\item{pmf_mean}{Numeric. Mean of the PMF stage distribution. Must be less +than \code{pmf_shift}.} + +\item{pmf_sigma}{Numeric. Standard deviation on the log scale. Defaults to +\code{0.5}.} +} +\value{ +A named list with the following elements: +\describe{ +\item{pmf_shift}{Lower bound of the distribution.} +\item{pmf_mean}{Mean of the distribution.} +\item{pmf_sigma}{Standard deviation on the log scale.} +\item{pmf_mu}{Derived location parameter on the log scale.} +\item{pmf_p05}{5th percentile of the PMF stage distribution.} +\item{pmf_p95}{95th percentile of the PMF stage distribution.} +} +} +\description{ +Computes the parameters of a three-parameter lognormal distribution for use +as a probabilistic maximum stage (PMF) in rejection sampling. The shift +parameter defines the lower bound of the distribution. +} +\examples{ +pmf <- pmf_stage_lognormal(pmf_shift = 1200, pmf_mean = 1150, pmf_sigma = 0.5) +pmf$pmf_p05 +pmf$pmf_p95 + +} diff --git a/man/power_function.Rd b/man/power_function.Rd index 7fa3f72..f02b149 100644 --- a/man/power_function.Rd +++ b/man/power_function.Rd @@ -7,7 +7,7 @@ power_function(x) } \arguments{ -\item{x}{Highest power of 10 to us} +\item{x}{Highest power of 10 to use} } \value{ Vector of 10 raised sequentially from the 1st to the nth power. diff --git a/man/rejection_sampling_stage.Rd b/man/rejection_sampling_stage.Rd new file mode 100644 index 0000000..a2f5ade --- /dev/null +++ b/man/rejection_sampling_stage.Rd @@ -0,0 +1,55 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rejection_sampling.R +\name{rejection_sampling_stage} +\alias{rejection_sampling_stage} +\title{Rejection Sampling of Stage Bounded by a Probabilistic Maximum Stage} +\usage{ +rejection_sampling_stage( + pmf_stage_LN, + stage_freq_df = NULL, + aep = NULL, + stage = NULL, + n_samples = 1e+07 +) +} +\arguments{ +\item{pmf_stage_LN}{A named list returned by \code{\link{pmf_stage_lognormal}} +containing the lognormal PMF parameters.} + +\item{stage_freq_df}{A data frame with AEP in column 1 and stage in column 2. +Mutually exclusive with \code{aep} and \code{stage}.} + +\item{aep}{Numeric vector of annual exceedance probabilities. Must be +supplied with \code{stage}. Mutually exclusive with \code{stage_freq_df}.} + +\item{stage}{Numeric vector of stages corresponding to \code{aep}. Must be +supplied with \code{aep}. Mutually exclusive with \code{stage_freq_df}.} + +\item{n_samples}{Integer. Number of samples to draw. Defaults to \code{1e7}.} +} +\value{ +A data frame of thinned accepted samples with columns for z-score +and stage, suitable for plotting a probabilistically bounded stage-frequency +curve. Output is produced by \code{thin_samples()}. +} +\description{ +Draws \code{n_samples} stage values from a stage-frequency curve, rejecting +any draws that exceed a probabilistic maximum stage (PMF) sampled from a +three-parameter lognormal distribution. Accepted samples are ranked and +assigned Weibull plotting positions for use in frequency analysis. +} +\examples{ +\dontrun{ +pmf <- pmf_stage_lognormal(pmf_shift = 1200, pmf_mean = 1150, pmf_sigma = 0.5) + +result <- rejection_sampling_stage( + pmf_stage_LN = pmf, + stage_freq_df = my_stage_freq_df, + n_samples = 1e7 +) +} + +} +\seealso{ +\code{\link{pmf_stage_lognormal}} +} diff --git a/man/rfaR-package.Rd b/man/rfaR-package.Rd index a35e55b..1d24d8d 100644 --- a/man/rfaR-package.Rd +++ b/man/rfaR-package.Rd @@ -12,7 +12,7 @@ An R implementation of the U.S. Army Corps of Engineers (USACE) Risk Management Useful links: \itemize{ \item \url{https://github.com/USACE-RMC/rfaR} - \item \url{https://ideal-broccoli-1q9y47z.pages.github.io/} + \item \url{https://usace-rmc.github.io/rfaR/} \item Report bugs at \url{https://github.com/USACE-RMC/rfaR/issues} } @@ -31,6 +31,9 @@ Other contributors: \item Allen Avance \email{allen.avance@usace.army.mil} (P.E., USACE RMC) [contributor] \item Samantha Hartke \email{samantha.h.hartke@usace.army.mil} (PhD, USACE RMC) [contributor] \item Julian Gonzalez \email{julian.t.gonzalez@usace.army.mil} (USACE RMC) [contributor] + \item Sadie Niblett \email{sadie.s.niblett@usace.army.mil} (USACE RMC) [reviewer] + \item Reuben Sasaki \email{reuben.a.sasaki@usace.army.mil} (P.E.,USACE RMC) [reviewer] + \item Bryan Robinson \email{bryan.j.robinson@usace.army.mil} (P.E.,USACE RMC) [reviewer] } } diff --git a/man/thin_samples.Rd b/man/thin_samples.Rd new file mode 100644 index 0000000..9229153 --- /dev/null +++ b/man/thin_samples.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rejection_sampling.R +\name{thin_samples} +\alias{thin_samples} +\title{Thin rejection sampling output for plotting} +\usage{ +thin_samples(n, z_sorted, stage_sorted) +} +\arguments{ +\item{n}{Integer. Total number of samples.} + +\item{z_sorted}{Numeric vector of sorted z-scores (ascending), length \code{n}.} + +\item{stage_sorted}{Numeric vector of sorted stage values (ascending), length \code{n}.} +} +\value{ +A data frame with columns \code{z_aep} and \code{stage}. +} +\description{ +Thins a large sample of sorted z-scores and stages for plotting efficiency. +Retains 5,000 evenly-spaced points across the full range plus the last +10,000 points for dense tail coverage. +} +\keyword{internal}