Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
f5af3bf
Refactor outlier imputation notebooks to utilize a shared bootstrap c…
claude-marie Mar 24, 2026
eb07fe0
Refactor outlier imputation notebooks to leverage a shared bootstrap …
claude-marie Mar 24, 2026
068efb9
Refactor Magic Glasses notebooks to utilize a shared bootstrap contex…
claude-marie Mar 24, 2026
a2cb550
Enhance SNT utility functions for configuration loading and data proc…
claude-marie Mar 24, 2026
b4bd49a
Refactor outlier imputation and reporting notebooks to utilize new sh…
claude-marie Mar 25, 2026
bb1df37
Merge branch 'SNT25-381' of https://github.com/BLSQ/snt_development i…
claude-marie Mar 25, 2026
9b838fa
Refactor IQR outliers reporting notebook and utility functions to enh…
claude-marie Mar 25, 2026
776887d
fix for outliers pipelines
claude-marie Mar 26, 2026
b882af1
fix names and deleted old files
claude-marie Mar 26, 2026
f14ac32
Update helper file paths in outlier imputation and reporting notebook…
claude-marie Mar 26, 2026
1dbb87f
fix
claude-marie Mar 26, 2026
b5f9e3d
still bugged
claude-marie Mar 26, 2026
615fab0
fix
claude-marie Mar 27, 2026
a4d5219
last fix
claude-marie Mar 27, 2026
e3404c8
Refactor magic glasses input preparation and outlier detection functi…
claude-marie Mar 27, 2026
e0ee49a
generic functions in snt_utils
claude-marie Mar 30, 2026
489d4b6
milestone, fixing giulia nb
claude-marie Mar 30, 2026
b525673
main version of incidence
claude-marie Mar 30, 2026
9b61636
should be it
claude-marie Mar 31, 2026
b3eab9f
milestone, everything is working
claude-marie Mar 31, 2026
eca709c
final
claude-marie Mar 31, 2026
e8a72fb
little fix
claude-marie Mar 31, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
113 changes: 113 additions & 0 deletions code/snt_utils.r
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,114 @@ install_and_load <- function(packages) {
print(loaded_packages)
}

# Load SNT configuration file with a consistent error message
load_snt_config <- function(config_path, config_file_name = "SNT_config.json") {
config_file <- file.path(config_path, config_file_name)
config_json <- tryCatch(
{
jsonlite::fromJSON(config_file)
},
error = function(e) {
msg <- paste0("[ERROR] Error while loading configuration ", conditionMessage(e))
stop(msg)
}
)
return(config_json)
}

# Validate that required keys exist in a config section
validate_required_config_keys <- function(config_json, keys, section = "SNT_CONFIG") {
if (is.null(config_json[[section]])) {
stop(paste0("[ERROR] Missing configuration section: ", section))
}

missing_keys <- keys[!keys %in% names(config_json[[section]])]
if (length(missing_keys) > 0) {
stop(paste0("[ERROR] Missing configuration input(s): ", paste(missing_keys, collapse = ", ")))
}

invisible(TRUE)
}

# Generic helper to load a country-specific dataset file
load_country_file_from_dataset <- function(dataset_id, country_code, suffix, label = NULL) {
file_name <- paste0(country_code, suffix)
output_data <- tryCatch(
{
get_latest_dataset_file_in_memory(dataset_id, file_name)
},
error = function(e) {
target_label <- if (is.null(label)) file_name else label
msg <- paste0(
"[ERROR] Error while loading ",
target_label,
" (dataset: ",
dataset_id,
", file: ",
file_name,
"): ",
conditionMessage(e)
)
stop(msg)
}
)

log_msg(paste0("Loaded file `", file_name, "` from dataset `", dataset_id, "`."))
return(output_data)
}

# Ensure YEAR and MONTH are stored as integers when present
normalize_year_month_types <- function(input_df, year_col = "YEAR", month_col = "MONTH") {
output_df <- input_df
if (year_col %in% names(output_df)) {
output_df[[year_col]] <- as.integer(output_df[[year_col]])
}
if (month_col %in% names(output_df)) {
output_df[[month_col]] <- as.integer(output_df[[month_col]])
}
return(output_df)
}

# Standard routine preparation: select, pivot longer, optional deduplication
prepare_routine_long <- function(routine_df, fixed_cols, indicators, deduplicate = TRUE) {
cols_to_select <- intersect(c(fixed_cols, indicators), names(routine_df))
missing_indicators <- setdiff(indicators, names(routine_df))
if (length(missing_indicators) > 0) {
stop(paste0("[ERROR] Missing indicator column(s): ", paste(missing_indicators, collapse = ", ")))
}

routine_long <- routine_df %>%
dplyr::select(dplyr::all_of(cols_to_select)) %>%
tidyr::pivot_longer(
cols = dplyr::all_of(indicators),
names_to = "INDICATOR",
values_to = "VALUE"
)

if (deduplicate) {
dedup_keys <- intersect(c("ADM1_ID", "ADM2_ID", "OU_ID", "PERIOD", "YEAR", "MONTH", "INDICATOR"), names(routine_long))
routine_long <- routine_long %>%
dplyr::distinct(dplyr::across(dplyr::all_of(dedup_keys)), .keep_all = TRUE)
}

return(routine_long)
}

# Build a standardized output path under /data and create it if needed
standard_output_path <- function(data_root_path, domain, subdomain = NULL, create_dir = TRUE) {
target_path <- if (is.null(subdomain) || nchar(subdomain) == 0) {
file.path(data_root_path, domain)
} else {
file.path(data_root_path, domain, subdomain)
}

if (create_dir && !dir.exists(target_path)) {
dir.create(target_path, recursive = TRUE, showWarnings = FALSE)
}

return(target_path)
}

# Helper to safely extract values from parameters (allows to specify the type)
get_param <- function(params_list, target_param, default, cast_method = identity) {
#' Safely retrieve a parameter if it exists in the input, using a default fallback if it doesn't exist in the inupt
Expand Down Expand Up @@ -124,6 +232,11 @@ get_latest_dataset_file_in_memory <- function(dataset, filename) {
}


# helper function for OpenHEXA logging
printdim <- function(df, name = deparse(substitute(df))) {
cat("Dimensions of", name, ":", nrow(df), "rows x", ncol(df), "columns\n\n")
}

# helper function for OpenHEXA logging
log_msg <- function(msg , level="info") {
print(msg)
Expand Down
Loading