diff --git a/R/helpers_regression.R b/R/helpers_regression.R
index 52a09c8..365881f 100644
--- a/R/helpers_regression.R
+++ b/R/helpers_regression.R
@@ -2,7 +2,8 @@
 #'
 #' @description This function determines the order of sibling pairs based on an outcome variable.
 #' The function checks which of the two kinship pairs has more of a specified outcome variable.
-#' It adds a new column named `order` to the dataset, indicating which sibling (identified as "s1" or "s2") has more of the outcome.
+#' It adds a new column named `order` to the dataset, indicating which sibling
+#' (identified as "s1" or "s2") has more of the outcome.
 #' If the two siblings have the same amount of the outcome, it randomly assigns one as having more.
 #
 #' @inheritParams discord_data
@@ -16,7 +17,7 @@ check_sibling_order <- function(..., fast = FALSE) {
   if (fast == TRUE) {
     check_sibling_order_fast(...)
   } else {
-    check_sibling_order_ram_optimized(...)
+    check_sibling_order_ram(...)
   }
 }
 
@@ -24,7 +25,8 @@ check_sibling_order <- function(..., fast = FALSE) {
 #'
 #' @description This function determines the order of sibling pairs based on an outcome variable.
 #' The function checks which of the two kinship pairs has more of a specified outcome variable.
-#' It adds a new column named `order` to the dataset, indicating which sibling (identified as "s1" or "s2") has more of the outcome.
+#' It adds a new column named `order` to the dataset, indicating which sibling
+#' (identified as "s1" or "s2") has more of the outcome.
 #' If the two siblings have the same amount of the outcome, it randomly assigns one as having more.
 #'
 #' @inheritParams discord_data
@@ -36,7 +38,7 @@ check_sibling_order <- function(..., fast = FALSE) {
 #'  neither) has more of the outcome.
 #' @keywords internal
 
-check_sibling_order_ram_optimized <- function(data, outcome, pair_identifiers, row) {
+check_sibling_order_ram <- function(data, outcome, pair_identifiers, row) {
   # Select the row of interest from the data frame
   data <- data[row, ]
 
@@ -46,7 +48,11 @@ check_sibling_order_ram_optimized <- function(data, outcome, pair_identifiers, r
 
   # Check if either sibling has missing (NA) outcome data
   if (is.na(outcome1) || is.na(outcome2)) {
-    stop(paste0("There are missing data, encoded as `NA`, for at least one kinship pair in the '", outcome, "' variable and data cannot be prepped properly.\n Please remove or impute missing data."))
+    stop(paste0(
+      "There are missing data, encoded as `NA`, for at least one kinship pair in the '",
+      outcome, "' variable and data cannot be prepped properly.\n",
+      " Please remove or impute missing data."
+    ))
   }
   # Determine sibling order
   if (outcome1 > outcome2) {
@@ -63,7 +69,7 @@ check_sibling_order_ram_optimized <- function(data, outcome, pair_identifiers, r
     }
   }
 
-  return(data)
+  data
 }
 
 check_sibling_order_fast <- function(data, outcome, pair_identifiers) {
@@ -75,7 +81,11 @@ check_sibling_order_fast <- function(data, outcome, pair_identifiers) {
 
   # Check for missing outcome data
   if (any(is.na(outcome1) | is.na(outcome2))) {
-    stop(paste0("There are missing data, encoded as `NA`, for at least one kinship pair in the '", outcome, "' variable and data cannot be prepped properly.\n Please remove or impute missing data."))
+    stop(paste0(
+      "There are missing data, encoded as `NA`, for at least one kinship pair in the '",
+      outcome, "' variable and data cannot be prepped properly.\n",
+      " Please remove or impute missing data."
+    ))
   }
 
   order <- ifelse(outcome1 > outcome2, "s1",
@@ -90,14 +100,17 @@ check_sibling_order_fast <- function(data, outcome, pair_identifiers) {
   }
 
   data$order <- order
-  return(data)
+  data
 }
 
 
 #' @title Make Mean Differences
 #'
-#' @description This function calculates differences and means of a given variable for each kinship pair. The order of subtraction and the variables' names in the output dataframe depend on the order column set by check_sibling_order().
-#' If the demographics parameter is set to "race", "sex", or "both", it also prepares demographic information accordingly,
+#' @description This function calculates differences and means of a given variable for each
+#' kinship pair. The order of subtraction and the variables' names in the output dataframe
+#' depend on the order column set by check_sibling_order().
+#' If the demographics parameter is set to "race", "sex", or "both", it also prepares
+#' demographic information accordingly,
 #' swapping the order of demographics as per the order column.
 #' @inheritParams discord_data
 #' @inheritParams check_sibling_order
@@ -177,7 +190,7 @@ make_mean_diffs_ram_optimized <- function(data, id, sex, race, demographics,
   )
 
 
-  return(output)
+  output
 }
 
 
@@ -270,20 +283,26 @@ recode_demographics <- function(demographics, data, raceS1, raceS2,
     if (demographics == "both" || demographics == "race") {
       race_1_name <- paste0(race, "_1")
       race_2_name <- paste0(race, "_2")
-      output_demographics[[paste0(race, "_binarymatch")]] <- ifelse(output_demographics[[race_1_name]] == output_demographics[[race_2_name]],
+      output_demographics[[paste0(race, "_binarymatch")]] <- ifelse(
+        output_demographics[[race_1_name]] == output_demographics[[race_2_name]],
         1, 0
       )
-      output_demographics[[paste0(race, "_multimatch")]] <- ifelse(output_demographics[[race_1_name]] == output_demographics[[race_2_name]],
+      output_demographics[[paste0(race, "_multimatch")]] <- ifelse(
+        output_demographics[[race_1_name]] == output_demographics[[race_2_name]],
         as.character(output_demographics[[race_2_name]]), "mixed"
       )
     }
     if (demographics == "both" || demographics == "sex") {
       sex_1_name <- paste0(sex, "_1")
       sex_2_name <- paste0(sex, "_2")
-      output_demographics[[paste0(sex, "_binarymatch")]] <- ifelse(output_demographics[[sex_1_name]] == output_demographics[[sex_2_name]],
+      output_demographics[[paste0(sex, "_binarymatch")]] <- ifelse(
+        output_demographics[[sex_1_name]] == output_demographics[[sex_2_name]],
         1, 0
       )
-      output_demographics[[paste0(sex, "_multimatch")]] <- ifelse(output_demographics[[sex_1_name]] == output_demographics[[sex_2_name]], as.character(output_demographics[[sex_2_name]]), "mixed")
+      output_demographics[[paste0(sex, "_multimatch")]] <- ifelse(
+        output_demographics[[sex_1_name]] == output_demographics[[sex_2_name]],
+        as.character(output_demographics[[sex_2_name]]), "mixed"
+      )
     }
   }
 
@@ -291,7 +310,7 @@ recode_demographics <- function(demographics, data, raceS1, raceS2,
     output <- base::cbind(output, output_demographics)
   }
 
-  return(output)
+  output
 }
 
 
@@ -346,12 +365,13 @@ make_mean_diffs_fast <- function(data, id, sex, race, demographics,
     )
     diff_list[[var]] <- tmp
   }
-  return(diff_list)
+  diff_list
 }
 
 #' @title Check Discord Errors
 #'
-#' @description This function checks for common errors in the provided data, including the correct specification of identifiers (ID, sex, race) and their existence in the data.
+#' @description This function checks for common errors in the provided data, including
+#' the correct specification of identifiers (ID, sex, race) and their existence in the data.
 #'
 #
 #' @param data The data to perform a discord regression on.
@@ -370,15 +390,26 @@ check_discord_errors <- function(data, id, sex, race, pair_identifiers) {
   }
 
   if (!base::is.null(sex) && base::sum(base::grepl(sex, base::names(data))) == 0) {
-    stop(paste0("The kinship pair sex identifier \"", sex, "\" is not appropriately defined. Please check that you have the correct column name."))
+    stop(paste0(
+      "The kinship pair sex identifier \"", sex,
+      "\" is not appropriately defined. Please check that you have the correct column name."
+    ))
   }
   if (!base::is.null(race) && base::sum(base::grepl(race, base::names(data))) == 0) {
-    stop(paste0("The kinship pair race identifier \"", race, "\" is not appropriately defined. Please check that you have the correct column name."))
+    stop(paste0(
+      "The kinship pair race identifier \"", race,
+      "\" is not appropriately defined. Please check that you have the correct column name."
+    ))
   }
-  if (base::sum(base::grepl(pair_identifiers[1], base::names(data))) == 0 | base::sum(base::grepl(pair_identifiers[2], base::names(data))) == 0) {
-    stop(paste0("Please check that the kinship pair identifiers \"", pair_identifiers[1], "\" and \"", pair_identifiers[2], "\" are valid, i.e. ensure that you have the correct labels for each kin."))
+  if (base::sum(base::grepl(pair_identifiers[1], base::names(data))) == 0 ||
+    base::sum(base::grepl(pair_identifiers[2], base::names(data))) == 0) {
+    stop(paste0(
+      "Please check that the kinship pair identifiers \"", pair_identifiers[1],
+      "\" and \"", pair_identifiers[2],
+      "\" are valid, i.e. ensure that you have the correct labels for each kin."
+    ))
   }
-  if (!base::is.null(sex) & !base::is.null(race) && sex == race) {
+  if (!base::is.null(sex) && !base::is.null(race) && sex == race) {
     stop("Please check that your sex and race variables are not equal.")
   }
 }
@@ -401,11 +432,11 @@ valid_ids <- function(data, id) {
       dwarn("Specified id column does not contain unique values for each kin-pair.
 Adding row-wise ID for restructuring data into paired format for analysis.
 For more details, see <https://github.com/R-Computing-Lab/discord/issues/6>.")
-      return(FALSE)
+      FALSE
     } else if (id_length == nrow(data)) {
-      return(TRUE)
+      TRUE
     }
   } else if (is.null(id)) {
-    return(FALSE)
+    FALSE
   }
 }
diff --git a/man/check_discord_errors.Rd b/man/check_discord_errors.Rd
index 626b7c3..8ea12ec 100644
--- a/man/check_discord_errors.Rd
+++ b/man/check_discord_errors.Rd
@@ -21,5 +21,6 @@ check_discord_errors(data, id, sex, race, pair_identifiers)
 An error message if one of the conditions are met.
 }
 \description{
-This function checks for common errors in the provided data, including the correct specification of identifiers (ID, sex, race) and their existence in the data.
+This function checks for common errors in the provided data, including
+the correct specification of identifiers (ID, sex, race) and their existence in the data.
 }
diff --git a/man/check_sibling_order.Rd b/man/check_sibling_order.Rd
index fe1688a..94f8246 100644
--- a/man/check_sibling_order.Rd
+++ b/man/check_sibling_order.Rd
@@ -18,6 +18,7 @@ A one-row data frame with a new column order indicating which familial member (1
 \description{
 This function determines the order of sibling pairs based on an outcome variable.
 The function checks which of the two kinship pairs has more of a specified outcome variable.
-It adds a new column named `order` to the dataset, indicating which sibling (identified as "s1" or "s2") has more of the outcome.
+It adds a new column named `order` to the dataset, indicating which sibling
+(identified as "s1" or "s2") has more of the outcome.
 If the two siblings have the same amount of the outcome, it randomly assigns one as having more.
 }
diff --git a/man/check_sibling_order_ram_optimized.Rd b/man/check_sibling_order_ram.Rd
similarity index 81%
rename from man/check_sibling_order_ram_optimized.Rd
rename to man/check_sibling_order_ram.Rd
index 68268d9..3a6058a 100644
--- a/man/check_sibling_order_ram_optimized.Rd
+++ b/man/check_sibling_order_ram.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/helpers_regression.R
-\name{check_sibling_order_ram_optimized}
-\alias{check_sibling_order_ram_optimized}
+\name{check_sibling_order_ram}
+\alias{check_sibling_order_ram}
 \title{Check Sibling Order RAM Optimized}
 \usage{
-check_sibling_order_ram_optimized(data, outcome, pair_identifiers, row)
+check_sibling_order_ram(data, outcome, pair_identifiers, row)
 }
 \arguments{
 \item{data}{The data set with kinship pairs}
@@ -24,7 +24,8 @@ A one-row data frame with a new column order indicating which familial member (1
 \description{
 This function determines the order of sibling pairs based on an outcome variable.
 The function checks which of the two kinship pairs has more of a specified outcome variable.
-It adds a new column named `order` to the dataset, indicating which sibling (identified as "s1" or "s2") has more of the outcome.
+It adds a new column named `order` to the dataset, indicating which sibling
+(identified as "s1" or "s2") has more of the outcome.
 If the two siblings have the same amount of the outcome, it randomly assigns one as having more.
 }
 \keyword{internal}
diff --git a/man/make_mean_diffs.Rd b/man/make_mean_diffs.Rd
index a6ac8cf..bb8b034 100644
--- a/man/make_mean_diffs.Rd
+++ b/man/make_mean_diffs.Rd
@@ -12,7 +12,10 @@ make_mean_diffs(..., fast = FALSE)
 \item{fast}{Logical. If TRUE, uses a faster method for data processing.}
 }
 \description{
-This function calculates differences and means of a given variable for each kinship pair. The order of subtraction and the variables' names in the output dataframe depend on the order column set by check_sibling_order().
-If the demographics parameter is set to "race", "sex", or "both", it also prepares demographic information accordingly,
+This function calculates differences and means of a given variable for each
+kinship pair. The order of subtraction and the variables' names in the output dataframe
+depend on the order column set by check_sibling_order().
+If the demographics parameter is set to "race", "sex", or "both", it also prepares
+demographic information accordingly,
 swapping the order of demographics as per the order column.
 }
diff --git a/tests/testthat/test-helpers_regression.R b/tests/testthat/test-helpers_regression.R
new file mode 100644
index 0000000..984b696
--- /dev/null
+++ b/tests/testthat/test-helpers_regression.R
@@ -0,0 +1,169 @@
+# Tests for internal helper functions in helpers_regression.R
+# These functions are exercised via the public discord_data() and
+# discord_regression() APIs.
+
+# Minimal pair data used across several tests below
+pair_data <- data.frame(
+  id = 1:3,
+  y_1 = c(5, 3, 4),
+  y_2 = c(3, 5, 6)
+)
+
+# --- check_discord_errors (via discord_regression) ---
+
+test_that("check_discord_errors stops on invalid id column", {
+  expect_error(
+    discord_regression(
+      data = pair_data,
+      outcome = "y",
+      predictors = NULL,
+      id = "bad_id",
+      sex = NULL,
+      race = NULL,
+      pair_identifiers = c("_1", "_2")
+    ),
+    regexp = "bad_id"
+  )
+})
+
+test_that("check_discord_errors stops on invalid sex column", {
+  expect_error(
+    discord_regression(
+      data = pair_data,
+      outcome = "y",
+      predictors = NULL,
+      sex = "bad_sex",
+      race = NULL,
+      pair_identifiers = c("_1", "_2")
+    ),
+    regexp = "bad_sex"
+  )
+})
+
+test_that("check_discord_errors stops on invalid race column", {
+  expect_error(
+    discord_regression(
+      data = pair_data,
+      outcome = "y",
+      predictors = NULL,
+      sex = NULL,
+      race = "bad_race",
+      pair_identifiers = c("_1", "_2")
+    ),
+    regexp = "bad_race"
+  )
+})
+
+test_that("check_discord_errors stops when first pair identifier is missing", {
+  expect_error(
+    discord_regression(
+      data = pair_data,
+      outcome = "y",
+      predictors = NULL,
+      sex = NULL,
+      race = NULL,
+      pair_identifiers = c("_bad1", "_2")
+    ),
+    regexp = "_bad1"
+  )
+})
+
+test_that("check_discord_errors stops when second pair identifier is missing", {
+  expect_error(
+    discord_regression(
+      data = pair_data,
+      outcome = "y",
+      predictors = NULL,
+      sex = NULL,
+      race = NULL,
+      pair_identifiers = c("_1", "_bad2")
+    ),
+    regexp = "_bad2"
+  )
+})
+
+test_that("check_discord_errors stops when sex and race columns are equal", {
+  # sex_1 / sex_2 columns exist so the earlier existence checks pass
+  sex_data <- data.frame(
+    sex_1 = c(1, 0, 1),
+    sex_2 = c(0, 1, 1),
+    y_1 = c(5, 3, 4),
+    y_2 = c(3, 5, 6)
+  )
+  expect_error(
+    discord_regression(
+      data = sex_data,
+      outcome = "y",
+      predictors = NULL,
+      sex = "sex",
+      race = "sex",
+      pair_identifiers = c("_1", "_2")
+    ),
+    regexp = "sex and race"
+  )
+})
+
+# --- valid_ids (via discord_data) ---
+
+test_that("valid_ids warns and proceeds when id column has duplicate values", {
+  dup_data <- data.frame(
+    id = c(1, 1, 2),
+    y_1 = c(5, 3, 4),
+    y_2 = c(3, 5, 6)
+  )
+  expect_warning(
+    discord_data(
+      data = dup_data,
+      outcome = "y",
+      predictors = NULL,
+      id = "id",
+      sex = NULL,
+      race = NULL,
+      pair_identifiers = c("_1", "_2"),
+      demographics = "none"
+    ),
+    regexp = "unique"
+  )
+})
+
+# --- missing outcome data (check_sibling_order_ram and check_sibling_order_fast) ---
+
+test_that("check_sibling_order_ram stops when outcome has NA (fast = FALSE)", {
+  na_data <- data.frame(
+    y_1 = c(NA, 3, 4),
+    y_2 = c(3, 5, 6)
+  )
+  expect_error(
+    discord_data(
+      data = na_data,
+      outcome = "y",
+      predictors = NULL,
+      sex = NULL,
+      race = NULL,
+      pair_identifiers = c("_1", "_2"),
+      demographics = "none",
+      fast = FALSE
+    ),
+    regexp = "missing data"
+  )
+})
+
+test_that("check_sibling_order_fast stops when outcome has NA (fast = TRUE)", {
+  na_data <- data.frame(
+    y_1 = c(NA, 3, 4),
+    y_2 = c(3, 5, 6)
+  )
+  expect_error(
+    discord_data(
+      data = na_data,
+      outcome = "y",
+      predictors = NULL,
+      sex = NULL,
+      race = NULL,
+      pair_identifiers = c("_1", "_2"),
+      demographics = "none",
+      fast = TRUE
+    ),
+    regexp = "missing data"
+  )
+})