From 6b4c2d20b96a59ab6dbf49132a179e9a93d638b6 Mon Sep 17 00:00:00 2001 From: Elin Waring Date: Thu, 11 Jun 2026 21:46:50 -0400 Subject: [PATCH 1/9] Add measures parameter to control columns displayed in frequency tables. --- R/data_tabulate.R | 17 +++++++++++++++-- man/data_tabulate.Rd | 6 ++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/R/data_tabulate.R b/R/data_tabulate.R index 0a5c72972..9e802cc26 100644 --- a/R/data_tabulate.R +++ b/R/data_tabulate.R @@ -39,6 +39,10 @@ #' printed as markdown or HTML table, depending on the environment. See #' [`insight::export_table()`] for details. #' @param verbose Toggle warnings and messages. +#' @param measures Optional character vector, indicating the types of +#' percents to be included. Only applies to frequencies, i.e. when `by` is +#' `NULL`. Can be `"raw"` (includes `NA` values), `"valid"` (excludes `NA` values) +#' or `"cumulative"` (excludes `NA` vlues). #' @param ... not used. #' @inheritParams extract_column_names #' @@ -154,6 +158,7 @@ data_tabulate.default <- function( proportions = NULL, name = NULL, verbose = TRUE, + measures = c("raw", "valid", "cumulative"), ... ) { # save label attribute, before it gets lost... @@ -250,7 +255,9 @@ data_tabulate.default <- function( out$N <- round(out$N) } - out$`Raw %` <- 100 * out$N / sum(out$N) + if ("raw" %in% measures){ + out$`Raw %` <- 100 * out$N / sum(out$N) + } # if we have missing values, we add a row with NA if (remove_na) { out$`Valid %` <- 100 * out$N / sum(out$N) @@ -259,8 +266,14 @@ data_tabulate.default <- function( out$`Valid %` <- c(100 * out$N[-nrow(out)] / sum(out$N[-nrow(out)]), NA) valid_n <- sum(out$N[-length(out$N)], na.rm = TRUE) } - out$`Cumulative %` <- cumsum(out$`Valid %`) + if ("cumulative" %in% measures) { + out$`Cumulative %` <- cumsum(out$`Valid %`) + } + + if (!"valid" %in% measures) { + out$`Valid %` <- NULL + } # add information about variable/group names if (!is.null(obj_name)) { if (is.null(group_variable)) { diff --git a/man/data_tabulate.Rd b/man/data_tabulate.Rd index 87656e17f..720b3b027 100644 --- a/man/data_tabulate.Rd +++ b/man/data_tabulate.Rd @@ -19,6 +19,7 @@ data_tabulate(x, ...) proportions = NULL, name = NULL, verbose = TRUE, + measures = c("raw", "valid", "cumulative"), ... ) @@ -74,6 +75,11 @@ for printing.} \item{verbose}{Toggle warnings and messages.} +\item{measures}{Optional character vector, indicating the types of +percents to be included. Only applies to frequencies, i.e. when \code{by} is +\code{NULL}. Can be \code{"raw"} (includes \code{NA} values), \code{"valid"} (excludes \code{NA} values) +or \code{"cumulative"} (excludes \code{NA} vlues).} + \item{select}{Variables that will be included when performing the required tasks. Can be either \itemize{ From e14cd92de046ef7b5f1d496e36c36f1719984b4e Mon Sep 17 00:00:00 2001 From: Elin Waring Date: Fri, 12 Jun 2026 16:27:42 -0400 Subject: [PATCH 2/9] Add an attribute `by` when a by parameter is present. --- R/data_tabulate.R | 16 ++++++++++++++++ R/data_xtabulate.R | 3 +++ 2 files changed, 19 insertions(+) diff --git a/R/data_tabulate.R b/R/data_tabulate.R index 9e802cc26..391a0478c 100644 --- a/R/data_tabulate.R +++ b/R/data_tabulate.R @@ -188,6 +188,10 @@ data_tabulate.default <- function( # we go into another function for crosstables here... if (!is.null(by)) { + by_name <- tryCatch( + insight::safe_deparse(substitute(by)), + error = function(e) NULL + ) by <- .validate_by(by, x) return(.crosstable( x, @@ -196,6 +200,7 @@ data_tabulate.default <- function( remove_na = remove_na, proportions = proportions, obj_name = obj_name, + by_name = by_name, group_variable = group_variable )) } @@ -335,6 +340,10 @@ data_tabulate.data.frame <- function( verbose = verbose ) + by_name <- tryCatch( + insight::safe_deparse(substitute(by)), + error = function(e) NULL + ) # validate "by" by <- .validate_by(by, x) # validate "weights" @@ -358,6 +367,7 @@ data_tabulate.data.frame <- function( class(out) <- c("datawizard_tables", "list") } else { class(out) <- c("datawizard_crosstabs", "list") + attr(out, "by") <- gsub('\\"', "", by_name) } attr(out, "collapse") <- isTRUE(collapse) attr(out, "is_weighted") <- !is.null(weights) @@ -396,6 +406,11 @@ data_tabulate.grouped_df <- function( verbose = verbose ) + by_name <- tryCatch( + insight::safe_deparse(substitute(by)), + error = function(e) NULL + ) + x <- as.data.frame(x) out <- list() @@ -429,6 +444,7 @@ data_tabulate.grouped_df <- function( class(out) <- c("datawizard_tables", "list") } else { class(out) <- c("datawizard_crosstabs", "list") + attr(out, "by") <- gsub('\\"', "", by_name) } attr(out, "collapse") <- isTRUE(collapse) attr(out, "is_weighted") <- !is.null(weights) diff --git a/R/data_xtabulate.R b/R/data_xtabulate.R index f29387b36..530bb94c1 100644 --- a/R/data_xtabulate.R +++ b/R/data_xtabulate.R @@ -7,6 +7,7 @@ remove_na = FALSE, proportions = NULL, obj_name = NULL, + by_name = NULL, group_variable = NULL ) { if (!is.null(proportions)) { @@ -89,6 +90,8 @@ attr(out, "weights") <- weights attr(out, "proportions") <- proportions attr(out, "varname") <- obj_name + attr(out, "by") <- by_name + attr(out, "grouped_df") <- !is.null(group_variable) attr(out, "prop_table") <- .prop_table(out) From eb3fb06ed11d3bd41e85f68bd31867a1406d1a86 Mon Sep 17 00:00:00 2001 From: Elin Waring Date: Fri, 12 Jun 2026 16:32:59 -0400 Subject: [PATCH 3/9] Git branches fun. --- R/data_tabulate.R | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/R/data_tabulate.R b/R/data_tabulate.R index 391a0478c..babf996e8 100644 --- a/R/data_tabulate.R +++ b/R/data_tabulate.R @@ -39,10 +39,6 @@ #' printed as markdown or HTML table, depending on the environment. See #' [`insight::export_table()`] for details. #' @param verbose Toggle warnings and messages. -#' @param measures Optional character vector, indicating the types of -#' percents to be included. Only applies to frequencies, i.e. when `by` is -#' `NULL`. Can be `"raw"` (includes `NA` values), `"valid"` (excludes `NA` values) -#' or `"cumulative"` (excludes `NA` vlues). #' @param ... not used. #' @inheritParams extract_column_names #' @@ -158,7 +154,6 @@ data_tabulate.default <- function( proportions = NULL, name = NULL, verbose = TRUE, - measures = c("raw", "valid", "cumulative"), ... ) { # save label attribute, before it gets lost... @@ -260,9 +255,9 @@ data_tabulate.default <- function( out$N <- round(out$N) } - if ("raw" %in% measures){ - out$`Raw %` <- 100 * out$N / sum(out$N) - } + + out$`Raw %` <- 100 * out$N / sum(out$N) + # if we have missing values, we add a row with NA if (remove_na) { out$`Valid %` <- 100 * out$N / sum(out$N) @@ -272,13 +267,8 @@ data_tabulate.default <- function( valid_n <- sum(out$N[-length(out$N)], na.rm = TRUE) } - if ("cumulative" %in% measures) { - out$`Cumulative %` <- cumsum(out$`Valid %`) - } + out$`Cumulative %` <- cumsum(out$`Valid %`) - if (!"valid" %in% measures) { - out$`Valid %` <- NULL - } # add information about variable/group names if (!is.null(obj_name)) { if (is.null(group_variable)) { From b1dc5b2f562ca26148b1957c4dcd13db5a0d884b Mon Sep 17 00:00:00 2001 From: Elin Waring Date: Fri, 12 Jun 2026 16:34:47 -0400 Subject: [PATCH 4/9] Remove added blank lines. --- R/data_tabulate.R | 2 -- 1 file changed, 2 deletions(-) diff --git a/R/data_tabulate.R b/R/data_tabulate.R index babf996e8..02ef1253e 100644 --- a/R/data_tabulate.R +++ b/R/data_tabulate.R @@ -266,9 +266,7 @@ data_tabulate.default <- function( out$`Valid %` <- c(100 * out$N[-nrow(out)] / sum(out$N[-nrow(out)]), NA) valid_n <- sum(out$N[-length(out$N)], na.rm = TRUE) } - out$`Cumulative %` <- cumsum(out$`Valid %`) - # add information about variable/group names if (!is.null(obj_name)) { if (is.null(group_variable)) { From fd62f3c30f254f34b8d68ff213a1bbf1ebd5af88 Mon Sep 17 00:00:00 2001 From: Elin Waring Date: Fri, 12 Jun 2026 16:37:09 -0400 Subject: [PATCH 5/9] More blank line fixing --- R/data_tabulate.R | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/R/data_tabulate.R b/R/data_tabulate.R index 02ef1253e..3eca7d007 100644 --- a/R/data_tabulate.R +++ b/R/data_tabulate.R @@ -255,9 +255,7 @@ data_tabulate.default <- function( out$N <- round(out$N) } - out$`Raw %` <- 100 * out$N / sum(out$N) - # if we have missing values, we add a row with NA if (remove_na) { out$`Valid %` <- 100 * out$N / sum(out$N) @@ -267,7 +265,8 @@ data_tabulate.default <- function( valid_n <- sum(out$N[-length(out$N)], na.rm = TRUE) } out$`Cumulative %` <- cumsum(out$`Valid %`) - # add information about variable/group names + + # add information about variable/group names if (!is.null(obj_name)) { if (is.null(group_variable)) { var_info <- data.frame(Variable = obj_name, stringsAsFactors = FALSE) From 2973688bac2c69f51035718295a2947ae4f35627 Mon Sep 17 00:00:00 2001 From: Elin Waring Date: Fri, 12 Jun 2026 16:39:49 -0400 Subject: [PATCH 6/9] And fix the man page. --- man/data_tabulate.Rd | 6 ------ 1 file changed, 6 deletions(-) diff --git a/man/data_tabulate.Rd b/man/data_tabulate.Rd index 720b3b027..87656e17f 100644 --- a/man/data_tabulate.Rd +++ b/man/data_tabulate.Rd @@ -19,7 +19,6 @@ data_tabulate(x, ...) proportions = NULL, name = NULL, verbose = TRUE, - measures = c("raw", "valid", "cumulative"), ... ) @@ -75,11 +74,6 @@ for printing.} \item{verbose}{Toggle warnings and messages.} -\item{measures}{Optional character vector, indicating the types of -percents to be included. Only applies to frequencies, i.e. when \code{by} is -\code{NULL}. Can be \code{"raw"} (includes \code{NA} values), \code{"valid"} (excludes \code{NA} values) -or \code{"cumulative"} (excludes \code{NA} vlues).} - \item{select}{Variables that will be included when performing the required tasks. Can be either \itemize{ From 3e9d2fbef102fea886c32937d4c4cd54f89d738f Mon Sep 17 00:00:00 2001 From: Elin Waring Date: Sat, 13 Jun 2026 21:06:18 -0400 Subject: [PATCH 7/9] news update --- NEWS.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/NEWS.md b/NEWS.md index 86c4e6d4c..ad462937a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,9 @@ CHANGES * `data_read()` now also reads zip-files from URLs (#682). +* `data_tabluate()` now returns an attribute "by" with the + `by` variable name when the `by` parameter is used. + # datawizard 1.3.1 CHANGES From 11a00a962b48ba4b10358268d8b8e370de6c04e5 Mon Sep 17 00:00:00 2001 From: Elin Waring Date: Sat, 13 Jun 2026 21:25:38 -0400 Subject: [PATCH 8/9] Fix lints. --- R/data_tabulate.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/data_tabulate.R b/R/data_tabulate.R index 3eca7d007..40439d8e7 100644 --- a/R/data_tabulate.R +++ b/R/data_tabulate.R @@ -266,7 +266,7 @@ data_tabulate.default <- function( } out$`Cumulative %` <- cumsum(out$`Valid %`) - # add information about variable/group names + # add information about variable/group names if (!is.null(obj_name)) { if (is.null(group_variable)) { var_info <- data.frame(Variable = obj_name, stringsAsFactors = FALSE) @@ -354,7 +354,7 @@ data_tabulate.data.frame <- function( class(out) <- c("datawizard_tables", "list") } else { class(out) <- c("datawizard_crosstabs", "list") - attr(out, "by") <- gsub('\\"', "", by_name) + attr(out, "by") <- gsub('\\"', "", by_name, fixed = TRUE) } attr(out, "collapse") <- isTRUE(collapse) attr(out, "is_weighted") <- !is.null(weights) @@ -431,7 +431,7 @@ data_tabulate.grouped_df <- function( class(out) <- c("datawizard_tables", "list") } else { class(out) <- c("datawizard_crosstabs", "list") - attr(out, "by") <- gsub('\\"', "", by_name) + attr(out, "by") <- gsub('\\"', "", by_name, fixed = TRUE) } attr(out, "collapse") <- isTRUE(collapse) attr(out, "is_weighted") <- !is.null(weights) From d251b39bb90d04114f49bd72bb5e2fc57b26f131 Mon Sep 17 00:00:00 2001 From: Elin Waring Date: Mon, 15 Jun 2026 15:25:53 -0400 Subject: [PATCH 9/9] Add tests, and, as a consequence, make some changes. --- NEWS.md | 4 +- R/data_tabulate.R | 60 ++++++++++++++++-------- R/data_xtabulate.R | 11 ++++- tests/testthat/test-data_tabulate.R | 71 +++++++++++++++++++++++++++++ 4 files changed, 122 insertions(+), 24 deletions(-) diff --git a/NEWS.md b/NEWS.md index ad462937a..9154239ac 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,8 +4,8 @@ CHANGES * `data_read()` now also reads zip-files from URLs (#682). -* `data_tabluate()` now returns an attribute "by" with the - `by` variable name when the `by` parameter is used. +* `data_tabulate()` now returns an attribute "by" with the + `by` variable name when the `by` parameter is used (#690 @elinw). # datawizard 1.3.1 diff --git a/R/data_tabulate.R b/R/data_tabulate.R index 40439d8e7..fdd15da2f 100644 --- a/R/data_tabulate.R +++ b/R/data_tabulate.R @@ -159,6 +159,14 @@ data_tabulate.default <- function( # save label attribute, before it gets lost... var_label <- attr(x, "label", exact = TRUE) + # save by attribute + if (!is.null(by)) { + by_name <- tryCatch( + insight::safe_deparse(substitute(by)), + error = function(e) NULL + ) + } + # save and fix variable name, check for grouping variable obj_name <- tryCatch( insight::safe_deparse(substitute(x)), @@ -183,11 +191,10 @@ data_tabulate.default <- function( # we go into another function for crosstables here... if (!is.null(by)) { - by_name <- tryCatch( - insight::safe_deparse(substitute(by)), - error = function(e) NULL - ) + # don't lose that name of the by variable + attr(x, "by") <- by_name by <- .validate_by(by, x) + return(.crosstable( x, by = by, @@ -195,11 +202,10 @@ data_tabulate.default <- function( remove_na = remove_na, proportions = proportions, obj_name = obj_name, - by_name = by_name, group_variable = group_variable - )) + ) + ) } - # frequency table if (is.null(weights)) { if (remove_na) { @@ -293,6 +299,11 @@ data_tabulate.default <- function( attr(out, "total_n") <- sum(out$N, na.rm = TRUE) attr(out, "valid_n") <- valid_n + if (!is.null(by)) { + attr(out, "by") <- by_name + } else { + attr(out, "by") <- NULL + } class(out) <- c("datawizard_table", "data.frame") @@ -317,6 +328,13 @@ data_tabulate.data.frame <- function( verbose = TRUE, ... ) { + if (!is.null(by)) { + by_name <- tryCatch( + insight::safe_deparse(substitute(by)), + error = function(e) NULL + ) + by_name <- gsub('\\"', "", by_name) + } # evaluate arguments select <- .select_nse( select, @@ -327,10 +345,10 @@ data_tabulate.data.frame <- function( verbose = verbose ) - by_name <- tryCatch( - insight::safe_deparse(substitute(by)), - error = function(e) NULL - ) + if (!is.null(by)) { + attr(x, "by") <- by_name + } + # validate "by" by <- .validate_by(by, x) # validate "weights" @@ -349,13 +367,13 @@ data_tabulate.data.frame <- function( ... ) }) - if (is.null(by)) { class(out) <- c("datawizard_tables", "list") } else { + out <- lapply(out, structure, by = by_name) class(out) <- c("datawizard_crosstabs", "list") - attr(out, "by") <- gsub('\\"', "", by_name, fixed = TRUE) } + attr(out, "collapse") <- isTRUE(collapse) attr(out, "is_weighted") <- !is.null(weights) @@ -382,7 +400,14 @@ data_tabulate.grouped_df <- function( grps <- attr(x, "groups", exact = TRUE) group_variables <- data_remove(grps, ".rows") grps <- grps[[".rows"]] - + # save the by variable name + if (!is.null(by)) { + by_name <- tryCatch( + insight::safe_deparse(substitute(by)), + error = function(e) NULL + ) + by_name <- gsub('\\"', "", by_name) + } # evaluate arguments select <- .select_nse( select, @@ -393,11 +418,6 @@ data_tabulate.grouped_df <- function( verbose = verbose ) - by_name <- tryCatch( - insight::safe_deparse(substitute(by)), - error = function(e) NULL - ) - x <- as.data.frame(x) out <- list() @@ -430,8 +450,8 @@ data_tabulate.grouped_df <- function( if (is.null(by)) { class(out) <- c("datawizard_tables", "list") } else { + out <- lapply(out, structure, by = by_name) class(out) <- c("datawizard_crosstabs", "list") - attr(out, "by") <- gsub('\\"', "", by_name, fixed = TRUE) } attr(out, "collapse") <- isTRUE(collapse) attr(out, "is_weighted") <- !is.null(weights) diff --git a/R/data_xtabulate.R b/R/data_xtabulate.R index 530bb94c1..4ee539a4a 100644 --- a/R/data_xtabulate.R +++ b/R/data_xtabulate.R @@ -7,12 +7,16 @@ remove_na = FALSE, proportions = NULL, obj_name = NULL, - by_name = NULL, group_variable = NULL ) { + if (!is.null(proportions)) { proportions <- match.arg(proportions, c("row", "column", "full")) } + if (!is.null(attr(x, "by"))) { + by_name <- attr(x, "by") + } + # frequency table if (is.null(weights)) { # we have a `.default` and a `.data.frame` method for `data_tabulate()`. @@ -90,7 +94,10 @@ attr(out, "weights") <- weights attr(out, "proportions") <- proportions attr(out, "varname") <- obj_name - attr(out, "by") <- by_name + + if (!is.null(by_name)) { + attr(out, "by") <- by_name + } attr(out, "grouped_df") <- !is.null(group_variable) attr(out, "prop_table") <- .prop_table(out) diff --git a/tests/testthat/test-data_tabulate.R b/tests/testthat/test-data_tabulate.R index 5ac4bad63..4945b5230 100644 --- a/tests/testthat/test-data_tabulate.R +++ b/tests/testthat/test-data_tabulate.R @@ -181,7 +181,78 @@ test_that("data_tabulate data.frame", { ) }) +test_that("data_tabulate data.frame by", { + data(efc, package = "datawizard") + x <- data_tabulate.data.frame(efc, "c172code", by = "e16sex") + expect_s3_class(x, c("datawizard_crosstab", "list")) + expect_length(x, 1L) + expect_identical( + attributes(x[[1]]), + list( + names = c( + "c172code", + "male", + "female", + "NA" + ), + row.names = 1:4, + class = c("datawizard_crosstab", "data.frame"), + total_n = 100L, + varname = "c172code", + by = "e16sex", + grouped_df = FALSE + ) + ) +}) +test_that("data_tabulate default by", { + data(efc, package = "datawizard") + x <- data_tabulate(efc$c172code, by = efc$e16sex) + expect_s3_class(x, c("datawizard_crosstab", "data.frame")) + expect_length(x, 4L) + expect_identical( + attributes(x), + list( + names = c( + "efc$c172code", + "male", + "female", + "NA" + ), + row.names = 1:4, + class = c("datawizard_crosstab", "data.frame"), + total_n = 100L, + varname = "efc$c172code", + by = "efc$e16sex", + grouped_df = FALSE + ) + ) +}) +test_that("data_tabulate grouped data.frame by", { + skip_if_not_installed("poorman") + data(efc, package = "datawizard") + x <- data_tabulate(poorman::group_by(efc, e16sex), "c172code", by = "e42dep") + expect_identical( + attributes(x[[1]]), + list( + names = c( + "c172code", + "Group", + "1", + "2", + "3", + "4", + "NA" + ), + class = c("datawizard_crosstab", "data.frame"), + row.names = 1:4, + total_n = 46L, + varname = "c172code", + by = "e42dep", + grouped_df = TRUE + ) + ) +}) test_that("data_tabulate unsupported class", { data(mtcars) expect_warning(