easystats · elinw · Jun 12, 2026 · Jun 12, 2026 · Jun 12, 2026 · Jun 12, 2026
diff --git a/NEWS.md b/NEWS.md
@@ -4,6 +4,9 @@ CHANGES
 
 * `data_read()` now also reads zip-files from URLs (#682).
 
+* `data_tabulate()` now returns an attribute "by" with the
+  `by` variable name when the `by` parameter is used (#690 @elinw).
+
 # datawizard 1.3.1
 
 CHANGES

diff --git a/R/data_tabulate.R b/R/data_tabulate.R
@@ -159,6 +159,14 @@
   # save label attribute, before it gets lost...
   var_label <- attr(x, "label", exact = TRUE)
 
+  # save by attribute
+  if (!is.null(by)) {
+    by_name <- tryCatch(
+      insight::safe_deparse(substitute(by)),
+      error = function(e) NULL
+    )
+  }
+
   # save and fix variable name, check for grouping variable
   obj_name <- tryCatch(
     insight::safe_deparse(substitute(x)),
@@ -183,7 +191,10 @@
 
   # we go into another function for crosstables here...
   if (!is.null(by)) {
+    # don't lose that name of the by variable
+    attr(x, "by") <- by_name
     by <- .validate_by(by, x)
+
     return(.crosstable(
       x,
       by = by,
@@ -192,9 +203,9 @@
       proportions = proportions,
       obj_name = obj_name,
       group_variable = group_variable
-    ))
+    )
+    )
   }
-
   # frequency table
   if (is.null(weights)) {
     if (remove_na) {
@@ -288,6 +299,11 @@
 
   attr(out, "total_n") <- sum(out$N, na.rm = TRUE)
   attr(out, "valid_n") <- valid_n
+  if (!is.null(by)) {
+    attr(out, "by") <- by_name
+  } else {
+    attr(out, "by") <- NULL
+  }
 
   class(out) <- c("datawizard_table", "data.frame")
 
@@ -312,6 +328,13 @@
   verbose = TRUE,
   ...
 ) {
+  if (!is.null(by)) {
+    by_name <- tryCatch(
+      insight::safe_deparse(substitute(by)),
+      error = function(e) NULL
+    )
+    by_name <- gsub('\\"', "", by_name)
+  }
   # evaluate arguments
   select <- .select_nse(
     select,
@@ -322,6 +345,10 @@
     verbose = verbose
   )
 
+  if (!is.null(by)) {
+    attr(x, "by") <- by_name
+  }
+
   # validate "by"
   by <- .validate_by(by, x)
   # validate "weights"
@@ -340,12 +367,13 @@
       ...
     )
   })
-
   if (is.null(by)) {
     class(out) <- c("datawizard_tables", "list")
   } else {
+    out <- lapply(out, structure, by = by_name)
     class(out) <- c("datawizard_crosstabs", "list")
   }
+
   attr(out, "collapse") <- isTRUE(collapse)
   attr(out, "is_weighted") <- !is.null(weights)
 
@@ -372,7 +400,14 @@
   grps <- attr(x, "groups", exact = TRUE)
   group_variables <- data_remove(grps, ".rows")
   grps <- grps[[".rows"]]
-
+  # save the by variable name
+  if (!is.null(by)) {
+    by_name <- tryCatch(
+      insight::safe_deparse(substitute(by)),
+      error = function(e) NULL
+    )
+    by_name <- gsub('\\"', "", by_name)
+  }
   # evaluate arguments
   select <- .select_nse(
     select,
@@ -415,6 +450,7 @@
   if (is.null(by)) {
     class(out) <- c("datawizard_tables", "list")
   } else {
+    out <- lapply(out, structure, by = by_name)
     class(out) <- c("datawizard_crosstabs", "list")
   }
   attr(out, "collapse") <- isTRUE(collapse)

diff --git a/R/data_xtabulate.R b/R/data_xtabulate.R
@@ -9,9 +9,14 @@
   obj_name = NULL,
   group_variable = NULL
 ) {
+
   if (!is.null(proportions)) {
     proportions <- match.arg(proportions, c("row", "column", "full"))
   }
+  if (!is.null(attr(x, "by"))) {
+    by_name <- attr(x, "by")
+  }
+
   # frequency table
   if (is.null(weights)) {
     # we have a `.default` and a `.data.frame` method for `data_tabulate()`.
@@ -89,6 +94,11 @@
   attr(out, "weights") <- weights
   attr(out, "proportions") <- proportions
   attr(out, "varname") <- obj_name
+
+  if (!is.null(by_name)) {
+    attr(out, "by") <- by_name
+  }
+
   attr(out, "grouped_df") <- !is.null(group_variable)
   attr(out, "prop_table") <- .prop_table(out)
 

diff --git a/tests/testthat/test-data_tabulate.R b/tests/testthat/test-data_tabulate.R
@@ -181,7 +181,78 @@ test_that("data_tabulate data.frame", {
   )
 })
 
+test_that("data_tabulate data.frame by", {
+  data(efc, package = "datawizard")
+  x <- data_tabulate.data.frame(efc, "c172code", by = "e16sex")
+  expect_s3_class(x, c("datawizard_crosstab", "list"))
+  expect_length(x, 1L)
+  expect_identical(
+    attributes(x[[1]]),
+    list(
+      names = c(
+        "c172code",
+        "male",
+        "female",
+        "NA"
+      ),
+      row.names = 1:4,
+      class = c("datawizard_crosstab", "data.frame"),
+      total_n = 100L,
+      varname = "c172code",
+      by = "e16sex",
+      grouped_df = FALSE
+    )
+  )
+})
 
+test_that("data_tabulate default by", {
+  data(efc, package = "datawizard")
+  x <- data_tabulate(efc$c172code, by = efc$e16sex)
+  expect_s3_class(x, c("datawizard_crosstab", "data.frame"))
+  expect_length(x, 4L)
+  expect_identical(
+    attributes(x),
+    list(
+      names = c(
+        "efc$c172code",
+        "male",
+        "female",
+        "NA"
+      ),
+      row.names = 1:4,
+      class = c("datawizard_crosstab", "data.frame"),
+      total_n = 100L,
+      varname = "efc$c172code",
+      by = "efc$e16sex",
+      grouped_df = FALSE
+    )
+  )
+})
+test_that("data_tabulate grouped data.frame by", {
+  skip_if_not_installed("poorman")
+  data(efc, package = "datawizard")
+  x <- data_tabulate(poorman::group_by(efc, e16sex), "c172code", by = "e42dep")
+  expect_identical(
+    attributes(x[[1]]),
+    list(
+      names = c(
+        "c172code",
+        "Group",
+        "1",
+        "2",
+        "3",
+        "4",
+        "NA"
+      ),
+      class = c("datawizard_crosstab", "data.frame"),
+      row.names = 1:4,
+      total_n = 46L,
+      varname = "c172code",
+      by = "e42dep",
+      grouped_df = TRUE
+    )
+  )
+})
 test_that("data_tabulate unsupported class", {
   data(mtcars)
   expect_warning(