From 47fade66711e48c955569bf55ab19ed502d0f257 Mon Sep 17 00:00:00 2001 From: Mahjabin Oyshi Date: Thu, 26 Feb 2026 20:13:35 +0600 Subject: [PATCH 1/2] Document how j results are combined with by via rbindlist (issue #7643) --- man/data.table.Rd | 11 ++++++++++- tests/by-rbindlist-use-names-false.R | 16 ++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 tests/by-rbindlist-use-names-false.R diff --git a/man/data.table.Rd b/man/data.table.Rd index cfdcb2706..aa9b7ce02 100644 --- a/man/data.table.Rd +++ b/man/data.table.Rd @@ -113,7 +113,16 @@ data.table(\dots, keep.rownames=FALSE, check.names=FALSE, key=NULL, stringsAsFac \emph{Advanced:} When \code{i} is a \code{list} (or \code{data.frame} or \code{data.table}), \code{DT[i, j, by=.EACHI]} evaluates \code{j} for the groups in \code{DT} that each row in \code{i} joins to. That is, you can join (in \code{i}) and aggregate (in \code{j}) simultaneously. We call this \emph{grouping by each i}. See \href{https://stackoverflow.com/a/27004566/559784}{this StackOverflow answer} for a more detailed explanation until we \href{https://github.com/Rdatatable/data.table/issues/944}{roll out vignettes}. - \emph{Advanced:} In the \code{X[Y, j]} form of grouping, the \code{j} expression sees variables in \code{X} first, then \code{Y}. We call this \emph{join inherited scope}. If the variable is not in \code{X} or \code{Y} then the calling frame is searched, its calling frame, and so on in the usual way up to and including the global environment.} + \emph{Advanced:} In the \code{X[Y, j]} form of grouping, the \code{j} expression sees variables in \code{X} first, then \code{Y}. We call this \emph{join inherited scope}. If the variable is not in \code{X} or \code{Y} then the calling frame is searched, its calling frame, and so on in the usual way up to and including the global environment. + + When \code{by} is supplied, \code{j} is evaluated once per group and + the per-group results are then combined row-wise. Internally this + combination uses \code{rbindlist} with \code{use.names = FALSE}, so + columns are matched by position and the column names from the first + group are retained. If different groups return columns with different + names in the same positions, the names from later groups may be ignored + and the resulting column names can depend on the order of groups. +} \item{keyby}{ Same as \code{by}, but with an additional \code{setkey()} run on the \code{by} columns of the result, for convenience. It is common practice to use \code{keyby=} routinely when you wish the result to be sorted. May also be \code{TRUE} or \code{FALSE} when \code{by} is provided as an alternative way to accomplish the same operation.} diff --git a/tests/by-rbindlist-use-names-false.R b/tests/by-rbindlist-use-names-false.R new file mode 100644 index 000000000..956f413d7 --- /dev/null +++ b/tests/by-rbindlist-use-names-false.R @@ -0,0 +1,16 @@ +require(data.table) + +DT1 <- data.table(group = 1:2)[ + , data.table(x = 1, y = 2)[, group, with = FALSE] + , by = group +] + +DT2 <- data.table(group = 2:1)[ + , data.table(x = 1, y = 2)[, group, with = FALSE] + , by = group +] + +stopifnot( + names(DT1)[2L] == "x", + names(DT2)[2L] == "y" +) From bcbc6558e5aa0efb0e80ca73681ccb5a79fd11df Mon Sep 17 00:00:00 2001 From: Mahjabin Oyshi Date: Fri, 27 Feb 2026 10:14:22 +0600 Subject: [PATCH 2/2] Shorten by doc and remove test per review on #7651 --- man/data.table.Rd | 10 +++------- tests/by-rbindlist-use-names-false.R | 16 ---------------- 2 files changed, 3 insertions(+), 23 deletions(-) delete mode 100644 tests/by-rbindlist-use-names-false.R diff --git a/man/data.table.Rd b/man/data.table.Rd index aa9b7ce02..4d259d45a 100644 --- a/man/data.table.Rd +++ b/man/data.table.Rd @@ -115,13 +115,9 @@ data.table(\dots, keep.rownames=FALSE, check.names=FALSE, key=NULL, stringsAsFac \emph{Advanced:} In the \code{X[Y, j]} form of grouping, the \code{j} expression sees variables in \code{X} first, then \code{Y}. We call this \emph{join inherited scope}. If the variable is not in \code{X} or \code{Y} then the calling frame is searched, its calling frame, and so on in the usual way up to and including the global environment. - When \code{by} is supplied, \code{j} is evaluated once per group and - the per-group results are then combined row-wise. Internally this - combination uses \code{rbindlist} with \code{use.names = FALSE}, so - columns are matched by position and the column names from the first - group are retained. If different groups return columns with different - names in the same positions, the names from later groups may be ignored - and the resulting column names can depend on the order of groups. + When \code{by} is supplied, per-group \code{j} results are combined using + \code{\link{rbindlist}}; see its help for details on how columns and names + are handled. } \item{keyby}{ Same as \code{by}, but with an additional \code{setkey()} run on the \code{by} columns of the result, for convenience. It is common practice to use \code{keyby=} routinely when you wish the result to be sorted. May also be \code{TRUE} or \code{FALSE} when \code{by} is provided as an alternative way to accomplish the same operation.} diff --git a/tests/by-rbindlist-use-names-false.R b/tests/by-rbindlist-use-names-false.R deleted file mode 100644 index 956f413d7..000000000 --- a/tests/by-rbindlist-use-names-false.R +++ /dev/null @@ -1,16 +0,0 @@ -require(data.table) - -DT1 <- data.table(group = 1:2)[ - , data.table(x = 1, y = 2)[, group, with = FALSE] - , by = group -] - -DT2 <- data.table(group = 2:1)[ - , data.table(x = 1, y = 2)[, group, with = FALSE] - , by = group -] - -stopifnot( - names(DT1)[2L] == "x", - names(DT2)[2L] == "y" -)