Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@

4. `dcast()` and `melt()` "just work" when passed a data.frame, not just data.tables, with no need for coercion, [#7614](https://github.com/Rdatatable/data.table/issues/7614). Thanks @MichaelChirico for the suggestion and @manmita for the PR. Note that to avoid potential conflicts with {reshape2}'s data.frame methods, we do the dispatch to the data.table method manually.

5. `setnames()` now supports a global option `datatable.unique.names` to control the creation of duplicate column names. Users can choose between `"off"` (default), `"warn"`, `"error"`, or `"rename"`. This addresses long-standing ambiguity issues when duplicate names were created silently, [#4044](https://github.com/Rdatatable/data.table/issues/4044). Thanks to @venom1204 for the PR.

### BUG FIXES

1. `fread()` with `skip=0` and `(header=TRUE|FALSE)` no longer skips the first row when it has fewer fields than subsequent rows, [#7463](https://github.com/Rdatatable/data.table/issues/7463). Thanks @emayerhofer for the report and @ben-schwen for the fix.
Expand Down
8 changes: 6 additions & 2 deletions R/data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -2943,13 +2943,17 @@ setnames = function(x,old,new,skip_absent=FALSE) {
if (!length(new)) return(invisible(x)) # no changes
if (length(i) != length(new)) internal_error("length(i)!=length(new)") # nocov
}
# update the key if the column name being change is in the key

full_names = names(x)
full_names[i] = new
full_names = process_name_policy(full_names)
new = full_names[i]

m = chmatch(names(x)[i], key(x))
w = which(!is.na(m))
if (length(w))
.Call(Csetcharvec, attr(x, "sorted", exact=TRUE), m[w], new[w])

# update secondary keys
idx = attr(x, "index", exact=TRUE)
for (k in names(attributes(idx))) {
tt = strsplit(k,split="__")[[1L]][-1L]
Expand Down
3 changes: 2 additions & 1 deletion R/onLoad.R
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,8 @@
datatable.auto.index=TRUE, # DT[col=="val"] to auto add index so 2nd time faster
datatable.use.index=TRUE, # global switch to address #1422
datatable.prettyprint.char=NULL, # FR #1091
datatable.old.matrix.autoname=FALSE # #7145: how data.table(x=1, matrix(1)) is auto-named set to change
datatable.old.matrix.autoname=FALSE, # #7145: how data.table(x=1, matrix(1)) is auto-named set to change
datatable.unique.names = "off"
)
opts = opts[!names(opts) %chin% names(options())]
options(opts)
Expand Down
26 changes: 26 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,32 @@
table_name, brackify(duplicate_names), domain=NA)
}

process_name_policy = function(names_vec) {
policy = getOption("datatable.unique.names", "off")

Check warning on line 40 in R/utils.R

View workflow job for this annotation

GitHub Actions / lint-r

file=R/utils.R,line=40,col=1,[trailing_whitespace_linter] Remove trailing whitespace.
if (is.null(policy) || policy == "off") return(names_vec)

allowed = c("warn", "error", "rename")
if (!policy %in% allowed) {
warningf("Invalid value for 'datatable.unique.names': [%s]. Falling back to 'off'. Allowed values are: 'off', 'warn', 'error', 'rename'.", as.character(policy))
return(names_vec)
}

if (anyDuplicated(names_vec)) {
dups = unique(names_vec[duplicated(names_vec)])
msg = sprintf("Duplicate column names created: %s. This may cause ambiguity.", brackify(dups))

if (policy == "warn") {

Check warning on line 53 in R/utils.R

View workflow job for this annotation

GitHub Actions / lint-r

file=R/utils.R,line=53,col=5,[if_switch_linter] Prefer switch() statements over repeated if/else equality tests, e.g., switch(x, a = 1, b = 2) over if (x == "a") 1 else if (x == "b") 2.
warningf(msg)
} else if (policy == "error") {
stopf(msg)
} else if (policy == "rename") {
return(make.unique(names_vec))
}
}
return(names_vec)

Check warning on line 61 in R/utils.R

View workflow job for this annotation

GitHub Actions / lint-r

file=R/utils.R,line=61,col=3,[return_linter] Use implicit return behavior; explicit return() is not needed.
}

duplicated_values = function(x) {
# fast anyDuplicated for the typical/non-error case; second duplicated() pass for (usually) error case
if (!anyDuplicated(x)) return(vector(typeof(x)))
Expand Down
11 changes: 11 additions & 0 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -21520,3 +21520,14 @@ test(2365.1, melt(df_melt, id.vars=1:2), melt(dt_melt, id.vars=1:2))
df_dcast = data.frame(a = c("x", "y"), b = 1:2, v = 3:4)
dt_dcast = data.table(a = c("x", "y"), b = 1:2, v = 3:4)
test(2365.2, dcast(df_dcast, a ~ b, value.var = "v"), dcast(dt_dcast, a ~ b, value.var = "v"))

#4044
DT = as.data.table(iris)
options(datatable.unique.names = "off")
test(2366.1, names(setnames(copy(DT), "Petal.Length", "Sepal.Length")), c("Sepal.Length", "Sepal.Width", "Sepal.Length", "Petal.Width", "Species"))
options(datatable.unique.names = "warn")
test(2366.2, names(setnames(copy(DT), "Petal.Length", "Sepal.Length")), c("Sepal.Length", "Sepal.Width", "Sepal.Length", "Petal.Width", "Species"), warning = "Duplicate column names created")
options(datatable.unique.names = "error")
test(2366.3, setnames(copy(DT), "Petal.Length", "Sepal.Length"), error = "Duplicate column names created")
options(datatable.unique.names = "rename")
test(2366.4, names(setnames(copy(DT), "Petal.Length", "Sepal.Length")), c("Sepal.Length", "Sepal.Width", "Sepal.Length.1", "Petal.Width", "Species"))
6 changes: 6 additions & 0 deletions man/data.table-options.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,12 @@
\item{\code{datatable.enlist}}{Experimental feature. Default is \code{NULL}. If set to a function
(e.g., \code{list}), the \code{j} expression can return a \code{list}, which will then
be "enlisted" into columns in the result.}
\item{\code{datatable.unique.names}}{A character string, default \code{"off"}.
Controls the behavior when operations (like \code{setnames}) would result in
duplicate column names. Can be \code{"off"} (silently allow duplicates),
\code{"warn"} (issue a warning), \code{"error"} (halt with an error),
or \code{"rename"} (automatically fix duplicates using \code{make.unique}).
Invalid values will trigger a warning and fall back to \code{"off"}.}
}
}

Expand Down
Loading