diff --git a/NEWS.md b/NEWS.md index 64175cc68b..e86960c8d0 100644 --- a/NEWS.md +++ b/NEWS.md @@ -24,6 +24,8 @@ 5. `cube()` will now produce expected order of results, [#3179](https://github.com/Rdatatable/data.table/issues/3179). Thanks to @Henrik-P for reporting. +6. `groupingsets()` groups by empty column set and constant value in `j`, [#3173](https://github.com/Rdatatable/data.table/issues/3173). + #### NOTES 1. When data.table first loads it now checks the DLL's MD5. This is to detect installation issues on Windows when you upgrade and i) the DLL is in use by another R session and ii) the CRAN source version > CRAN binary binary which happens just after a new release (R prompts users to install from source until the CRAN binary is available). This situation can lead to a state where the package's new R code calls old C code in the old DLL; [R#17478](https://bugs.r-project.org/bugzilla/show_bug.cgi?id=17478), [#3056](https://github.com/Rdatatable/data.table/issues/3056). This broken state can persist until, hopefully, you experience a strange error caused by the mismatch. Otherwise, wrong results may occur silently. This situation applies to any R package with compiled code not just data.table, is Windows-only, and is long-standing. It has only recently been understood as it typically only occurs during the few days after each new release until binaries are available on CRAN. Thanks to Gabor Csardi for the suggestion to use `tools::checkMD5sums()`. diff --git a/R/groupingsets.R b/R/groupingsets.R index 498ded2b72..64ec72771a 100644 --- a/R/groupingsets.R +++ b/R/groupingsets.R @@ -76,7 +76,10 @@ groupingsets.data.table <- function(x, j, by, sets, .SDcols, id = FALSE, jj, ... empty = if (length(.SDcols)) x[0L, eval(jj), by, .SDcols=.SDcols] else x[0L, eval(jj), by] } else { empty = if (length(.SDcols)) x[0L, eval(jj), .SDcols=.SDcols] else x[0L, eval(jj)] - if (!is.data.table(empty)) empty = setDT(list(empty)) # improve after #648, see comment in aggr.set + if (!is.data.table(empty)) { + if (length(empty)>0) empty = empty[0L] # fix for #3173 when no grouping and j constant + empty = setDT(list(empty)) # improve after #648, see comment in aggregate.set + } } if (id && "grouping" %chin% names(empty)) # `j` could have been evaluated to `grouping` field stop("When using `id=TRUE` the 'j' expression must not evaluate to column named 'grouping'.") diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index ab6945c3fb..c20448a646 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -12428,7 +12428,29 @@ for (i in 1:4) { test(1959.5, fread("A\n\nB\n\nC\n1\n", skip=2), data.table(B=c("", "C", "1"))) test(1959.6, fread("A,B\r\r\nX,Y\r\r\nB,C\r\r\n1,2", skip=4), data.table(B=1L, C=2L)) -# order of cube results consistent to postgresql docs #3179 +# empty set with constant j, #3173 +DT = data.table( + color = c("yellow", "red", "green", "red", "green", "red", + "yellow", "yellow", "green", "green", "green", "yellow", + "red", "yellow", "red", "green", "yellow", "red", "yellow", + "red", "green", "yellow", "green", "green"), + year = structure(c(15340, 15340, 14975, 15706, 15706, 15340, + 16436, 15340, 15340, 14975, 16436, 15706, + 16436, 15340, 14975, 14975, 16071, 15340, + 15706, 16071, 15706, 15340, 16436, 16071), class = "Date"), + status = structure(c(4L, 3L, 4L, 3L, 2L, 1L, 3L, 4L, 4L, 3L, 4L, 4L, + 4L, 4L, 1L, 3L, 3L, 2L, 1L, 2L, 3L, 4L, 2L, 4L), + .Label = c("active", "archived", "inactive", "removed"), + class = "factor"), + amount = c(1L, 4L, 2L, 3L, 1L, 5L, 1L, 1L, 4L, 2L, 3L, 1L, + 5L, 4L, 2L, 2L, 4L, 3L, 3L, 2L, 4L, 4L, 1L, 2L), + value = c(2.5, 2, 3, 3, 2.5, 3.5, 2.5, 3.5, 3, 2.5, 3.5, 2.5, 2, + 2.5, 3, 3, 3, 3, 3, 3, 2, 2.5, 3, 3) +) +ans = groupingsets(DT[ , .(amount, value)], j = 5, by = character(0L), sets = list(character()), id=TRUE) +test(1960, ans, data.table(grouping=0L, V1=5)) + +# order of cube results consistent to postgresql docs, #3179 set.seed(1) d <- data.table(a = rep(1:2, each = 8), b = rep(1:2, each = 4), @@ -12444,7 +12466,7 @@ gs = groupingsets(d, j = sum(val), by = c("a", "b", "c"), c( "b" ), c( "c"), character()), id=TRUE) -test(1963, cb, gs) +test(1961, cb, gs) ###################################