From ed8194c421d98522b7f7b3150bcdb4ec396a535c Mon Sep 17 00:00:00 2001 From: jangorecki Date: Sat, 1 Dec 2018 09:57:40 +0530 Subject: [PATCH] groupingsets edge case for const j fix, closes #3173 --- NEWS.md | 2 ++ R/groupingsets.R | 5 ++++- inst/tests/tests.Rraw | 22 ++++++++++++++++++++++ 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index b7d30da3af..ee2c0ba7a4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -20,6 +20,8 @@ 4. `fread(..., skip=)` now skips non-standard `\r` and `\n\r` line endings properly again, [#3006](https://github.com/Rdatatable/data.table/issues/3006). Standard line endings (`\n` Linux/Mac and `\r\n` Windows) were skipped ok. Thanks to @brattono and @tbrycekelly for providing reproducible examples, and @st-pasha for fixing. +5. `groupingsets` edge case of grouping by empty column set and using constant value in `j` now properly handled. Closes [#3173](https://github.com/Rdatatable/data.table/issues/3173). + #### NOTES 1. When data.table first loads it now checks the DLL's MD5. This is to detect installation issues on Windows when you upgrade and i) the DLL is in use by another R session and ii) the CRAN source version > CRAN binary binary which happens just after a new release (R prompts users to install from source until the CRAN binary is available). This situation can lead to a state where the package's new R code calls old C code in the old DLL; [R#17478](https://bugs.r-project.org/bugzilla/show_bug.cgi?id=17478), [#3056](https://github.com/Rdatatable/data.table/issues/3056). This broken state can persist until, hopefully, you experience a strange error caused by the mismatch. Otherwise, wrong results may occur silently. This situation applies to any R package with compiled code not just data.table, is Windows-only, and is long-standing. It has only recently been understood as it typically only occurs during the few days after each new release until binaries are available on CRAN. Thanks to Gabor Csardi for the suggestion to use `tools::checkMD5sums()`. diff --git a/R/groupingsets.R b/R/groupingsets.R index 6c40d416e0..113b1acbeb 100644 --- a/R/groupingsets.R +++ b/R/groupingsets.R @@ -76,7 +76,10 @@ groupingsets.data.table <- function(x, j, by, sets, .SDcols, id = FALSE, jj, ... empty = if (length(.SDcols)) x[0L, eval(jj), by, .SDcols=.SDcols] else x[0L, eval(jj), by] } else { empty = if (length(.SDcols)) x[0L, eval(jj), .SDcols=.SDcols] else x[0L, eval(jj)] - if (!is.data.table(empty)) empty = setDT(list(empty)) # improve after #648, see comment in aggr.set + if (!is.data.table(empty)) { + if (length(empty)>0) empty = empty[0L] # fix for #3173 when no grouping and j constant + empty = setDT(list(empty)) # improve after #648, see comment in aggregate.set + } } if (id && "grouping" %chin% names(empty)) # `j` could have been evaluated to `grouping` field stop("When using `id=TRUE` the 'j' expression must not evaluate to column named 'grouping'.") diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 87fca343cf..5ceed77d20 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -12419,6 +12419,28 @@ for (i in 1:4) { test(1959.5, fread("A\n\nB\n\nC\n1\n", skip=2), data.table(B=c("", "C", "1"))) test(1959.6, fread("A,B\r\r\nX,Y\r\r\nB,C\r\r\n1,2", skip=4), data.table(B=1L, C=2L)) +# edge case in groupingsets #3173 +DT = data.table( + color = c("yellow", "red", "green", "red", "green", "red", + "yellow", "yellow", "green", "green", "green", "yellow", + "red", "yellow", "red", "green", "yellow", "red", "yellow", + "red", "green", "yellow", "green", "green"), + year = structure(c(15340, 15340, 14975, 15706, 15706, 15340, + 16436, 15340, 15340, 14975, 16436, 15706, + 16436, 15340, 14975, 14975, 16071, 15340, + 15706, 16071, 15706, 15340, 16436, 16071), class = "Date"), + status = structure(c(4L, 3L, 4L, 3L, 2L, 1L, 3L, 4L, 4L, 3L, 4L, 4L, + 4L, 4L, 1L, 3L, 3L, 2L, 1L, 2L, 3L, 4L, 2L, 4L), + .Label = c("active", "archived", "inactive", "removed"), + class = "factor"), + amount = c(1L, 4L, 2L, 3L, 1L, 5L, 1L, 1L, 4L, 2L, 3L, 1L, + 5L, 4L, 2L, 2L, 4L, 3L, 3L, 2L, 4L, 4L, 1L, 2L), + value = c(2.5, 2, 3, 3, 2.5, 3.5, 2.5, 3.5, 3, 2.5, 3.5, 2.5, 2, + 2.5, 3, 3, 3, 3, 3, 3, 2, 2.5, 3, 3) +) +ans = groupingsets(DT[ , .(amount, value)], j = 5, by = character(0L), sets = list(character()), id=TRUE) +test(1961, ans, data.table(grouping=0L, V1=5)) + ################################### # Add new tests above this line #