diff --git a/NEWS.md b/NEWS.md index 03b0f50bff..f7c073d0b4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -40,6 +40,8 @@ # 2: 2 6 4 5 ``` +8. `groupingsets()` gets a new argument `enclos` for use together with the `jj` argument in functions wrapping `groupingsets()`, including the existing wrappers `rollup()` and `cube()`. When forwarding a `j`-expression as `groupingsets(jj = substitute(j))`, make sure to pass `enclos = parent.frame()` as well, so that the `j`-expression will be evaluated in the right context. This makes it possible for `j` to refer to variables outside the `data.table`. + ### BUG FIXES 1. Custom binary operators from the `lubridate` package now work with objects of class `IDate` as with a `Date` subclass, [#6839](https://github.com/Rdatatable/data.table/issues/6839). Thanks @emallickhossain for the report and @aitap for the fix. diff --git a/R/groupingsets.R b/R/groupingsets.R index 63e94d1b62..8a2eddcde9 100644 --- a/R/groupingsets.R +++ b/R/groupingsets.R @@ -13,7 +13,7 @@ rollup.data.table = function(x, j, by, .SDcols, id = FALSE, label = NULL, ...) { sets = lapply(length(by):0L, function(i) by[0L:i]) # redirect to workhorse function jj = substitute(j) - groupingsets.data.table(x, by=by, sets=sets, .SDcols=.SDcols, id=id, jj=jj, label=label) + groupingsets.data.table(x, by=by, sets=sets, .SDcols=.SDcols, id=id, jj=jj, label=label, enclos = parent.frame()) } cube = function(x, ...) { @@ -35,13 +35,13 @@ cube.data.table = function(x, j, by, .SDcols, id = FALSE, label = NULL, ...) { sets = lapply((2L^n):1L, function(jj) by[keepBool[jj, ]]) # redirect to workhorse function jj = substitute(j) - groupingsets.data.table(x, by=by, sets=sets, .SDcols=.SDcols, id=id, jj=jj, label=label) + groupingsets.data.table(x, by=by, sets=sets, .SDcols=.SDcols, id=id, jj=jj, label=label, enclos = parent.frame()) } groupingsets = function(x, ...) { UseMethod("groupingsets") } -groupingsets.data.table = function(x, j, by, sets, .SDcols, id = FALSE, jj, label = NULL, ...) { +groupingsets.data.table = function(x, j, by, sets, .SDcols, id = FALSE, jj, label = NULL, enclos = parent.frame(), ...) { # input data type basic validation if (!is.data.table(x)) stopf("Argument 'x' must be a data.table object") @@ -112,7 +112,10 @@ groupingsets.data.table = function(x, j, by, sets, .SDcols, id = FALSE, jj, labe .SDcols = if (".SD" %chin% av) setdiff(names(x), by) else NULL if (length(names(by))) by = unname(by) # 0 rows template data.table to keep colorder and type - empty = if (length(.SDcols)) x[0L, eval(jj), by, .SDcols=.SDcols] else x[0L, eval(jj), by] + # inline all arguments that might clash with enclosing environment + pcall = substitute(x[0L, jj, by], list(x = x, jj = jj, by = by)) + if (length(.SDcols)) pcall$.SDcols = .SDcols + empty = eval(pcall, list(.datatable.aware = TRUE), enclos) if (id && "grouping" %chin% names(empty)) # `j` could have been evaluated to `grouping` field stopf("When using `id=TRUE` the 'j' expression must not evaluate to a column named 'grouping'.") if (anyDuplicated(names(empty)) > 0L) @@ -150,8 +153,12 @@ groupingsets.data.table = function(x, j, by, sets, .SDcols, id = FALSE, jj, labe stopf("Using integer64 class columns require to have 'bit64' package installed.") # nocov int64.by.cols = intersect(int64.cols, by) # aggregate function called for each grouping set + # inline all arguments that might clash with enclosing environment + pcall = substitute(x[, jj], list(x = x, jj = jj)) + if (length(.SDcols)) pcall$.SDcols = .SDcols aggregate.set = function(by.set) { - r = if (length(.SDcols)) x[, eval(jj), by.set, .SDcols=.SDcols] else x[, eval(jj), by.set] + pcall$by = by.set + r = eval(pcall, list(.datatable.aware = TRUE), enclos) if (id) { # integer bit mask of aggregation levels: http://www.postgresql.org/docs/9.5/static/functions-aggregate.html#FUNCTIONS-GROUPING-TABLE # 3267: strtoi("", base = 2L) output apparently unstable across platforms diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 9ffcb01fce..8de433dc05 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21252,3 +21252,27 @@ it <- as.ITime('00:00:00') test(2323.1, names(data.frame(COL = it)), "COL") test(2323.2, names(data.frame(b = 1, COL = it)), c("b", "COL")) test(2323.3, names(as.data.frame(it, optional=TRUE)), NULL) + +# 'sets' is a local variable in groupingsets(), cube(), rollup() and shouldn't leak into the 'j' expression +n = 24L +set.seed(25) +DT = data.table( + color = sample(c("green","yellow","red"), n, TRUE), + year = as.Date(sample(paste0(2011:2015,"-01-01"), n, TRUE)), + status = as.factor(sample(c("removed","active","inactive","archived"), n, TRUE)), + amount = sample(1:5, n, TRUE), + value = sample(c(3, 3.5, 2.5, 2), n, TRUE) +) +sets = 0 +test(2324.0, + groupingsets(DT, j = c(list(count=.N + ..sets)), by = c("color","year","status"), sets = list("color", c("year","status"), character()), id=TRUE), + groupingsets(DT, j = c(list(count=.N + 0)), by = c("color","year","status"), sets = list("color", c("year","status"), character()), id=TRUE) +) +test(2324.1, + cube(DT, j = sum(value) + ..sets, by = c("color","year","status"), id=TRUE), + cube(DT, j = sum(value), by = c("color","year","status"), id=TRUE) +) +test(2324.2, + rollup(DT, j = sum(value) + ..sets, by=c("color","year","status"), label="total"), + rollup(DT, j = sum(value), by=c("color","year","status"), label="total") +) diff --git a/man/groupingsets.Rd b/man/groupingsets.Rd index 5d8a309548..7d87175dec 100644 --- a/man/groupingsets.Rd +++ b/man/groupingsets.Rd @@ -15,7 +15,7 @@ rollup(x, \dots) cube(x, \dots) \method{cube}{data.table}(x, j, by, .SDcols, id = FALSE, label = NULL, \dots) groupingsets(x, \dots) -\method{groupingsets}{data.table}(x, j, by, sets, .SDcols, id = FALSE, jj, label = NULL, \dots) +\method{groupingsets}{data.table}(x, j, by, sets, .SDcols, id = FALSE, jj, label = NULL, enclos = parent.frame(), \dots) } \arguments{ \item{x}{\code{data.table}.} @@ -27,6 +27,7 @@ groupingsets(x, \dots) \item{id}{logical default \code{FALSE}. If \code{TRUE} it will add leading column with bit mask of grouping sets.} \item{jj}{quoted version of \code{j} argument, for convenience. When provided function will ignore \code{j} argument.} \item{label}{label(s) to be used in the 'total' rows in the grouping variable columns of the output, that is, in rows where the grouping variable has been aggregated. Can be a named list of scalars, or a scalar, or \code{NULL}. Defaults to \code{NULL}, which results in the grouping variables having \code{NA} in their 'total' rows. See Details.} + \item{enclos}{the environment containing the symbols referenced by \code{jj}. When writing functions that accept a \code{j} environment for non-standard evaluation by \pkg{data.table}, \code{\link[base]{substitute}()} it and forward it to \code{groupingsets} using the \code{jj} argument, set this to the \code{\link[base]{parent.frame}()} of the function that captures \code{j}.} } \details{ All three functions \code{rollup, cube, groupingsets} are generic methods, \code{data.table} methods are provided.