Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@
# 2: 2 6 4 5
```

8. `groupingsets()` gets a new argument `enclos` for use together with the `jj` argument in functions wrapping `groupingsets()`, including the existing wrappers `rollup()` and `cube()`. When forwarding a `j`-expression as `groupingsets(jj = substitute(j))`, make sure to pass `enclos = parent.frame()` as well, so that the `j`-expression will be evaluated in the right context. This makes it possible for `j` to refer to variables outside the `data.table`.

### BUG FIXES

1. Custom binary operators from the `lubridate` package now work with objects of class `IDate` as with a `Date` subclass, [#6839](https://github.com/Rdatatable/data.table/issues/6839). Thanks @emallickhossain for the report and @aitap for the fix.
Expand Down
17 changes: 12 additions & 5 deletions R/groupingsets.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ rollup.data.table = function(x, j, by, .SDcols, id = FALSE, label = NULL, ...) {
sets = lapply(length(by):0L, function(i) by[0L:i])
# redirect to workhorse function
jj = substitute(j)
groupingsets.data.table(x, by=by, sets=sets, .SDcols=.SDcols, id=id, jj=jj, label=label)
groupingsets.data.table(x, by=by, sets=sets, .SDcols=.SDcols, id=id, jj=jj, label=label, enclos = parent.frame())
}

cube = function(x, ...) {
Expand All @@ -35,13 +35,13 @@ cube.data.table = function(x, j, by, .SDcols, id = FALSE, label = NULL, ...) {
sets = lapply((2L^n):1L, function(jj) by[keepBool[jj, ]])
# redirect to workhorse function
jj = substitute(j)
groupingsets.data.table(x, by=by, sets=sets, .SDcols=.SDcols, id=id, jj=jj, label=label)
groupingsets.data.table(x, by=by, sets=sets, .SDcols=.SDcols, id=id, jj=jj, label=label, enclos = parent.frame())
}

groupingsets = function(x, ...) {
UseMethod("groupingsets")
}
groupingsets.data.table = function(x, j, by, sets, .SDcols, id = FALSE, jj, label = NULL, ...) {
groupingsets.data.table = function(x, j, by, sets, .SDcols, id = FALSE, jj, label = NULL, enclos = parent.frame(), ...) {
# input data type basic validation
if (!is.data.table(x))
stopf("Argument 'x' must be a data.table object")
Expand Down Expand Up @@ -112,7 +112,10 @@ groupingsets.data.table = function(x, j, by, sets, .SDcols, id = FALSE, jj, labe
.SDcols = if (".SD" %chin% av) setdiff(names(x), by) else NULL
if (length(names(by))) by = unname(by)
# 0 rows template data.table to keep colorder and type
empty = if (length(.SDcols)) x[0L, eval(jj), by, .SDcols=.SDcols] else x[0L, eval(jj), by]
# inline all arguments that might clash with enclosing environment
pcall = substitute(x[0L, jj, by], list(x = x, jj = jj, by = by))
if (length(.SDcols)) pcall$.SDcols = .SDcols
empty = eval(pcall, list(.datatable.aware = TRUE), enclos)
if (id && "grouping" %chin% names(empty)) # `j` could have been evaluated to `grouping` field
stopf("When using `id=TRUE` the 'j' expression must not evaluate to a column named 'grouping'.")
if (anyDuplicated(names(empty)) > 0L)
Expand Down Expand Up @@ -150,8 +153,12 @@ groupingsets.data.table = function(x, j, by, sets, .SDcols, id = FALSE, jj, labe
stopf("Using integer64 class columns require to have 'bit64' package installed.") # nocov
int64.by.cols = intersect(int64.cols, by)
# aggregate function called for each grouping set
# inline all arguments that might clash with enclosing environment
pcall = substitute(x[, jj], list(x = x, jj = jj))
if (length(.SDcols)) pcall$.SDcols = .SDcols
aggregate.set = function(by.set) {
r = if (length(.SDcols)) x[, eval(jj), by.set, .SDcols=.SDcols] else x[, eval(jj), by.set]
pcall$by = by.set
r = eval(pcall, list(.datatable.aware = TRUE), enclos)
if (id) {
# integer bit mask of aggregation levels: http://www.postgresql.org/docs/9.5/static/functions-aggregate.html#FUNCTIONS-GROUPING-TABLE
# 3267: strtoi("", base = 2L) output apparently unstable across platforms
Expand Down
24 changes: 24 additions & 0 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -21252,3 +21252,27 @@ it <- as.ITime('00:00:00')
test(2323.1, names(data.frame(COL = it)), "COL")
test(2323.2, names(data.frame(b = 1, COL = it)), c("b", "COL"))
test(2323.3, names(as.data.frame(it, optional=TRUE)), NULL)

# 'sets' is a local variable in groupingsets(), cube(), rollup() and shouldn't leak into the 'j' expression
n = 24L
set.seed(25)
DT = data.table(
color = sample(c("green","yellow","red"), n, TRUE),
year = as.Date(sample(paste0(2011:2015,"-01-01"), n, TRUE)),
status = as.factor(sample(c("removed","active","inactive","archived"), n, TRUE)),
amount = sample(1:5, n, TRUE),
value = sample(c(3, 3.5, 2.5, 2), n, TRUE)
)
sets = 0
test(2324.0,
groupingsets(DT, j = c(list(count=.N + ..sets)), by = c("color","year","status"), sets = list("color", c("year","status"), character()), id=TRUE),
groupingsets(DT, j = c(list(count=.N + 0)), by = c("color","year","status"), sets = list("color", c("year","status"), character()), id=TRUE)
)
test(2324.1,
cube(DT, j = sum(value) + ..sets, by = c("color","year","status"), id=TRUE),
cube(DT, j = sum(value), by = c("color","year","status"), id=TRUE)
)
test(2324.2,
rollup(DT, j = sum(value) + ..sets, by=c("color","year","status"), label="total"),
rollup(DT, j = sum(value), by=c("color","year","status"), label="total")
)
3 changes: 2 additions & 1 deletion man/groupingsets.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ rollup(x, \dots)
cube(x, \dots)
\method{cube}{data.table}(x, j, by, .SDcols, id = FALSE, label = NULL, \dots)
groupingsets(x, \dots)
\method{groupingsets}{data.table}(x, j, by, sets, .SDcols, id = FALSE, jj, label = NULL, \dots)
\method{groupingsets}{data.table}(x, j, by, sets, .SDcols, id = FALSE, jj, label = NULL, enclos = parent.frame(), \dots)
}
\arguments{
\item{x}{\code{data.table}.}
Expand All @@ -27,6 +27,7 @@ groupingsets(x, \dots)
\item{id}{logical default \code{FALSE}. If \code{TRUE} it will add leading column with bit mask of grouping sets.}
\item{jj}{quoted version of \code{j} argument, for convenience. When provided function will ignore \code{j} argument.}
\item{label}{label(s) to be used in the 'total' rows in the grouping variable columns of the output, that is, in rows where the grouping variable has been aggregated. Can be a named list of scalars, or a scalar, or \code{NULL}. Defaults to \code{NULL}, which results in the grouping variables having \code{NA} in their 'total' rows. See Details.}
\item{enclos}{the environment containing the symbols referenced by \code{jj}. When writing functions that accept a \code{j} environment for non-standard evaluation by \pkg{data.table}, \code{\link[base]{substitute}()} it and forward it to \code{groupingsets} using the \code{jj} argument, set this to the \code{\link[base]{parent.frame}()} of the function that captures \code{j}.}
}
\details{
All three functions \code{rollup, cube, groupingsets} are generic methods, \code{data.table} methods are provided.
Expand Down
Loading