diff --git a/NEWS.md b/NEWS.md index 9163d9eb3f..db9ab2dee3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -42,6 +42,8 @@ 13. `dcast`gains `value.var.in.dots`, `value.var.in.LHSdots` and `value.var.in.RHSdots` arguments, [#5824](https://github.com/Rdatatable/data.table/issues/5824). This allows the `value.var` variable(s) in `dcast` to be represented by `...` in the formula (if not otherwise mentioned). Thanks to @iago-pssjd for the report and PR. +14. `all.equal.data.table` gains `ignore.indices` argument, [#6134](https://github.com/Rdatatable/data.table/issues/6134). + ## BUG FIXES 1. `unique()` returns a copy the case when `nrows(x) <= 1` instead of a mutable alias, [#5932](https://github.com/Rdatatable/data.table/pull/5932). This is consistent with existing `unique()` behavior when the input has no duplicates but more than one row. Thanks to @brookslogan for the report and @dshemetov for the fix. diff --git a/R/setops.R b/R/setops.R index 23dd6ec8f6..b455fffc07 100644 --- a/R/setops.R +++ b/R/setops.R @@ -104,8 +104,8 @@ fsetequal = function(x, y, all=TRUE) { # all.equal ---- -all.equal.data.table = function(target, current, trim.levels=TRUE, check.attributes=TRUE, ignore.col.order=FALSE, ignore.row.order=FALSE, tolerance=sqrt(.Machine$double.eps), ...) { - stopifnot(is.logical(trim.levels), is.logical(check.attributes), is.logical(ignore.col.order), is.logical(ignore.row.order), is.numeric(tolerance), is.data.table(target)) +all.equal.data.table = function(target, current, trim.levels=TRUE, check.attributes=TRUE, ignore.indices=FALSE, ignore.col.order=FALSE, ignore.row.order=FALSE, tolerance=sqrt(.Machine$double.eps), ...) { + stopifnot(is.logical(trim.levels), is.logical(check.attributes), is.logical(ignore.indices), is.logical(ignore.col.order), is.logical(ignore.row.order), is.numeric(tolerance), is.data.table(target)) if (!is.data.table(current)) { if (check.attributes) return(paste0('target is data.table, current is ', data.class(current))) @@ -163,20 +163,23 @@ all.equal.data.table = function(target, current, trim.levels=TRUE, check.attribu )) } # check index - i1 = indices(target) - i2 = indices(current) - if (!identical(i1, i2)) { - return(gettextf( - "Datasets have different %s. 'target': %s. 'current': %s.", - "indices", - if(length(i1)) brackify(i1) else gettextf("has no index"), - if(length(i2)) brackify(i2) else gettextf("has no index") - )) + if (!ignore.indices) { + i1 = indices(target) + i2 = indices(current) + if (!identical(i1, i2)) { + return(gettextf( + "Datasets have different %s. 'target': %s. 'current': %s.", + "indices", + if(length(i1)) brackify(i1) else gettextf("has no index"), + if(length(i2)) brackify(i2) else gettextf("has no index") + )) + } } # Trim any extra row.names attributes that came from some inheritance # Trim ".internal.selfref" as long as there is no `all.equal.externalptr` method - exclude.attrs = function(x, attrs = c("row.names",".internal.selfref")) x[!names(x) %chin% attrs] + # Trim "index" attribute, in case `ignore.indices` is set (if it is not, we already checked it). + exclude.attrs = function(x, attrs = c("row.names",".internal.selfref", "index")) x[!names(x) %chin% attrs] a1 = exclude.attrs(attributes(target)) a2 = exclude.attrs(attributes(current)) if (length(a1) != length(a2)) return(sprintf("Datasets has different number of (non-excluded) attributes: target %s, current %s", length(a1), length(a2))) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 021b3f19e7..04aa5c4017 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -8684,6 +8684,7 @@ DT1 <- data.table(a = 1:4, b = letters[1:4]) DT2 <- data.table(a = 1:4, b = letters[1:4]) setindexv(DT1, "b") test(1613.25, all.equal(DT1, DT2), "Datasets have different indices. 'target': [b]. 'current': has no index.") +test(1613.251, all.equal(DT1, DT2, ignore.indices = TRUE), TRUE) test(1613.26, all.equal(DT1, DT2, check.attributes = FALSE), TRUE) test(1613.27, all.equal(DT1, setindexv(DT2, "a")), "Datasets have different indices. 'target': [b]. 'current': [a].") test(1613.28, all.equal(DT1, setindexv(DT2, "b")), "Datasets have different indices. 'target': [b]. 'current': [a, b].") @@ -8693,6 +8694,7 @@ DT1 <- data.table(a = 1:4, b = letters[1:4]) DT2 <- data.table(a = 1:4, b = letters[1:4]) setattr(DT1, "custom", 1L) test(1613.30, all.equal(DT1, DT2), "Datasets has different number of (non-excluded) attributes: target 3, current 2") +test(1613.301, all.equal(DT1, DT2, ignore.indices = TRUE), "Datasets has different number of (non-excluded) attributes: target 3, current 2") test(1613.31, all.equal(DT1, DT2, check.attributes = FALSE), TRUE) setattr(DT2, "custom2", 2L) test(1613.32, all.equal(DT1, DT2), "Datasets has attributes with different names: [custom, custom2]") diff --git a/man/all.equal.data.table.Rd b/man/all.equal.data.table.Rd index 44d9a29037..a89bcd87ad 100644 --- a/man/all.equal.data.table.Rd +++ b/man/all.equal.data.table.Rd @@ -8,8 +8,8 @@ \usage{ \method{all.equal}{data.table}(target, current, trim.levels=TRUE, check.attributes=TRUE, - ignore.col.order=FALSE, ignore.row.order=FALSE, tolerance=sqrt(.Machine$double.eps), - \dots) + ignore.indices=FALSE, ignore.col.order=FALSE, ignore.row.order=FALSE, + tolerance=sqrt(.Machine$double.eps), \dots) } \arguments{ @@ -27,6 +27,10 @@ A logical indicating whether or not to check attributes, will apply not only to data.table but also attributes of the columns. It will skip \code{c("row.names",".internal.selfref")} data.table attributes. } + \item{ignore.indices}{ + A logical indicating whether or not to ignore if tables differ by indices. Since \code{data.table} indices are attributes, \code{ignore.indices} is implied when \code{check.attributes} is FALSE. + } + \item{ignore.col.order}{ A logical indicating whether or not to ignore columns order in \code{data.table}. }