From 98e1c568db9b7d2d118044c8faddf218ae4cce2d Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Sun, 7 Sep 2025 21:33:58 +0200 Subject: [PATCH 01/12] all --- NAMESPACE | 1 + NEWS.md | 47 ++++++++++ R/froll.R | 41 +++++++++ R/frollapply.R | 14 ++- inst/tests/froll.Rraw | 207 +++++++++++++++++++++++++++++++++++------- man/froll.Rd | 4 +- man/frollapply.Rd | 4 +- src/data.table.h | 1 + src/frollR.c | 71 +++++++++++++-- src/init.c | 1 + 10 files changed, 345 insertions(+), 46 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 235f654e85..3fa8575448 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -56,6 +56,7 @@ export(frollmean) export(frollsum) export(frollmax) export(frollapply) +export(frolladapt) export(nafill) export(setnafill) export(.Last.updated) diff --git a/NEWS.md b/NEWS.md index 17fb0c1e20..35417c6b5f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -32,6 +32,19 @@ ``` Additionally argument names in `frollapply` has been renamed from `x` to `X` and `n` to `N` to avoid conflicts with common argument names that may be passed to `...`, aligning to base R API of `lapply`. `x` and `n` continue to work with a warning, for now. +5. Adaptive rolling functions no longer tolerate `NA`s and negative values passed to `n` argument. + ```r + n = c(2,NA,2) + frollsum(1:3, n, adaptive=TRUE) + #Error in froll(fun = "sum", x = x, n = n, fill = fill, algo = algo, align = align, : + # 'n' must be non-negative integer values (>= 0) + ``` + If for some reason previous `NA`s behavior is needed, it can be achieved by replacing `NA`s with a value big enough + ```r + n = nafill(c(2,NA,2), fill=.Machine$integer.max) + frollsum(1:3, n, adaptive=TRUE) + ``` + ### NOTICE OF INTENDED FUTURE POTENTIAL BREAKING CHANGES 1. `data.table(x=1, )`, where `` is an expression resulting in a 1-column matrix without column names, will eventually have names `x` and `V2`, not `x` and `V1`, consistent with `data.table(x=1, )` where `` results in an atomic vector, for example `data.table(x=1, cbind(1))` and `data.table(x=1, 1)` will both have columns named `x` and `V2`. In this release, the matrix case continues to be named `V1`, but the new behavior can be activated by setting `options(datatable.old.matrix.autoname)` to `FALSE`. See point 5 under Bug Fixes for more context; this change will provide more internal consistency as well as more consistency with `data.frame()`. @@ -210,6 +223,40 @@ #[1] TRUE ``` +18. New `frolladapt` helper function has been added to aid in preparation of adaptive length of rolling window width when dealing with _irregularly spaced ordered data_. This lets the user to apply a rolling function over a period without having to deal with gaps in a data where some periods might be missing. +```r +idx = as.Date("2022-10-23") + c(0,1,4,5,6,7,9,10,14) +dt = data.table(index=idx, value=seq_along(idx)) +dt +# index value +# +#1: 2022-10-23 1 +#2: 2022-10-24 2 +#3: 2022-10-27 3 +#4: 2022-10-28 4 +#5: 2022-10-29 5 +#6: 2022-10-30 6 +#7: 2022-11-01 7 +#8: 2022-11-02 8 +#9: 2022-11-06 9 +dt[, c("rollmean3","rollmean3days") := list( + frollmean(value, 3), + frollmean(value, frolladapt(index, 3), adaptive=TRUE) + )] +dt +# index value rollmean3 rollmean3days +# +#1: 2022-10-23 1 NA NA +#2: 2022-10-24 2 NA NA +#3: 2022-10-27 3 2 3.0 +#4: 2022-10-28 4 3 3.5 +#5: 2022-10-29 5 4 4.0 +#6: 2022-10-30 6 5 5.0 +#7: 2022-11-01 7 6 6.5 +#8: 2022-11-02 8 7 7.5 +#9: 2022-11-06 9 8 9.0 +``` + ### BUG FIXES 1. `fread()` no longer warns on certain systems on R 4.5.0+ where the file owner can't be resolved, [#6918](https://github.com/Rdatatable/data.table/issues/6918). Thanks @ProfFancyPants for the report and PR. diff --git a/R/froll.R b/R/froll.R index 2f834614b2..1ed68d28dc 100644 --- a/R/froll.R +++ b/R/froll.R @@ -111,6 +111,47 @@ make.roll.names = function(x.len, n.len, n, x.nm, n.nm, fun, adaptive) { ans } +# irregularly spaced time series, helper for creating adaptive window size +frolladapt = function(x, n, align="right", partial=FALSE, give.names=FALSE) { + x = unclass(x) + if (!is.numeric(x)) + stopf("'x' must be of a numeric type") + if (!is.integer(x)) + x = as.integer(x) + if (!is.numeric(n)) { + stopf("'n' must be an integer") + } else { + nms = names(n) ## only for give.names + if (!is.integer(n)) { + if (!fitsInInt32(n)) + stopf("'n' must be an integer") + n = as.integer(n) + } + } + if (!length(n)) + stopf("'n' must be non 0 length") + if (anyNA(n)) + stopf("'n' must not have NAs") + if (!identical(align, "right")) + stopf("'align' other than 'right' has not yet been implemented") + if (!isTRUEorFALSE(partial)) + stopf("'partial' must be TRUE or FALSE") + if (!isTRUEorFALSE(give.names)) + stopf("'give.names' must be TRUE or FALSE") + + if (length(n) == 1L) { + ans = .Call(Cfrolladapt, x, n, partial) + } else { + ans = lapply(n, function(.n) .Call(Cfrolladapt, x, .n, partial)) + if (give.names) { + if (is.null(nms)) + nms = paste0("n", as.character(n)) + setattr(ans, "names", nms) + } + } + ans +} + froll = function(fun, x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"), na.rm=FALSE, has.nf=NA, adaptive=FALSE, partial=FALSE, give.names=FALSE, hasNA) { stopifnot(!missing(fun), is.character(fun), length(fun)==1L, !is.na(fun)) if (!missing(hasNA)) { diff --git a/R/frollapply.R b/R/frollapply.R index 74de4732fb..37dc9ca1db 100644 --- a/R/frollapply.R +++ b/R/frollapply.R @@ -141,12 +141,14 @@ frollapply = function(X, N, FUN, ..., by.column=TRUE, fill=NA, align=c("right"," stopf("'N' must be non 0 length") if (!adaptive) { if (is.list(N)) - stopf("'N' must be integer, list is accepted for adaptive TRUE") + stopf("'N' must be an integer, list is accepted for adaptive TRUE") else if (!is.numeric(N)) - stopf("'N' must be integer vector") + stopf("'N' must be an integer") nnam = names(N) ## used for give.names if (!is.integer(N)) N = as.integer(N) + if (anyNA(N)) + stopf("'N' must be non-negative integer values (>= 0)") nn = length(N) ## top level loop for vectorized n } else { if (length(unique(len)) > 1L) ## vectorized x requires same nrow for adaptive @@ -156,6 +158,8 @@ frollapply = function(X, N, FUN, ..., by.column=TRUE, fill=NA, align=c("right"," stopf("length of integer vector(s) provided as list to 'N' argument must be equal to number of observations provided in 'X'") if (!is.integer(N)) N = as.integer(N) + if (anyNA(N)) + stopf("'N' must be non-negative integer values (>= 0)") nn = 1L N = list(N) nnam = character() @@ -165,13 +169,15 @@ frollapply = function(X, N, FUN, ..., by.column=TRUE, fill=NA, align=c("right"," if (!equal.lengths(N)) stopf("adaptive windows provided in 'N' must not to have different lengths") if (!all(vapply_1b(N, is.numeric, use.names=FALSE))) - stopf("n must be an integer vector or list of an integer vectors") + stopf("'N' must be an integer vector or list of integer vectors") if (!all(vapply_1b(N, is.integer, use.names=FALSE))) N = lapply(N, as.integer) + if (any(vapply_1b(N, anyNA, use.names=FALSE))) + stopf("'N' must be non-negative integer values (>= 0)") nn = length(N) nnam = names(N) } else - stopf("n must be an integer vector or list of an integer vectors") + stopf("'N' must be an integer vector or list of integer vectors") } ## partial if (partial) { diff --git a/inst/tests/froll.Rraw b/inst/tests/froll.Rraw index 452c4ff0c8..92a2b229ed 100644 --- a/inst/tests/froll.Rraw +++ b/inst/tests/froll.Rraw @@ -78,22 +78,22 @@ test(6000.011, frollmean(x, n, adaptive=TRUE), list(c(NA, 1, 1.25), c(NA, 1, 1.2 #### error on unsupported type dx = data.table(real=1:10/2, char=letters[1:10]) -test(6000.012, frollmean(dx, 3), error="x must be of type numeric or logical, or a list, data.frame or data.table of such") +test(6000.012, frollmean(dx, 3), error="'x' must be of type numeric or logical, or a list, data.frame or data.table of such") dx = data.table(real=1:10/2, fact=factor(letters[1:10])) -test(6000.013, frollmean(dx, 3), error="x must be of type numeric or logical, or a list, data.frame or data.table of such") +test(6000.013, frollmean(dx, 3), error="'x' must be of type numeric or logical, or a list, data.frame or data.table of such") #dx = data.table(real=1:10/2, logi=logical(10)) #test(6000.014, frollmean(dx, 3), error="x must be list, data.frame or data.table of numeric types") # commented out as support added in #3749, tested in .009 dx = data.table(real=1:10/2, list=rep(list(NA), 10)) -test(6000.015, frollmean(dx, 3), error="x must be of type numeric or logical, or a list, data.frame or data.table of such") +test(6000.015, frollmean(dx, 3), error="'x' must be of type numeric or logical, or a list, data.frame or data.table of such") x = letters[1:10] -test(6000.016, frollmean(x, 3), error="x must be of type numeric or logical, or a list, data.frame or data.table of such") +test(6000.016, frollmean(x, 3), error="'x' must be of type numeric or logical, or a list, data.frame or data.table of such") x = 1:10/2 -test(6000.017, frollmean(x, "a"), error="n must be integer") -test(6000.018, frollmean(x, factor("a")), error="n must be integer") -test(6000.019, frollmean(x, TRUE), error="n must be integer") -test(6000.020, frollmean(x, list(1:10)), error="n must be integer, list is accepted for adaptive TRUE") -test(6000.021, frollmean(x, list(NA), adaptive=TRUE), error="n must be an integer vector or list of an integer vectors") -test(6000.022, frollmean(x, list(c(1:5,1:5), NA), adaptive=TRUE), error="n must be an integer vector or list of an integer vectors") +test(6000.017, frollmean(x, "a"), error="'n' must be an integer") +test(6000.018, frollmean(x, factor("a")), error="'n' must be an integer") +test(6000.019, frollmean(x, TRUE), error="'n' must be an integer") +test(6000.020, frollmean(x, list(1:10)), error="'n' must be an integer, list is accepted for adaptive TRUE") +test(6000.021, frollmean(x, list(NA), adaptive=TRUE), error="'n' must be an integer vector or list of integer vectors") +test(6000.022, frollmean(x, list(c(1:5,1:5), NA), adaptive=TRUE), error="'n' must be an integer vector or list of integer vectors") #### various length list vectors l = list(1:6/2, 3:10/4) @@ -371,10 +371,10 @@ test(6000.086, frollmean(list(1:3, 2:4), integer()), error="n must be non 0 leng #### n==0 (k==0, k[i]==0) ## 6000.87 6000.88 moved to 6001. #### n<0 -test(6000.089, frollmean(1:3, -2), error="n must be non-negative integer values") -test(6000.0891, frollmean(1:3, c(-2,2), adaptive=TRUE), error="n must be non-negative integer values") +test(6000.089, frollmean(1:3, -2), error="'n' must be non-negative integer values") +test(6000.0891, frollmean(1:3, c(-2,2), adaptive=TRUE), error="'n' must be non-negative integer values") #### n[[1L]]>0 && n[[2L]]<0 -test(6000.090, frollmean(1:3, c(2, -2)), error="n must be non-negative integer values") +test(6000.090, frollmean(1:3, c(2, -2)), error="'n' must be non-negative integer values") #### n[[1L]]==n[[2L]] test(6000.091, frollmean(1:3, c(2, 2)), list(c(NA_real_, 1.5, 2.5), c(NA_real_, 1.5, 2.5))) test(6000.092, frollmean(list(1:3, 4:6), c(2, 2)), list(c(NA_real_, 1.5, 2.5), c(NA_real_, 1.5, 2.5), c(NA_real_, 4.5, 5.5), c(NA_real_, 4.5, 5.5))) @@ -411,17 +411,17 @@ test(6000.111, frollmean(list(1, 10, 5), 2, align="left"), list(NA_real_, NA_rea test(6000.112, frollmean(5, 2, align="center"), NA_real_) test(6000.113, frollmean(list(1, 10, 5), 2, align="center"), list(NA_real_, NA_real_, NA_real_)) #### n==Inf -test(6000.114, frollmean(1:5, Inf), error="n must be non-negative integer values", warning="NAs introduced by coercion*") +test(6000.114, frollmean(1:5, Inf), error="'n' must be non-negative integer values", warning="NAs introduced by coercion*") #### n==c(5, Inf) -test(6000.115, frollmean(1:5, c(5, Inf)), error="n must be non-negative integer values", warning="NAs introduced by coercion*") +test(6000.115, frollmean(1:5, c(5, Inf)), error="'n' must be non-negative integer values", warning="NAs introduced by coercion*") #### is.complex(n) -test(6000.116, frollmean(1:5, 3i), error="n must be integer") +test(6000.116, frollmean(1:5, 3i), error="'n' must be an integer") #### is.character(n) -test(6000.117, frollmean(1:5, "a"), error="n must be integer") +test(6000.117, frollmean(1:5, "a"), error="'n' must be an integer") #### is.factor(n) -test(6000.118, frollmean(1:5, as.factor("a")), error="n must be integer") +test(6000.118, frollmean(1:5, as.factor("a")), error="'n' must be an integer") #### is.list(n) -test(6000.119, frollmean(1:5, list(1:5)), error="n must be integer, list is accepted for adaptive TRUE") +test(6000.119, frollmean(1:5, list(1:5)), error="'n' must be an integer, list is accepted for adaptive TRUE") #### adaptive=NA test(6000.1192, frollmean(1:5, 2, adaptive=NA), error="adaptive must be TRUE or FALSE") #### na.rm=NA @@ -453,7 +453,7 @@ test(6000.1197, frollmean(c(1:5,NA), 2, algo="exact", na.rm=TRUE), output=c( )) options(datatable.verbose=FALSE) #### adaptive=TRUE n=character -test(6000.1198, frollmean(1:5, n=letters[1:5], adaptive=TRUE), error="n must be an integer vector or list of an integer vectors") +test(6000.1198, frollmean(1:5, n=letters[1:5], adaptive=TRUE), error="'n' must be an integer vector or list of integer vectors") #### non-finite values (NA, NaN, Inf, -Inf) ma = function(x, n, na.rm=FALSE, nf.rm=FALSE) { @@ -1005,6 +1005,18 @@ test(6000.502, frollmax(c(5,NaN,1), 1L), c(5,NaN,1)) test(6000.503, frollmax(c(5,1,1,NaN,1,1,1), 2L), c(NA,5,1,NaN,NaN,1,1)) test(6000.504, frollmax(c(5,1,NA,NaN,1,1,1), 2L), c(NA,5,NA,NA,NaN,1,1)) +# n==NA +test(6000.550, frollmean(1:3, NA), error="'n' must be an integer") +test(6000.551, frollmean(1:3, NA_integer_), error="'n' must be non-negative integer values (>= 0)") +test(6000.552, frollmean(1:3, NA, algo="exact"), error="'n' must be an integer") +test(6000.553, frollmean(1:3, NA_integer_, algo="exact"), error="'n' must be non-negative integer values (>= 0)") +test(6000.554, frollmean(adaptive=TRUE, 1:3, c(2,NA,2)), error="'n' must be non-negative integer values (>= 0)") +test(6000.555, frollmean(adaptive=TRUE, 1:3, c(2,NA,2), algo="exact"), error="'n' must be non-negative integer values (>= 0)") +test(6000.556, frollapply(FUN=mean, 1:3, NA), error="'N' must be an integer") +test(6000.557, frollapply(FUN=mean, 1:3, NA_integer_), error="'N' must be non-negative integer values (>= 0)") +test(6000.558, frollapply(FUN=mean, adaptive=TRUE, 1:3, c(2,NA,2)), error="'N' must be non-negative integer values (>= 0)") +test(6000.559, frollapply(FUN=mean, adaptive=TRUE, 1:3, list(c(2,NA,2))), error="'N' must be non-negative integer values (>= 0)") + # n==0, k==0, k[i]==0 test(6001.111, frollmean(1:3, 0), c(NaN,NaN,NaN), options=c("datatable.verbose"=TRUE), output="window width of size 0") test(6001.112, frollmean(1:3, 0, fill=99), c(NaN,NaN,NaN)) @@ -1021,7 +1033,8 @@ test(6001.125, frollmean(adaptive=TRUE, 1:3, c(2,0,2), algo="exact"), c(NA,NaN,2 test(6001.126, frollmean(adaptive=TRUE, 1:3, c(2,0,2), fill=99, algo="exact"), c(99,NaN,2.5)) test(6001.127, frollmean(adaptive=TRUE, c(1:2,NA), c(2,0,2), algo="exact"), c(NA,NaN,NA)) test(6001.128, frollmean(adaptive=TRUE, c(1:2,NA), c(2,0,2), algo="exact", na.rm=TRUE), c(NA,NaN,2)) -test(6001.129, frollmean(adaptive=TRUE, c(1:2,NA), c(2,0,2), fill=99, algo="exact", na.rm=TRUE), c(99,NaN,2)) +test(6001.129, frollmean(adaptive=TRUE, c(1:2,NA), c(2,0,2), algo="exact", na.rm=TRUE, partial=TRUE), c(1,NaN,2)) +test(6001.130, frollmean(adaptive=TRUE, c(1:2,NA), c(2,0,2), fill=99, algo="exact", na.rm=TRUE), c(99,NaN,2)) test(6001.181, frollapply(FUN=mean, 1:3, 0), c(NaN,NaN,NaN)) test(6001.182, frollapply(FUN=mean, 1:3, 0, fill=99), c(NaN,NaN,NaN)) test(6001.183, frollapply(FUN=mean, c(1:2,NA), 0), c(NaN,NaN,NaN)) @@ -1033,6 +1046,7 @@ test(6001.1913, frollapply(FUN=mean, adaptive=TRUE, list(1:3,2:4), list(c(2,0,2) test(6001.192, frollapply(FUN=mean, adaptive=TRUE, 1:3, c(2,0,2), fill=99), c(99,NaN,2.5)) test(6001.193, frollapply(FUN=mean, adaptive=TRUE, c(1:2,NA), c(2,0,2)), c(NA,NaN,NA)) test(6001.194, frollapply(FUN=mean, adaptive=TRUE, c(1:2,NA), c(2,0,2), na.rm=TRUE), c(NA,NaN,2)) +test(6001.195, frollapply(FUN=mean, adaptive=TRUE, c(1:2,NA_real_), c(2,0,2), na.rm=TRUE, partial=TRUE), c(1,NaN,2)) test(6001.211, frollsum(1:3, 0), c(0,0,0), options=c("datatable.verbose"=TRUE), output="window width of size 0") test(6001.212, frollsum(1:3, 0, fill=99), c(0,0,0)) @@ -1049,7 +1063,8 @@ test(6001.225, frollsum(adaptive=TRUE, 1:3, c(2,0,2), algo="exact"), c(NA,0,5)) test(6001.226, frollsum(adaptive=TRUE, 1:3, c(2,0,2), fill=99, algo="exact"), c(99,0,5)) test(6001.227, frollsum(adaptive=TRUE, c(1:2,NA), c(2,0,2), algo="exact"), c(NA,0,NA)) test(6001.228, frollsum(adaptive=TRUE, c(1:2,NA), c(2,0,2), algo="exact", na.rm=TRUE), c(NA,0,2)) -test(6001.229, frollsum(adaptive=TRUE, c(1:2,NA), c(2,0,2), fill=99, algo="exact", na.rm=TRUE), c(99,0,2)) +test(6001.229, frollsum(adaptive=TRUE, c(1:2,NA), c(2,0,2), algo="exact", na.rm=TRUE, partial=TRUE), c(1,0,2)) +test(6001.230, frollsum(adaptive=TRUE, c(1:2,NA), c(2,0,2), fill=99, algo="exact", na.rm=TRUE), c(99,0,2)) test(6001.281, frollapply(FUN=sum, as.numeric(1:3), 0), c(0,0,0)) test(6001.282, frollapply(FUN=sum, as.numeric(1:3), 0, fill=99), c(0,0,0)) test(6001.283, frollapply(FUN=sum, c(1:2,NA_real_), 0), c(0,0,0)) @@ -1061,6 +1076,7 @@ test(6001.2913, frollapply(FUN=sum, adaptive=TRUE, list(as.numeric(1:3), as.nume test(6001.292, frollapply(FUN=sum, adaptive=TRUE, as.numeric(1:3), c(2,0,2), fill=99), c(99,0,5)) test(6001.293, frollapply(FUN=sum, adaptive=TRUE, c(1:2,NA_real_), c(2,0,2)), c(NA,0,NA)) test(6001.294, frollapply(FUN=sum, adaptive=TRUE, c(1:2,NA_real_), c(2,0,2), na.rm=TRUE), c(NA,0,2)) +test(6001.295, frollapply(FUN=sum, adaptive=TRUE, c(1:2,NA_real_), c(2,0,2), na.rm=TRUE, partial=TRUE), c(1,0,2)) test(6001.311, frollmax(1:3, 0), c(-Inf,-Inf,-Inf), options=c("datatable.verbose"=TRUE), output="window width of size 0") test(6001.312, frollmax(1:3, 0, fill=99), c(-Inf,-Inf,-Inf)) @@ -1077,18 +1093,20 @@ test(6001.325, frollmax(adaptive=TRUE, 1:3, c(2,0,2), algo="exact"), c(NA,-Inf,3 test(6001.326, frollmax(adaptive=TRUE, 1:3, c(2,0,2), fill=99, algo="exact"), c(99,-Inf,3)) test(6001.327, frollmax(adaptive=TRUE, c(1:2,NA), c(2,0,2), algo="exact"), c(NA,-Inf,NA)) test(6001.328, frollmax(adaptive=TRUE, c(1:2,NA), c(2,0,2), algo="exact", na.rm=TRUE), c(NA,-Inf,2)) -test(6001.329, frollmax(adaptive=TRUE, c(1:2,NA), c(2,0,2), fill=99, algo="exact", na.rm=TRUE), c(99,-Inf,2)) +test(6001.329, frollmax(adaptive=TRUE, c(1:2,NA), c(2,0,2), algo="exact", na.rm=TRUE, partial=TRUE), c(1,-Inf,2)) +test(6001.330, frollmax(adaptive=TRUE, c(1:2,NA), c(2,0,2), fill=99, algo="exact", na.rm=TRUE), c(99,-Inf,2)) test(6001.381, frollapply(FUN=max, 1:3, 0), c(-Inf,-Inf,-Inf)) test(6001.382, frollapply(FUN=max, 1:3, 0, fill=99), c(-Inf,-Inf,-Inf)) test(6001.383, frollapply(FUN=max, c(1:2,NA_real_), 0), c(-Inf,-Inf,-Inf)) test(6001.384, frollapply(FUN=max, c(1:2,NA_real_), 0, na.rm=TRUE), c(-Inf,-Inf,-Inf)) -test(6001.3910, frollapply(FUN = max, adaptive = TRUE, as.numeric(1:3), c(2,0,2)), c(NA, -Inf, 3)) +test(6001.3910, frollapply(FUN=max, adaptive=TRUE, as.numeric(1:3), c(2,0,2)), c(NA,-Inf,3)) test(6001.3911, frollapply(FUN=max, adaptive=TRUE, list(as.numeric(1:3), as.numeric(2:4)), c(2,0,2)), list(c(NA,-Inf,3), c(NA,-Inf,4))) test(6001.3912, frollapply(FUN=max, adaptive=TRUE, as.numeric(1:3), list(c(2,0,2), c(0,2,0))), list(c(NA,-Inf,3), c(-Inf,2,-Inf))) test(6001.3913, frollapply(FUN=max, adaptive=TRUE, list(as.numeric(1:3), as.numeric(2:4)), list(c(2,0,2), c(0,2,0))), list(c(NA,-Inf,3), c(-Inf,2,-Inf), c(NA,-Inf,4), c(-Inf,3,-Inf))) test(6001.392, frollapply(FUN=max, adaptive=TRUE, as.numeric(1:3), c(2,0,2), fill=99), c(99,-Inf,3)) test(6001.393, frollapply(FUN=max, adaptive=TRUE, c(1:2,NA_real_), c(2,0,2)), c(NA,-Inf,NA)) test(6001.394, frollapply(FUN=max, adaptive=TRUE, c(1:2,NA_real_), c(2,0,2), na.rm=TRUE), c(NA,-Inf,2)) +test(6001.395, frollapply(FUN=max, adaptive=TRUE, c(1:2,NA_real_), c(2,0,2), na.rm=TRUE, partial=TRUE), c(1,-Inf,2)) ## partial x = 1:6/2 @@ -1293,12 +1311,12 @@ ans = frollapply(FUN=mean, x, n, align="left") ans[(length(x)-n-1L):length(x)] = frollapply(FUN=mean, x[(length(x)-n-1L):length(x)], n, partial=TRUE, align="left") test(6010.123, ans, c(1,1.5,2,2.5,2.75,3)) ans = list(c(0.50,0.75,1.00,1.50,2.00,2.50), c(0.50,0.75,1.00,1.25,1.75,2.25)) -test(6010.131, frollapply(FUN=mean, 1:6/2, list(3L,4L), partial=TRUE), error="'N' must be integer, list is accepted for adaptive TRUE") +test(6010.131, frollapply(FUN=mean, 1:6/2, list(3L,4L), partial=TRUE), error="'N' must be an integer, list is accepted for adaptive TRUE") test(6010.132, frollapply(FUN=mean, 1:6/2, 3:4, partial=TRUE), ans) test(6010.143, frollapply(FUN=mean, 1:4, 2L, align="center", partial=TRUE), error="'partial' cannot be used together with align='center'") test(6010.144, frollapply(FUN=mean, list(1:4, 2:4), n, partial=TRUE), error="'partial' does not support variable length of columns in x") -test(6010.145, frollapply(FUN=mean, x, TRUE, partial=TRUE), error="'N' must be integer vector") -test(6010.146, frollapply(FUN=mean, x, list(TRUE), partial=TRUE), error="'N' must be integer, list is accepted for adaptive TRUE") +test(6010.145, frollapply(FUN=mean, x, TRUE, partial=TRUE), error="'N' must be an integer") +test(6010.146, frollapply(FUN=mean, x, list(TRUE), partial=TRUE), error="'N' must be an integer, list is accepted for adaptive TRUE") ## growable failed if length was set after copy: attempt to set index 1/1 in SET_STRING_ELT old = setDTthreads(1L) test(6010.150, frollapply(c("B","B","C"), 3, unique, simplify=FALSE, partial=TRUE), list("B", "B", c("B","C"))) @@ -1321,10 +1339,10 @@ options(datatable.verbose=FALSE) test(6010.203, frollapply(c(1,2,1,1,1,2,3,2), rep(3, 8), uniqueN, adaptive=TRUE), c(NA,NA,2L,2L,1L,2L,3L,2L)) test(6010.204, frollapply(c(1,2,1,1,NA,2,NA,2), rep(3, 8), anyNA, adaptive=TRUE), c(NA,NA,FALSE,FALSE,TRUE,TRUE,TRUE,TRUE)) test(6010.205, frollapply(c(2,2,2,3,4), c(1,3,3,2,3), uniqueN, adaptive=TRUE), c(1L,NA,1L,2L,3L)) ## window width bigger than location -test(6010.206, frollapply(1:5, c(NA,NA,3,2,3), sum, adaptive=TRUE), c(NA,NA,6L,7L,12L)) ## NAs in adaptive window are ok +#test(6010.206, frollapply(1:5, c(NA,NA,3,2,3), sum, adaptive=TRUE), c(NA,NA,6L,7L,12L)) ## NAs in adaptive window are ok ## this no longer true after frolladapt #### test coverage -test(6010.501, frollapply(1:3, "b", sum), error="'N' must be integer") +test(6010.501, frollapply(1:3, "b", sum), error="'N' must be an integer") test(6010.503, frollapply(1:3, integer(), sum), error="'N' must be non 0 length") test(6010.504, frollapply(1:3, 2L, sum, fill=1:2), list(1:2, 3L, 5L)) test(6010.505, frollapply(1:3, 2L, sum, fill=NA_integer_), c(NA,3L,5L)) @@ -1380,8 +1398,8 @@ test(6010.545, frollapply(1:2, 2, sum, simplify=NA), error="must be TRUE or FALS test(6010.561, frollapply(x=1:2, N=2, FUN=sum), c(NA,3L), warning="'x' is deprecated in frollapply, use 'X' instead") test(6010.562, frollapply(X=1:2, n=2, FUN=sum), c(NA,3L), warning="'n' is deprecated in frollapply, use 'N' instead") test(6010.563, frollapply(x=1:2, n=2, FUN=sum), c(NA,3L), warning=c("'x' is deprecated in frollapply, use 'X' instead","'n' is deprecated in frollapply, use 'N' instead")) -test(6010.564, frollapply(1:2, c("a","a"), length, adaptive=TRUE), error="n must be an integer vector or list of an integer vectors") -test(6010.565, frollapply(1:2, list(c("a","a")), length, adaptive=TRUE), error="n must be an integer vector or list of an integer vectors") +test(6010.564, frollapply(1:2, c("a","a"), length, adaptive=TRUE), error="'N' must be an integer vector or list of integer vectors") +test(6010.565, frollapply(1:2, list(c("a","a")), length, adaptive=TRUE), error="'N' must be an integer vector or list of integer vectors") test(6010.566, frollapply(1:2, 2, length, by.column=FALSE), error="frollapply by.column=FALSE requires 'X' argument to be") test(6010.567, frollapply(list(1:2, list(c("a","b"))), 2, length, by.column=FALSE), error="frollapply by.column=FALSE got list in 'X' but it is not valid one") test(6010.568, frollapply(list(data.frame(x=1:2), data.frame(x=I(list(1:2)))), 2, length, by.column=FALSE), error="not all columns of data.frames/data.tables are atomic") @@ -1578,6 +1596,133 @@ test(6010.9969, frollapply(FUN=sum, list(x1=c(1,2,3)), list(n1=c(2,2,2)), adapti test(6010.9991, frollapply(list(integer()), 0, function(x) 1), list(NULL)) test(6010.9992, frollapply(list(integer()), list(integer()), str, adaptive=TRUE), list(NULL)) +## frolladapt +test(6015.001, frolladapt(integer(), -1L), error="'n' must be positive integer values") +test(6015.002, frolladapt(integer(), 0L), error="'n' must be positive integer values") +test(6015.003, frolladapt(integer(), 1L), integer()) +test(6015.004, frolladapt(integer(), 2L), integer()) +test(6015.005, frolladapt(integer(), integer()), error="'n' must be non 0 length") +test(6015.006, frolladapt(integer(), NA_integer_), error="'n' must not have NAs") +test(6015.007, frolladapt(integer(), 0), error="'n' must be positive integer values") +test(6015.008, frolladapt(integer(), 1), integer()) +test(6015.011, frolladapt(0L, 0L), error="'n' must be positive integer values") +test(6015.012, frolladapt(0L, 0L, partial=TRUE), error="'n' must be positive integer values") +test(6015.013, frolladapt(0L, 1L), 1L) +test(6015.014, frolladapt(0L, 1L, partial=TRUE), 1L) +test(6015.015, frolladapt(0L, 2L), 2L) +test(6015.016, frolladapt(0L, 2L, partial=TRUE), 1L) +test(6015.017, frolladapt(0L, 0), error="'n' must be positive integer values") +test(6015.018, frolladapt(0L, 0, partial=TRUE), error="'n' must be positive integer values") +test(6015.019, frolladapt(0L, 1), 1L) +test(6015.020, frolladapt(0L, 1, partial=TRUE), 1L) +test(6015.021, frolladapt(c(1,3,5), 2), c(2L,1L,1L)) +test(6015.022, frolladapt(c(1,3,5), 2, partial=TRUE), c(1L,1L,1L)) +test(6015.023, frolladapt(c(2,4,6), 2), c(2L,1L,1L)) +test(6015.024, frolladapt(c(2,4,6), 2, partial=TRUE), c(1L,1L,1L)) +test(6015.025, frolladapt(c(1,3,5), 1), c(1L,1L,1L)) +test(6015.026, frolladapt(c(1,3,5), 1, partial=TRUE), c(1L,1L,1L)) +test(6015.027, frolladapt(c(1,3,4,5), 2), c(2L,1L,2L,2L)) +test(6015.028, frolladapt(c(1,3,4,5), 2, partial=TRUE), c(1L,1L,2L,2L)) +test(6015.029, frolladapt(c(-3,-2,-1,1,3), 2), c(2L,2L,2L,1L,1L)) +test(6015.030, frolladapt(c(-3,-2,-1,1,3), 2, partial=TRUE), c(1L,2L,2L,1L,1L)) +test(6015.031, frolladapt(c(-3,-2,-1,1,3), 3), c(3L,3L,3L,2L,2L)) +test(6015.032, frolladapt(c(-3,-2,-1,1,3), 3, partial=TRUE), c(1L,2L,3L,2L,2L)) +idx = c(1:4,6:7,10:14,16:17,23:24) +test(6015.041, frolladapt(idx, 5), c(5L, 5L, 5L, 5L, 4L, 4L, 3L, 3L, 3L, 4L, 5L, 4L, 4L, 1L, 2L)) +test(6015.042, frolladapt(idx, 5, partial=TRUE), c(1L, 2L, 3L, 4L, 4L, 4L, 3L, 3L, 3L, 4L, 5L, 4L, 4L, 1L, 2L)) +test(6015.051, frolladapt(list(c(1,3,5), c(2,4,6)), 2), error="'x' must be of a numeric type") +test(6015.052, frolladapt(list(c(1,3,5), c(2,4,6)), 2:3), error="'x' must be of a numeric type") +test(6015.053, frolladapt(c(1,3,5), 2:3), list(c(2L,1L,1L), c(3L,2L,2L))) +test(6015.054, frolladapt(c(1,3,5), 2:3, partial=TRUE), list(c(1L,1L,1L), c(1L,2L,2L))) +test(6015.061, frolladapt(c(1,3,5), 2, give.names=TRUE), c(2L,1L,1L)) +test(6015.062, frolladapt(c(1,3,5), 2:3, give.names=TRUE), list(n2=c(2L,1L,1L), n3=c(3L,2L,2L))) +test(6015.063, frolladapt(c(1,3,5), c(a=2, b=3)), list(c(2L,1L,1L), c(3L,2L,2L))) +test(6015.064, frolladapt(c(1,3,5), c(a=2, b=3), give.names=TRUE), list(a=c(2L,1L,1L), b=c(3L,2L,2L))) + +### verified against slider pkg + +#library(slider) +#library(data.table) +#set.seed(108) +#N = 64 +#n = 8 +#x = sample(N, N, TRUE) +#idx = sort(sample(N*64, N)) ## update this for each sparsity +#system.time(s <- slide_index_dbl(x, idx, mean, .before=n-1L, .complete=TRUE)) +#system.time(d <- frollmean(x, frolladapt(idx, n), adaptive=TRUE)) +#all.equal(d, s) +#cat("x = "); dput(x); cat("idx = "); dput(idx); cat("ans = "); dput(s) + +n = 8 +#### completely dense: sort(sample(N*1.0, N)) = 1:N +x = c(63L, 42L, 47L, 16L, 39L, 45L, 47L, 43L, 54L, 29L, 47L, 45L, 45L, 27L, 39L, 26L, 6L, 39L, 23L, 57L, 6L, 22L, 20L, 14L, 24L, 53L, 58L, 31L, 54L, 51L, 55L, 19L, 22L, 21L, 4L, 53L, 14L, 35L, 13L, 49L, 51L, 42L, 46L, 47L, 24L, 59L, 58L, 53L, 36L, 41L, 5L, 57L, 51L, 44L, 21L, 3L, 45L, 12L, 61L, 25L, 47L, 57L, 52L, 57L) +idx = 1:64 +ans = c(NA, NA, NA, NA, NA, NA, NA, 42.75, 41.625, 40, 40, 43.625, 44.375, 42.125, 41.125, 39, 33, 34.25, 31.25, 32.75, 27.875, 27.25, 24.875, 23.375, 25.625, 27.375, 31.75, 28.5, 34.5, 38.125, 42.5, 43.125, 42.875, 38.875, 32.125, 34.875, 29.875, 27.875, 22.625, 26.375, 30, 32.625, 37.875, 37.125, 38.375, 41.375, 47, 47.5, 45.625, 45.5, 40.375, 41.625, 45, 43.125, 38.5, 32.25, 33.375, 29.75, 36.75, 32.75, 32.25, 33.875, 37.75, 44.5) +test(6015.301, frollmean(x, frolladapt(idx, n), adaptive=TRUE), ans) +#### very dense: sort(sample(N*1.1, N)) +x = c(63L, 42L, 47L, 16L, 39L, 45L, 47L, 43L, 54L, 29L, 47L, 45L, 45L, 27L, 39L, 26L, 6L, 39L, 23L, 57L, 6L, 22L, 20L, 14L, 24L, 53L, 58L, 31L, 54L, 51L, 55L, 19L, 22L, 21L, 4L, 53L, 14L, 35L, 13L, 49L, 51L, 42L, 46L, 47L, 24L, 59L, 58L, 53L, 36L, 41L, 5L, 57L, 51L, 44L, 21L, 3L, 45L, 12L, 61L, 25L, 47L, 57L, 52L, 57L) +idx = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 22L, 23L, 24L, 25L, 26L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 63L, 64L, 65L, 66L, 68L, 69L, 70L) +ans = c(NA, NA, NA, NA, NA, NA, NA, 42.75, 41.625, 40, 40, 43.625, 44.375, 42.125, 41.125, 39, 33, 34.25, 31.25, 31.6666666666667, 26.1666666666667, 25.5, 27.8333333333333, 23.6666666666667, 23.8333333333333, 28, 28.1428571428571, 31.7142857142857, 36.2857142857143, 40.7142857142857, 46.5714285714286, 43.125, 42.875, 38.875, 32.125, 34.875, 29.875, 24, 23.1428571428571, 27, 31.2857142857143, 36.7142857142857, 35.7142857142857, 40.4285714285714, 38.375, 41.375, 47, 47.5, 45.625, 45.5, 40.375, 41.625, 45, 43.125, 38.5, 32.25, 33.375, 33.2857142857143, 33.8571428571429, 30.1428571428571, 30.5714285714286, 41.1666666666667, 42.3333333333333, 44.4285714285714) +test(6015.302, frollmean(x, frolladapt(idx, n), adaptive=TRUE), ans) +#### moderately dense: sort(sample(N*1.5, N)) +x = c(63L, 42L, 47L, 16L, 39L, 45L, 47L, 43L, 54L, 29L, 47L, 45L, 45L, 27L, 39L, 26L, 6L, 39L, 23L, 57L, 6L, 22L, 20L, 14L, 24L, 53L, 58L, 31L, 54L, 51L, 55L, 19L, 22L, 21L, 4L, 53L, 14L, 35L, 13L, 49L, 51L, 42L, 46L, 47L, 24L, 59L, 58L, 53L, 36L, 41L, 5L, 57L, 51L, 44L, 21L, 3L, 45L, 12L, 61L, 25L, 47L, 57L, 52L, 57L) +idx = c(2L, 3L, 4L, 6L, 7L, 10L, 12L, 13L, 14L, 15L, 17L, 18L, 19L, 20L, 21L, 24L, 25L, 26L, 27L, 29L, 30L, 32L, 33L, 34L, 35L, 36L, 37L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 48L, 54L, 55L, 56L, 57L, 59L, 60L, 62L, 63L, 65L, 66L, 67L, 68L, 69L, 71L, 73L, 76L, 77L, 79L, 80L, 84L, 85L, 86L, 87L, 90L, 91L, 92L, 94L, 95L, 96L) +ans = c(NA, NA, NA, NA, NA, 37.8, 36.75, 38, 45.6, 43.6, 44.1666666666667, 44.1666666666667, 44.2857142857143, 41.4285714285714, 40.8571428571429, 38.1666666666667, 31.3333333333333, 30.3333333333333, 26.6666666666667, 30.2, 26.1666666666667, 25.5, 27.8333333333333, 23.6666666666667, 23.8333333333333, 28, 28.1428571428571, 31.7142857142857, 36.2857142857143, 40.7142857142857, 46.5714285714286, 45.8571428571429, 41.4285714285714, 36.1428571428571, 28.6666666666667, 28.5, 23.6666666666667, 34, 28.75, 32.8, 35.8333333333333, 34, 39.3333333333333, 47, 43.1666666666667, 44.8333333333333, 46, 47, 46.1666666666667, 45.1666666666667, 33.75, 34.75, 38.5, 39.6, 43.25, 29.75, 32.8, 25, 28.4, 27.8333333333333, 32.1666666666667, 40.4, 48.4, 49.8333333333333) +test(6015.303, frollmean(x, frolladapt(idx, n), adaptive=TRUE), ans) +#### moderately sparse: sort(sample(N*2, N)) +x = c(63L, 42L, 47L, 16L, 39L, 45L, 47L, 43L, 54L, 29L, 47L, 45L, 45L, 27L, 39L, 26L, 6L, 39L, 23L, 57L, 6L, 22L, 20L, 14L, 24L, 53L, 58L, 31L, 54L, 51L, 55L, 19L, 22L, 21L, 4L, 53L, 14L, 35L, 13L, 49L, 51L, 42L, 46L, 47L, 24L, 59L, 58L, 53L, 36L, 41L, 5L, 57L, 51L, 44L, 21L, 3L, 45L, 12L, 61L, 25L, 47L, 57L, 52L, 57L) +idx = c(2L, 3L, 4L, 10L, 12L, 13L, 14L, 15L, 17L, 19L, 20L, 21L, 25L, 26L, 27L, 29L, 30L, 32L, 34L, 35L, 37L, 42L, 44L, 45L, 48L, 54L, 55L, 56L, 57L, 60L, 61L, 63L, 67L, 69L, 71L, 76L, 81L, 82L, 85L, 86L, 87L, 90L, 91L, 92L, 97L, 99L, 101L, 103L, 104L, 105L, 107L, 109L, 112L, 113L, 114L, 115L, 116L, 119L, 120L, 121L, 122L, 124L, 125L, 126L) +ans = c(NA, NA, NA, 35, 27.5, 33.3333333333333, 36.75, 38, 40.6666666666667, 42.8333333333333, 44.1666666666667, 44.1666666666667, 41.5, 38.6, 40.6, 34.25, 28.6, 30.3333333333333, 26.6, 30.2, 26.2, 28.3333333333333, 16, 18.6666666666667, 20, 38.5, 45, 47.3333333333333, 49, 49.4, 50.3333333333333, 42, 36.75, 20.6666666666667, 15.6666666666667, 26, 33.5, 34, 20.6666666666667, 27.75, 32.4, 38.75, 40.2, 41.3333333333333, 39.75, 43.3333333333333, 47, 48.5, 46, 49.4, 38.6, 38.4, 38.5, 39.25, 35.6, 35.2, 36.8333333333333, 29.3333333333333, 31, 27.8333333333333, 32.1666666666667, 40.4, 42.3333333333333, 44.4285714285714) +test(6015.304, frollmean(x, frolladapt(idx, n), adaptive=TRUE), ans) +#### very sparse: sort(sample(N*8, N)) +x = c(63L, 42L, 47L, 16L, 39L, 45L, 47L, 43L, 54L, 29L, 47L, 45L, 45L, 27L, 39L, 26L, 6L, 39L, 23L, 57L, 6L, 22L, 20L, 14L, 24L, 53L, 58L, 31L, 54L, 51L, 55L, 19L, 22L, 21L, 4L, 53L, 14L, 35L, 13L, 49L, 51L, 42L, 46L, 47L, 24L, 59L, 58L, 53L, 36L, 41L, 5L, 57L, 51L, 44L, 21L, 3L, 45L, 12L, 61L, 25L, 47L, 57L, 52L, 57L) +idx = c(3L, 10L, 42L, 57L, 60L, 97L, 116L, 118L, 130L, 141L, 143L, 154L, 158L, 160L, 172L, 183L, 184L, 185L, 191L, 210L, 214L, 218L, 227L, 229L, 239L, 285L, 293L, 298L, 300L, 301L, 304L, 310L, 325L, 327L, 337L, 348L, 353L, 360L, 362L, 364L, 366L, 376L, 378L, 379L, 383L, 396L, 399L, 401L, 416L, 419L, 429L, 438L, 441L, 447L, 459L, 460L, 474L, 481L, 484L, 489L, 504L, 508L, 509L, 510L) +ans = c(NA, 52.5, 47, 16, 27.5, 45, 47, 45, 54, 29, 38, 45, 45, 39, 39, 26, 16, 23.6666666666667, 22.6666666666667, 57, 31.5, 14, 20, 17, 24, 53, 58, 44.5, 47.6666666666667, 45.3333333333333, 47.75, 37, 22, 21.5, 4, 53, 33.5, 24.5, 24, 32.3333333333333, 37, 42, 44, 45, 39.75, 59, 58.5, 56.6666666666667, 36, 38.5, 5, 57, 54, 47.5, 21, 12, 45, 28.5, 36.5, 43, 47, 52, 52, 53.25) +test(6015.305, frollmean(x, frolladapt(idx, n), adaptive=TRUE), ans) +#### "completely" sparse: sort(sample(N*64, N)) +x = c(63L, 42L, 47L, 16L, 39L, 45L, 47L, 43L, 54L, 29L, 47L, 45L, 45L, 27L, 39L, 26L, 6L, 39L, 23L, 57L, 6L, 22L, 20L, 14L, 24L, 53L, 58L, 31L, 54L, 51L, 55L, 19L, 22L, 21L, 4L, 53L, 14L, 35L, 13L, 49L, 51L, 42L, 46L, 47L, 24L, 59L, 58L, 53L, 36L, 41L, 5L, 57L, 51L, 44L, 21L, 3L, 45L, 12L, 61L, 25L, 47L, 57L, 52L, 57L) +idx = c(3L, 118L, 130L, 301L, 401L, 670L, 684L, 730L, 739L, 741L, 751L, 874L, 878L, 950L, 959L, 1178L, 1309L, 1361L, 1372L, 1453L, 1462L, 1633L, 1652L, 1666L, 1746L, 1840L, 1977L, 2047L, 2105L, 2232L, 2262L, 2287L, 2346L, 2401L, 2431L, 2447L, 2555L, 2563L, 2570L, 2620L, 2745L, 2853L, 2887L, 2938L, 2939L, 2956L, 3020L, 3049L, 3075L, 3114L, 3215L, 3226L, 3232L, 3255L, 3382L, 3397L, 3436L, 3488L, 3725L, 3884L, 3937L, 3944L, 3960L, 4041L) +ans = c(NA, 42, 47, 16, 39, 45, 47, 43, 54, 41.5, 47, 45, 45, 27, 39, 26, 6, 39, 23, 57, 6, 22, 20, 14, 24, 53, 58, 31, 54, 51, 55, 19, 22, 21, 4, 53, 14, 35, 24, 49, 51, 42, 46, 47, 35.5, 59, 58, 53, 36, 41, 5, 57, 54, 44, 21, 3, 45, 12, 61, 25, 47, 52, 52, 57) +test(6015.306, frollmean(x, frolladapt(idx, n), adaptive=TRUE), ans) + +#### Time classes + +x = as.Date("2022-10-24") + c(0:1,4,6,7) +test(6015.401, frolladapt(x, 2L), c(2L,2L,1L,1L,2L)) +test(6015.402, frolladapt(x, 2L, partial=TRUE), c(1L,2L,1L,1L,2L)) + +x = as.IDate("2022-10-24") + c(0:1,4,6,7) +test(6015.501, frolladapt(x, 2L), c(2L,2L,1L,1L,2L)) +test(6015.502, frolladapt(x, 2L, partial=TRUE), c(1L,2L,1L,1L,2L)) + +x = as.ITime(as.ITime("19:33:30") + c(0:1,4,6,7)*60) +test(6015.601, frolladapt(x, 2*60), c(120L,120L,1L,1L,2L)) ## 2 minutes +test(6015.602, frolladapt(x, 2*60, partial=TRUE), c(1L,2L,1L,1L,2L)) + +x = as.POSIXct("2022-10-24 19:34:30") + c(0:1,4,6,7)*60 +test(6015.701, frolladapt(x, 2*60), c(120L,120L,1L,1L,2L)) ## 2 minutes +test(6015.702, frolladapt(x, 2*60, partial=TRUE), c(1L,2L,1L,1L,2L)) + +x = as.POSIXct("2022-10-24 19:34:30.005") + c(0:1,4,6,7)*60 +test(6015.801, frolladapt(x, 2*60), c(120L,120L,1L,1L,2L)) +test(6015.802, frolladapt(x, 2*60, partial=TRUE), c(1L,2L,1L,1L,2L)) +x = c(as.POSIXct("2022-10-24 19:34:00.900"), as.POSIXct("2022-10-24 19:36:00.005"), as.POSIXct("2022-10-24 19:38:00.006")) +test(6015.803, frolladapt(x, 2*60), c(120L,1L,1L)) ## sub seconds truncation +test(6015.804, frolladapt(x, 2*60, partial=TRUE), c(1L,1L,1L)) +test(6015.805, frolladapt(as.POSIXct(round(x)), 2*60), c(120L,2L,1L)) +test(6015.806, frolladapt(as.POSIXct(round(x)), 2*60, partial=TRUE), c(1L,2L,1L)) + +test(6015.901, frolladapt(TRUE, 1L), error="'x' must be of a numeric type") +test(6015.902, frolladapt(1L, FALSE), error="'n' must be an integer") +test(6015.903, frolladapt(1L, list(1L)), error="'n' must be an integer") +test(6015.904, frolladapt(1L, 1.1), error="'n' must be an integer") +test(6015.905, frolladapt(1L, 1L, partial=NA), error="must be TRUE or FALSE") +test(6015.906, frolladapt(1L, 1L, give.names=NA), error="must be TRUE or FALSE") +test(6015.907, frolladapt(c(1L,3L,2L), 2L), error="be sorted, have no duplicates, have no NAs") +test(6015.908, frolladapt(c(1L,2L,2L), 2L), error="be sorted, have no duplicates, have no NAs") +test(6015.909, frolladapt(c(1L,2L,NA_integer_), 2L), error="be sorted, have no duplicates, have no NAs") ## loop that checks for sorted will detect NAs as well, except for first element +test(6015.910, frolladapt(c(NA_integer_,1L,2L), 2L), error="be sorted, have no duplicates, have no NAs") ## first NA is detected by extra check + ## batch validation set.seed(108) makeNA = function(x, ratio=0.1, nf=FALSE) { diff --git a/man/froll.Rd b/man/froll.Rd index 28eab8aefb..1315d5d572 100644 --- a/man/froll.Rd +++ b/man/froll.Rd @@ -77,7 +77,7 @@ For a non \emph{vectorized} input (\code{x} is not a list, and \code{n} specify single rolling window) a \code{vector} is returned, for convenience. Thus, rolling functions can be used conveniently within \code{data.table} syntax. For a \emph{vectorized} input a list is returned. } \note{ - Be aware that rolling functions operate on the physical order of input. If the intent is to roll values in a vector by a logical window, for example an hour, or a day, then one has to ensure that there are no gaps in input or use adaptive rolling function to handle gaps by specifying expected window sizes. For details see \href{https://github.com/Rdatatable/data.table/issues/3241}{issue #3241}. + Be aware that rolling functions operate on the physical order of input. If the intent is to roll values in a vector by a logical window, for example an hour, or a day, then one has to ensure that there are no gaps in the input, or use adaptive rolling function to handle gaps, for which we provide helper function \code{\link{frolladapt}} to generate adaptive window size. } \section{\code{has.nf} argument}{ \code{has.nf} can be used to speed up processing in cases when it is known if \code{x} contains (or not) non-finite values (\code{NA}, \code{NaN}, \code{Inf}, \code{-Inf}). @@ -214,7 +214,7 @@ errs = sapply(lapply(anserr, abs), sum, na.rm=TRUE) sapply(errs, format, scientific=FALSE) # roundoff } \seealso{ - \code{\link{frollapply}}, \code{\link{shift}}, \code{\link{data.table}}, \code{\link{setDTthreads}} + \code{\link{frollapply}}, \code{\link{frolladapt}}, \code{\link{shift}}, \code{\link{data.table}}, \code{\link{setDTthreads}} } \references{ \href{https://en.wikipedia.org/wiki/Round-off_error}{Round-off error} diff --git a/man/frollapply.Rd b/man/frollapply.Rd index def6473a5d..0f186e685b 100644 --- a/man/frollapply.Rd +++ b/man/frollapply.Rd @@ -34,7 +34,7 @@ } } \note{ - Be aware that rolling functions operates on the physical order of input. If the intent is to roll values in a vector by a logical window, for example an hour, or a day, then one has to ensure that there are no gaps in input or use adaptive rolling function to handle gaps by specifying expected window sizes. For details see \href{https://github.com/Rdatatable/data.table/issues/3241}{issue #3241}. + Be aware that rolling functions operates on the physical order of input. If the intent is to roll values in a vector by a logical window, for example an hour, or a day, then one has to ensure that there are no gaps in the input, or use adaptive rolling function to handle gaps, for which we provide helper function \code{\link{frolladapt}} to generate adaptive window size. } \section{\code{by.column} argument}{ Setting \code{by.column} to \code{FALSE} allows to apply function on multiple variables rather than a single vector. Then \code{X} expects to be data.table, data.table or a list of equal length vectors, and window size provided in \code{N} refers to number of rows (or length of a vectors in a list). See examples for use cases. Error \emph{"incorrect number of dimensions"} can be commonly observed when \code{by.column} was not set to \code{FALSE} when \code{FUN} expects its input to be a data.frame/data.table. @@ -267,6 +267,6 @@ coef.fill = c("(Intercept)"=NA_real_, "v1"=NA_real_) frollapply(x, 4, f, by.column=FALSE, fill=coef.fill) } \seealso{ - \code{\link{froll}}, \code{\link{shift}}, \code{\link{data.table}}, \code{\link{setDTthreads}} + \code{\link{froll}}, \code{\link{frolladapt}}, \code{\link{shift}}, \code{\link{data.table}}, \code{\link{setDTthreads}} } \keyword{ data } diff --git a/src/data.table.h b/src/data.table.h index badcbac1b2..664279b574 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -247,6 +247,7 @@ void frolladaptivemaxExact(double *x, uint64_t nx, ans_t *ans, int *k, double fi // frollR.c SEXP frollfunR(SEXP fun, SEXP xobj, SEXP kobj, SEXP fill, SEXP algo, SEXP align, SEXP narm, SEXP hasnf, SEXP adaptive); +SEXP frolladapt(SEXP xobj, SEXP kobj, SEXP partial); // frollapply.c SEXP memcpyVector(SEXP dest, SEXP src, SEXP offset, SEXP size); diff --git a/src/frollR.c b/src/frollR.c index 2566f7fc7b..443d089222 100644 --- a/src/frollR.c +++ b/src/frollR.c @@ -15,7 +15,7 @@ SEXP coerceX(SEXP obj) { for (R_len_t i=0; i= 0) i++; if (i != nk) - error(_("n must be non-negative integer values (>= 0)")); + error(_("'n' must be non-negative integer values (>= 0)")); } else { if (isVectorAtomic(obj)) { ans = PROTECT(allocVector(VECSXP, 1)); protecti++; @@ -50,7 +50,7 @@ SEXP coerceK(SEXP obj, bool adaptive) { } else if (isReal(obj)) { SET_VECTOR_ELT(ans, 0, coerceVector(obj, INTSXP)); } else { - error(_("n must be an integer vector or list of an integer vectors")); + error(_("'n' must be an integer vector or list of integer vectors")); } } else { int nk = length(obj); @@ -61,7 +61,7 @@ SEXP coerceK(SEXP obj, bool adaptive) { } else if (isReal(VECTOR_ELT(obj, i))) { SET_VECTOR_ELT(ans, i, coerceVector(VECTOR_ELT(obj, i), INTSXP)); } else { - error(_("n must be an integer vector or list of an integer vectors")); + error(_("'n' must be an integer vector or list of integer vectors")); } } } @@ -71,7 +71,7 @@ SEXP coerceK(SEXP obj, bool adaptive) { R_len_t ii = 0; while (ii < nx && iik[ii] >= 0) ii++; if (ii != nx) - error(_("n must be non-negative integer values (>= 0)")); + error(_("'n' must be non-negative integer values (>= 0)")); } } UNPROTECT(protecti); @@ -208,3 +208,60 @@ SEXP frollfunR(SEXP fun, SEXP xobj, SEXP kobj, SEXP fill, SEXP algo, SEXP align, UNPROTECT(protecti); return isVectorAtomic(xobj) && length(ans) == 1 ? VECTOR_ELT(ans, 0) : ans; } + +// helper called from R to generate adaptive window for irregularly spaced time series +SEXP frolladapt(SEXP xobj, SEXP kobj, SEXP partial) { + + bool p = LOGICAL(partial)[0]; + int n = INTEGER(kobj)[0]; + if (n < 1L) + error(_("'n' must be positive integer values (>= 1)")); + int *x = INTEGER(xobj); + int64_t len = XLENGTH(xobj); // can be 0 + + if (len && x[0] == NA_INTEGER) + error(_("index provided to 'x' must: be sorted, have no duplicates, have no NAs")); // error text for consistency to the one below + for (int64_t i=1; i n) { + error(_("internal error: an > n, should not increment i in the first place")); // # nocov + } else if (an == n) { // an is same size as n, so we either have no gaps or will need to shrink an by j++ + if (lhs == rhs+n-1) { // no gaps - or a k gaps and a k dups? + ians[i] = n; // could skip if pre-fill + i++; + j++; + } else if (lhs > rhs+n-1) { // need to shrink an + j++; + } else { + error(_("internal error: not sorted, should be been detected by now")); // # nocov + } + } else if (an < n) { // there are some gaps + if (lhs == rhs+n-1) { // gap and rhs matches the bound, so increment i and j + ians[i] = an; + i++; + j++; + } else if (lhs > rhs+n-1L) { // need to shrink an + ians[i] = an; // likely to be overwritten by smaller an if shrinking continues because i is not incremented in this iteration + j++; + } else if (lhs < rhs+n-1L) { + ians[i] = !p && lhs Date: Sun, 7 Sep 2025 22:22:07 +0200 Subject: [PATCH 02/12] extend news --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 35417c6b5f..21b5656ce5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -223,7 +223,7 @@ #[1] TRUE ``` -18. New `frolladapt` helper function has been added to aid in preparation of adaptive length of rolling window width when dealing with _irregularly spaced ordered data_. This lets the user to apply a rolling function over a period without having to deal with gaps in a data where some periods might be missing. +18. New `frolladapt` helper function has been added to aid in preparation of adaptive length of rolling window width when dealing with _irregularly spaced ordered data_. This lets the user to apply a rolling function over a period without having to deal with gaps in a data where some periods might be missing, [#3241](https://github.com/Rdatatable/data.table/issues/3241). Thanks to @jangorecki for implementation. ```r idx = as.Date("2022-10-23") + c(0,1,4,5,6,7,9,10,14) dt = data.table(index=idx, value=seq_along(idx)) From f1688459128086de32acba5281b821fbcf4f2be5 Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Sun, 7 Sep 2025 22:35:36 +0200 Subject: [PATCH 03/12] mark experimental --- man/frolladapt.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/frolladapt.Rd b/man/frolladapt.Rd index 3a9f76a1ba..4c2d1bd948 100644 --- a/man/frolladapt.Rd +++ b/man/frolladapt.Rd @@ -2,7 +2,7 @@ \alias{frolladapt} \title{Adapt rolling window to irregularly spaced time series} \description{ - Helper function to generate adaptive window size based on the irregularly spaced time series index. Generated adaptive window can be then used in rolling functions. See \code{\link{froll}} and \code{\link{frollapply}} for details. + Helper function to generate adaptive window size based on the irregularly spaced time series index. Experimental. Generated adaptive window can be then used in rolling functions. See \code{\link{froll}} and \code{\link{frollapply}} for details. } \usage{ frolladapt(x, n, align="right", partial=FALSE, give.names=FALSE) From faae74302c04b1d435c21af97654587d16f54ff4 Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Sun, 7 Sep 2025 22:55:05 +0200 Subject: [PATCH 04/12] codecov --- inst/tests/froll.Rraw | 1 + 1 file changed, 1 insertion(+) diff --git a/inst/tests/froll.Rraw b/inst/tests/froll.Rraw index 9ba10ed665..4de9001f1a 100644 --- a/inst/tests/froll.Rraw +++ b/inst/tests/froll.Rraw @@ -1597,6 +1597,7 @@ test(6010.9991, frollapply(list(integer()), 0, function(x) 1), list(NULL)) test(6010.9992, frollapply(list(integer()), list(integer()), str, adaptive=TRUE), list(NULL)) ## frolladapt +test(6015.001, frolladapt(1:3, 2, align="center"), error="'align' other than 'right' has not yet been implemented") test(6015.001, frolladapt(integer(), -1L), error="'n' must be positive integer values") test(6015.002, frolladapt(integer(), 0L), error="'n' must be positive integer values") test(6015.003, frolladapt(integer(), 1L), integer()) From 59c174a7d440afb3ac633f470f5789cf4ae369ff Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Sun, 7 Sep 2025 23:00:44 +0200 Subject: [PATCH 05/12] also test for negative n --- R/frollapply.R | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/R/frollapply.R b/R/frollapply.R index 37dc9ca1db..3863e99c5c 100644 --- a/R/frollapply.R +++ b/R/frollapply.R @@ -61,6 +61,7 @@ all_data.frame = function(x) all(vapply_1b(x, is.data.frame, use.names=FALSE)) all_list = function(x) all(vapply_1b(x, is.list, use.names=FALSE)) equal.lengths = function(x) length(unique(lengths(x))) <= 1L equal.nrows = function(x) length(unique(vapply(x, nrow, 0L))) <= 1L +anyNAneg = function(x) anyNA(x) || any(x < 0L) frollapply = function(X, N, FUN, ..., by.column=TRUE, fill=NA, align=c("right","left","center"), adaptive=FALSE, partial=FALSE, give.names=FALSE, simplify=TRUE, x, n) { if (!missing(x)) { @@ -147,7 +148,7 @@ frollapply = function(X, N, FUN, ..., by.column=TRUE, fill=NA, align=c("right"," nnam = names(N) ## used for give.names if (!is.integer(N)) N = as.integer(N) - if (anyNA(N)) + if (anyNAneg(N)) stopf("'N' must be non-negative integer values (>= 0)") nn = length(N) ## top level loop for vectorized n } else { @@ -158,7 +159,7 @@ frollapply = function(X, N, FUN, ..., by.column=TRUE, fill=NA, align=c("right"," stopf("length of integer vector(s) provided as list to 'N' argument must be equal to number of observations provided in 'X'") if (!is.integer(N)) N = as.integer(N) - if (anyNA(N)) + if (anyNAneg(N)) stopf("'N' must be non-negative integer values (>= 0)") nn = 1L N = list(N) @@ -172,7 +173,7 @@ frollapply = function(X, N, FUN, ..., by.column=TRUE, fill=NA, align=c("right"," stopf("'N' must be an integer vector or list of integer vectors") if (!all(vapply_1b(N, is.integer, use.names=FALSE))) N = lapply(N, as.integer) - if (any(vapply_1b(N, anyNA, use.names=FALSE))) + if (any(vapply_1b(N, anyNAneg, use.names=FALSE))) stopf("'N' must be non-negative integer values (>= 0)") nn = length(N) nnam = names(N) From 656bbba0480db575f0a7b3b9e26428223c5d1d8b Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Sun, 7 Sep 2025 23:04:47 +0200 Subject: [PATCH 06/12] test number --- inst/tests/froll.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/froll.Rraw b/inst/tests/froll.Rraw index 4de9001f1a..4b3cf509aa 100644 --- a/inst/tests/froll.Rraw +++ b/inst/tests/froll.Rraw @@ -1597,7 +1597,7 @@ test(6010.9991, frollapply(list(integer()), 0, function(x) 1), list(NULL)) test(6010.9992, frollapply(list(integer()), list(integer()), str, adaptive=TRUE), list(NULL)) ## frolladapt -test(6015.001, frolladapt(1:3, 2, align="center"), error="'align' other than 'right' has not yet been implemented") +test(6015.000, frolladapt(1:3, 2, align="center"), error="'align' other than 'right' has not yet been implemented") test(6015.001, frolladapt(integer(), -1L), error="'n' must be positive integer values") test(6015.002, frolladapt(integer(), 0L), error="'n' must be positive integer values") test(6015.003, frolladapt(integer(), 1L), integer()) From a9698435985f4f7ef9d012ae4cdb7bf46f3bcc69 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sun, 7 Sep 2025 22:43:21 -0700 Subject: [PATCH 07/12] trailing ws --- src/frollR.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/frollR.c b/src/frollR.c index 443d089222..c8720f2e2c 100644 --- a/src/frollR.c +++ b/src/frollR.c @@ -264,4 +264,4 @@ SEXP frolladapt(SEXP xobj, SEXP kobj, SEXP partial) { } UNPROTECT(1); return ans; -} \ No newline at end of file +} From c284c4cc1f21377f5e315181e223a580be8872f7 Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Mon, 8 Sep 2025 08:14:35 +0200 Subject: [PATCH 08/12] unit tests --- inst/tests/froll.Rraw | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/inst/tests/froll.Rraw b/inst/tests/froll.Rraw index 4b3cf509aa..175be9ac7a 100644 --- a/inst/tests/froll.Rraw +++ b/inst/tests/froll.Rraw @@ -1005,7 +1005,7 @@ test(6000.502, frollmax(c(5,NaN,1), 1L), c(5,NaN,1)) test(6000.503, frollmax(c(5,1,1,NaN,1,1,1), 2L), c(NA,5,1,NaN,NaN,1,1)) test(6000.504, frollmax(c(5,1,NA,NaN,1,1,1), 2L), c(NA,5,NA,NA,NaN,1,1)) -# n==NA +# n==NA, n<0 test(6000.550, frollmean(1:3, NA), error="'n' must be an integer") test(6000.551, frollmean(1:3, NA_integer_), error="'n' must be non-negative integer values (>= 0)") test(6000.552, frollmean(1:3, NA, algo="exact"), error="'n' must be an integer") @@ -1016,6 +1016,11 @@ test(6000.556, frollapply(FUN=mean, 1:3, NA), error="'N' must be an integer") test(6000.557, frollapply(FUN=mean, 1:3, NA_integer_), error="'N' must be non-negative integer values (>= 0)") test(6000.558, frollapply(FUN=mean, adaptive=TRUE, 1:3, c(2,NA,2)), error="'N' must be non-negative integer values (>= 0)") test(6000.559, frollapply(FUN=mean, adaptive=TRUE, 1:3, list(c(2,NA,2))), error="'N' must be non-negative integer values (>= 0)") +test(6000.550, frollmean(1:3, -1), error="'n' must be non-negative integer values (>= 0)") +test(6000.552, frollmean(1:3, -1, algo="exact"), error="'n' must be non-negative integer values (>= 0)") +test(6000.560, frollapply(FUN=mean, 1:3, -1), error="'N' must be non-negative integer values (>= 0)") +test(6000.561, frollapply(FUN=mean, 1:3, c(0,-1,1), adaptive=TRUE), error="'N' must be non-negative integer values (>= 0)") +test(6000.562, frollapply(FUN=mean, 1:3, list(c(0,-1,1)), adaptive=TRUE), error="'N' must be non-negative integer values (>= 0)") # n==0, k==0, k[i]==0 test(6001.111, frollmean(1:3, 0), c(NaN,NaN,NaN), options=c("datatable.verbose"=TRUE), output="window width of size 0") From ff55a0bebaef9833e37a0a9cd798c0811b3e5746 Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Mon, 8 Sep 2025 08:15:05 +0200 Subject: [PATCH 09/12] unit tests, after save file --- inst/tests/froll.Rraw | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/inst/tests/froll.Rraw b/inst/tests/froll.Rraw index 175be9ac7a..f9c8e1eb85 100644 --- a/inst/tests/froll.Rraw +++ b/inst/tests/froll.Rraw @@ -1016,11 +1016,11 @@ test(6000.556, frollapply(FUN=mean, 1:3, NA), error="'N' must be an integer") test(6000.557, frollapply(FUN=mean, 1:3, NA_integer_), error="'N' must be non-negative integer values (>= 0)") test(6000.558, frollapply(FUN=mean, adaptive=TRUE, 1:3, c(2,NA,2)), error="'N' must be non-negative integer values (>= 0)") test(6000.559, frollapply(FUN=mean, adaptive=TRUE, 1:3, list(c(2,NA,2))), error="'N' must be non-negative integer values (>= 0)") -test(6000.550, frollmean(1:3, -1), error="'n' must be non-negative integer values (>= 0)") -test(6000.552, frollmean(1:3, -1, algo="exact"), error="'n' must be non-negative integer values (>= 0)") -test(6000.560, frollapply(FUN=mean, 1:3, -1), error="'N' must be non-negative integer values (>= 0)") -test(6000.561, frollapply(FUN=mean, 1:3, c(0,-1,1), adaptive=TRUE), error="'N' must be non-negative integer values (>= 0)") -test(6000.562, frollapply(FUN=mean, 1:3, list(c(0,-1,1)), adaptive=TRUE), error="'N' must be non-negative integer values (>= 0)") +test(6000.560, frollmean(1:3, -1), error="'n' must be non-negative integer values (>= 0)") +test(6000.561, frollmean(1:3, -1, algo="exact"), error="'n' must be non-negative integer values (>= 0)") +test(6000.562, frollapply(FUN=mean, 1:3, -1), error="'N' must be non-negative integer values (>= 0)") +test(6000.563, frollapply(FUN=mean, 1:3, c(0,-1,1), adaptive=TRUE), error="'N' must be non-negative integer values (>= 0)") +test(6000.564, frollapply(FUN=mean, 1:3, list(c(0,-1,1)), adaptive=TRUE), error="'N' must be non-negative integer values (>= 0)") # n==0, k==0, k[i]==0 test(6001.111, frollmean(1:3, 0), c(NaN,NaN,NaN), options=c("datatable.verbose"=TRUE), output="window width of size 0") From 85ec622faa577a1b7349746fb9ce1c78c7208c78 Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Mon, 8 Sep 2025 08:21:50 +0200 Subject: [PATCH 10/12] Apply suggestions from code review Co-authored-by: Michael Chirico --- R/froll.R | 4 ++-- src/frollR.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/froll.R b/R/froll.R index 1ed68d28dc..a6d64fcdfa 100644 --- a/R/froll.R +++ b/R/froll.R @@ -135,9 +135,9 @@ frolladapt = function(x, n, align="right", partial=FALSE, give.names=FALSE) { if (!identical(align, "right")) stopf("'align' other than 'right' has not yet been implemented") if (!isTRUEorFALSE(partial)) - stopf("'partial' must be TRUE or FALSE") + stopf("'%s' must be TRUE or FALSE", "partial") if (!isTRUEorFALSE(give.names)) - stopf("'give.names' must be TRUE or FALSE") + stopf("'%s' must be TRUE or FALSE", "give.names") if (length(n) == 1L) { ans = .Call(Cfrolladapt, x, n, partial) diff --git a/src/frollR.c b/src/frollR.c index c8720f2e2c..e97cb3e705 100644 --- a/src/frollR.c +++ b/src/frollR.c @@ -216,7 +216,7 @@ SEXP frolladapt(SEXP xobj, SEXP kobj, SEXP partial) { int n = INTEGER(kobj)[0]; if (n < 1L) error(_("'n' must be positive integer values (>= 1)")); - int *x = INTEGER(xobj); + int *x = INTEGER_RO(xobj); int64_t len = XLENGTH(xobj); // can be 0 if (len && x[0] == NA_INTEGER) From 50061db5594f649c933613d0e339eac8c882a878 Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Mon, 8 Sep 2025 08:28:50 +0200 Subject: [PATCH 11/12] frolladapt nicer news example --- NEWS.md | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/NEWS.md b/NEWS.md index 21b5656ce5..f32bbc2791 100644 --- a/NEWS.md +++ b/NEWS.md @@ -225,20 +225,20 @@ 18. New `frolladapt` helper function has been added to aid in preparation of adaptive length of rolling window width when dealing with _irregularly spaced ordered data_. This lets the user to apply a rolling function over a period without having to deal with gaps in a data where some periods might be missing, [#3241](https://github.com/Rdatatable/data.table/issues/3241). Thanks to @jangorecki for implementation. ```r -idx = as.Date("2022-10-23") + c(0,1,4,5,6,7,9,10,14) +idx = as.Date("2025-09-08") + c(0,1,4,5,6,7,9,10,14) dt = data.table(index=idx, value=seq_along(idx)) dt # index value # -#1: 2022-10-23 1 -#2: 2022-10-24 2 -#3: 2022-10-27 3 -#4: 2022-10-28 4 -#5: 2022-10-29 5 -#6: 2022-10-30 6 -#7: 2022-11-01 7 -#8: 2022-11-02 8 -#9: 2022-11-06 9 +#1: 2025-09-08 1 +#2: 2025-09-09 2 +#3: 2025-09-12 3 +#4: 2025-09-13 4 +#5: 2025-09-14 5 +#6: 2025-09-15 6 +#7: 2025-09-17 7 +#8: 2025-09-18 8 +#9: 2025-09-22 9 dt[, c("rollmean3","rollmean3days") := list( frollmean(value, 3), frollmean(value, frolladapt(index, 3), adaptive=TRUE) @@ -246,15 +246,15 @@ dt[, c("rollmean3","rollmean3days") := list( dt # index value rollmean3 rollmean3days # -#1: 2022-10-23 1 NA NA -#2: 2022-10-24 2 NA NA -#3: 2022-10-27 3 2 3.0 -#4: 2022-10-28 4 3 3.5 -#5: 2022-10-29 5 4 4.0 -#6: 2022-10-30 6 5 5.0 -#7: 2022-11-01 7 6 6.5 -#8: 2022-11-02 8 7 7.5 -#9: 2022-11-06 9 8 9.0 +#1: 2025-09-08 1 NA NA +#2: 2025-09-09 2 NA NA +#3: 2025-09-12 3 2 3.0 +#4: 2025-09-13 4 3 3.5 +#5: 2025-09-14 5 4 4.0 +#6: 2025-09-15 6 5 5.0 +#7: 2025-09-17 7 6 6.5 +#8: 2025-09-18 8 7 7.5 +#9: 2025-09-22 9 8 9.0 ``` ### BUG FIXES From 7268f40f65edcae06262d6c7c3c18b0c7805e5eb Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Tue, 9 Sep 2025 10:14:48 +0200 Subject: [PATCH 12/12] fix missed conflict in merge --- NEWS.md | 36 ------------------------------------ 1 file changed, 36 deletions(-) diff --git a/NEWS.md b/NEWS.md index 997cd47f83..e7f0a839f6 100644 --- a/NEWS.md +++ b/NEWS.md @@ -212,41 +212,6 @@ #[1] TRUE ``` -<<<<<<< HEAD -18. New `frolladapt` helper function has been added to aid in preparation of adaptive length of rolling window width when dealing with _irregularly spaced ordered data_. This lets the user to apply a rolling function over a period without having to deal with gaps in a data where some periods might be missing, [#3241](https://github.com/Rdatatable/data.table/issues/3241). Thanks to @jangorecki for implementation. -```r -idx = as.Date("2025-09-08") + c(0,1,4,5,6,7,9,10,14) -dt = data.table(index=idx, value=seq_along(idx)) -dt -# index value -# -#1: 2025-09-08 1 -#2: 2025-09-09 2 -#3: 2025-09-12 3 -#4: 2025-09-13 4 -#5: 2025-09-14 5 -#6: 2025-09-15 6 -#7: 2025-09-17 7 -#8: 2025-09-18 8 -#9: 2025-09-22 9 -dt[, c("rollmean3","rollmean3days") := list( - frollmean(value, 3), - frollmean(value, frolladapt(index, 3), adaptive=TRUE) - )] -dt -# index value rollmean3 rollmean3days -# -#1: 2025-09-08 1 NA NA -#2: 2025-09-09 2 NA NA -#3: 2025-09-12 3 2 3.0 -#4: 2025-09-13 4 3 3.5 -#5: 2025-09-14 5 4 4.0 -#6: 2025-09-15 6 5 5.0 -#7: 2025-09-17 7 6 6.5 -#8: 2025-09-18 8 7 7.5 -#9: 2025-09-22 9 8 9.0 -``` -======= 18. New helper `frolladapt` to facilitate applying rolling functions over windows of fixed calendar-time width in irregularly-spaced data sets, thereby bypassing the need to "augment" such data with placeholder rows, [#3241](https://github.com/Rdatatable/data.table/issues/3241). Thanks to @jangorecki for implementation. ```r idx = as.Date("2025-09-05") + c(0,4,7,8,9,10,12,13,17) @@ -280,7 +245,6 @@ dt #8: 2025-09-18 8 7 7.5 #9: 2025-09-22 9 8 9.0 ``` ->>>>>>> master ### BUG FIXES