diff --git a/NAMESPACE b/NAMESPACE index c22782440a..dec4beee18 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -51,6 +51,7 @@ S3method(cube, data.table) S3method(rollup, data.table) export(frollmean) export(frollsum) +export(frollmax) export(frollapply) export(nafill) export(setnafill) diff --git a/NEWS.md b/NEWS.md index 4f4a2f417a..f03bdabe78 100644 --- a/NEWS.md +++ b/NEWS.md @@ -296,6 +296,41 @@ 41. New function `%notin%` provides a convenient alternative to `!(x %in% y)`, [#4152](https://github.com/Rdatatable/data.table/issues/4152). Thanks to Jan Gorecki for suggesting and Michael Czekanski for the PR. `%notin%` uses half the memory because it computes the result directly as opposed to `!` which allocates a new vector to hold the negated result. If `x` is long enough to occupy more than half the remaining free memory, this can make the difference between the operation working, or failing with an out-of-memory error. +42. Multiple improvements has been added to rolling functions. Request came from @gpierard who needed left aligned, adaptive, rolling max, [#5438](https://github.com/Rdatatable/data.table/issues/5438). There was no `frollmax` function yet. Adaptive rolling functions did not have support for `align="left"`. `frollapply` did not support `adaptive=TRUE`. Available alternatives were base R `mapply` or self-join using `max` and grouping `by=.EACHI`. As a follow up of his request, following features has been added: +- new function `frollmax`, applies `max` over a rolling window. +- support for `align="left"` for adaptive rolling function. +- support for `adaptive=TRUE` in `frollapply`. +- better support for non-double data types in `frollapply`. +- better support for `Inf` and `-Inf` support in `algo="fast"` implementation. +- `partial` argument to trim window width to available observations rather than returning `NA` whenever window is not complete. + +For a comprehensive description about all available features see `?froll` manual. + +Adaptive `frollmax` has observed to be up to 50 times faster than second fastest solution (data.table self-join + `max` + `by=.EACHI`). +```r +set.seed(108) +setDTthreads(8) +x = data.table( + value = cumsum(rnorm(1e6, 0.1)), + end_window = 1:1e6 + sample(50:500, 1e6, TRUE), + row = 1:1e6 +)[, "end_window" := pmin(end_window, .N) + ][, "len_window" := end_window-row+1L] + +baser = function(x) x[, mapply(function(from, to) max(value[from:to]), row, end_window)] +sj = function(x) x[x, max(value), on=.(row >= row, row <= end_window), by=.EACHI]$V1 +fmax = function(x) x[, frollmax(value, len_window, adaptive=TRUE, align="left", hasNA=FALSE)] +microbenchmark::microbenchmark( + baser(x), sj(x), fmax(x), + times=10, check="identical" +) +#Unit: milliseconds +# expr min lq mean median uq max neval +# baser(x) 4290.98557 4529.82841 4573.94115 4604.85827 4654.39342 4883.991 10 +# sj(x) 3600.42771 3752.19359 4118.21755 4235.45856 4329.08728 4884.080 10 +# fmax(x) 64.48627 73.07978 88.84932 76.64569 82.56115 198.438 10 +``` + ## BUG FIXES 1. `by=.EACHI` when `i` is keyed but `on=` different columns than `i`'s key could create an invalidly keyed result, [#4603](https://github.com/Rdatatable/data.table/issues/4603) [#4911](https://github.com/Rdatatable/data.table/issues/4911). Thanks to @myoung3 and @adamaltmejd for reporting, and @ColeMiller1 for the PR. An invalid key is where a `data.table` is marked as sorted by the key columns but the data is not sorted by those columns, leading to incorrect results from subsequent queries. diff --git a/R/froll.R b/R/froll.R index df901f0b84..4982d4ab39 100644 --- a/R/froll.R +++ b/R/froll.R @@ -2,8 +2,24 @@ froll = function(fun, x, n, fill=NA, algo=c("fast", "exact"), align=c("right", " stopifnot(!missing(fun), is.character(fun), length(fun)==1L, !is.na(fun)) algo = match.arg(algo) align = match.arg(align) + leftadaptive = isTRUE(adaptive) && align=="left" ## support for left added in #5441 + if (leftadaptive) { + rev2 = function(x) if (is.list(x)) sapply(x, rev, simplify=FALSE) else rev(x) + verbose = getOption("datatable.verbose") + if (verbose) + cat("froll: adaptive=TRUE && align='left' pre-processing for align='right'\n") + x = rev2(x) + n = rev2(n) + align = "right" + } ans = .Call(CfrollfunR, fun, x, n, fill, algo, align, na.rm, hasNA, adaptive) - ans + if (!leftadaptive) + ans + else { + if (verbose) + cat("froll: adaptive=TRUE && align='left' post-processing from align='right'\n") + rev2(ans) + } } frollmean = function(x, n, fill=NA, algo=c("fast", "exact"), align=c("right", "left", "center"), na.rm=FALSE, hasNA=NA, adaptive=FALSE) { @@ -12,9 +28,14 @@ frollmean = function(x, n, fill=NA, algo=c("fast", "exact"), align=c("right", "l frollsum = function(x, n, fill=NA, algo=c("fast","exact"), align=c("right", "left", "center"), na.rm=FALSE, hasNA=NA, adaptive=FALSE) { froll(fun="sum", x=x, n=n, fill=fill, algo=algo, align=align, na.rm=na.rm, hasNA=hasNA, adaptive=adaptive) } -frollapply = function(x, n, FUN, ..., fill=NA, align=c("right", "left", "center")) { +frollmax = function(x, n, fill=NA, algo=c("fast", "exact"), align=c("right", "left", "center"), na.rm=FALSE, hasNA=NA, adaptive=FALSE) { + froll(fun="max", x=x, n=n, fill=fill, algo=algo, align=align, na.rm=na.rm, hasNA=hasNA, adaptive=adaptive) +} +frollapply = function(x, n, FUN, ..., fill=NA, align=c("right", "left", "center"), adaptive) { FUN = match.fun(FUN) align = match.arg(align) + if (!missing(adaptive)) + stopf("frollapply does not support 'adaptive' argument") rho = new.env() ans = .Call(CfrollapplyR, FUN, x, n, fill, align, rho) ans diff --git a/inst/tests/froll.Rraw b/inst/tests/froll.Rraw index f6a4f96a80..60f548fe8f 100644 --- a/inst/tests/froll.Rraw +++ b/inst/tests/froll.Rraw @@ -310,7 +310,7 @@ test(6000.0671, frollmean(c(1:2,NA,4:10), 4), c(rep(NA_real_, 6), 5.5, 6.5, 7.5, "frollfunR: 1:", "frollmeanFast: running for input length 10, window 4, hasna 0, narm 0", "frollmeanFast: NA.*are present in input, skip non-NA attempt and run with extra care for NAs", - "frollmean: processing algo 0 took.*", + "frollfun: processing fun 0 algo 0 took.*", "frollfunR: processing.*took.*" )) test(6000.0672, frollmean(c(1:2,NA,4:10), 4, hasNA=FALSE), c(rep(NA_real_, 6), 5.5, 6.5, 7.5, 8.5), output=c( @@ -319,7 +319,7 @@ test(6000.0672, frollmean(c(1:2,NA,4:10), 4, hasNA=FALSE), c(rep(NA_real_, 6), 5 "frollfunR: 1:", "frollmeanFast: running for input length 10, window 4, hasna -1, narm 0", "frollmeanFast: NA.*are present in input, skip non-NA attempt and run with extra care for NAs", - "frollmean: processing algo 0 took.*", + "frollfun: processing fun 0 algo 0 took.*", "frollfunR: processing.*took.*" ), warning="hasNA=FALSE used but NA.*are present in input, use default hasNA=NA to avoid this warning") test(6000.0673, frollmean(c(1:2,NA,4:10), 2, hasNA=FALSE), c(NA, 1.5, NA, NA, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5), output=c( @@ -328,7 +328,7 @@ test(6000.0673, frollmean(c(1:2,NA,4:10), 2, hasNA=FALSE), c(NA, 1.5, NA, NA, 4. "frollfunR: 1:", "frollmeanFast: running for input length 10, window 2, hasna -1, narm 0", "frollmeanFast: NA.*are present in input, re-running with extra care for NAs", - "frollmean: processing algo 0 took.*", + "frollfun: processing fun 0 algo 0 took.*", "frollfunR: processing.*took.*" ), warning="hasNA=FALSE used but NA.*are present in input, use default hasNA=NA to avoid this warning") test(6000.0674, frollmean(c(1:2,NA,4:10), 4, align="center"), c(rep(NA_real_, 4), 5.5, 6.5, 7.5, 8.5, NA, NA), output=c( @@ -336,8 +336,8 @@ test(6000.0674, frollmean(c(1:2,NA,4:10), 4, align="center"), c(rep(NA_real_, 4) "frollfunR: 1 column.*1 window.*", "frollmeanFast: running for input length 10, window 4, hasna 0, narm 0", "frollmeanFast: NA.*are present in input, skip non-NA attempt and run with extra care for NAs", - "frollmean: align 0, shift answer by -2", - "frollmean: processing algo 0 took.*", + "frollfun: align 0, shift answer by -2", + "frollfun: processing fun 0 algo 0 took.*", "frollfunR: processing.*took.*" )) options(datatable.verbose=FALSE) @@ -383,7 +383,7 @@ test(6000.093, frollmean(list(1:3, 4:6), 4), list(c(NA_real_, NA_real_, NA_real_ test(6000.0931, frollmean(list(1:3, 4:6), 4, align="center"), list(c(NA_real_, NA_real_, NA_real_), c(NA_real_, NA_real_, NA_real_))) test(6000.0932, frollmean(list(1:3, 4:6), 4, align="left"), list(c(NA_real_, NA_real_, NA_real_), c(NA_real_, NA_real_, NA_real_))) options(datatable.verbose=TRUE) -test(6000.0933, frollmean(list(1:3, 4:6), 4), list(c(NA_real_, NA_real_, NA_real_), c(NA_real_, NA_real_, NA_real_)), output="frollmean: window width longer than input vector, returning all NA vector") +test(6000.0933, frollmean(list(1:3, 4:6), 4), list(c(NA_real_, NA_real_, NA_real_), c(NA_real_, NA_real_, NA_real_)), output="frollfun: window width longer than input vector, returning all NA vector") options(datatable.verbose=FALSE) #### n==length(x) test(6000.094, frollmean(list(1:3, 4:6), 3), list(c(NA_real_, NA_real_, 2), c(NA_real_, NA_real_, 5))) @@ -436,9 +436,9 @@ test(6000.1196, frollmean(c(1:5,NA), 1:6, algo="exact", na.rm=TRUE, adaptive=TRU "frollfunR: allocating memory for results 1x1", "frollfunR: 1 column.*1 window.*, not entering parallel execution here because algo='exact' will compute results in parallel", "frollfunR: 1:", - "fadaptiverollmeanExact: running in parallel for input length 6, hasna 0, narm 1", - "fadaptiverollmeanExact: NA.*are present in input, re-running with extra care for NAs", - "fadaptiverollmean: processing algo 1 took.*", + "frolladaptivemeanExact: running in parallel for input length 6, hasna 0, narm 1", + "frolladaptivemeanExact: NA.*are present in input, re-running with extra care for NAs", + "frolladaptivefun: processing fun 0 algo 1 took.*", "frollfunR: processing.*took.*" )) #### exact na.rm=TRUE verbose=TRUE @@ -448,7 +448,7 @@ test(6000.1197, frollmean(c(1:5,NA), 2, algo="exact", na.rm=TRUE), output=c( "frollfunR: 1:", "frollmeanExact: running in parallel for input length 6, window 2, hasna 0, narm 1", "frollmeanExact: NA.*are present in input, re-running with extra care for NAs", - "frollmean: processing algo 1 took.*", + "frollfun: processing fun 0 algo 1 took.*", "frollfunR: processing.*took.*" )) options(datatable.verbose=FALSE) @@ -563,9 +563,16 @@ if (FALSE) { #### adaptive limitations test(6000.145, frollmean(1:2, 1:2, adaptive=TRUE, align="right"), c(1, 1.5)) -test(6000.146, frollmean(1:2, 1:2, adaptive=TRUE, align="center"), error="using adaptive TRUE and align argument different than 'right' is not implemented") -test(6000.147, frollmean(1:2, 1:2, adaptive=TRUE, align="left"), error="using adaptive TRUE and align argument different than 'right' is not implemented") -test(6000.148, frollmean(list(1:2, 1:3), list(1:2), adaptive=TRUE), error="adaptive rolling function can only process 'x' having equal length of elements, like data.table or data.frame. If you want to call rolling function on list having variable length of elements call it for each field separately") +test(6000.146, frollmean(1:2, 1:2, adaptive=TRUE, align="center"), error="using adaptive TRUE and align 'center' is not implemented") +test(6000.147, frollmean(list(1:2, 1:3), list(1:2), adaptive=TRUE), error="adaptive rolling function can only process 'x' having equal length of elements, like data.table or data.frame. If you want to call rolling function on list having variable length of elements call it for each field separately") + +#### adaptive align - added in #5441 +options(datatable.verbose=TRUE) +test(6000.148, frollsum(c(1,3,4,2,0), c(3,2,2,3,2), adaptive=TRUE, align="left"), c(8,7,6,NA,NA), output=c("processing from align='right'")) +options(datatable.verbose=FALSE) +test(6000.1481, frollsum(c(1,3,4,2,0), list(c(3,2,2,3,2), c(3,3,3,3,3)), adaptive=TRUE, align="left"), list(c(8,7,6,NA,NA), c(8,9,6,NA,NA))) +test(6000.1482, frollsum(list(c(1,3,4,2,0), c(3,1,4,2,0)), c(3,2,2,3,2), adaptive=TRUE, align="left"), list(c(8,7,6,NA,NA), c(8,5,6,NA,NA))) +test(6000.1483, frollsum(list(c(1,3,4,2,0), c(3,1,4,2,0)), list(c(3,2,2,3,2), c(3,3,3,3,3)), adaptive=TRUE, align="left"), list(c(8,7,6,NA,NA),c(8,9,6,NA,NA),c(8,5,6,NA,NA),c(8,7,6,NA,NA))) #### adaptive exact fastama = function(x, n, na.rm, fill=NA) { @@ -664,81 +671,81 @@ test(6000.171, frollmean(x, n), output=c( "frollfunR: 1 column.*1 window.*", "frollfunR: 1:", "frollmeanFast: running for input length 10, window 3, hasna 0, narm 0", - "frollmean: processing algo 0 took.*", + "frollfun: processing fun 0 algo 0 took.*", "frollfunR: processing.*took.*")) test(6000.172, frollmean(list(x, x+1), n), output=c( "frollfunR: allocating memory for results 2x1", "frollfunR: 2 column.*1 window.*", "frollfunR: 1:", "frollmeanFast: running for input length 10, window 3, hasna 0, narm 0", - "frollmean: processing algo 0 took.*", + "frollfun: processing fun 0 algo 0 took.*", "frollfunR: 2:", "frollmeanFast: running for input length 10, window 3, hasna 0, narm 0", - "frollmean: processing algo 0 took.*", + "frollfun: processing fun 0 algo 0 took.*", "frollfunR: processing.*took.*")) test(6000.173, frollmean(x, c(n, n+1)), output=c( "frollfunR: allocating memory for results 1x2", "frollfunR: 1 column.*2 window.*", "frollfunR: 1:", "frollmeanFast: running for input length 10, window 3, hasna 0, narm 0", - "frollmean: processing algo 0 took.*", + "frollfun: processing fun 0 algo 0 took.*", "frollfunR: 2:", "frollmeanFast: running for input length 10, window 4, hasna 0, narm 0", - "frollmean: processing algo 0 took.*", + "frollfun: processing fun 0 algo 0 took.*", "frollfunR: processing.*took.*")) test(6000.174, frollmean(list(x, x+1), c(n, n+1)), output=c( "frollfunR: allocating memory for results 2x2", "frollfunR: 2 column.*2 window.*", "frollfunR: 1:", "frollmeanFast: running for input length 10, window 3, hasna 0, narm 0", - "frollmean: processing algo 0 took.*", + "frollfun: processing fun 0 algo 0 took.*", "frollfunR: 2:", "frollmeanFast: running for input length 10, window 4, hasna 0, narm 0", - "frollmean: processing algo 0 took.*", + "frollfun: processing fun 0 algo 0 took.*", "frollfunR: 3:", "frollmeanFast: running for input length 10, window 3, hasna 0, narm 0", - "frollmean: processing algo 0 took.*", + "frollfun: processing fun 0 algo 0 took.*", "frollfunR: 4:", "frollmeanFast: running for input length 10, window 4, hasna 0, narm 0", - "frollmean: processing algo 0 took.*", + "frollfun: processing fun 0 algo 0 took.*", "frollfunR: processing.*took.*")) test(6000.175, frollmean(x, n, algo="exact"), output=c( "frollfunR: allocating memory for results 1x1", "frollfunR: 1 column.*1 window.*", "frollfunR: 1:", "frollmeanExact: running in parallel for input length 10, window 3, hasna 0, narm 0", - "frollmean: processing algo 1 took.*", + "frollfun: processing fun 0 algo 1 took.*", "frollfunR: processing.*took.*")) test(6000.176, frollmean(x, n, align="center"), output=c( "frollfunR: allocating memory for results 1x1", "frollfunR: 1 column.*1 window.*", "frollfunR: 1:", "frollmeanFast: running for input length 10, window 3, hasna 0, narm 0", - "frollmean: align 0, shift answer by -1", - "frollmean: processing algo 0 took.*", + "frollfun: align 0, shift answer by -1", + "frollfun: processing fun 0 algo 0 took.*", "frollfunR: processing.*took.*")) test(6000.177, frollmean(x, n, align="left"), output=c( "frollfunR: allocating memory for results 1x1", "frollfunR: 1 column.*1 window.*", "frollfunR: 1:", "frollmeanFast: running for input length 10, window 3, hasna 0, narm 0", - "frollmean: align -1, shift answer by -2", - "frollmean: processing algo 0 took.*", + "frollfun: align -1, shift answer by -2", + "frollfun: processing fun 0 algo 0 took.*", "frollfunR: processing.*took.*")) nn = c(1:4,2:3,1:4) test(6000.178, frollmean(x, nn, adaptive=TRUE), output=c( "frollfunR: allocating memory for results 1x1", "frollfunR: 1 column.*1 window.*", "frollfunR: 1:", - "fadaptiverollmeanFast: running for input length 10, hasna 0, narm 0", - "fadaptiverollmean: processing algo 0 took.*", + "frolladaptivemeanFast: running for input length 10, hasna 0, narm 0", + "frolladaptivefun: processing fun 0 algo 0 took.*", "frollfunR: processing.*took.*")) test(6000.179, frollmean(x, nn, algo="exact", adaptive=TRUE), output=c( "frollfunR: allocating memory for results 1x1", "frollfunR: 1 column.*1 window.*", "frollfunR: 1:", - "fadaptiverollmeanExact: running in parallel for input length 10, hasna 0, narm 0", - "fadaptiverollmean: processing algo 1 took.*", + "frolladaptivemeanExact: running in parallel for input length 10, hasna 0, narm 0", + "frolladaptivefun: processing fun 0 algo 1 took.*", "frollfunR: processing.*took.*")) x[8] = NA @@ -748,7 +755,7 @@ test(6000.180, frollmean(x, n), output=c( "frollfunR: 1:", "frollmeanFast: running for input length 10, window 3, hasna 0, narm 0", "frollmeanFast: NA.*are present in input, re-running with extra care for NAs", - "frollmean: processing algo 0 took.*", + "frollfun: processing fun 0 algo 0 took.*", "frollfunR: processing.*took.*")) test(6000.181, frollmean(x, n, algo="exact"), output=c( "frollfunR: allocating memory for results 1x1", @@ -756,23 +763,23 @@ test(6000.181, frollmean(x, n, algo="exact"), output=c( "frollfunR: 1:", "frollmeanExact: running in parallel for input length 10, window 3, hasna 0, narm 0", "frollmeanExact: NA.*are present in input, na.rm was FALSE so in 'exact' implementation NAs were handled already, no need to re-run", - "frollmean: processing algo 1 took.*", + "frollfun: processing fun 0 algo 1 took.*", "frollfunR: processing.*took.*")) test(6000.182, frollmean(x, nn, adaptive=TRUE), output=c( "frollfunR: allocating memory for results 1x1", "frollfunR: 1 column.*1 window.*", "frollfunR: 1:", - "fadaptiverollmeanFast: running for input length 10, hasna 0, narm 0", - "fadaptiverollmeanFast: NA.*are present in input, re-running with extra care for NAs", - "fadaptiverollmean: processing algo 0 took.*", + "frolladaptivemeanFast: running for input length 10, hasna 0, narm 0", + "frolladaptivemeanFast: NA.*are present in input, re-running with extra care for NAs", + "frolladaptivefun: processing fun 0 algo 0 took.*", "frollfunR: processing.*took.*")) test(6000.183, frollmean(x, nn, algo="exact", adaptive=TRUE), output=c( "frollfunR: allocating memory for results 1x1", "frollfunR: 1 column.*1 window.*", "frollfunR: 1:", - "fadaptiverollmeanExact: running in parallel for input length 10, hasna 0, narm 0", - "fadaptiverollmeanExact: NA.*are present in input, na.rm was FALSE so in 'exact' implementation NAs were handled already, no need to re-run", - "fadaptiverollmean: processing algo 1 took.*", + "frolladaptivemeanExact: running in parallel for input length 10, hasna 0, narm 0", + "frolladaptivemeanExact: NA.*are present in input, na.rm was FALSE so in 'exact' implementation NAs were handled already, no need to re-run", + "frolladaptivefun: processing fun 0 algo 1 took.*", "frollfunR: processing.*took.*")) d = as.data.table(list(1:10/2, 10:1/4)) @@ -781,7 +788,7 @@ test(6000.184, frollmean(d[,1], 3, algo="exact"), output=c( "frollfunR: 1 column.*1 window.*", "frollfunR: 1:", "frollmeanExact: running in parallel for input length 10, window 3, hasna 0, narm 0", - "frollmean: processing algo 1 took.*", + "frollfun: processing fun 0 algo 1 took.*", "frollfunR: processing.*took.*" )) test(6000.185, frollmean(d, 3:4, algo="exact"), output=c( @@ -789,16 +796,16 @@ test(6000.185, frollmean(d, 3:4, algo="exact"), output=c( "frollfunR: 2 column.*2 window.*", "frollfunR: 1:", "frollmeanExact: running in parallel for input length 10, window 3, hasna 0, narm 0", - "frollmean: processing algo 1 took.*", + "frollfun: processing fun 0 algo 1 took.*", "frollfunR: 2:", "frollmeanExact: running in parallel for input length 10, window 4, hasna 0, narm 0", - "frollmean: processing algo 1 took.*", + "frollfun: processing fun 0 algo 1 took.*", "frollfunR: 3:", "frollmeanExact: running in parallel for input length 10, window 3, hasna 0, narm 0", - "frollmean: processing algo 1 took.*", + "frollfun: processing fun 0 algo 1 took.*", "frollfunR: 4:", "frollmeanExact: running in parallel for input length 10, window 4, hasna 0, narm 0", - "frollmean: processing algo 1 took.*", + "frollfun: processing fun 0 algo 1 took.*", "frollfunR: processing.*took.*" )) options(datatable.verbose=FALSE) @@ -839,13 +846,66 @@ options(datatable.verbose=TRUE) test(6000.219, frollsum(c(1:2,NA,4:10), rep(4L,10), adaptive=TRUE, algo="exact", na.rm=TRUE), c(rep(NA_real_, 3L), 7, 11, 15, 22, 26, 30, 34), output="re-running with extra care for NAs") test(6000.220, frollsum(c(1:2,NA,4:10), rep(4L,10), adaptive=TRUE, algo="exact"), c(rep(NA_real_, 6), 22, 26, 30, 34), output="NAs were handled already, no need to re-run") test(6000.221, frollsum(1:3, 2), c(NA, 3, 5), output="frollsumFast: running for input length") -test(6000.222, frollsum(1:3, 2, align="left"), c(3, 5, NA), output="frollsum: align") +test(6000.222, frollsum(1:3, 2, align="left"), c(3, 5, NA), output="frollfun: align") test(6000.223, frollsum(c(1,2,NA), 2), c(NA, 3, NA), output="re-running with extra care for NAs") test(6000.224, frollsum(c(NA,2,3), 2), c(NA, NA, 5), output="skip non-NA attempt and run with extra care for NAs") -test(6000.225, frollsum(1:3, c(2,2,2), adaptive=TRUE), c(NA, 3, 5), output="fadaptiverollsumFast: running for input length") +test(6000.225, frollsum(1:3, c(2,2,2), adaptive=TRUE), c(NA, 3, 5), output="frolladaptivesumFast: running for input length") test(6000.226, frollsum(c(NA,2,3), c(2,2,2), adaptive=TRUE), c(NA, NA, 5), output="re-running with extra care for NAs") options(datatable.verbose=FALSE) +## frollmax +options(datatable.verbose=TRUE) ## adaptive frollmax no fast algo +test(6000.3, frollmax(1:4, c(2,2,2,2), adaptive=TRUE), output="frolladaptivefun: algo 0 not implemented, fall back to 1") +test(6000.3001, frollmax(1:4, c(2,2,2,2), algo="fast", adaptive=TRUE), output="frolladaptivefun: algo 0 not implemented, fall back to 1") +test(6000.3002, frollmax(1:4, c(2,2,2,2), algo="exact", adaptive=TRUE), notOutput="frolladaptivefun: algo 0 not implemented, fall back to 1") +options(datatable.verbose=FALSE) +n = c(3,2,2,4,2,1,4,8) +x = c(7,2,3,6,3,2,6,6) # no NA +test(6000.3111, frollmax(x, n, adaptive=TRUE), c(NA,7,3,7,6,2,6,7)) # hasNA=NA # narm=F +test(6000.3112, frollmax(x, n, na.rm=TRUE, adaptive=TRUE), c(NA,7,3,7,6,2,6,7)) # narm=T +test(6000.3121, frollmax(x, n, hasNA=FALSE, adaptive=TRUE), c(NA,7,3,7,6,2,6,7)) # hasNA=F +test(6000.3122, frollmax(x, n, hasNA=FALSE, na.rm=TRUE, adaptive=TRUE), error="does not make sense") +test(6000.3131, frollmax(x, n, hasNA=TRUE, adaptive=TRUE), c(NA,7,3,7,6,2,6,7)) # hasNA=T +test(6000.3132, frollmax(x, n, hasNA=TRUE, na.rm=TRUE, adaptive=TRUE), c(NA,7,3,7,6,2,6,7)) +x = c(7,2,NA,6,3,NA,6,6) # NA +test(6000.3211, frollmax(x, n, adaptive=TRUE), c(NA,7,NA,NA,6,NA,NA,NA)) +test(6000.3212, frollmax(x, n, na.rm=TRUE, adaptive=TRUE), c(NA,7,2,7,6,-Inf,6,7)) +test(6000.3221, frollmax(x, n, hasNA=FALSE, adaptive=TRUE), c(NA,7,2,7,6,-Inf,6,7)) ## expected incorrect results, see manual hasNA section for details, added in #5441 +test(6000.3222, frollmax(x, n, hasNA=FALSE, na.rm=TRUE, adaptive=TRUE), error="does not make sense") +test(6000.3231, frollmax(x, n, hasNA=TRUE, adaptive=TRUE), c(NA,7,NA,NA,6,NA,NA,NA)) +test(6000.3232, frollmax(x, n, hasNA=TRUE, na.rm=TRUE, adaptive=TRUE), c(NA,7,2,7,6,-Inf,6,7)) +x = rep(NA_real_, 8) # all NA +test(6000.3241, frollmax(x, n, adaptive=TRUE), rep(NA_real_, 8)) +test(6000.3242, frollmax(x, n, na.rm=TRUE, adaptive=TRUE), c(NA, rep(-Inf, 7))) +test(6000.3251, frollmax(x, n, hasNA=FALSE, adaptive=TRUE), c(NA, rep(-Inf, 7))) +test(6000.3252, frollmax(x, n, hasNA=FALSE, na.rm=TRUE, adaptive=TRUE), error="does not make sense") +test(6000.3261, frollmax(x, n, hasNA=TRUE, adaptive=TRUE), rep(NA_real_, 8)) +test(6000.3262, frollmax(x, n, hasNA=TRUE, na.rm=TRUE, adaptive=TRUE), c(NA, rep(-Inf, 7))) +x = c(NA,NaN,NA,NaN,NaN,NaN,NA,NA) # all NaN/NA +test(6000.3271, frollmax(x, n, adaptive=TRUE), c(NA,NA,NA,NA,NaN,NaN,NA,NA)) +test(6000.3272, frollmax(x, n, na.rm=TRUE, adaptive=TRUE), c(NA,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf)) +test(6000.3281, frollmax(x, n, hasNA=FALSE, adaptive=TRUE), c(NA,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf)) ## expected incorrect results, see manual hasNA section for details, added in #5441 +test(6000.3282, frollmax(x, n, hasNA=FALSE, na.rm=TRUE, adaptive=TRUE), error="does not make sense") +test(6000.3291, frollmax(x, n, hasNA=TRUE, adaptive=TRUE), c(NA,NA,NA,NA,NaN,NaN,NA,NA)) +test(6000.3292, frollmax(x, n, hasNA=TRUE, na.rm=TRUE, adaptive=TRUE), c(NA,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf)) +x = c(7,2,NA,6,3,Inf,6,6) # Inf +test(6000.3311, frollmax(x, n, adaptive=TRUE), c(NA,7,NA,NA,6,Inf,Inf,NA)) +test(6000.3312, frollmax(x, n, na.rm=TRUE, adaptive=TRUE), c(NA,7,2,7,6,Inf,Inf,Inf)) +test(6000.3321, frollmax(x, n, hasNA=FALSE, adaptive=TRUE), c(NA,7,2,7,6,Inf,Inf,Inf)) ## expected incorrect results, see manual hasNA section for details, added in #5441 +test(6000.3322, frollmax(x, n, hasNA=FALSE, na.rm=TRUE, adaptive=TRUE), error="does not make sense") +test(6000.3331, frollmax(x, n, hasNA=TRUE, adaptive=TRUE), c(NA,7,NA,NA,6,Inf,Inf,NA)) +test(6000.3332, frollmax(x, n, hasNA=TRUE, na.rm=TRUE, adaptive=TRUE), c(NA,7,2,7,6,Inf,Inf,Inf)) +x = c(7,2,-Inf,6,3,NA,6,6) # -Inf +test(6000.3341, frollmax(x, n, adaptive=TRUE), c(NA,7,2,7,6,NA,NA,NA)) +test(6000.3342, frollmax(x, n, na.rm=TRUE, adaptive=TRUE), c(NA,7,2,7,6,-Inf,6,7)) +test(6000.3351, frollmax(x, n, hasNA=FALSE, adaptive=TRUE), c(NA,7,2,7,6,-Inf,6,7)) ## expected incorrect results, see manual hasNA section for details, added in #5441 +test(6000.3352, frollmax(x, n, hasNA=FALSE, na.rm=TRUE, adaptive=TRUE), error="does not make sense") +test(6000.3361, frollmax(x, n, hasNA=TRUE, adaptive=TRUE), c(NA,7,2,7,6,NA,NA,NA)) +test(6000.3362, frollmax(x, n, hasNA=TRUE, na.rm=TRUE, adaptive=TRUE), c(NA,7,2,7,6,-Inf,6,7)) + +#TODO frollmax dev +#TODO frollmax tests + ## validation set.seed(108) @@ -1063,6 +1123,7 @@ f = function(x) { #test(6010.106, head(frollapply(1:5, 3, f), 3L), c(NA_real_,NA_real_,1), output=c("frollapplyR: allocating memory.*","frollapply: took.*","frollapplyR: processing.*took.*")) # only head 3 is valid, rest is undefined as REAL is applied on logical type, can return garbage or fail with REAL error options(datatable.verbose=FALSE) #### test coverage +test(6010.5, frollapply(1:3, c(3,3,3), sum, adaptive=TRUE), error="frollapply does not support 'adaptive' argument") test(6010.501, frollapply(1:3, "b", sum), error="n must be integer") test(6010.502, frollapply(1:3, 2.5, sum), error="n must be integer") test(6010.503, frollapply(1:3, integer(), sum), error="n must be non 0 length") diff --git a/man/froll.Rd b/man/froll.Rd index 090b397a90..576da269c7 100644 --- a/man/froll.Rd +++ b/man/froll.Rd @@ -7,113 +7,94 @@ \alias{rollmean} \alias{frollmean} \alias{rollsum} +\alias{rollmax} \alias{frollsum} +\alias{frollmax} \alias{rollapply} \alias{frollapply} \title{Rolling functions} \description{ - Fast rolling functions to calculate aggregates on sliding windows. Function name and arguments are experimental. + Fast rolling functions to calculate aggregates on sliding windows. } \usage{ -frollmean(x, n, fill=NA, algo=c("fast", "exact"), - align=c("right", "left", "center"), na.rm=FALSE, hasNA=NA, adaptive=FALSE) -frollsum(x, n, fill=NA, algo=c("fast","exact"), - align=c("right", "left", "center"), na.rm=FALSE, hasNA=NA, adaptive=FALSE) -frollapply(x, n, FUN, \dots, fill=NA, align=c("right", "left", "center")) + frollmean(x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"), + na.rm=FALSE, hasNA=NA, adaptive=FALSE) + frollsum(x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"), + na.rm=FALSE, hasNA=NA, adaptive=FALSE) + frollmax(x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"), + na.rm=FALSE, hasNA=NA, adaptive=FALSE) + frollapply(x, n, FUN, \dots, fill=NA, align=c("right","left","center"), adaptive) } \arguments{ \item{x}{ Vector, \code{data.frame} or \code{data.table} of integer, numeric or logical columns over which to calculate the windowed aggregations. May also be a list, in which case the rolling function is applied to each of its elements. } - \item{n}{ Integer vector giving rolling window size(s). This is the \emph{total} number of included values. Adaptive rolling functions also accept a list of integer vectors. } + \item{n}{ Integer vector giving rolling window size(s). This is the \emph{total} number of included values in aggregate function. Adaptive rolling functions also accept a list of integer vectors when applying multiple window sizes. } \item{fill}{ Numeric; value to pad by. Defaults to \code{NA}. } - \item{algo}{ Character, default \code{"fast"}. When set to \code{"exact"}, a slower (but more accurate) algorithm is used. It - suffers less from floating point rounding errors by performing an extra pass, and carefully handles all non-finite values. - It will use mutiple cores where available. See Details for more information. } + \item{algo}{ Character, default \code{"fast"}. When set to \code{"exact"}, a slower (in some cases more accurate) algorithm is used. See \emph{Implementation} section below for details. } \item{align}{ Character, specifying the "alignment" of the rolling window, defaulting to \code{"right"}. \code{"right"} covers preceding rows (the window \emph{ends} on the current value); \code{"left"} covers following rows (the window \emph{starts} on the current value); \code{"center"} is halfway in between (the window is \emph{centered} on the current value, biased towards \code{"left"} when \code{n} is even). } - \item{na.rm}{ Logical, default \code{FALSE}. Should missing values be removed when - calculating window? For details on handling other non-finite values, see Details. } - \item{hasNA}{ Logical. If it is known that \code{x} contains \code{NA} - then setting this to \code{TRUE} will speed up calculation. Defaults to \code{NA}. } - \item{adaptive}{ Logical, default \code{FALSE}. Should the rolling function be calculated adaptively? See Details below. } - \item{FUN}{ The function to be applied to the rolling window; see Details for restrictions. } + \item{na.rm}{ Logical, default \code{FALSE}. Should missing values be removed when calculating window? } + \item{hasNA}{ Logical. If it is known that \code{x} contains \code{NA} (or \code{Nan}) then setting this to \code{TRUE} will speed up calculation. Defaults to \code{NA}. See \emph{hasNA argument} section below for details. } + \item{adaptive}{ Logical, default \code{FALSE}. Should the rolling function be calculated adaptively? See \emph{Adaptive rolling functions} section below for details. } + \item{FUN}{ The function to be applied to the rolling window in \code{frollapply}; See \emph{frollaply} section below for details. } \item{\dots}{ Extra arguments passed to \code{FUN} in \code{frollapply}. } } \details{ - \code{froll*} functions accept vectors, lists, \code{data.frame}s or - \code{data.table}s. They always return a list except when the input is a - \code{vector} and \code{length(n)==1}, in which case a \code{vector} - is returned, for convenience. Thus, rolling functions can be used - conveniently within \code{data.table} syntax. + \code{froll*} functions accept vector, list, \code{data.frame} or \code{data.table}. Functions operate on a single vector, when passing a non-atomic input, then function is applied column-by-column, not to a complete set of column at once. - Argument \code{n} allows multiple values to apply rolling functions on - multiple window sizes. If \code{adaptive=TRUE}, then \code{n} must be a list. - Each list element must be integer vector of window sizes corresponding - to every single observation in each column; see Examples. + Argument \code{n} allows multiple values to apply rolling function on multiple window sizes. If \code{adaptive=TRUE}, then \code{n} can be a list to specify multiple window sizes for adaptive rolling computation. See \emph{Adaptive rolling functions} section below for details. - When \code{algo="fast"} an \emph{"on-line"} algorithm is used, and - all of \code{NaN, +Inf, -Inf} are treated as \code{NA}. - Setting \code{algo="exact"} will make rolling functions to use a more - computationally-intensive algorithm that suffers less from floating point - rounding error (the same consideration applies to \code{\link[base]{mean}}). - \code{algo="exact"} also handles \code{NaN, +Inf, -Inf} consistently to - base R. In case of some functions (like \emph{mean}), it will additionally - make extra pass to perform floating point error correction. Error - corrections might not be truly exact on some platforms (like Windows) - when using multiple threads. + When multiple columns and/or multiple windows width are provided, then computation run in parallel (except for \code{frollapply}. The exception is for \code{algo="exact"}, which runs in parallel even for single column and single window width. By default data.table uses only half of available CPUs, see \code{\link{setDTthreads}} for details on how to tune CPU usage. - Adaptive rolling functions are a special case where each - observation has its own corresponding rolling window width. Due to the logic - of adaptive rolling functions, the following restrictions apply: - \itemize{ - \item{ \code{align} only \code{"right"}. } - \item{ if list of vectors is passed to \code{x}, then all - vectors within it must have equal length. } - } - - When multiple columns or multiple windows width are provided, then they - are run in parallel. The exception is for \code{algo="exact"}, which runs in - parallel already. - - \code{frollapply} computes rolling aggregate on arbitrary R functions. - The input \code{x} (first argument) to the function \code{FUN} - is coerced to \emph{numeric} beforehand and \code{FUN} - has to return a scalar \emph{numeric} value. Checks for that are made only - during the first iteration when \code{FUN} is evaluated. Edge cases can be - found in examples below. Any R function is supported, but it is not optimized - using our own C implementation -- hence, for example, using \code{frollapply} - to compute a rolling average is inefficient. It is also always single-threaded - because there is no thread-safe API to R's C \code{eval}. Nevertheless we've - seen the computation speed up vis-a-vis versions implemented in base R. + Setting \code{options(datatable.verbose=TRUE)} will display various information about how rolling function processed. It will not print information in a real-time but only at the end of the processing. } \value{ - A list except when the input is a \code{vector} and - \code{length(n)==1} in which case a \code{vector} is returned. + A list except when the input is a \code{vector} and \code{length(n)==1}, in which case a \code{vector} is returned, for convenience. Thus, rolling functions can be used conveniently within \code{data.table} syntax. } \note{ - Users coming from most popular package for rolling functions - \code{zoo} might expect following differences in \code{data.table} - implementation. + Be aware that rolling functions operates on the physical order of input. If the intent is to roll values in a vector by a logical window, for example an hour, or a day, then one has to ensure that there are no gaps in input. For details see \href{https://github.com/Rdatatable/data.table/issues/3241}{issue #3241}. +} +\section{\code{hasNA} argument}{ + \code{hasNA} can be used to speed up processing in cases when it is known if \code{x} contains \code{NA} (or \code{NaN}). + \itemize{ + \item{ Default \code{hasNA=NA} will use faster NA inaware implementation, but when NAs are detected it will re-run NA aware implementation. } + \item{ \code{hasNA=TRUE} will use NA aware implementation straightaway. } + \item{ \code{hasNA=FALSE} will use faster NA inaware implementation. Then depending on the rolling function it will either: + \itemize{ + \item{ (\emph{mean, sum}) detect NAs, raise warning, re-run NA aware. } + \item{ (\emph{max}) not detect NAs and may silently produce incorrect answer. } + } + Therefore \code{hasNA=FALSE} should be used with care. } + } +} +\section{Implementation}{ \itemize{ - \item{ rolling function will always return result of the same length - as input. } + \item{ \code{algo="fast"} uses \emph{"on-line"} algorithm, and all of \code{NaN, +Inf, -Inf} (Inf are to be changed!!) are treated as \code{NA}. Not all functions have \emph{fast} implementation available. As of now \emph{max} and \code{adaptive=TRUE} does not have, therefore it will automatically fall back to \emph{exact} implementation. \code{datatable.verbose} option can be used to check that. } + \item{ \code{algo="exact"} will make rolling functions to use a more computationally-intensive algorithm. For each observation from input vector it will compute a function on a window from scratch (complexity \eqn{O(n^2)}). Depeneding on the function, this algorithm may suffers less from floating point rounding error (the same consideration applies to base \code{\link[base]{mean}}). Algorithm also handles \code{NaN, +Inf, -Inf} consistently to base R, unless - for some functions (e.g. \emph{max}) - \code{hasNA}is \code{FALSE} but NAs are present. In case of some functions (like \emph{mean}), it will additionally make extra pass to perform floating point error correction. Error corrections might not be truly exact on some platforms (like Windows) when using multiple threads. } + } +} +\section{Adaptive rolling functions}{ + Adaptive rolling functions are a special case where each observation has its own corresponding rolling window width. Therefore values passed to \code{n} argument must be series corresponding to observations in \code{x}. If multiple windows is meant to be computed then a list of integer vectors is expected; each list element must be an integer vector of window size corresponding to observations in \code{x}; see Examples. Due to the logic or implementation of adaptive rolling functions, the following restrictions apply + \itemize{ + \item{ \code{align} does not support \code{"center"}. } + \item{ if list of vectors is passed to \code{x}, then all vectors within it must have equal length. } + \item{ functionality in not suported in \code{frollapply} (to be changed). } + } +} +\section{\code{frollapply}}{ + \code{frollapply} computes rolling aggregate on arbitrary R functions. \code{adaptive} argument is not supported (to be changed). The input \code{x} (first argument) to the function \code{FUN} is coerced to \emph{numeric} beforehand(to be changed) and \code{FUN} has to return a scalar \emph{numeric} value (to be changed). Checks for that are made only during the first iteration when \code{FUN} is evaluated. Edge cases can be found in examples below. Any R function is supported, but it is not optimized using our own C implementation -- hence, for example, using \code{frollapply} to compute a rolling average is inefficient. It is also always single-threaded because there is no thread-safe API to R's C \code{eval}. Nevertheless we've seen the computation speed up vis-a-vis versions implemented in base R. +} +\section{\code{zoo} package users notice}{ + Users coming from most popular package for rolling functions \code{zoo} might expect following differences in \code{data.table} implementation + \itemize{ + \item{ rolling function will always return result of the same length as input. } \item{ \code{fill} defaults to \code{NA}. } - \item{ \code{fill} accepts only constant values. It does not support - for \emph{na.locf} or other functions. } + \item{ \code{fill} accepts only constant values. No support for \emph{na.locf} or other functions. } \item{ \code{align} defaults to \code{"right"}. } - \item{ \code{na.rm} is respected, and other functions are not needed - when input contains \code{NA}. } - \item{ integers and logical are always coerced to double. } - \item{ when \code{adaptive=FALSE} (default), then \code{n} must be a - numeric vector. List is not accepted. } - \item{ when \code{adaptive=TRUE}, then \code{n} must be vector of - length equal to \code{nrow(x)}, or list of such vectors. } - \item{ \code{partial} window feature is not supported, although it can - be accomplished by using \code{adaptive=TRUE}, see examples. \code{NA} is always returned for incomplete windows. } + \item{ \code{na.rm} is respected, and other functions are not needed when input contains \code{NA}. } + \item{ integers and logical are always coerced to double (to be changed for frollapply). } + \item{ when \code{adaptive=FALSE} (default), then \code{n} must be a numeric vector. List is not accepted. } + \item{ when \code{adaptive=TRUE}, then \code{n} must be vector of length equal to \code{nrow(x)}, or list of such vectors. } + \item{ \code{partial} window feature is not supported, although it can be accomplished by using \code{adaptive=TRUE}, see examples (to be changed). \code{NA} is always returned for incomplete windows. } } - - Be aware that rolling functions operates on the physical order of input. - If the intent is to roll values in a vector by a logical window, for - example an hour, or a day, one has to ensure that there are no gaps in - input. For details see \href{https://github.com/Rdatatable/data.table/issues/3241}{issue #3241}. } \examples{ d = as.data.table(list(1:6/2, 3:8/4)) @@ -200,7 +181,7 @@ f = function(x) { ## FUN is not type-stable try(frollapply(1:5, 3, f)) } \seealso{ - \code{\link{shift}}, \code{\link{data.table}} + \code{\link{shift}}, \code{\link{data.table}}, \code{\link{setDTthreads}} } \references{ \href{https://en.wikipedia.org/wiki/Round-off_error}{Round-off error} diff --git a/src/data.table.h b/src/data.table.h index b966e86c08..db0c4c5045 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -196,22 +196,29 @@ void initDTthreads(); int getDTthreads(const int64_t n, const bool throttle); void avoid_openmp_hang_within_fork(); +typedef enum { // adding rolling functions here and in frollfunR in frollR.c + MEAN = 0, + SUM = 1, + MAX = 2 +} rollfun_t; // froll.c -void frollmean(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, int align, double fill, bool narm, int hasna, bool verbose); +void frollfun(rollfun_t rfun, unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, int align, double fill, bool narm, int hasna, bool verbose); void frollmeanFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose); void frollmeanExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose); -void frollsum(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, int align, double fill, bool narm, int hasna, bool verbose); void frollsumFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose); void frollsumExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose); +void frollmaxFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose); +void frollmaxExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose); void frollapply(double *x, int64_t nx, double *w, int k, ans_t *ans, int align, double fill, SEXP call, SEXP rho, bool verbose); // frolladaptive.c -void fadaptiverollmean(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose); -void fadaptiverollmeanFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose); -void fadaptiverollmeanExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose); -void fadaptiverollsum(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose); -void fadaptiverollsumFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose); -void fadaptiverollsumExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose); +void frolladaptivefun(rollfun_t rfun, unsigned int algo, double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose); +void frolladaptivemeanFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose); +void frolladaptivemeanExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose); +void frolladaptivesumFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose); +void frolladaptivesumExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose); +//void frolladaptivemaxFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose); // does not exists as of now +void frolladaptivemaxExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose); // frollR.c SEXP frollfunR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP algo, SEXP align, SEXP narm, SEXP hasNA, SEXP adaptive); diff --git a/src/froll.c b/src/froll.c index 3ab7bd927a..37a391218d 100644 --- a/src/froll.c +++ b/src/froll.c @@ -1,43 +1,65 @@ #include "data.table.h" -/* fast rolling mean - router +/* rolling fun - router for fun and algo * early stopping for window bigger than input - * also handles 'align' in single place - * algo = 0: frollmeanFast + * handles 'align' in single place for center or left + * rfun enum rollfun_t routes to rolling function + * algo = 0: fast * adding/removing in/out of sliding window of observations - * algo = 1: frollmeanExact - * recalculate whole mean for each observation, roundoff correction is adjusted, also support for NaN and Inf + * algo = 1: exact + * recalculate whole fun for each observation, for mean roundoff correction is adjusted, also support for NaN and Inf */ -void frollmean(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, int align, double fill, bool narm, int hasna, bool verbose) { - if (nx < k) { // if window width bigger than input just return vector of fill values +void frollfun(rollfun_t rfun, unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, int align, double fill, bool narm, int hasna, bool verbose) { + double tic = 0; + if (verbose) + tic = omp_get_wtime(); + if (nx < k) { // if window width bigger than input just return vector of fill values if (verbose) snprintf(end(ans->message[0]), 500, _("%s: window width longer than input vector, returning all NA vector\n"), __func__); // implicit n_message limit discussed here: https://github.com/Rdatatable/data.table/issues/3423#issuecomment-487722586 - for (int i=0; idbl_v[i] = fill; } return; } - double tic = 0; - if (verbose) - tic = omp_get_wtime(); - if (algo==0) { - frollmeanFast(x, nx, ans, k, fill, narm, hasna, verbose); - } else if (algo==1) { - frollmeanExact(x, nx, ans, k, fill, narm, hasna, verbose); + switch (rfun) { + case MEAN : + if (algo==0) { + frollmeanFast(x, nx, ans, k, fill, narm, hasna, verbose); + } else if (algo==1) { + frollmeanExact(x, nx, ans, k, fill, narm, hasna, verbose); + } + break; + case SUM : + if (algo==0) { + frollsumFast(x, nx, ans, k, fill, narm, hasna, verbose); + } else if (algo==1) { + frollsumExact(x, nx, ans, k, fill, narm, hasna, verbose); + } + break; + case MAX : + if (algo==0) { + frollmaxFast(x, nx, ans, k, fill, narm, hasna, verbose); + } else if (algo==1) { + frollmaxExact(x, nx, ans, k, fill, narm, hasna, verbose); + } + break; + default: + error(_("Internal error: Unknown rfun value in froll: %d"), rfun); // #nocov } - if (ans->status < 3 && align < 1) { // align center or left, only when no errors occurred - int k_ = align==-1 ? k-1 : floor(k/2); // offset to shift + if (align < 1 && ans->status < 3) { + int k_ = align==-1 ? k-1 : floor(k/2); // offset to shift if (verbose) snprintf(end(ans->message[0]), 500, _("%s: align %d, shift answer by %d\n"), __func__, align, -k_); memmove((char *)ans->dbl_v, (char *)ans->dbl_v + (k_*sizeof(double)), (nx-k_)*sizeof(double)); // apply shift to achieve expected align - for (uint64_t i=nx-k_; idbl_v[i] = fill; } } if (verbose) - snprintf(end(ans->message[0]), 500, _("%s: processing algo %u took %.3fs\n"), __func__, algo, omp_get_wtime()-tic); + snprintf(end(ans->message[0]), 500, _("%s: processing fun %d algo %u took %.3fs\n"), __func__, rfun, algo, omp_get_wtime()-tic); } + /* fast rolling mean - fast * when no info on NA (hasNA argument) then assume no NAs run faster version * rollmean implemented as single pass sliding window for align="right" @@ -216,36 +238,9 @@ void frollmeanExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool } } -/* fast rolling sum */ -void frollsum(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, int align, double fill, bool narm, int hasna, bool verbose) { - if (nx < k) { - if (verbose) - snprintf(end(ans->message[0]), 500, _("%s: window width longer than input vector, returning all NA vector\n"), __func__); - for (int i=0; idbl_v[i] = fill; - } - return; - } - double tic = 0; - if (verbose) - tic = omp_get_wtime(); - if (algo==0) { - frollsumFast(x, nx, ans, k, fill, narm, hasna, verbose); - } else if (algo==1) { - frollsumExact(x, nx, ans, k, fill, narm, hasna, verbose); - } - if (ans->status < 3 && align < 1) { - int k_ = align==-1 ? k-1 : floor(k/2); - if (verbose) - snprintf(end(ans->message[0]), 500, _("%s: align %d, shift answer by %d\n"), __func__, align, -k_); - memmove((char *)ans->dbl_v, (char *)ans->dbl_v + (k_*sizeof(double)), (nx-k_)*sizeof(double)); - for (uint64_t i=nx-k_; idbl_v[i] = fill; - } - } - if (verbose) - snprintf(end(ans->message[0]), 500, _("%s: processing algo %u took %.3fs\n"), __func__, algo, omp_get_wtime()-tic); -} +/* fast rolling sum - fast + * same as mean fast + */ void frollsumFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose) { if (verbose) snprintf(end(ans->message[0]), 500, _("%s: running for input length %"PRIu64", window %d, hasna %d, narm %d\n"), "frollsumFast", (uint64_t)nx, k, hasna, (int)narm); @@ -330,6 +325,9 @@ void frollsumFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool n } } } +/* fast rolling sum - exact + * same as mean exact + */ void frollsumExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose) { if (verbose) snprintf(end(ans->message[0]), 500, _("%s: running in parallel for input length %"PRIu64", window %d, hasna %d, narm %d\n"), "frollsumExact", (uint64_t)nx, k, hasna, (int)narm); @@ -397,11 +395,253 @@ void frollsumExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool } } +inline void windowmax(double *x, uint64_t o, int k, double *w, uint64_t *iw) { + for (int i=0; i= w[0]: x[%d-%d+1] >= w[0]: %f >= %f: %d\n", i, o, x[o], i, k, x[o+i-k+1], w[0], x[o+i-k+1] >= w[0]); + if (x[o+i-k+1] >= w[0]) { // what if that is never satisfied? test! + iw[0] = o+i-k+1; + w[0] = x[iw[0]]; + } + } +} +inline void windowmaxnarm(double *x, uint64_t o, int k, bool narm, int *nc, double *w, uint64_t *iw) { + for (int i=0; i= w[0]: x[%d-%d+1] >= w[0]: %f >= %f: %d\n", i, o, x[o], i, k, x[o+i-k+1], w[0], x[o+i-k+1] >= w[0]); + if (R_FINITE(x[o+i-k+1])) { + if (x[o+i-k+1] >= w[0]) { // what if that is never satisfied? test! + iw[0] = o+i-k+1; + w[0] = x[iw[0]]; + } + } else if (narm) { + nc[0]++; + } else { + w[0] = NA_REAL; + } + } +} +/* fast rolling max - fast + * fast online algorithm do single pass over elements keeping track of recent max and its index + * if index of max is within progressing window then it keeps running single pass + * whenever max is leaving the window (index of max is outside of iterator minus window size) then new maximum is computed via nested loop on current complete window + * new max is used to continue outer single pass as long as new max index is not leaving the running window + * should scale well for bigger window size, may carry overhead for small window, needs benchmarking + * TODO NA handling + */ +void frollmaxFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose) { + if (verbose) + snprintf(end(ans->message[0]), 500, _("%s: running for input length %"PRIu64", window %d, hasna %d, narm %d\n"), "frollmaxFast", (uint64_t)nx, k, hasna, (int)narm); + double w = R_NegInf; // window max + uint64_t cmax = 0; // counter of nested loops for verbose + uint64_t iw = 0; // index of window max + bool truehasna = hasna>0; + if (!truehasna) { + int i; + for (i=0; i= w) { // >= rather than > because we track most recent maximum using iw + iw = i; + w = x[iw]; + } + ans->dbl_v[i] = fill; + } + if (!truehasna && !R_FINITE(x[i])) { + if (x[i] >= w) { + iw = i; + w = x[iw]; + } else { + // ensure iw ok, case when all window was -Inf or NA? + } + ans->dbl_v[i] = w; + } else { + truehasna = true; + } + if (!truehasna) { + for (uint64_t i=k; i= w) { + //Rprintf("iteration %d, new max %f at %d, old max %f at %d\n", i, x[i], i, w, iw); + iw = i; + w = x[iw]; + } else { + if (iw > i-k) { // max is still within window + // w and iw are still ok + //Rprintf("iteration %d, max %f at %d bigger than current value %f\n", i, w, iw, x[i]); + } else { // max left the window, need to find max in this window + //Rprintf("iteration %d, max %f at %d left the window, call windowmax from %d of size %d\n", i, w, iw, i, k); + w = R_NegInf; + iw = i-k; + windowmax(x, i, k, &w, &iw); + //Rprintf("iteration %d, windowmax found new max %f at %d\n", i, w, iw); + cmax++; + } + } + ans->dbl_v[i] = w; + } + if (verbose) + snprintf(end(ans->message[0]), 500, _("%s: windowmax called %"PRIu64" time(s)\n"), __func__, cmax); + if (truehasna) { + if (hasna==-1) { + ans->status = 2; + snprintf(end(ans->message[2]), 500, _("%s: hasNA=FALSE used but NA (or other non-finite) value(s) are present in input, use default hasNA=NA to avoid this warning"), __func__); + } + if (verbose) + snprintf(end(ans->message[0]), 500, _("%s: NA (or other non-finite) value(s) are present in input, re-running with extra care for NAs\n"), __func__); + w = R_NegInf; + cmax = 0; + } + } else { + if (hasna==-1) { + ans->status = 2; + snprintf(end(ans->message[2]), 500, _("%s: hasNA=FALSE used but NA (or other non-finite) value(s) are present in input, use default hasNA=NA to avoid this warning"), __func__); + } + if (verbose) + snprintf(end(ans->message[0]), 500, _("%s: NA (or other non-finite) value(s) are present in input, skip non-NA attempt and run with extra care for NAs\n"), __func__); + w = R_NegInf; + cmax = 0; + } + } + if (truehasna) { + int nc = 0; + int i; + for (i=0; i= w) { + iw = i; + w = x[iw]; + } + } else { + nc++; + } + ans->dbl_v[i] = fill; + } + if (R_FINITE(x[i])) { + if (x[i] >= w) { + iw = i; + w = x[iw]; + } + } else { + nc++; + } + if (nc == 0) { + ans->dbl_v[i] = w; + } else if (nc == k) { + ans->dbl_v[i] = narm ? R_NegInf : NA_REAL; + } else { + ans->dbl_v[i] = narm ? w : NA_REAL; + } + for (uint64_t i=k; i= w) { + iw = i; + w = x[iw]; + } else { + if (iw > i-k) { // max is still within window + // w and iw are still ok + } else { // max left the window, need to find max in this window + w = R_NegInf; + iw = i-k; + windowmaxnarm(x, i, k, narm, &nc, &w, &iw); + //Rprintf("iteration %d, windowmaxnarm found new max %f at %d\n", i, w, iw); + cmax++; + } + } + ans->dbl_v[i] = w; + } else { + nc++; + } + if (!R_FINITE(x[i-k])) { + nc--; + } + if (nc == 0) { + ans->dbl_v[i] = w; + } else if (nc == k) { + ans->dbl_v[i] = narm ? R_NegInf : NA_REAL; + } else { + ans->dbl_v[i] = narm ? w : NA_REAL; + } + } + if (verbose) + snprintf(end(ans->message[0]), 500, _("%s: windowmaxnarm called %"PRIu64" time(s)\n"), __func__, cmax); + } +} +/* fast rolling max - exact + * for each observation in x compute max in window from scratch + * no proper support for NAs yet + */ +void frollmaxExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose) { + if (verbose) + snprintf(end(ans->message[0]), 500, _("%s: running in parallel for input length %"PRIu64", window %d, hasna %d, narm %d\n"), "frollmaxExact", (uint64_t)nx, k, hasna, (int)narm); + for (int i=0; idbl_v[i] = fill; + } + bool truehasna = hasna>0; + if (!truehasna || !narm) { + #pragma omp parallel for num_threads(getDTthreads(nx, true)) + for (uint64_t i=k-1; i w) + w = x[i+j]; + } + if (R_FINITE(w)) { + ans->dbl_v[i] = w; + } else { + if (!narm) { + ans->dbl_v[i] = w; + } + truehasna = true; + } + } + if (truehasna) { + if (hasna==-1) { + ans->status = 2; + snprintf(end(ans->message[2]), 500, _("%s: hasNA=FALSE used but NA (or other non-finite) value(s) are present in input, use default hasNA=NA to avoid this warning"), __func__); + } + if (verbose) { + if (narm) { + snprintf(end(ans->message[0]), 500, _("%s: NA (or other non-finite) value(s) are present in input, re-running with extra care for NAs\n"), __func__); + } else { + snprintf(end(ans->message[0]), 500, _("%s: NA (or other non-finite) value(s) are present in input, na.rm was FALSE so in 'exact' implementation NAs were handled already, no need to re-run\n"), __func__); + } + } + } + } + if (truehasna && narm) { + #pragma omp parallel for num_threads(getDTthreads(nx, true)) + for (uint64_t i=k-1; i w) { + w = x[i+j]; + } + } + if (nc < k) { + ans->dbl_v[i] = w; + } else { + ans->dbl_v[i] = R_NegInf; + } + } + } +} + /* fast rolling any R function * not plain C, not thread safe * R eval() allocates */ void frollapply(double *x, int64_t nx, double *w, int k, ans_t *ans, int align, double fill, SEXP call, SEXP rho, bool verbose) { + // early stopping for window bigger than input if (nx < k) { if (verbose) Rprintf(_("%s: window width longer than input vector, returning all NA vector\n"), __func__); @@ -450,6 +690,7 @@ void frollapply(double *x, int64_t nx, double *w, int k, ans_t *ans, int align, UNPROTECT(1); // evali } } + // align if (ans->status < 3 && align < 1) { int k_ = align==-1 ? k-1 : floor(k/2); if (verbose) diff --git a/src/frollR.c b/src/frollR.c index 74cc7dd4ef..bb7d1cf792 100644 --- a/src/frollR.c +++ b/src/frollR.c @@ -108,8 +108,8 @@ SEXP frollfunR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP algo, SEXP align, SEX else error(_("Internal error: invalid %s argument in %s function should have been caught earlier. Please report to the data.table issue tracker."), "align", "rolling"); // # nocov - if (badaptive && ialign!=1) - error(_("using adaptive TRUE and align argument different than 'right' is not implemented")); + if (badaptive && ialign==0) // support for left added in #5441 + error(_("using adaptive TRUE and align 'center' is not implemented")); SEXP ans = PROTECT(allocVector(VECSXP, nk * nx)); protecti++; // allocate list to keep results if (verbose) @@ -132,11 +132,13 @@ SEXP frollfunR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP algo, SEXP align, SEX dx[i] = REAL(VECTOR_ELT(x, i)); // assign source columns to C pointers } - enum {MEAN, SUM} sfun; + rollfun_t rfun; // adding fun needs to be here and data.table.h if (!strcmp(CHAR(STRING_ELT(fun, 0)), "mean")) { - sfun = MEAN; + rfun = MEAN; } else if (!strcmp(CHAR(STRING_ELT(fun, 0)), "sum")) { - sfun = SUM; + rfun = SUM; + } else if (!strcmp(CHAR(STRING_ELT(fun, 0)), "max")) { + rfun = MAX; } else { error(_("Internal error: invalid %s argument in %s function should have been caught earlier. Please report to the data.table issue tracker."), "fun", "rolling"); // # nocov } @@ -152,7 +154,7 @@ SEXP frollfunR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP algo, SEXP align, SEX int ihasna = // plain C tri-state boolean as integer LOGICAL(hasna)[0]==NA_LOGICAL ? 0 : // hasna NA, default, no info about NA LOGICAL(hasna)[0]==TRUE ? 1 : // hasna TRUE, might be some NAs - -1; // hasna FALSE, there should be no NAs + -1; // hasna FALSE, there should be no NAs // or there must be no NAs for rollmax #5441 unsigned int ialgo; // decode algo to integer if (!strcmp(CHAR(STRING_ELT(algo, 0)), "fast")) @@ -180,21 +182,10 @@ SEXP frollfunR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP algo, SEXP align, SEX #pragma omp parallel for if (ialgo==0) schedule(dynamic) collapse(2) num_threads(getDTthreads(nx*nk, false)) for (R_len_t i=0; imessage[0]), 500, _("%s: algo %u not implemented, fall back to %u\n"), __func__, algo, (unsigned int) 1); + } + frolladaptivemaxExact(x, nx, ans, k, fill, narm, hasna, verbose); + break; + default: + error(_("Internal error: Unknown rfun value in froll: %d"), rfun); // #nocov } if (verbose) - snprintf(end(ans->message[0]), 500, _("%s: processing algo %u took %.3fs\n"), __func__, algo, omp_get_wtime()-tic); - // implicit n_message limit discussed here: https://github.com/Rdatatable/data.table/issues/3423#issuecomment-487722586 + snprintf(end(ans->message[0]), 500, _("%s: processing fun %d algo %u took %.3fs\n"), __func__, rfun, algo, omp_get_wtime()-tic); } -/* fast adaptive rolling mean - fast + +/* fast rolling adaptive mean - fast * when no info on NA (hasNA argument) then assume no NAs run faster version * adaptive rollmean implemented as cumsum first pass, then diff cumsum by indexes `i` to `i-k[i]` * if NAs detected re-run rollmean implemented as cumsum with NA support */ -void fadaptiverollmeanFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose) { +void frolladaptivemeanFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose) { if (verbose) - snprintf(end(ans->message[0]), 500, _("%s: running for input length %"PRIu64", hasna %d, narm %d\n"), "fadaptiverollmeanFast", (uint64_t)nx, hasna, (int) narm); + snprintf(end(ans->message[0]), 500, _("%s: running for input length %"PRIu64", hasna %d, narm %d\n"), "frolladaptivemeanFast", (uint64_t)nx, hasna, (int) narm); bool truehasna = hasna>0; // flag to re-run if NAs detected long double w = 0.0; double *cs = malloc(nx*sizeof(double)); // cumsum vector, same as double cs[nx] but no segfault @@ -104,14 +125,14 @@ void fadaptiverollmeanFast(double *x, uint64_t nx, ans_t *ans, int *k, double fi } // end of truehasna free(cs); } -/* fast adaptive rolling mean exact +/* fast rolling adaptive mean exact * extra nested loop to calculate mean of each obs and error correction * requires much more cpu * uses multiple cores */ -void fadaptiverollmeanExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose) { +void frolladaptivemeanExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose) { if (verbose) - snprintf(end(ans->message[0]), 500, _("%s: running in parallel for input length %"PRIu64", hasna %d, narm %d\n"), "fadaptiverollmeanExact", (uint64_t)nx, hasna, (int) narm); + snprintf(end(ans->message[0]), 500, _("%s: running in parallel for input length %"PRIu64", hasna %d, narm %d\n"), "frolladaptivemeanExact", (uint64_t)nx, hasna, (int) narm); bool truehasna = hasna>0; // flag to re-run if NAs detected if (!truehasna || !narm) { // narm=FALSE handled here as NAs properly propagated in exact algo #pragma omp parallel for num_threads(getDTthreads(nx, true)) @@ -200,22 +221,12 @@ void fadaptiverollmeanExact(double *x, uint64_t nx, ans_t *ans, int *k, double f } // end of truehasna } -/* fast adaptive rolling sum */ -void fadaptiverollsum(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose) { - double tic = 0; - if (verbose) - tic = omp_get_wtime(); - if (algo==0) { - fadaptiverollsumFast(x, nx, ans, k, fill, narm, hasna, verbose); - } else if (algo==1) { - fadaptiverollsumExact(x, nx, ans, k, fill, narm, hasna, verbose); - } - if (verbose) - snprintf(end(ans->message[0]), 500, _("%s: processing algo %u took %.3fs\n"), __func__, algo, omp_get_wtime()-tic); -} -void fadaptiverollsumFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose) { +/* fast rolling adaptive sum - fast + * same as adaptive mean fast + */ +void frolladaptivesumFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose) { if (verbose) - snprintf(end(ans->message[0]), 500, _("%s: running for input length %"PRIu64", hasna %d, narm %d\n"), "fadaptiverollsumFast", (uint64_t)nx, hasna, (int) narm); + snprintf(end(ans->message[0]), 500, _("%s: running for input length %"PRIu64", hasna %d, narm %d\n"), "frolladaptivesumFast", (uint64_t)nx, hasna, (int) narm); bool truehasna = hasna>0; long double w = 0.0; double *cs = malloc(nx*sizeof(double)); @@ -293,9 +304,12 @@ void fadaptiverollsumFast(double *x, uint64_t nx, ans_t *ans, int *k, double fil } free(cs); } -void fadaptiverollsumExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose) { +/* fast rolling adaptive sum - exact + * same as adaptive mean exact + */ +void frolladaptivesumExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose) { if (verbose) - snprintf(end(ans->message[0]), 500, _("%s: running in parallel for input length %"PRIu64", hasna %d, narm %d\n"), "fadaptiverollsumExact", (uint64_t)nx, hasna, (int) narm); + snprintf(end(ans->message[0]), 500, _("%s: running in parallel for input length %"PRIu64", hasna %d, narm %d\n"), "frolladaptivesumExact", (uint64_t)nx, hasna, (int) narm); bool truehasna = hasna>0; if (!truehasna || !narm) { #pragma omp parallel for num_threads(getDTthreads(nx, true)) @@ -364,3 +378,66 @@ void fadaptiverollsumExact(double *x, uint64_t nx, ans_t *ans, int *k, double fi } } } + +/* fast rolling adaptive max - exact + * for hasNA=FALSE it will not detect if any NAs were in the input, therefore could produce incorrect result, well documented + */ +void frolladaptivemaxExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose) { + if (verbose) + snprintf(end(ans->message[0]), 500, _("%s: running in parallel for input length %"PRIu64", hasna %d, narm %d\n"), "frolladaptivemaxExact", (uint64_t)nx, hasna, (int) narm); + if (hasna==-1) { // fastest we can get for adaptive max as there is no algo='fast', therefore we drop any NA checks when hasNA=FALSE + #pragma omp parallel for num_threads(getDTthreads(nx, true)) + for (uint64_t i=0; idbl_v[i] = fill; + } else { + double w = R_NegInf; + for (int j=-k[i]+1; j<=0; j++) { //Rprintf("x[%d+%d] > w: %f > %f: %d\n", i, j, x[i+j], w, x[i+j] > w); + if (x[i+j] > w) + w = x[i+j]; + } + ans->dbl_v[i] = w; + } + } + } else { + if (narm) { + #pragma omp parallel for num_threads(getDTthreads(nx, true)) + for (uint64_t i=0; idbl_v[i] = fill; + } else { + int nc = 0; + double w = R_NegInf; + for (int j=-k[i]+1; j<=0; j++) { + if (ISNAN(x[i+j])) + nc++; + else if (x[i+j] > w) + w = x[i+j]; + } + if (nc < k[i]) + ans->dbl_v[i] = w; + else + ans->dbl_v[i] = R_NegInf; + } + } + } else { + #pragma omp parallel for num_threads(getDTthreads(nx, true)) + for (uint64_t i=0; idbl_v[i] = fill; + } else { + double w = R_NegInf; + for (int j=-k[i]+1; j<=0; j++) { + if (ISNAN(x[i+j])) { + w = NA_REAL; + break; + } else if (x[i+j] > w) { + w = x[i+j]; + } + } + ans->dbl_v[i] = w; + } + } + } + } +}