From cca5b7912aeee1826792d983f531f36736c9fb91 Mon Sep 17 00:00:00 2001
From: jangorecki <j.gorecki@wit.edu.pl>
Date: Thu, 18 Aug 2022 21:57:34 +0200
Subject: [PATCH 01/16] frollmax exact, buggy fast, no fast adaptive

---
 NAMESPACE           |   1 +
 R/froll.R           |   3 +
 src/data.table.h    |   6 ++
 src/froll.c         | 231 ++++++++++++++++++++++++++++++++++++++++++++
 src/frollR.c        |  11 ++-
 src/frolladaptive.c | 163 +++++++++++++++++++++++++++++++
 6 files changed, 414 insertions(+), 1 deletion(-)

diff --git a/NAMESPACE b/NAMESPACE
index c22782440a..dec4beee18 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -51,6 +51,7 @@ S3method(cube, data.table)
 S3method(rollup, data.table)
 export(frollmean)
 export(frollsum)
+export(frollmax)
 export(frollapply)
 export(nafill)
 export(setnafill)
diff --git a/R/froll.R b/R/froll.R
index df901f0b84..41a816be04 100644
--- a/R/froll.R
+++ b/R/froll.R
@@ -12,6 +12,9 @@ frollmean = function(x, n, fill=NA, algo=c("fast", "exact"), align=c("right", "l
 frollsum = function(x, n, fill=NA, algo=c("fast","exact"), align=c("right", "left", "center"), na.rm=FALSE, hasNA=NA, adaptive=FALSE) {
   froll(fun="sum", x=x, n=n, fill=fill, algo=algo, align=align, na.rm=na.rm, hasNA=hasNA, adaptive=adaptive)
 }
+frollmax = function(x, n, fill=NA, algo=c("fast", "exact"), align=c("right", "left", "center"), na.rm=FALSE, hasNA=NA, adaptive=FALSE) {
+  froll(fun="max", x=x, n=n, fill=fill, algo=algo, align=align, na.rm=na.rm, hasNA=hasNA, adaptive=adaptive)
+}
 frollapply = function(x, n, FUN, ..., fill=NA, align=c("right", "left", "center")) {
   FUN = match.fun(FUN)
   align = match.arg(align)
diff --git a/src/data.table.h b/src/data.table.h
index b966e86c08..4109c52e9a 100644
--- a/src/data.table.h
+++ b/src/data.table.h
@@ -203,6 +203,9 @@ void frollmeanExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool
 void frollsum(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, int align, double fill, bool narm, int hasna, bool verbose);
 void frollsumFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose);
 void frollsumExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose);
+void frollmax(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, int align, double fill, bool narm, int hasna, bool verbose);
+void frollmaxFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose);
+void frollmaxExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose);
 void frollapply(double *x, int64_t nx, double *w, int k, ans_t *ans, int align, double fill, SEXP call, SEXP rho, bool verbose);
 
 // frolladaptive.c
@@ -212,6 +215,9 @@ void fadaptiverollmeanExact(double *x, uint64_t nx, ans_t *ans, int *k, double f
 void fadaptiverollsum(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose);
 void fadaptiverollsumFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose);
 void fadaptiverollsumExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose);
+void fadaptiverollmax(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose);
+void fadaptiverollmaxFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose);
+void fadaptiverollmaxExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose);
 
 // frollR.c
 SEXP frollfunR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP algo, SEXP align, SEXP narm, SEXP hasNA, SEXP adaptive);
diff --git a/src/froll.c b/src/froll.c
index 3ab7bd927a..e2d4c802d9 100644
--- a/src/froll.c
+++ b/src/froll.c
@@ -397,6 +397,237 @@ void frollsumExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool
   }
 }
 
+/* fast rolling max */
+void frollmax(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, int align, double fill, bool narm, int hasna, bool verbose) {
+  if (nx < k) {
+    if (verbose)
+      snprintf(end(ans->message[0]), 500, _("%s: window width longer than input vector, returning all NA vector\n"), __func__);
+    for (int i=0; i<nx; i++) {
+      ans->dbl_v[i] = fill;
+    }
+    return;
+  }
+  double tic = 0;
+  if (verbose)
+    tic = omp_get_wtime();
+  if (algo==0) {
+    frollmaxFast(x, nx, ans, k, fill, narm, hasna, verbose);
+  } else if (algo==1) {
+    frollmaxExact(x, nx, ans, k, fill, narm, hasna, verbose);
+  }
+  if (ans->status < 3 && align < 1) {
+    int k_ = align==-1 ? k-1 : floor(k/2);
+    if (verbose)
+      snprintf(end(ans->message[0]), 500, _("%s: align %d, shift answer by %d\n"), __func__, align, -k_);
+    memmove((char *)ans->dbl_v, (char *)ans->dbl_v + (k_*sizeof(double)), (nx-k_)*sizeof(double));
+    for (uint64_t i=nx-k_; i<nx; i++) {
+      ans->dbl_v[i] = fill;
+    }
+  }
+  if (verbose)
+    snprintf(end(ans->message[0]), 500, _("%s: processing algo %u took %.3fs\n"), __func__, algo, omp_get_wtime()-tic);
+}
+void frollmaxFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose) {
+  /*
+   * fast online algorithm do single pass over elements keeping track of recent max and its index
+   * if index of max is within progressing window then it keep running single pass
+   * whenever max is leaving the window (index of max is outside of iterator minus window size) then new maximum is computed via nested loop
+   * new max is used to continue outer single pass
+   *
+   * should scale well for bigger window size, may carry overhead for small window, needs benchmarking
+   */
+  if (verbose)
+    snprintf(end(ans->message[0]), 500, _("%s: running for input length %"PRIu64", window %d, hasna %d, narm %d\n"), "frollmaxFast", (uint64_t)nx, k, hasna, (int)narm);
+  double w = R_NegInf;
+  uint64_t iw = 0;
+  bool truehasna = hasna>0;
+  if (!truehasna) {
+    int i;
+    for (i=0; i<k-1; i++) { // #loop_counter_not_local_scope_ok
+      if (x[i] >= w) { // >= rather than > because we track most recent maximum using iw
+        iw = i;
+        w = x[iw];
+      }
+      ans->dbl_v[i] = fill;
+    }
+    if (x[i] >= w) {
+      iw = i;
+      w = x[iw];
+    } else {
+      // ensure iw ok, case when all window was -Inf or NA?
+    }
+    ans->dbl_v[i] = w;
+    if (R_FINITE(w)) {
+      for (uint64_t i=k; i<nx; i++) {
+        if (x[i] >= w) {
+          iw = i;
+          w = x[iw];
+        } else {
+          if (iw > i-k) { // max is still within window
+            // w and iw are still ok
+          } else { // max left the window, need to find max in this window
+            for (uint64_t ii=0, w=R_NegInf; i<k-1; ii++) {
+              if (x[i+ii-k] >= w) { // what if that is never satisfied? test!
+                iw = i+ii-k;
+                w = x[iw];
+              }
+            }
+          }
+        }
+        ans->dbl_v[i] = w;
+      }
+      if (!R_FINITE(w)) {
+        if (hasna==-1) {
+          ans->status = 2;
+          snprintf(end(ans->message[2]), 500, _("%s: hasNA=FALSE used but NA (or other non-finite) value(s) are present in input, use default hasNA=NA to avoid this warning"), __func__);
+        }
+        if (verbose)
+          snprintf(end(ans->message[0]), 500, _("%s: NA (or other non-finite) value(s) are present in input, re-running with extra care for NAs\n"), __func__);
+        w = 0.0;
+        truehasna = true;
+      }
+    } else {
+      if (hasna==-1) {
+        ans->status = 2;
+        snprintf(end(ans->message[2]), 500, _("%s: hasNA=FALSE used but NA (or other non-finite) value(s) are present in input, use default hasNA=NA to avoid this warning"), __func__);
+      }
+      if (verbose)
+        snprintf(end(ans->message[0]), 500, _("%s: NA (or other non-finite) value(s) are present in input, skip non-NA attempt and run with extra care for NAs\n"), __func__);
+      w = 0.0;
+      truehasna = true;
+    }
+  }
+  if (truehasna) {
+    int nc = 0;
+    int i;
+    double w = R_NegInf; // could be NA from !truehasna section above?
+    for (i=0; i<k-1; i++) { // #loop_counter_not_local_scope_ok
+      if (R_FINITE(x[i])) {
+        if (x[i] >= w) {
+          iw = i;
+          w = x[iw];
+        }
+      } else {
+        nc++;
+      }
+      ans->dbl_v[i] = fill;
+    }
+    if (R_FINITE(x[i])) {
+      if (x[i] >= w) {
+        iw = i;
+        w = x[iw];
+      }
+    } else {
+      nc++;
+    }
+    if (nc == 0) {
+      ans->dbl_v[i] = w;
+    } else if (nc == k) {
+      ans->dbl_v[i] = narm ? R_NegInf : NA_REAL;
+    } else {
+      ans->dbl_v[i] = narm ? w : NA_REAL;
+    }
+    for (uint64_t i=k; i<nx; i++) {
+      if (R_FINITE(x[i])) {
+        if (x[i] >= w) {
+          iw = i;
+          w = x[iw];
+        } else {
+          if (iw > i-k) { // max is still within window
+            // w and iw are still ok
+          } else { // max left the window, need to find max in this window
+            for (uint64_t ii=0, w=R_NegInf; i<k-1; ii++) {
+              nc = 0;
+              if (R_FINITE(x[i+ii-k])) {
+                if (x[i+ii-k] >= w) { // what if that is never satisfied? test!
+                  iw = i+ii-k;
+                  w = x[iw];
+                }
+              } else {
+                nc++; // this needs heavy testing
+              }
+            }
+          }
+        }
+        ans->dbl_v[i] = w;
+      } else {
+        nc++;
+      }
+      if (!R_FINITE(x[i-k])) {
+        nc--;
+      }
+      if (nc == 0) {
+        ans->dbl_v[i] = w;
+      } else if (nc == k) {
+        ans->dbl_v[i] = narm ? R_NegInf : NA_REAL;
+      } else {
+        ans->dbl_v[i] = narm ? w : NA_REAL;
+      }
+    }
+  }
+}
+void frollmaxExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose) {
+  if (verbose)
+    snprintf(end(ans->message[0]), 500, _("%s: running in parallel for input length %"PRIu64", window %d, hasna %d, narm %d\n"), "frollmaxExact", (uint64_t)nx, k, hasna, (int)narm);
+  for (int i=0; i<k-1; i++) {
+    ans->dbl_v[i] = fill;
+  }
+  bool truehasna = hasna>0;
+  if (!truehasna || !narm) {
+    #pragma omp parallel for num_threads(getDTthreads(nx, true))
+    for (uint64_t i=k-1; i<nx; i++) {
+      if (narm && truehasna) {
+        continue;
+      }
+      double w = R_NegInf;
+      for (int j=-k+1; j<=0; j++) {
+        if (x[i+j] > w)
+          w = x[i+j];
+      }
+      if (R_FINITE(w)) {
+        ans->dbl_v[i] = w;
+      } else {
+        if (!narm) {
+          ans->dbl_v[i] = w;
+        }
+        truehasna = true;
+      }
+    }
+    if (truehasna) {
+      if (hasna==-1) {
+        ans->status = 2;
+        snprintf(end(ans->message[2]), 500, _("%s: hasNA=FALSE used but NA (or other non-finite) value(s) are present in input, use default hasNA=NA to avoid this warning"), __func__);
+      }
+      if (verbose) {
+        if (narm) {
+          snprintf(end(ans->message[0]), 500, _("%s: NA (or other non-finite) value(s) are present in input, re-running with extra care for NAs\n"), __func__);
+        } else {
+          snprintf(end(ans->message[0]), 500, _("%s: NA (or other non-finite) value(s) are present in input, na.rm was FALSE so in 'exact' implementation NAs were handled already, no need to re-run\n"), __func__);
+        }
+      }
+    }
+  }
+  if (truehasna && narm) {
+    #pragma omp parallel for num_threads(getDTthreads(nx, true))
+    for (uint64_t i=k-1; i<nx; i++) {
+      double w = R_NegInf;
+      int nc = 0;
+      for (int j=-k+1; j<=0; j++) {
+        if (ISNAN(x[i+j])) {
+          nc++;
+        } else if (x[i+j] > w) {
+          w = x[i+j];
+        }
+      }
+      if (nc < k) {
+          ans->dbl_v[i] = w;
+      } else {
+          ans->dbl_v[i] = R_NegInf;
+        }
+    }
+  }
+}
+
 /* fast rolling any R function
  * not plain C, not thread safe
  * R eval() allocates
diff --git a/src/frollR.c b/src/frollR.c
index 74cc7dd4ef..3d70b0aa4b 100644
--- a/src/frollR.c
+++ b/src/frollR.c
@@ -132,11 +132,13 @@ SEXP frollfunR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP algo, SEXP align, SEX
     dx[i] = REAL(VECTOR_ELT(x, i));                             // assign source columns to C pointers
   }
 
-  enum {MEAN, SUM} sfun;
+  enum {MEAN, SUM, MAX} sfun;
   if (!strcmp(CHAR(STRING_ELT(fun, 0)), "mean")) {
     sfun = MEAN;
   } else if (!strcmp(CHAR(STRING_ELT(fun, 0)), "sum")) {
     sfun = SUM;
+  } else if (!strcmp(CHAR(STRING_ELT(fun, 0)), "max")) {
+    sfun = MAX;
   } else {
     error(_("Internal error: invalid %s argument in %s function should have been caught earlier. Please report to the data.table issue tracker."), "fun", "rolling"); // # nocov
   }
@@ -161,6 +163,7 @@ SEXP frollfunR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP algo, SEXP align, SEX
     ialgo = 1;                                                  // exact = 1
   else
     error(_("Internal error: invalid %s argument in %s function should have been caught earlier. Please report to the data.table issue tracker."), "algo", "rolling"); // # nocov
+  if (badaptive && ialgo == 0 && sfun==MAX) error("frollmax adaptive algo fast not yet implemented");
 
   int* iik = NULL;
   if (!badaptive) {
@@ -193,6 +196,12 @@ SEXP frollfunR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP algo, SEXP align, SEX
         else
           fadaptiverollsum(ialgo, dx[i], inx[i], &dans[i*nk+j], ikl[j], dfill, bnarm, ihasna, verbose);
         break;
+      case MAX :
+        if (!badaptive)
+          frollmax(ialgo, dx[i], inx[i], &dans[i*nk+j], iik[j], ialign, dfill, bnarm, ihasna, verbose);
+        else
+          fadaptiverollmax(ialgo, dx[i], inx[i], &dans[i*nk+j], ikl[j], dfill, bnarm, ihasna, verbose);
+        break;
       default:
         error(_("Internal error: Unknown sfun value in froll: %d"), sfun); // #nocov
       }
diff --git a/src/frolladaptive.c b/src/frolladaptive.c
index e005cef693..46d24acd19 100644
--- a/src/frolladaptive.c
+++ b/src/frolladaptive.c
@@ -364,3 +364,166 @@ void fadaptiverollsumExact(double *x, uint64_t nx, ans_t *ans, int *k, double fi
     }
   }
 }
+
+/* fast adaptive rolling sum */
+void fadaptiverollmax(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose) {
+  double tic = 0;
+  if (verbose)
+    tic = omp_get_wtime();
+  if (algo==0) {
+    fadaptiverollmaxFast(x, nx, ans, k, fill, narm, hasna, verbose);
+  } else if (algo==1) {
+    fadaptiverollmaxExact(x, nx, ans, k, fill, narm, hasna, verbose);
+  }
+  if (verbose)
+    snprintf(end(ans->message[0]), 500, _("%s: processing algo %u took %.3fs\n"), __func__, algo, omp_get_wtime()-tic);
+}
+void fadaptiverollmaxFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose) {
+  // TODO
+  if (verbose)
+    snprintf(end(ans->message[0]), 500, _("%s: running for input length %"PRIu64", hasna %d, narm %d\n"), "fadaptiverollsumFast", (uint64_t)nx, hasna, (int) narm);
+  bool truehasna = hasna>0;
+  long double w = 0.0;
+  double *cs = malloc(nx*sizeof(double));
+  if (!cs) {                                                    // # nocov start
+    ans->status = 3;
+    snprintf(ans->message[3], 500, _("%s: Unable to allocate memory for cumsum"), __func__);
+    free(cs);
+    return;
+  }                                                             // # nocov end
+  if (!truehasna) {
+    for (uint64_t i=0; i<nx; i++) {
+      w += x[i];
+      cs[i] = (double) w;
+    }
+    if (R_FINITE((double) w)) {
+      #pragma omp parallel for num_threads(getDTthreads(nx, true))
+      for (uint64_t i=0; i<nx; i++) {
+        if (i+1 == k[i]) {
+          ans->dbl_v[i] = cs[i];
+        } else if (i+1 > k[i]) {
+          ans->dbl_v[i] = cs[i]-cs[i-k[i]];
+        } else {
+          ans->dbl_v[i] = fill;
+        }
+      }
+    } else {
+      if (hasna==-1) {
+        ans->status = 2;
+        snprintf(end(ans->message[2]), 500, _("%s: hasNA=FALSE used but NA (or other non-finite) value(s) are present in input, use default hasNA=NA to avoid this warning"), __func__);
+      }
+      if (verbose)
+        snprintf(end(ans->message[0]), 500, _("%s: NA (or other non-finite) value(s) are present in input, re-running with extra care for NAs\n"), __func__);
+      w = 0.0;
+      truehasna = true;
+    }
+  }
+  if (truehasna) {
+    uint64_t nc = 0;
+    uint64_t *cn = malloc(nx*sizeof(uint64_t));
+    if (!cn) {                                                  // # nocov start
+      ans->status = 3;
+      snprintf(ans->message[3], 500, _("%s: Unable to allocate memory for cum NA counter"), __func__);
+      free(cs);
+      free(cn);
+      return;
+    }                                                           // # nocov end
+    for (uint64_t i=0; i<nx; i++) {
+      if (R_FINITE(x[i])) {
+        w += x[i];
+      } else {
+        nc++;
+      }
+      cs[i] = (double) w;
+      cn[i] = nc;
+    }
+#pragma omp parallel for num_threads(getDTthreads(nx, true))
+    for (uint64_t i=0; i<nx; i++) {
+      if (i+1 < k[i]) {
+        ans->dbl_v[i] = fill;
+      } else if (!narm) {
+        if (i+1 == k[i]) {
+          ans->dbl_v[i] = cn[i]>0 ? NA_REAL : cs[i];
+        } else if (i+1 > k[i]) {
+          ans->dbl_v[i] = (cn[i] - cn[i-k[i]])>0 ? NA_REAL : cs[i]-cs[i-k[i]];
+        }
+      } else if (i+1 == k[i]) {
+        int thisk = k[i] - ((int) cn[i]);
+        ans->dbl_v[i] = thisk==0 ? 0.0 : cs[i];
+      } else if (i+1 > k[i]) {
+        int thisk = k[i] - ((int) (cn[i] - cn[i-k[i]]));
+        ans->dbl_v[i] = thisk==0 ? 0.0 : cs[i]-cs[i-k[i]];
+      }
+    }
+    free(cn);
+  }
+  free(cs);
+}
+void fadaptiverollmaxExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose) {
+  if (verbose)
+    snprintf(end(ans->message[0]), 500, _("%s: running in parallel for input length %"PRIu64", hasna %d, narm %d\n"), "fadaptiverollmaxExact", (uint64_t)nx, hasna, (int) narm);
+  bool truehasna = hasna>0;
+  if (!truehasna || !narm) {
+    #pragma omp parallel for num_threads(getDTthreads(nx, true))
+    for (uint64_t i=0; i<nx; i++) {
+      if (narm && truehasna) {
+        continue;
+      }
+      if (i+1 < k[i]) {
+        ans->dbl_v[i] = fill;
+      } else {
+        double w = R_NegInf;
+        for (int j=-k[i]+1; j<=0; j++) {
+          // should be used with setDTthreads(1)
+          //Rprintf("x[%d+%d] > w: %f > %f: %d\n", i, j, x[i+j], w, x[i+j] > w);
+          if (x[i+j] > w)
+            w = x[i+j];
+        }
+        if (R_FINITE(w)) {
+          ans->dbl_v[i] = w;
+        } else {
+          if (!narm) {
+            ans->dbl_v[i] = w;
+          }
+          truehasna = true;
+        }
+      }
+    }
+    if (truehasna) {
+      if (hasna==-1) {
+        ans->status = 2;
+        snprintf(end(ans->message[2]), 500, _("%s: hasNA=FALSE used but NA (or other non-finite) value(s) are present in input, use default hasNA=NA to avoid this warning"), __func__);
+      }
+      if (verbose) {
+        if (narm) {
+          snprintf(end(ans->message[0]), 500, _("%s: NA (or other non-finite) value(s) are present in input, re-running with extra care for NAs\n"), __func__);
+        } else {
+          snprintf(end(ans->message[0]), 500, _("%s: NA (or other non-finite) value(s) are present in input, na.rm was FALSE so in 'exact' implementation NAs were handled already, no need to re-run\n"), __func__);
+        }
+      }
+    }
+  }
+  if (truehasna && narm) {
+    #pragma omp parallel for num_threads(getDTthreads(nx, true))
+    for (uint64_t i=0; i<nx; i++) {
+      if (i+1 < k[i]) {
+        ans->dbl_v[i] = fill;
+      } else {
+        double w = R_NegInf;
+        int nc = 0;
+        for (int j=-k[i]+1; j<=0; j++) {
+          if (ISNAN(x[i+j])) {
+            nc++;
+          } else if (x[i+j] > w) {
+            w = x[i+j];
+          }
+        }
+        if (nc < k[i]) {
+          ans->dbl_v[i] = w;
+        } else {
+          ans->dbl_v[i] = R_NegInf;
+        }
+      }
+    }
+  }
+}

From 14555b2b3ef3498242110bfd64c3fc8570f4f7aa Mon Sep 17 00:00:00 2001
From: jangorecki <j.gorecki@wit.edu.pl>
Date: Fri, 19 Aug 2022 12:37:39 +0200
Subject: [PATCH 02/16] frollmax fast fixing bugs

---
 src/froll.c | 97 +++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 68 insertions(+), 29 deletions(-)

diff --git a/src/froll.c b/src/froll.c
index e2d4c802d9..39e7194c32 100644
--- a/src/froll.c
+++ b/src/froll.c
@@ -427,6 +427,30 @@ void frollmax(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, int
   if (verbose)
     snprintf(end(ans->message[0]), 500, _("%s: processing algo %u took %.3fs\n"), __func__, algo, omp_get_wtime()-tic);
 }
+inline void windowmax(double *x, uint64_t o, int k, double *w, uint64_t *iw) {
+  for (int i=0; i<k-1; i++) {
+    //Rprintf("windowmax iteration %d, offset %d, first x val %f, testing x[o+i-k+1] >= w[0]: x[%d-%d+1] >= w[0]: %f >= %f: %d\n", i, o, x[o], i, k, x[o+i-k+1], w[0], x[o+i-k+1] >= w[0]);
+    if (x[o+i-k+1] >= w[0]) { // what if that is never satisfied? test!
+      iw[0] = o+i-k+1;
+      w[0] = x[iw[0]];
+    }
+  }
+}
+inline void windowmaxnarm(double *x, uint64_t o, int k, bool narm, int *nc, double *w, uint64_t *iw) {
+  for (int i=0; i<k-1; i++) {
+    //Rprintf("windowmax iteration %d, offset %d, first x val %f, testing x[o+i-k+1] >= w[0]: x[%d-%d+1] >= w[0]: %f >= %f: %d\n", i, o, x[o], i, k, x[o+i-k+1], w[0], x[o+i-k+1] >= w[0]);
+    if (R_FINITE(x[o+i-k+1])) {
+      if (x[o+i-k+1] >= w[0]) { // what if that is never satisfied? test!
+        iw[0] = o+i-k+1;
+        w[0] = x[iw[0]];
+      }
+    } else if (narm) {
+      nc[0]++;
+    } else {
+      w[0] = NA_REAL;
+    }
+  }
+}
 void frollmaxFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose) {
   /*
    * fast online algorithm do single pass over elements keeping track of recent max and its index
@@ -438,52 +462,71 @@ void frollmaxFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool n
    */
   if (verbose)
     snprintf(end(ans->message[0]), 500, _("%s: running for input length %"PRIu64", window %d, hasna %d, narm %d\n"), "frollmaxFast", (uint64_t)nx, k, hasna, (int)narm);
-  double w = R_NegInf;
-  uint64_t iw = 0;
+  double w = R_NegInf; // window max
+  uint64_t cmax = 0; // counter of nested loops for verbose
+  uint64_t iw = 0; // index of window max
   bool truehasna = hasna>0;
   if (!truehasna) {
+    bool hadna = false;
     int i;
     for (i=0; i<k-1; i++) { // #loop_counter_not_local_scope_ok
+      if (!R_FINITE(x[i])) {
+        hadna = true;
+        break;
+      }
       if (x[i] >= w) { // >= rather than > because we track most recent maximum using iw
         iw = i;
         w = x[iw];
       }
       ans->dbl_v[i] = fill;
     }
-    if (x[i] >= w) {
-      iw = i;
-      w = x[iw];
+    if (!hadna && !R_FINITE(x[i])) {
+      if (x[i] >= w) {
+        iw = i;
+        w = x[iw];
+      } else {
+        // ensure iw ok, case when all window was -Inf or NA?
+      }
+      ans->dbl_v[i] = w;
     } else {
-      // ensure iw ok, case when all window was -Inf or NA?
+      hadna = true;
     }
-    ans->dbl_v[i] = w;
-    if (R_FINITE(w)) {
+    if (!hadna) {
       for (uint64_t i=k; i<nx; i++) {
+        if (!R_FINITE(x[i])) {
+          hadna = true;
+          break;
+        }
         if (x[i] >= w) {
+          //Rprintf("iteration %d, new max %f at %d, old max %f at %d\n", i, x[i], i, w, iw);
           iw = i;
           w = x[iw];
         } else {
           if (iw > i-k) { // max is still within window
             // w and iw are still ok
+            //Rprintf("iteration %d, max %f at %d bigger than current value %f\n", i, w, iw, x[i]);
           } else { // max left the window, need to find max in this window
-            for (uint64_t ii=0, w=R_NegInf; i<k-1; ii++) {
-              if (x[i+ii-k] >= w) { // what if that is never satisfied? test!
-                iw = i+ii-k;
-                w = x[iw];
-              }
-            }
+            //Rprintf("iteration %d, max %f at %d left the window, call windowmax from %d of size %d\n", i, w, iw, i, k);
+            w = R_NegInf;
+            iw = i-k;
+            windowmax(x, i, k, &w, &iw);
+            //Rprintf("iteration %d, windowmax found new max %f at %d\n", i, w, iw);
+            cmax++;
           }
         }
         ans->dbl_v[i] = w;
       }
-      if (!R_FINITE(w)) {
+      if (verbose)
+        snprintf(end(ans->message[0]), 500, _("%s: windowmax called %"PRIu64" time(s)\n"), __func__, cmax);
+      if (hadna) {
         if (hasna==-1) {
           ans->status = 2;
           snprintf(end(ans->message[2]), 500, _("%s: hasNA=FALSE used but NA (or other non-finite) value(s) are present in input, use default hasNA=NA to avoid this warning"), __func__);
         }
         if (verbose)
           snprintf(end(ans->message[0]), 500, _("%s: NA (or other non-finite) value(s) are present in input, re-running with extra care for NAs\n"), __func__);
-        w = 0.0;
+        w = R_NegInf;
+        cmax = 0;
         truehasna = true;
       }
     } else {
@@ -493,14 +536,14 @@ void frollmaxFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool n
       }
       if (verbose)
         snprintf(end(ans->message[0]), 500, _("%s: NA (or other non-finite) value(s) are present in input, skip non-NA attempt and run with extra care for NAs\n"), __func__);
-      w = 0.0;
+      w = R_NegInf;
+      cmax = 0;
       truehasna = true;
     }
   }
   if (truehasna) {
     int nc = 0;
     int i;
-    double w = R_NegInf; // could be NA from !truehasna section above?
     for (i=0; i<k-1; i++) { // #loop_counter_not_local_scope_ok
       if (R_FINITE(x[i])) {
         if (x[i] >= w) {
@@ -536,17 +579,11 @@ void frollmaxFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool n
           if (iw > i-k) { // max is still within window
             // w and iw are still ok
           } else { // max left the window, need to find max in this window
-            for (uint64_t ii=0, w=R_NegInf; i<k-1; ii++) {
-              nc = 0;
-              if (R_FINITE(x[i+ii-k])) {
-                if (x[i+ii-k] >= w) { // what if that is never satisfied? test!
-                  iw = i+ii-k;
-                  w = x[iw];
-                }
-              } else {
-                nc++; // this needs heavy testing
-              }
-            }
+            w = R_NegInf;
+            iw = i-k;
+            windowmaxnarm(x, i, k, narm, &nc, &w, &iw);
+            //Rprintf("iteration %d, windowmaxnarm found new max %f at %d\n", i, w, iw);
+            cmax++;
           }
         }
         ans->dbl_v[i] = w;
@@ -564,6 +601,8 @@ void frollmaxFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool n
         ans->dbl_v[i] = narm ? w : NA_REAL;
       }
     }
+    if (verbose)
+      snprintf(end(ans->message[0]), 500, _("%s: windowmaxnarm called %"PRIu64" time(s)\n"), __func__, cmax);
   }
 }
 void frollmaxExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose) {

From 437b928801f3509e5c02949caf545d63ef68e4a2 Mon Sep 17 00:00:00 2001
From: jangorecki <j.gorecki@wit.edu.pl>
Date: Fri, 19 Aug 2022 12:39:38 +0200
Subject: [PATCH 03/16] frollmax man to fix CRAN check

---
 man/froll.Rd | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/man/froll.Rd b/man/froll.Rd
index 090b397a90..143c585c1d 100644
--- a/man/froll.Rd
+++ b/man/froll.Rd
@@ -7,7 +7,9 @@
 \alias{rollmean}
 \alias{frollmean}
 \alias{rollsum}
+\alias{rollmax}
 \alias{frollsum}
+\alias{frollmax}
 \alias{rollapply}
 \alias{frollapply}
 \title{Rolling functions}
@@ -19,6 +21,8 @@ frollmean(x, n, fill=NA, algo=c("fast", "exact"),
           align=c("right", "left", "center"), na.rm=FALSE, hasNA=NA, adaptive=FALSE)
 frollsum(x, n, fill=NA, algo=c("fast","exact"),
          align=c("right", "left", "center"), na.rm=FALSE, hasNA=NA, adaptive=FALSE)
+frollmax(x, n, fill=NA, algo=c("fast","exact"),
+         align=c("right", "left", "center"), na.rm=FALSE, hasNA=NA, adaptive=FALSE)
 frollapply(x, n, FUN, \dots, fill=NA, align=c("right", "left", "center"))
 }
 \arguments{

From 2fd0faf12b9c6e08caf0099c67e559177292f08d Mon Sep 17 00:00:00 2001
From: jangorecki <j.gorecki@wit.edu.pl>
Date: Fri, 19 Aug 2022 15:05:41 +0200
Subject: [PATCH 04/16] frollmax fast adaptive non NA, dev

---
 src/froll.c         |  15 +++---
 src/frollR.c        |   1 -
 src/frolladaptive.c | 121 +++++++++++++++++++++-----------------------
 3 files changed, 65 insertions(+), 72 deletions(-)

diff --git a/src/froll.c b/src/froll.c
index 39e7194c32..3b900077f8 100644
--- a/src/froll.c
+++ b/src/froll.c
@@ -467,11 +467,10 @@ void frollmaxFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool n
   uint64_t iw = 0; // index of window max
   bool truehasna = hasna>0;
   if (!truehasna) {
-    bool hadna = false;
     int i;
     for (i=0; i<k-1; i++) { // #loop_counter_not_local_scope_ok
       if (!R_FINITE(x[i])) {
-        hadna = true;
+        truehasna = true;
         break;
       }
       if (x[i] >= w) { // >= rather than > because we track most recent maximum using iw
@@ -480,7 +479,7 @@ void frollmaxFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool n
       }
       ans->dbl_v[i] = fill;
     }
-    if (!hadna && !R_FINITE(x[i])) {
+    if (!truehasna && !R_FINITE(x[i])) {
       if (x[i] >= w) {
         iw = i;
         w = x[iw];
@@ -489,12 +488,12 @@ void frollmaxFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool n
       }
       ans->dbl_v[i] = w;
     } else {
-      hadna = true;
+      truehasna = true;
     }
-    if (!hadna) {
+    if (!truehasna) {
       for (uint64_t i=k; i<nx; i++) {
         if (!R_FINITE(x[i])) {
-          hadna = true;
+          truehasna = true;
           break;
         }
         if (x[i] >= w) {
@@ -518,7 +517,7 @@ void frollmaxFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool n
       }
       if (verbose)
         snprintf(end(ans->message[0]), 500, _("%s: windowmax called %"PRIu64" time(s)\n"), __func__, cmax);
-      if (hadna) {
+      if (truehasna) {
         if (hasna==-1) {
           ans->status = 2;
           snprintf(end(ans->message[2]), 500, _("%s: hasNA=FALSE used but NA (or other non-finite) value(s) are present in input, use default hasNA=NA to avoid this warning"), __func__);
@@ -527,7 +526,6 @@ void frollmaxFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool n
           snprintf(end(ans->message[0]), 500, _("%s: NA (or other non-finite) value(s) are present in input, re-running with extra care for NAs\n"), __func__);
         w = R_NegInf;
         cmax = 0;
-        truehasna = true;
       }
     } else {
       if (hasna==-1) {
@@ -538,7 +536,6 @@ void frollmaxFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool n
         snprintf(end(ans->message[0]), 500, _("%s: NA (or other non-finite) value(s) are present in input, skip non-NA attempt and run with extra care for NAs\n"), __func__);
       w = R_NegInf;
       cmax = 0;
-      truehasna = true;
     }
   }
   if (truehasna) {
diff --git a/src/frollR.c b/src/frollR.c
index 3d70b0aa4b..91e988c434 100644
--- a/src/frollR.c
+++ b/src/frollR.c
@@ -163,7 +163,6 @@ SEXP frollfunR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP algo, SEXP align, SEX
     ialgo = 1;                                                  // exact = 1
   else
     error(_("Internal error: invalid %s argument in %s function should have been caught earlier. Please report to the data.table issue tracker."), "algo", "rolling"); // # nocov
-  if (badaptive && ialgo == 0 && sfun==MAX) error("frollmax adaptive algo fast not yet implemented");
 
   int* iik = NULL;
   if (!badaptive) {
diff --git a/src/frolladaptive.c b/src/frolladaptive.c
index 46d24acd19..eb07dbfc74 100644
--- a/src/frolladaptive.c
+++ b/src/frolladaptive.c
@@ -365,7 +365,7 @@ void fadaptiverollsumExact(double *x, uint64_t nx, ans_t *ans, int *k, double fi
   }
 }
 
-/* fast adaptive rolling sum */
+/* fast adaptive rolling max */
 void fadaptiverollmax(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose) {
   double tic = 0;
   if (verbose)
@@ -378,86 +378,83 @@ void fadaptiverollmax(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int
   if (verbose)
     snprintf(end(ans->message[0]), 500, _("%s: processing algo %u took %.3fs\n"), __func__, algo, omp_get_wtime()-tic);
 }
+inline void windowadaptivemax(double *x, uint64_t o, int *k, double *w, uint64_t *iw) {
+  for (int i=0; i<k[o+i]-1; i++) {
+    Rprintf("windowmax iteration %d, offset %d, first x val %f, testing x[o+i-k[o+i]+1] >= w[0]: x[%d+%d-%d+1] >= w[0]: %f >= %f: %d\n",
+                                  i,         o,           x[o],                                     o, i, k[o+i],   x[o+i-k[o+i]+1], w[0], x[o+i-k[o+i]+1] >= w[0]);
+    if (x[o+i-k[o+i]+1] >= w[0]) { // what if that is never satisfied? test!
+      iw[0] = o+i-k[o+i]+1;
+      w[0] = x[iw[0]];
+    }
+  }
+}
+inline void windowadaptivemaxnarm(double *x, uint64_t o, int *k, bool narm, int *nc, double *w, uint64_t *iw) {
+  for (int i=0; i<k[o+i]-1; i++) {
+    //Rprintf("windowmax iteration %d, offset %d, first x val %f, testing x[o+i-k+1] >= w[0]: x[%d-%d+1] >= w[0]: %f >= %f: %d\n", i, o, x[o], i, k, x[o+i-k+1], w[0], x[o+i-k+1] >= w[0]);
+    if (x[o+i-k[o+i]+1] >= w[0]) { // what if that is never satisfied? test!
+      iw[0] = o+i-k[o+i]+1;
+      w[0] = x[iw[0]];
+    }
+  }
+}
 void fadaptiverollmaxFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose) {
-  // TODO
   if (verbose)
-    snprintf(end(ans->message[0]), 500, _("%s: running for input length %"PRIu64", hasna %d, narm %d\n"), "fadaptiverollsumFast", (uint64_t)nx, hasna, (int) narm);
+    snprintf(end(ans->message[0]), 500, _("%s: running for input length %"PRIu64", hasna %d, narm %d\n"), "fadaptiverollmaxFast", (uint64_t)nx, hasna, (int) narm);
+  double w = R_NegInf; // window max
+  uint64_t cmax = 0; // counter of nested loops for verbose
+  uint64_t iw = 0; // index of window max
   bool truehasna = hasna>0;
-  long double w = 0.0;
-  double *cs = malloc(nx*sizeof(double));
-  if (!cs) {                                                    // # nocov start
-    ans->status = 3;
-    snprintf(ans->message[3], 500, _("%s: Unable to allocate memory for cumsum"), __func__);
-    free(cs);
-    return;
-  }                                                             // # nocov end
   if (!truehasna) {
     for (uint64_t i=0; i<nx; i++) {
-      w += x[i];
-      cs[i] = (double) w;
-    }
-    if (R_FINITE((double) w)) {
-      #pragma omp parallel for num_threads(getDTthreads(nx, true))
-      for (uint64_t i=0; i<nx; i++) {
-        if (i+1 == k[i]) {
-          ans->dbl_v[i] = cs[i];
-        } else if (i+1 > k[i]) {
-          ans->dbl_v[i] = cs[i]-cs[i-k[i]];
+      if (!R_FINITE(x[i])) {
+        truehasna = true;
+      } else {
+        if (x[i] >= w) {
+          Rprintf("iteration %d, new max %f at %d, old max %f at %d\n", i, x[i], i, w, iw);
+          iw = i;
+          w = x[iw];
         } else {
+          //Rprintf("iteration %d, test if (iw > i-k[i]): (%d > %d-%d): %d\n",i,                   iw, i, k[i], iw > i-k[i]);
+          //Rprintf("i-k[i] < 0: %d-k[%d] < 0: %d-%d < 0: %d < 0: %d\n", i, i, i, k[i], i-k[i], i-k[i] < 0);
+          //Rprintf("i < k[i]: %d\n", i < k[i]);
+          if (i < k[i] || iw > i-k[i]) { // max is still within window
+            Rprintf("iteration %d, max %f at %d bigger than current value %f\n", i, w, iw, x[i]);
+          } else { // max left the window, need to find max in this window
+            Rprintf("iteration %d, max %f at %d left the window, call windowmax from %d of size %d\n", i, w, iw, i, k[i]);
+            w = R_NegInf;
+            iw = i-k[i];
+            windowadaptivemax(x, i, k, &w, &iw);
+            Rprintf("iteration %d, windowmax found new max %f at %d\n", i, w, iw);
+            cmax++;
+          }
+        }
+        if (i+1 < k[i]) {
+          Rprintf("iteration %d, window size %d too big, skip\n", i, k[i]);
           ans->dbl_v[i] = fill;
+        } else {
+          ans->dbl_v[i] = w;
         }
       }
-    } else {
+    }
+    if (verbose)
+      snprintf(end(ans->message[0]), 500, _("%s: windowmax called %"PRIu64" time(s)\n"), __func__, cmax);
+    if (truehasna) {
       if (hasna==-1) {
         ans->status = 2;
         snprintf(end(ans->message[2]), 500, _("%s: hasNA=FALSE used but NA (or other non-finite) value(s) are present in input, use default hasNA=NA to avoid this warning"), __func__);
       }
       if (verbose)
         snprintf(end(ans->message[0]), 500, _("%s: NA (or other non-finite) value(s) are present in input, re-running with extra care for NAs\n"), __func__);
-      w = 0.0;
-      truehasna = true;
+      w = R_NegInf;
+      cmax = 0;
     }
   }
   if (truehasna) {
-    uint64_t nc = 0;
-    uint64_t *cn = malloc(nx*sizeof(uint64_t));
-    if (!cn) {                                                  // # nocov start
-      ans->status = 3;
-      snprintf(ans->message[3], 500, _("%s: Unable to allocate memory for cum NA counter"), __func__);
-      free(cs);
-      free(cn);
-      return;
-    }                                                           // # nocov end
-    for (uint64_t i=0; i<nx; i++) {
-      if (R_FINITE(x[i])) {
-        w += x[i];
-      } else {
-        nc++;
-      }
-      cs[i] = (double) w;
-      cn[i] = nc;
-    }
-#pragma omp parallel for num_threads(getDTthreads(nx, true))
-    for (uint64_t i=0; i<nx; i++) {
-      if (i+1 < k[i]) {
-        ans->dbl_v[i] = fill;
-      } else if (!narm) {
-        if (i+1 == k[i]) {
-          ans->dbl_v[i] = cn[i]>0 ? NA_REAL : cs[i];
-        } else if (i+1 > k[i]) {
-          ans->dbl_v[i] = (cn[i] - cn[i-k[i]])>0 ? NA_REAL : cs[i]-cs[i-k[i]];
-        }
-      } else if (i+1 == k[i]) {
-        int thisk = k[i] - ((int) cn[i]);
-        ans->dbl_v[i] = thisk==0 ? 0.0 : cs[i];
-      } else if (i+1 > k[i]) {
-        int thisk = k[i] - ((int) (cn[i] - cn[i-k[i]]));
-        ans->dbl_v[i] = thisk==0 ? 0.0 : cs[i]-cs[i-k[i]];
-      }
-    }
-    free(cn);
+    ans->status = 3;
+    snprintf(ans->message[3], 500, _("%s: frollmax adaptive algo='fast' having NAs not yet implemented, use algo='exact'"), __func__);
+    return;
+    // TODO
   }
-  free(cs);
 }
 void fadaptiverollmaxExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose) {
   if (verbose)

From 6c3201af5eee80203184421a6abb7f938fa512c8 Mon Sep 17 00:00:00 2001
From: jangorecki <j.gorecki@wit.edu.pl>
Date: Sun, 21 Aug 2022 11:26:55 +0200
Subject: [PATCH 05/16] froll docs, adaptive left

---
 R/froll.R             | 23 ++++++++++-
 inst/tests/froll.Rraw |  4 +-
 man/froll.Rd          | 89 +++++++++++++++++++++++++++++--------------
 src/froll.c           | 19 +++++----
 src/frollR.c          |  4 +-
 5 files changed, 96 insertions(+), 43 deletions(-)

diff --git a/R/froll.R b/R/froll.R
index 41a816be04..be69c6f457 100644
--- a/R/froll.R
+++ b/R/froll.R
@@ -2,8 +2,25 @@ froll = function(fun, x, n, fill=NA, algo=c("fast", "exact"), align=c("right", "
   stopifnot(!missing(fun), is.character(fun), length(fun)==1L, !is.na(fun))
   algo = match.arg(algo)
   align = match.arg(align)
+  leftadaptive = isTRUE(adaptive) && align=="left"  ## support for left added in #5441
+  if (leftadaptive) {
+    rev2 = function(x) if (is.list(x)) sapply(x, rev, simplify=FALSE) else rev(x)
+    verbose = getOption("datatable.verbose")
+    if (verbose)
+      cat("froll: adaptive=TRUE && align='left' pre-processing for align='right'\n")
+    ## TODO test atomic x but list of lists n (multiple windows)!
+    x = rev2(x)
+    n = rev2(n)
+    align = "right"
+  }
   ans = .Call(CfrollfunR, fun, x, n, fill, algo, align, na.rm, hasNA, adaptive)
-  ans
+  if (!leftadaptive)
+    ans
+  else {
+    if (verbose)
+      cat("froll: adaptive=TRUE && align='left' post-processing from align='right'\n")
+    rev2(ans)
+  }
 }
 
 frollmean = function(x, n, fill=NA, algo=c("fast", "exact"), align=c("right", "left", "center"), na.rm=FALSE, hasNA=NA, adaptive=FALSE) {
@@ -15,9 +32,11 @@ frollsum = function(x, n, fill=NA, algo=c("fast","exact"), align=c("right", "lef
 frollmax = function(x, n, fill=NA, algo=c("fast", "exact"), align=c("right", "left", "center"), na.rm=FALSE, hasNA=NA, adaptive=FALSE) {
   froll(fun="max", x=x, n=n, fill=fill, algo=algo, align=align, na.rm=na.rm, hasNA=hasNA, adaptive=adaptive)
 }
-frollapply = function(x, n, FUN, ..., fill=NA, align=c("right", "left", "center")) {
+frollapply = function(x, n, FUN, ..., fill=NA, align=c("right", "left", "center"), adaptive) {
   FUN = match.fun(FUN)
   align = match.arg(align)
+  if (!missing(adaptive))
+    stopf("frollapply does not support 'adaptive' argument")
   rho = new.env()
   ans = .Call(CfrollapplyR, FUN, x, n, fill, align, rho)
   ans
diff --git a/inst/tests/froll.Rraw b/inst/tests/froll.Rraw
index f6a4f96a80..54c67e5628 100644
--- a/inst/tests/froll.Rraw
+++ b/inst/tests/froll.Rraw
@@ -563,8 +563,8 @@ if (FALSE) {
 
 #### adaptive limitations
 test(6000.145, frollmean(1:2, 1:2, adaptive=TRUE, align="right"), c(1, 1.5))
-test(6000.146, frollmean(1:2, 1:2, adaptive=TRUE, align="center"), error="using adaptive TRUE and align argument different than 'right' is not implemented")
-test(6000.147, frollmean(1:2, 1:2, adaptive=TRUE, align="left"), error="using adaptive TRUE and align argument different than 'right' is not implemented")
+test(6000.146, frollmean(1:2, 1:2, adaptive=TRUE, align="center"), error="using adaptive TRUE and align 'center' is not implemented")
+##test(6000.147, frollmean(1:2, 1:2, adaptive=TRUE, align="left"), error="using adaptive TRUE and align argument different than 'right' is not implemented") ## support added in #5441, TODO add tests
 test(6000.148, frollmean(list(1:2, 1:3), list(1:2), adaptive=TRUE), error="adaptive rolling function can only process 'x' having equal length of elements, like data.table or data.frame. If you want to call rolling function on list having variable length of elements call it for each field separately")
 
 #### adaptive exact
diff --git a/man/froll.Rd b/man/froll.Rd
index 143c585c1d..800709a408 100644
--- a/man/froll.Rd
+++ b/man/froll.Rd
@@ -23,7 +23,7 @@ frollsum(x, n, fill=NA, algo=c("fast","exact"),
          align=c("right", "left", "center"), na.rm=FALSE, hasNA=NA, adaptive=FALSE)
 frollmax(x, n, fill=NA, algo=c("fast","exact"),
          align=c("right", "left", "center"), na.rm=FALSE, hasNA=NA, adaptive=FALSE)
-frollapply(x, n, FUN, \dots, fill=NA, align=c("right", "left", "center"))
+frollapply(x, n, FUN, \dots, fill=NA, align=c("right", "left", "center"), adaptive)
 }
 \arguments{
   \item{x}{ Vector, \code{data.frame} or \code{data.table} of integer, numeric or logical columns over which to calculate the windowed aggregations. May also be a list, in which case the rolling function is applied to each of its elements. }
@@ -49,36 +49,65 @@ frollapply(x, n, FUN, \dots, fill=NA, align=c("right", "left", "center"))
   conveniently within \code{data.table} syntax.
 
   Argument \code{n} allows multiple values to apply rolling functions on
-  multiple window sizes. If \code{adaptive=TRUE}, then \code{n} must be a list.
-  Each list element must be integer vector of window sizes corresponding
-  to every single observation in each column; see Examples.
+  multiple window sizes. If \code{adaptive=TRUE}, then \code{n} must be a list,
+  see \emph{Adaptive rolling functions} section below for details.
 
-  When \code{algo="fast"} an \emph{"on-line"} algorithm is used, and
-  all of \code{NaN, +Inf, -Inf} are treated as \code{NA}.
-  Setting \code{algo="exact"} will make rolling functions to use a more
-  computationally-intensive algorithm that suffers less from floating point
-  rounding error (the same consideration applies to \code{\link[base]{mean}}).
-  \code{algo="exact"} also handles \code{NaN, +Inf, -Inf} consistently to
-  base R. In case of some functions (like \emph{mean}), it will additionally
+  When multiple columns or multiple windows width are provided, then they
+  are run in parallel. The exception is for \code{algo="exact"}, which runs in
+  parallel already.
+}
+\section{\code{hasNA} argument}{
+  \code{hasNA} can be used to speed up processing in cases when it is known
+  if \code{x} contains \code{NA, NaN, +Inf, -Inf}.
+  \itemize{
+    \item{ Default \code{hasNA=NA} will use faster NA inaware implementation,
+    but when NAs are detected it will re-run NA aware implementation. }
+    \item{ \code{hasNA=TRUE} will use NA aware implementation straightaway. }
+    \item{ \code{hasNA=FALSE} will use faster NA inaware implementation.
+    Then depending on the rolling function it will either
+    \itemize{
+      \item{ (\emph{mean, sum}) detect NAs, raise warning, re-run NA aware. }
+      \item{ (\emph{max}) not detect NAs and may silently produce incorrect
+      answer. }}
+    Therefore \code{hasNA=FALSE} should be used with care.
+    }
+  }
+}
+\section{Implementation}{
+  \itemize{
+    \item{ \code{algo="fast"} uses \emph{"on-line"} algorithm, and
+  all of \code{NaN, +Inf, -Inf} are treated as \code{NA}. }
+    \item{ \code{algo="exact"} will make rolling functions to use a more
+  computationally-intensive algorithm. For each observation from input vector
+  it will compute a function on a window from scratch (complexity \eqn{O(n^2)}).
+  Depeneding on the function, this algorithm may suffers less from
+  floating point rounding error (the same consideration applies to base \code{\link[base]{mean}}).
+  Algorithm also handles \code{NaN, +Inf, -Inf} consistently to
+  base R, unless - for some functions (e.g. \emph{max}) - \code{hasNA}is \code{FALSE}
+  but NAs are present. In case of some functions (like \emph{mean}), it will additionally
   make extra pass to perform floating point error correction. Error
   corrections might not be truly exact on some platforms (like Windows)
-  when using multiple threads.
-
+  when using multiple threads. }
+  }
+}
+\section{Adaptive rolling functions}{
   Adaptive rolling functions are a special case where each
-  observation has its own corresponding rolling window width. Due to the logic
-  of adaptive rolling functions, the following restrictions apply:
+  observation has its own corresponding rolling window width. \code{n}
+  argument must be a list, then each list element must be an integer vector
+  of window sizes corresponding to every single observation in each column;
+  see Examples. Due to the logic or implementation of adaptive rolling
+  functions, the following restrictions apply:
   \itemize{
-    \item{ \code{align} only \code{"right"}. }
+    \item{ \code{align} does not support \code{"center"}. }
     \item{ if list of vectors is passed to \code{x}, then all
       vectors within it must have equal length. }
+    \item{ functionality in not suported in \code{frollapply}. }
   }
-
-  When multiple columns or multiple windows width are provided, then they
-  are run in parallel. The exception is for \code{algo="exact"}, which runs in
-  parallel already.
-
+}
+\section{\code{frollapply}}{
   \code{frollapply} computes rolling aggregate on arbitrary R functions.
-  The input \code{x} (first argument) to the function \code{FUN}
+  \code{adaptive} argument is not supported. The input
+  \code{x} (first argument) to the function \code{FUN}
   is coerced to \emph{numeric} beforehand and \code{FUN}
   has to return a scalar \emph{numeric} value. Checks for that are made only
   during the first iteration when \code{FUN} is evaluated. Edge cases can be
@@ -88,11 +117,7 @@ frollapply(x, n, FUN, \dots, fill=NA, align=c("right", "left", "center"))
   because there is no thread-safe API to R's C \code{eval}. Nevertheless we've
   seen the computation speed up vis-a-vis versions implemented in base R.
 }
-\value{
-  A list except when the input is a \code{vector} and
-  \code{length(n)==1} in which case a \code{vector} is returned.
-}
-\note{
+\section{\code{zoo} package users notice}{
   Users coming from most popular package for rolling functions
   \code{zoo} might expect following differences in \code{data.table}
   implementation.
@@ -111,9 +136,15 @@ frollapply(x, n, FUN, \dots, fill=NA, align=c("right", "left", "center"))
     \item{ when \code{adaptive=TRUE}, then \code{n} must be vector of
       length equal to \code{nrow(x)}, or list of such vectors. }
     \item{ \code{partial} window feature is not supported, although it can
-      be accomplished by using \code{adaptive=TRUE}, see examples. \code{NA} is always returned for incomplete windows. }
+      be accomplished by using \code{adaptive=TRUE}, see examples.
+      \code{NA} is always returned for incomplete windows. }
   }
-
+}
+\value{
+  A list except when the input is a \code{vector} and
+  \code{length(n)==1} in which case a \code{vector} is returned.
+}
+\note{
   Be aware that rolling functions operates on the physical order of input.
   If the intent is to roll values in a vector by a logical window, for
   example an hour, or a day, one has to ensure that there are no gaps in
diff --git a/src/froll.c b/src/froll.c
index 3b900077f8..cc4836c06b 100644
--- a/src/froll.c
+++ b/src/froll.c
@@ -451,15 +451,14 @@ inline void windowmaxnarm(double *x, uint64_t o, int k, bool narm, int *nc, doub
     }
   }
 }
+/* fast rolling max - fast
+ * fast online algorithm do single pass over elements keeping track of recent max and its index
+ * if index of max is within progressing window then it keep running single pass
+ * whenever max is leaving the window (index of max is outside of iterator minus window size) then new maximum is computed via nested loop
+ * new max is used to continue outer single pass
+ * should scale well for bigger window size, may carry overhead for small window, needs benchmarking
+ */
 void frollmaxFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose) {
-  /*
-   * fast online algorithm do single pass over elements keeping track of recent max and its index
-   * if index of max is within progressing window then it keep running single pass
-   * whenever max is leaving the window (index of max is outside of iterator minus window size) then new maximum is computed via nested loop
-   * new max is used to continue outer single pass
-   *
-   * should scale well for bigger window size, may carry overhead for small window, needs benchmarking
-   */
   if (verbose)
     snprintf(end(ans->message[0]), 500, _("%s: running for input length %"PRIu64", window %d, hasna %d, narm %d\n"), "frollmaxFast", (uint64_t)nx, k, hasna, (int)narm);
   double w = R_NegInf; // window max
@@ -602,6 +601,10 @@ void frollmaxFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool n
       snprintf(end(ans->message[0]), 500, _("%s: windowmaxnarm called %"PRIu64" time(s)\n"), __func__, cmax);
   }
 }
+/* fast rolling max - exact
+ * for each observation in x compute max in window from scratch
+ * no proper support for NAs yet
+ */
 void frollmaxExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose) {
   if (verbose)
     snprintf(end(ans->message[0]), 500, _("%s: running in parallel for input length %"PRIu64", window %d, hasna %d, narm %d\n"), "frollmaxExact", (uint64_t)nx, k, hasna, (int)narm);
diff --git a/src/frollR.c b/src/frollR.c
index 91e988c434..ac138f220c 100644
--- a/src/frollR.c
+++ b/src/frollR.c
@@ -108,8 +108,8 @@ SEXP frollfunR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP algo, SEXP align, SEX
   else
     error(_("Internal error: invalid %s argument in %s function should have been caught earlier. Please report to the data.table issue tracker."), "align", "rolling"); // # nocov
 
-  if (badaptive && ialign!=1)
-    error(_("using adaptive TRUE and align argument different than 'right' is not implemented"));
+  if (badaptive && ialign==0) // support for left added in #5441
+    error(_("using adaptive TRUE and align 'center' is not implemented"));
 
   SEXP ans = PROTECT(allocVector(VECSXP, nk * nx)); protecti++; // allocate list to keep results
   if (verbose)

From 4a6f06364f7e6d29f26cd054a31411178bb790f1 Mon Sep 17 00:00:00 2001
From: jangorecki <j.gorecki@wit.edu.pl>
Date: Sun, 21 Aug 2022 11:48:57 +0200
Subject: [PATCH 06/16] no frollmax fast adaptive

---
 man/froll.Rd        |  9 +++--
 src/data.table.h    |  2 +-
 src/frolladaptive.c | 87 +++------------------------------------------
 3 files changed, 13 insertions(+), 85 deletions(-)

diff --git a/man/froll.Rd b/man/froll.Rd
index 800709a408..be78b26907 100644
--- a/man/froll.Rd
+++ b/man/froll.Rd
@@ -54,7 +54,8 @@ frollapply(x, n, FUN, \dots, fill=NA, align=c("right", "left", "center"), adapti
 
   When multiple columns or multiple windows width are provided, then they
   are run in parallel. The exception is for \code{algo="exact"}, which runs in
-  parallel already.
+  parallel already. By default data.table uses only half of available CPUs,
+  see \code{\link{setDTthreads}} for details.
 }
 \section{\code{hasNA} argument}{
   \code{hasNA} can be used to speed up processing in cases when it is known
@@ -76,7 +77,11 @@ frollapply(x, n, FUN, \dots, fill=NA, align=c("right", "left", "center"), adapti
 \section{Implementation}{
   \itemize{
     \item{ \code{algo="fast"} uses \emph{"on-line"} algorithm, and
-  all of \code{NaN, +Inf, -Inf} are treated as \code{NA}. }
+  all of \code{NaN, +Inf, -Inf} are treated as \code{NA}. Not all functions
+  have \emph{fast} implementation available. As of now
+  \emph{max} and \code{adaptive=TRUE} does not have, therefore it will
+  automatically fall back to \emph{exact} implementation. \code{datatable.verbose}
+  option can be used to check that. }
     \item{ \code{algo="exact"} will make rolling functions to use a more
   computationally-intensive algorithm. For each observation from input vector
   it will compute a function on a window from scratch (complexity \eqn{O(n^2)}).
diff --git a/src/data.table.h b/src/data.table.h
index 4109c52e9a..4f74f1e198 100644
--- a/src/data.table.h
+++ b/src/data.table.h
@@ -216,7 +216,7 @@ void fadaptiverollsum(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int
 void fadaptiverollsumFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose);
 void fadaptiverollsumExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose);
 void fadaptiverollmax(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose);
-void fadaptiverollmaxFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose);
+//void fadaptiverollmaxFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose); // does not exists as of now
 void fadaptiverollmaxExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose);
 
 // frollR.c
diff --git a/src/frolladaptive.c b/src/frolladaptive.c
index eb07dbfc74..e4e1983f94 100644
--- a/src/frolladaptive.c
+++ b/src/frolladaptive.c
@@ -370,92 +370,15 @@ void fadaptiverollmax(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int
   double tic = 0;
   if (verbose)
     tic = omp_get_wtime();
-  if (algo==0) {
-    fadaptiverollmaxFast(x, nx, ans, k, fill, narm, hasna, verbose);
-  } else if (algo==1) {
-    fadaptiverollmaxExact(x, nx, ans, k, fill, narm, hasna, verbose);
+  if (algo==0 && verbose) {
+    //fadaptiverollmaxFast(x, nx, ans, k, fill, narm, hasna, verbose); // fadaptiverollmaxFast does not exists as of now
+    snprintf(end(ans->message[0]), 500, _("%s: algo %u not implemented, fall back to %u\n"), __func__, algo, (unsigned int) 1);
   }
+  fadaptiverollmaxExact(x, nx, ans, k, fill, narm, hasna, verbose);
   if (verbose)
     snprintf(end(ans->message[0]), 500, _("%s: processing algo %u took %.3fs\n"), __func__, algo, omp_get_wtime()-tic);
 }
-inline void windowadaptivemax(double *x, uint64_t o, int *k, double *w, uint64_t *iw) {
-  for (int i=0; i<k[o+i]-1; i++) {
-    Rprintf("windowmax iteration %d, offset %d, first x val %f, testing x[o+i-k[o+i]+1] >= w[0]: x[%d+%d-%d+1] >= w[0]: %f >= %f: %d\n",
-                                  i,         o,           x[o],                                     o, i, k[o+i],   x[o+i-k[o+i]+1], w[0], x[o+i-k[o+i]+1] >= w[0]);
-    if (x[o+i-k[o+i]+1] >= w[0]) { // what if that is never satisfied? test!
-      iw[0] = o+i-k[o+i]+1;
-      w[0] = x[iw[0]];
-    }
-  }
-}
-inline void windowadaptivemaxnarm(double *x, uint64_t o, int *k, bool narm, int *nc, double *w, uint64_t *iw) {
-  for (int i=0; i<k[o+i]-1; i++) {
-    //Rprintf("windowmax iteration %d, offset %d, first x val %f, testing x[o+i-k+1] >= w[0]: x[%d-%d+1] >= w[0]: %f >= %f: %d\n", i, o, x[o], i, k, x[o+i-k+1], w[0], x[o+i-k+1] >= w[0]);
-    if (x[o+i-k[o+i]+1] >= w[0]) { // what if that is never satisfied? test!
-      iw[0] = o+i-k[o+i]+1;
-      w[0] = x[iw[0]];
-    }
-  }
-}
-void fadaptiverollmaxFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose) {
-  if (verbose)
-    snprintf(end(ans->message[0]), 500, _("%s: running for input length %"PRIu64", hasna %d, narm %d\n"), "fadaptiverollmaxFast", (uint64_t)nx, hasna, (int) narm);
-  double w = R_NegInf; // window max
-  uint64_t cmax = 0; // counter of nested loops for verbose
-  uint64_t iw = 0; // index of window max
-  bool truehasna = hasna>0;
-  if (!truehasna) {
-    for (uint64_t i=0; i<nx; i++) {
-      if (!R_FINITE(x[i])) {
-        truehasna = true;
-      } else {
-        if (x[i] >= w) {
-          Rprintf("iteration %d, new max %f at %d, old max %f at %d\n", i, x[i], i, w, iw);
-          iw = i;
-          w = x[iw];
-        } else {
-          //Rprintf("iteration %d, test if (iw > i-k[i]): (%d > %d-%d): %d\n",i,                   iw, i, k[i], iw > i-k[i]);
-          //Rprintf("i-k[i] < 0: %d-k[%d] < 0: %d-%d < 0: %d < 0: %d\n", i, i, i, k[i], i-k[i], i-k[i] < 0);
-          //Rprintf("i < k[i]: %d\n", i < k[i]);
-          if (i < k[i] || iw > i-k[i]) { // max is still within window
-            Rprintf("iteration %d, max %f at %d bigger than current value %f\n", i, w, iw, x[i]);
-          } else { // max left the window, need to find max in this window
-            Rprintf("iteration %d, max %f at %d left the window, call windowmax from %d of size %d\n", i, w, iw, i, k[i]);
-            w = R_NegInf;
-            iw = i-k[i];
-            windowadaptivemax(x, i, k, &w, &iw);
-            Rprintf("iteration %d, windowmax found new max %f at %d\n", i, w, iw);
-            cmax++;
-          }
-        }
-        if (i+1 < k[i]) {
-          Rprintf("iteration %d, window size %d too big, skip\n", i, k[i]);
-          ans->dbl_v[i] = fill;
-        } else {
-          ans->dbl_v[i] = w;
-        }
-      }
-    }
-    if (verbose)
-      snprintf(end(ans->message[0]), 500, _("%s: windowmax called %"PRIu64" time(s)\n"), __func__, cmax);
-    if (truehasna) {
-      if (hasna==-1) {
-        ans->status = 2;
-        snprintf(end(ans->message[2]), 500, _("%s: hasNA=FALSE used but NA (or other non-finite) value(s) are present in input, use default hasNA=NA to avoid this warning"), __func__);
-      }
-      if (verbose)
-        snprintf(end(ans->message[0]), 500, _("%s: NA (or other non-finite) value(s) are present in input, re-running with extra care for NAs\n"), __func__);
-      w = R_NegInf;
-      cmax = 0;
-    }
-  }
-  if (truehasna) {
-    ans->status = 3;
-    snprintf(ans->message[3], 500, _("%s: frollmax adaptive algo='fast' having NAs not yet implemented, use algo='exact'"), __func__);
-    return;
-    // TODO
-  }
-}
+//void fadaptiverollmaxFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose); // does not exists as of now
 void fadaptiverollmaxExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose) {
   if (verbose)
     snprintf(end(ans->message[0]), 500, _("%s: running in parallel for input length %"PRIu64", hasna %d, narm %d\n"), "fadaptiverollmaxExact", (uint64_t)nx, hasna, (int) narm);

From feef63d923831c9f6874267d0c47da478c8b35be Mon Sep 17 00:00:00 2001
From: jangorecki <j.gorecki@wit.edu.pl>
Date: Sun, 21 Aug 2022 18:41:45 +0200
Subject: [PATCH 07/16] frollmax adaptive exact NAs handling

---
 src/froll.c         |  7 ++--
 src/frollR.c        |  2 +-
 src/frolladaptive.c | 82 +++++++++++++++++++++------------------------
 3 files changed, 43 insertions(+), 48 deletions(-)

diff --git a/src/froll.c b/src/froll.c
index cc4836c06b..8484b09d43 100644
--- a/src/froll.c
+++ b/src/froll.c
@@ -453,10 +453,11 @@ inline void windowmaxnarm(double *x, uint64_t o, int k, bool narm, int *nc, doub
 }
 /* fast rolling max - fast
  * fast online algorithm do single pass over elements keeping track of recent max and its index
- * if index of max is within progressing window then it keep running single pass
- * whenever max is leaving the window (index of max is outside of iterator minus window size) then new maximum is computed via nested loop
- * new max is used to continue outer single pass
+ * if index of max is within progressing window then it keeps running single pass
+ * whenever max is leaving the window (index of max is outside of iterator minus window size) then new maximum is computed via nested loop on current complete window
+ * new max is used to continue outer single pass as long as new max index is not leaving the running window
  * should scale well for bigger window size, may carry overhead for small window, needs benchmarking
+ * TODO NA handling
  */
 void frollmaxFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose) {
   if (verbose)
diff --git a/src/frollR.c b/src/frollR.c
index ac138f220c..ee4d7fd391 100644
--- a/src/frollR.c
+++ b/src/frollR.c
@@ -154,7 +154,7 @@ SEXP frollfunR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP algo, SEXP align, SEX
   int ihasna =                                                  // plain C tri-state boolean as integer
     LOGICAL(hasna)[0]==NA_LOGICAL ? 0 :                         // hasna NA, default, no info about NA
     LOGICAL(hasna)[0]==TRUE ? 1 :                               // hasna TRUE, might be some NAs
-    -1;                                                         // hasna FALSE, there should be no NAs
+    -1;                                                         // hasna FALSE, there should be no NAs // or there must be no NAs for rollmax #5441
 
   unsigned int ialgo;                                           // decode algo to integer
   if (!strcmp(CHAR(STRING_ELT(algo, 0)), "fast"))
diff --git a/src/frolladaptive.c b/src/frolladaptive.c
index e4e1983f94..f0a40fb9ca 100644
--- a/src/frolladaptive.c
+++ b/src/frolladaptive.c
@@ -379,69 +379,63 @@ void fadaptiverollmax(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int
     snprintf(end(ans->message[0]), 500, _("%s: processing algo %u took %.3fs\n"), __func__, algo, omp_get_wtime()-tic);
 }
 //void fadaptiverollmaxFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose); // does not exists as of now
+/* fast adaptive rolling max - exact
+ * for hasNA=FALSE it will not detect if any NAs were in the input, therefore could produce incorrect result, well documented
+ */
 void fadaptiverollmaxExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose) {
   if (verbose)
     snprintf(end(ans->message[0]), 500, _("%s: running in parallel for input length %"PRIu64", hasna %d, narm %d\n"), "fadaptiverollmaxExact", (uint64_t)nx, hasna, (int) narm);
-  bool truehasna = hasna>0;
-  if (!truehasna || !narm) {
+  if (hasna==-1) { // fastest we can get for adaptive max as there is no algo='fast', therefore we drop any NA checks when hasNA=FALSE
     #pragma omp parallel for num_threads(getDTthreads(nx, true))
     for (uint64_t i=0; i<nx; i++) {
-      if (narm && truehasna) {
-        continue;
-      }
       if (i+1 < k[i]) {
         ans->dbl_v[i] = fill;
       } else {
         double w = R_NegInf;
-        for (int j=-k[i]+1; j<=0; j++) {
-          // should be used with setDTthreads(1)
-          //Rprintf("x[%d+%d] > w: %f > %f: %d\n", i, j, x[i+j], w, x[i+j] > w);
+        for (int j=-k[i]+1; j<=0; j++) { //Rprintf("x[%d+%d] > w: %f > %f: %d\n", i, j, x[i+j], w, x[i+j] > w);
           if (x[i+j] > w)
             w = x[i+j];
         }
-        if (R_FINITE(w)) {
-          ans->dbl_v[i] = w;
-        } else {
-          if (!narm) {
-            ans->dbl_v[i] = w;
-          }
-          truehasna = true;
-        }
+        ans->dbl_v[i] = w;
       }
     }
-    if (truehasna) {
-      if (hasna==-1) {
-        ans->status = 2;
-        snprintf(end(ans->message[2]), 500, _("%s: hasNA=FALSE used but NA (or other non-finite) value(s) are present in input, use default hasNA=NA to avoid this warning"), __func__);
-      }
-      if (verbose) {
-        if (narm) {
-          snprintf(end(ans->message[0]), 500, _("%s: NA (or other non-finite) value(s) are present in input, re-running with extra care for NAs\n"), __func__);
+  } else {
+    if (narm) {
+      #pragma omp parallel for num_threads(getDTthreads(nx, true))
+      for (uint64_t i=0; i<nx; i++) {
+        if (i+1 < k[i]) {
+          ans->dbl_v[i] = fill;
         } else {
-          snprintf(end(ans->message[0]), 500, _("%s: NA (or other non-finite) value(s) are present in input, na.rm was FALSE so in 'exact' implementation NAs were handled already, no need to re-run\n"), __func__);
+          int nc = 0;
+          double w = R_NegInf;
+          for (int j=-k[i]+1; j<=0; j++) {
+            if (ISNAN(x[i+j]))
+              nc++;
+            else if (x[i+j] > w)
+              w = x[i+j];
+          }
+          if (nc < k[i])
+            ans->dbl_v[i] = w;
+          else
+            ans->dbl_v[i] = R_NegInf;
         }
       }
-    }
-  }
-  if (truehasna && narm) {
-    #pragma omp parallel for num_threads(getDTthreads(nx, true))
-    for (uint64_t i=0; i<nx; i++) {
-      if (i+1 < k[i]) {
-        ans->dbl_v[i] = fill;
-      } else {
-        double w = R_NegInf;
-        int nc = 0;
-        for (int j=-k[i]+1; j<=0; j++) {
-          if (ISNAN(x[i+j])) {
-            nc++;
-          } else if (x[i+j] > w) {
-            w = x[i+j];
+    } else {
+      #pragma omp parallel for num_threads(getDTthreads(nx, true))
+      for (uint64_t i=0; i<nx; i++) {
+        if (i+1 < k[i]) {
+          ans->dbl_v[i] = fill;
+        } else {
+          double w = R_NegInf;
+          for (int j=-k[i]+1; j<=0; j++) {
+            if (ISNAN(x[i+j])) {
+              w = NA_REAL;
+              break;
+            } else if (x[i+j] > w) {
+              w = x[i+j];
+            }
           }
-        }
-        if (nc < k[i]) {
           ans->dbl_v[i] = w;
-        } else {
-          ans->dbl_v[i] = R_NegInf;
         }
       }
     }

From 63ea485ed24eb3c7ec75f3e57f0f0b4681b76165 Mon Sep 17 00:00:00 2001
From: jangorecki <j.gorecki@wit.edu.pl>
Date: Sun, 21 Aug 2022 19:40:06 +0200
Subject: [PATCH 08/16] PR summary in news

---
 NEWS.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/NEWS.md b/NEWS.md
index 4f4a2f417a..3fe0bb3b81 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -296,6 +296,8 @@
 
 41. New function `%notin%` provides a convenient alternative to `!(x %in% y)`, [#4152](https://github.com/Rdatatable/data.table/issues/4152). Thanks to Jan Gorecki for suggesting and Michael Czekanski for the PR. `%notin%` uses half the memory because it computes the result directly as opposed to `!` which allocates a new vector to hold the negated result. If `x` is long enough to occupy more than half the remaining free memory, this can make the difference between the operation working, or failing with an out-of-memory error.
 
+42. New function `frollmax` has been implemented. It applies `max` over a rolling window. Request came from @gpierard who needed 1left aligned, adaptive, rolling max, [#5438](https://github.com/Rdatatable/data.table/issues/5438). Adaptive rolling functions did not have support for `align="left"`, therefore this feature has been added as well. It works for other adaptive rolling functions now. Adaptive `frollmax` has observed to be up to 50 time faster than second fastest solution using `max` and grouping `by=.EACHI`.
+
 ## BUG FIXES
 
 1. `by=.EACHI` when `i` is keyed but `on=` different columns than `i`'s key could create an invalidly keyed result, [#4603](https://github.com/Rdatatable/data.table/issues/4603) [#4911](https://github.com/Rdatatable/data.table/issues/4911). Thanks to @myoung3 and @adamaltmejd for reporting, and @ColeMiller1 for the PR. An invalid key is where a `data.table` is marked as sorted by the key columns but the data is not sorted by those columns, leading to incorrect results from subsequent queries.

From 63f2e7ddb44d9e95e20e44141d2a474d6165676e Mon Sep 17 00:00:00 2001
From: jangorecki <j.gorecki@wit.edu.pl>
Date: Sun, 21 Aug 2022 20:02:22 +0200
Subject: [PATCH 09/16] align happens in one place, less duplicated code

---
 inst/tests/froll.Rraw |  8 ++++----
 src/froll.c           | 27 ---------------------------
 src/frollR.c          | 14 ++++++++++++++
 3 files changed, 18 insertions(+), 31 deletions(-)

diff --git a/inst/tests/froll.Rraw b/inst/tests/froll.Rraw
index 54c67e5628..d097e1e078 100644
--- a/inst/tests/froll.Rraw
+++ b/inst/tests/froll.Rraw
@@ -336,8 +336,8 @@ test(6000.0674, frollmean(c(1:2,NA,4:10), 4, align="center"), c(rep(NA_real_, 4)
   "frollfunR: 1 column.*1 window.*",
   "frollmeanFast: running for input length 10, window 4, hasna 0, narm 0",
   "frollmeanFast: NA.*are present in input, skip non-NA attempt and run with extra care for NAs",
-  "frollmean: align 0, shift answer by -2",
   "frollmean: processing algo 0 took.*",
+  "frollfunR: align 0, shift answer by -2",
   "frollfunR: processing.*took.*"
 ))
 options(datatable.verbose=FALSE)
@@ -714,16 +714,16 @@ test(6000.176, frollmean(x, n, align="center"), output=c(
   "frollfunR: 1 column.*1 window.*",
   "frollfunR: 1:",
   "frollmeanFast: running for input length 10, window 3, hasna 0, narm 0",
-  "frollmean: align 0, shift answer by -1",
   "frollmean: processing algo 0 took.*",
+  "frollfunR: align 0, shift answer by -1",
   "frollfunR: processing.*took.*"))
 test(6000.177, frollmean(x, n, align="left"), output=c(
   "frollfunR: allocating memory for results 1x1",
   "frollfunR: 1 column.*1 window.*",
   "frollfunR: 1:",
   "frollmeanFast: running for input length 10, window 3, hasna 0, narm 0",
-  "frollmean: align -1, shift answer by -2",
   "frollmean: processing algo 0 took.*",
+  "frollfunR: align -1, shift answer by -2",
   "frollfunR: processing.*took.*"))
 nn = c(1:4,2:3,1:4)
 test(6000.178, frollmean(x, nn, adaptive=TRUE), output=c(
@@ -839,7 +839,7 @@ options(datatable.verbose=TRUE)
 test(6000.219, frollsum(c(1:2,NA,4:10), rep(4L,10), adaptive=TRUE, algo="exact", na.rm=TRUE), c(rep(NA_real_, 3L), 7, 11, 15, 22, 26, 30, 34), output="re-running with extra care for NAs")
 test(6000.220, frollsum(c(1:2,NA,4:10), rep(4L,10), adaptive=TRUE, algo="exact"), c(rep(NA_real_, 6), 22, 26, 30, 34), output="NAs were handled already, no need to re-run")
 test(6000.221, frollsum(1:3, 2), c(NA, 3, 5), output="frollsumFast: running for input length")
-test(6000.222, frollsum(1:3, 2, align="left"), c(3, 5, NA), output="frollsum: align")
+test(6000.222, frollsum(1:3, 2, align="left"), c(3, 5, NA), output="frollfunR: align")
 test(6000.223, frollsum(c(1,2,NA), 2), c(NA, 3, NA), output="re-running with extra care for NAs")
 test(6000.224, frollsum(c(NA,2,3), 2), c(NA, NA, 5), output="skip non-NA attempt and run with extra care for NAs")
 test(6000.225, frollsum(1:3, c(2,2,2), adaptive=TRUE), c(NA, 3, 5), output="fadaptiverollsumFast: running for input length")
diff --git a/src/froll.c b/src/froll.c
index 8484b09d43..1bfa2c66c9 100644
--- a/src/froll.c
+++ b/src/froll.c
@@ -26,15 +26,6 @@ void frollmean(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, int
   } else if (algo==1) {
     frollmeanExact(x, nx, ans, k, fill, narm, hasna, verbose);
   }
-  if (ans->status < 3 && align < 1) {                           // align center or left, only when no errors occurred
-    int k_ = align==-1 ? k-1 : floor(k/2);                      // offset to shift
-    if (verbose)
-      snprintf(end(ans->message[0]), 500, _("%s: align %d, shift answer by %d\n"), __func__, align, -k_);
-    memmove((char *)ans->dbl_v, (char *)ans->dbl_v + (k_*sizeof(double)), (nx-k_)*sizeof(double)); // apply shift to achieve expected align
-    for (uint64_t i=nx-k_; i<nx; i++) {                         // fill from right side
-      ans->dbl_v[i] = fill;
-    }
-  }
   if (verbose)
     snprintf(end(ans->message[0]), 500, _("%s: processing algo %u took %.3fs\n"), __func__, algo, omp_get_wtime()-tic);
 }
@@ -234,15 +225,6 @@ void frollsum(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, int
   } else if (algo==1) {
     frollsumExact(x, nx, ans, k, fill, narm, hasna, verbose);
   }
-  if (ans->status < 3 && align < 1) {
-    int k_ = align==-1 ? k-1 : floor(k/2);
-    if (verbose)
-      snprintf(end(ans->message[0]), 500, _("%s: align %d, shift answer by %d\n"), __func__, align, -k_);
-    memmove((char *)ans->dbl_v, (char *)ans->dbl_v + (k_*sizeof(double)), (nx-k_)*sizeof(double));
-    for (uint64_t i=nx-k_; i<nx; i++) {
-      ans->dbl_v[i] = fill;
-    }
-  }
   if (verbose)
     snprintf(end(ans->message[0]), 500, _("%s: processing algo %u took %.3fs\n"), __func__, algo, omp_get_wtime()-tic);
 }
@@ -415,15 +397,6 @@ void frollmax(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, int
   } else if (algo==1) {
     frollmaxExact(x, nx, ans, k, fill, narm, hasna, verbose);
   }
-  if (ans->status < 3 && align < 1) {
-    int k_ = align==-1 ? k-1 : floor(k/2);
-    if (verbose)
-      snprintf(end(ans->message[0]), 500, _("%s: align %d, shift answer by %d\n"), __func__, align, -k_);
-    memmove((char *)ans->dbl_v, (char *)ans->dbl_v + (k_*sizeof(double)), (nx-k_)*sizeof(double));
-    for (uint64_t i=nx-k_; i<nx; i++) {
-      ans->dbl_v[i] = fill;
-    }
-  }
   if (verbose)
     snprintf(end(ans->message[0]), 500, _("%s: processing algo %u took %.3fs\n"), __func__, algo, omp_get_wtime()-tic);
 }
diff --git a/src/frollR.c b/src/frollR.c
index ee4d7fd391..bc3176ff75 100644
--- a/src/frollR.c
+++ b/src/frollR.c
@@ -209,6 +209,20 @@ SEXP frollfunR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP algo, SEXP align, SEX
 
   ansMsg(dans, nx*nk, verbose, __func__);                       // raise errors and warnings, as of now messages are not being produced
 
+  if (ialign < 1 && !badaptive) {                               // align center or left, adaptive align on R level
+    for (R_len_t i=0; i<nx; i++) {                              // loop over multiple columns
+      for (R_len_t j=0; j<nk; j++) {                            // loop over multiple windows
+        int k_ = ialign==-1 ? iik[j]-1 : floor(iik[j]/2);       // offset to shift
+        if (verbose)
+          Rprintf(_("%s: align %d, shift answer by %d\n"), __func__, ialign, -k_);
+        memmove((char *)dans[i*nk+j].dbl_v, (char *)dans[i*nk+j].dbl_v + (k_*sizeof(double)), (inx[i]-k_)*sizeof(double)); // apply shift to achieve expected align
+        for (uint64_t ii=inx[i]-k_; ii<inx[i]; ii++) {          // fill from right side
+          dans[i*nk+j].dbl_v[ii] = dfill;
+        }
+      }
+    }
+  }
+
   if (verbose)
     Rprintf(_("%s: processing of %d column(s) and %d window(s) took %.3fs\n"), __func__, nx, nk, omp_get_wtime()-tic);
 

From 53414095dcbb7305761e2b4600e01ab9bc5a8297 Mon Sep 17 00:00:00 2001
From: jangorecki <j.gorecki@wit.edu.pl>
Date: Sun, 21 Aug 2022 20:32:08 +0200
Subject: [PATCH 10/16] push up even more to frollR to reduce code duplication

---
 inst/tests/froll.Rraw |  2 +-
 src/data.table.h      |  6 ++--
 src/froll.c           | 33 ++-----------------
 src/frollR.c          | 77 +++++++++++++++++++++++++------------------
 4 files changed, 52 insertions(+), 66 deletions(-)

diff --git a/inst/tests/froll.Rraw b/inst/tests/froll.Rraw
index d097e1e078..97fa033e01 100644
--- a/inst/tests/froll.Rraw
+++ b/inst/tests/froll.Rraw
@@ -383,7 +383,7 @@ test(6000.093, frollmean(list(1:3, 4:6), 4), list(c(NA_real_, NA_real_, NA_real_
 test(6000.0931, frollmean(list(1:3, 4:6), 4, align="center"), list(c(NA_real_, NA_real_, NA_real_), c(NA_real_, NA_real_, NA_real_)))
 test(6000.0932, frollmean(list(1:3, 4:6), 4, align="left"), list(c(NA_real_, NA_real_, NA_real_), c(NA_real_, NA_real_, NA_real_)))
 options(datatable.verbose=TRUE)
-test(6000.0933, frollmean(list(1:3, 4:6), 4), list(c(NA_real_, NA_real_, NA_real_), c(NA_real_, NA_real_, NA_real_)), output="frollmean: window width longer than input vector, returning all NA vector")
+test(6000.0933, frollmean(list(1:3, 4:6), 4), list(c(NA_real_, NA_real_, NA_real_), c(NA_real_, NA_real_, NA_real_)), output="frollfunR: window width longer than input vector, returning all NA vector")
 options(datatable.verbose=FALSE)
 #### n==length(x)
 test(6000.094, frollmean(list(1:3, 4:6), 3), list(c(NA_real_, NA_real_, 2), c(NA_real_, NA_real_, 5)))
diff --git a/src/data.table.h b/src/data.table.h
index 4f74f1e198..99aa2eb069 100644
--- a/src/data.table.h
+++ b/src/data.table.h
@@ -197,13 +197,13 @@ int getDTthreads(const int64_t n, const bool throttle);
 void avoid_openmp_hang_within_fork();
 
 // froll.c
-void frollmean(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, int align, double fill, bool narm, int hasna, bool verbose);
+void frollmean(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose);
 void frollmeanFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose);
 void frollmeanExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose);
-void frollsum(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, int align, double fill, bool narm, int hasna, bool verbose);
+void frollsum(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose);
 void frollsumFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose);
 void frollsumExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose);
-void frollmax(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, int align, double fill, bool narm, int hasna, bool verbose);
+void frollmax(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose);
 void frollmaxFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose);
 void frollmaxExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose);
 void frollapply(double *x, int64_t nx, double *w, int k, ans_t *ans, int align, double fill, SEXP call, SEXP rho, bool verbose);
diff --git a/src/froll.c b/src/froll.c
index 1bfa2c66c9..3e438e4a29 100644
--- a/src/froll.c
+++ b/src/froll.c
@@ -1,23 +1,12 @@
 #include "data.table.h"
 
 /* fast rolling mean - router
- * early stopping for window bigger than input
- * also handles 'align' in single place
  * algo = 0: frollmeanFast
  *   adding/removing in/out of sliding window of observations
  * algo = 1: frollmeanExact
  *   recalculate whole mean for each observation, roundoff correction is adjusted, also support for NaN and Inf
  */
-void frollmean(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, int align, double fill, bool narm, int hasna, bool verbose) {
-  if (nx < k) {                                                 // if window width bigger than input just return vector of fill values
-    if (verbose)
-      snprintf(end(ans->message[0]), 500, _("%s: window width longer than input vector, returning all NA vector\n"), __func__);
-    // implicit n_message limit discussed here: https://github.com/Rdatatable/data.table/issues/3423#issuecomment-487722586
-    for (int i=0; i<nx; i++) {
-      ans->dbl_v[i] = fill;
-    }
-    return;
-  }
+void frollmean(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose) {
   double tic = 0;
   if (verbose)
     tic = omp_get_wtime();
@@ -208,15 +197,7 @@ void frollmeanExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool
 }
 
 /* fast rolling sum */
-void frollsum(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, int align, double fill, bool narm, int hasna, bool verbose) {
-  if (nx < k) {
-    if (verbose)
-      snprintf(end(ans->message[0]), 500, _("%s: window width longer than input vector, returning all NA vector\n"), __func__);
-    for (int i=0; i<nx; i++) {
-      ans->dbl_v[i] = fill;
-    }
-    return;
-  }
+void frollsum(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose) {
   double tic = 0;
   if (verbose)
     tic = omp_get_wtime();
@@ -380,15 +361,7 @@ void frollsumExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool
 }
 
 /* fast rolling max */
-void frollmax(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, int align, double fill, bool narm, int hasna, bool verbose) {
-  if (nx < k) {
-    if (verbose)
-      snprintf(end(ans->message[0]), 500, _("%s: window width longer than input vector, returning all NA vector\n"), __func__);
-    for (int i=0; i<nx; i++) {
-      ans->dbl_v[i] = fill;
-    }
-    return;
-  }
+void frollmax(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose) {
   double tic = 0;
   if (verbose)
     tic = omp_get_wtime();
diff --git a/src/frollR.c b/src/frollR.c
index bc3176ff75..6402bd95bf 100644
--- a/src/frollR.c
+++ b/src/frollR.c
@@ -182,47 +182,60 @@ SEXP frollfunR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP algo, SEXP align, SEX
   #pragma omp parallel for if (ialgo==0) schedule(dynamic) collapse(2) num_threads(getDTthreads(nx*nk, false))
   for (R_len_t i=0; i<nx; i++) {                                // loop over multiple columns
     for (R_len_t j=0; j<nk; j++) {                              // loop over multiple windows
-      switch (sfun) {
-      case MEAN :
-        if (!badaptive)
-          frollmean(ialgo, dx[i], inx[i], &dans[i*nk+j], iik[j], ialign, dfill, bnarm, ihasna, verbose);
-        else
+      if (!badaptive) {
+        // early stopping for window bigger than input
+        if (inx[i] < iik[j]) {                      // if window width bigger than input just return vector of fill values
+          if (verbose)
+            snprintf(end(dans[i*nk+j].message[0]), 500, _("%s: window width longer than input vector, returning all NA vector\n"), __func__);
+          // implicit n_message limit discussed here: https://github.com/Rdatatable/data.table/issues/3423#issuecomment-487722586
+          for (uint64_t ii=0; ii<inx[i]; ii++) {
+            dans[i*nk+j].dbl_v[ii] = dfill;
+          }
+          continue;
+        }
+        switch (sfun) {
+        case MEAN :
+          frollmean(ialgo, dx[i], inx[i], &dans[i*nk+j], iik[j], dfill, bnarm, ihasna, verbose);
+          break;
+        case SUM :
+          frollsum(ialgo, dx[i], inx[i], &dans[i*nk+j], iik[j], dfill, bnarm, ihasna, verbose);
+          break;
+        case MAX :
+          frollmax(ialgo, dx[i], inx[i], &dans[i*nk+j], iik[j], dfill, bnarm, ihasna, verbose);
+          break;
+        default:
+          error(_("Internal error: Unknown sfun value in froll: %d"), sfun); // #nocov
+        }
+        // handles 'align' in single place for center or left, adaptive align on R level
+        if (ialign < 1 && dans[i*nk+j].status < 3) {
+          int k_ = ialign==-1 ? iik[j]-1 : floor(iik[j]/2);       // offset to shift
+          if (verbose)
+            snprintf(end(dans[i*nk+j].message[0]), 500, _("%s: align %d, shift answer by %d\n"), __func__, ialign, -k_);
+          memmove((char *)dans[i*nk+j].dbl_v, (char *)dans[i*nk+j].dbl_v + (k_*sizeof(double)), (inx[i]-k_)*sizeof(double)); // apply shift to achieve expected align
+          for (uint64_t ii=inx[i]-k_; ii<inx[i]; ii++) {          // fill from right side
+            dans[i*nk+j].dbl_v[ii] = dfill;
+          }
+        }
+      } else {
+        switch (sfun) {
+        case MEAN :
           fadaptiverollmean(ialgo, dx[i], inx[i], &dans[i*nk+j], ikl[j], dfill, bnarm, ihasna, verbose);
-        break;
-      case SUM :
-        if (!badaptive)
-          frollsum(ialgo, dx[i], inx[i], &dans[i*nk+j], iik[j], ialign, dfill, bnarm, ihasna, verbose);
-        else
+          break;
+        case SUM :
           fadaptiverollsum(ialgo, dx[i], inx[i], &dans[i*nk+j], ikl[j], dfill, bnarm, ihasna, verbose);
-        break;
-      case MAX :
-        if (!badaptive)
-          frollmax(ialgo, dx[i], inx[i], &dans[i*nk+j], iik[j], ialign, dfill, bnarm, ihasna, verbose);
-        else
+          break;
+        case MAX :
           fadaptiverollmax(ialgo, dx[i], inx[i], &dans[i*nk+j], ikl[j], dfill, bnarm, ihasna, verbose);
-        break;
-      default:
-        error(_("Internal error: Unknown sfun value in froll: %d"), sfun); // #nocov
+          break;
+        default:
+          error(_("Internal error: Unknown sfun value in froll: %d"), sfun); // #nocov
+        }
       }
     }
   }
 
   ansMsg(dans, nx*nk, verbose, __func__);                       // raise errors and warnings, as of now messages are not being produced
 
-  if (ialign < 1 && !badaptive) {                               // align center or left, adaptive align on R level
-    for (R_len_t i=0; i<nx; i++) {                              // loop over multiple columns
-      for (R_len_t j=0; j<nk; j++) {                            // loop over multiple windows
-        int k_ = ialign==-1 ? iik[j]-1 : floor(iik[j]/2);       // offset to shift
-        if (verbose)
-          Rprintf(_("%s: align %d, shift answer by %d\n"), __func__, ialign, -k_);
-        memmove((char *)dans[i*nk+j].dbl_v, (char *)dans[i*nk+j].dbl_v + (k_*sizeof(double)), (inx[i]-k_)*sizeof(double)); // apply shift to achieve expected align
-        for (uint64_t ii=inx[i]-k_; ii<inx[i]; ii++) {          // fill from right side
-          dans[i*nk+j].dbl_v[ii] = dfill;
-        }
-      }
-    }
-  }
-
   if (verbose)
     Rprintf(_("%s: processing of %d column(s) and %d window(s) took %.3fs\n"), __func__, nx, nk, omp_get_wtime()-tic);
 

From c4675be01b0d7fa38d23fe05e2bcc4dc95a2095b Mon Sep 17 00:00:00 2001
From: jangorecki <j.gorecki@wit.edu.pl>
Date: Sun, 21 Aug 2022 20:42:52 +0200
Subject: [PATCH 11/16] frollapply push up align arg and early stopping up

---
 inst/tests/froll.Rraw |  4 ++--
 src/data.table.h      |  2 +-
 src/froll.c           | 19 +------------------
 src/frollR.c          | 23 ++++++++++++++++++++++-
 4 files changed, 26 insertions(+), 22 deletions(-)

diff --git a/inst/tests/froll.Rraw b/inst/tests/froll.Rraw
index 97fa033e01..f9cdce46ec 100644
--- a/inst/tests/froll.Rraw
+++ b/inst/tests/froll.Rraw
@@ -1091,9 +1091,9 @@ test(6010.525, frollapply(c(1,2,3,NA,NA,NA,NA), 3L, mean, na.rm=TRUE), c(NA,NA,2
 test(6010.526, frollapply(numeric(), 3L, sum), numeric())
 test(6010.527, frollapply(1:5, 3L, toString), error="frollapply: results from provided FUN are not of type double")
 options(datatable.verbose=TRUE)
-test(6010.551, frollapply(1:3, 4L, sum), c(NA_real_,NA_real_,NA_real_), output=c("frollapplyR: allocating memory.*","frollapply: window width longer than input vector.*","frollapplyR: processing.*took.*"))
+test(6010.551, frollapply(1:3, 4L, sum), c(NA_real_,NA_real_,NA_real_), output=c("frollapplyR: allocating memory.*","frollapplyR: window width longer than input vector.*","frollapplyR: processing.*took.*"))
 test(6010.552, frollapply(1:5, 3L, sum), c(NA,NA,6,9,12), output=c("frollapplyR: allocating memory.*","frollapply: took.*","frollapplyR: processing.*took.*"))
-test(6010.553, frollapply(1:5, 3L, sum, align="center"), c(NA,6,9,12,NA), output=c("frollapplyR: allocating memory.*","frollapply: align 0, shift.*","frollapply: took.*","frollapplyR: processing.*took.*"))
+test(6010.553, frollapply(1:5, 3L, sum, align="center"), c(NA,6,9,12,NA), output=c("frollapplyR: allocating memory.*","frollapply: took.*","frollapplyR: align 0, shift.*","frollapplyR: processing.*took.*"))
 options(datatable.verbose=FALSE)
 ma = function(x, n, na.rm=FALSE) {
   ans = rep(NA_real_, nx<-length(x))
diff --git a/src/data.table.h b/src/data.table.h
index 99aa2eb069..c59c0e55a5 100644
--- a/src/data.table.h
+++ b/src/data.table.h
@@ -206,7 +206,7 @@ void frollsumExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool
 void frollmax(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose);
 void frollmaxFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose);
 void frollmaxExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose);
-void frollapply(double *x, int64_t nx, double *w, int k, ans_t *ans, int align, double fill, SEXP call, SEXP rho, bool verbose);
+void frollapply(double *x, int64_t nx, double *w, int k, ans_t *ans, double fill, SEXP call, SEXP rho, bool verbose);
 
 // frolladaptive.c
 void fadaptiverollmean(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose);
diff --git a/src/froll.c b/src/froll.c
index 3e438e4a29..754daf2cd7 100644
--- a/src/froll.c
+++ b/src/froll.c
@@ -618,15 +618,7 @@ void frollmaxExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool
  * not plain C, not thread safe
  * R eval() allocates
  */
-void frollapply(double *x, int64_t nx, double *w, int k, ans_t *ans, int align, double fill, SEXP call, SEXP rho, bool verbose) {
-  if (nx < k) {
-    if (verbose)
-      Rprintf(_("%s: window width longer than input vector, returning all NA vector\n"), __func__);
-    for (int i=0; i<nx; i++) {
-      ans->dbl_v[i] = fill;
-    }
-    return;
-  }
+void frollapply(double *x, int64_t nx, double *w, int k, ans_t *ans, double fill, SEXP call, SEXP rho, bool verbose) {
   double tic = 0;
   if (verbose)
     tic = omp_get_wtime();
@@ -667,15 +659,6 @@ void frollapply(double *x, int64_t nx, double *w, int k, ans_t *ans, int align,
       UNPROTECT(1); // evali
     }
   }
-  if (ans->status < 3 && align < 1) {
-    int k_ = align==-1 ? k-1 : floor(k/2);
-    if (verbose)
-      Rprintf(_("%s: align %d, shift answer by %d\n"), __func__, align, -k_);
-    memmove((char *)ans->dbl_v, (char *)ans->dbl_v + (k_*sizeof(double)), (nx-k_)*sizeof(double));
-    for (int64_t i=nx-k_; i<nx; i++) {
-      ans->dbl_v[i] = fill;
-    }
-  }
   if (verbose)
     Rprintf(_("%s: took %.3fs\n"), __func__, omp_get_wtime()-tic);
 }
diff --git a/src/frollR.c b/src/frollR.c
index 6402bd95bf..89f9d425e3 100644
--- a/src/frollR.c
+++ b/src/frollR.c
@@ -321,7 +321,28 @@ SEXP frollapplyR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP align, SEXP rho) {
     pc = PROTECT(LCONS(fun, LCONS(pw, LCONS(R_DotsSymbol, R_NilValue))));
 
     for (R_len_t i=0; i<nx; i++) {
-      frollapply(dx[i], inx[i], dw, ik[j], &dans[i*nk+j], ialign, dfill, pc, rho, verbose);
+      // early stopping for window bigger than input
+      if (inx[i] < ik[j]) {
+        if (verbose)
+          Rprintf(_("%s: window width longer than input vector, returning all NA vector\n"), __func__);
+        for (uint64_t ii=0; ii<inx[i]; ii++) {
+          dans[i*nk+j].dbl_v[ii] = dfill;
+        }
+        continue;
+      }
+
+      frollapply(dx[i], inx[i], dw, ik[j], &dans[i*nk+j], dfill, pc, rho, verbose);
+
+      // align
+      if (ialign < 1 && dans[i*nk+j].status < 3) {
+        int k_ = ialign==-1 ? ik[j]-1 : floor(ik[j]/2);
+        if (verbose)
+          Rprintf(_("%s: align %d, shift answer by %d\n"), __func__, ialign, -k_);
+        memmove((char *)dans[i*nk+j].dbl_v, (char *)dans[i*nk+j].dbl_v + (k_*sizeof(double)), (inx[i]-k_)*sizeof(double));
+        for (uint64_t ii=inx[i]-k_; ii<inx[i]; ii++) {
+          dans[i*nk+j].dbl_v[ii] = dfill;
+        }
+      }
     }
 
     UNPROTECT(2);

From ccd5c43fd1d7aa295ca9a69bc7d0bfe0a4eb776a Mon Sep 17 00:00:00 2001
From: Jan Gorecki <J.Gorecki@wit.edu.pl>
Date: Mon, 22 Aug 2022 11:59:38 +0200
Subject: [PATCH 12/16] typo fix in NEWS.md

Co-authored-by: Marco Colombo <m.colombo@ed.ac.uk>
---
 NEWS.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/NEWS.md b/NEWS.md
index 3fe0bb3b81..8718fc8ffd 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -296,7 +296,7 @@
 
 41. New function `%notin%` provides a convenient alternative to `!(x %in% y)`, [#4152](https://github.com/Rdatatable/data.table/issues/4152). Thanks to Jan Gorecki for suggesting and Michael Czekanski for the PR. `%notin%` uses half the memory because it computes the result directly as opposed to `!` which allocates a new vector to hold the negated result. If `x` is long enough to occupy more than half the remaining free memory, this can make the difference between the operation working, or failing with an out-of-memory error.
 
-42. New function `frollmax` has been implemented. It applies `max` over a rolling window. Request came from @gpierard who needed 1left aligned, adaptive, rolling max, [#5438](https://github.com/Rdatatable/data.table/issues/5438). Adaptive rolling functions did not have support for `align="left"`, therefore this feature has been added as well. It works for other adaptive rolling functions now. Adaptive `frollmax` has observed to be up to 50 time faster than second fastest solution using `max` and grouping `by=.EACHI`.
+42. New function `frollmax` has been implemented. It applies `max` over a rolling window. Request came from @gpierard who needed left aligned, adaptive, rolling max, [#5438](https://github.com/Rdatatable/data.table/issues/5438). Adaptive rolling functions did not have support for `align="left"`, therefore this feature has been added as well. It works for other adaptive rolling functions now. Adaptive `frollmax` has observed to be up to 50 time faster than second fastest solution using `max` and grouping `by=.EACHI`.
 
 ## BUG FIXES
 

From 2494b970efc2ad7886f0493f8fb57c7cf8a497a4 Mon Sep 17 00:00:00 2001
From: jangorecki <j.gorecki@wit.edu.pl>
Date: Mon, 22 Aug 2022 12:27:45 +0200
Subject: [PATCH 13/16] keep R agnostic C code in froll.c, yet deduplicated

---
 inst/tests/froll.Rraw |  90 ++++++++++++++++-----------------
 src/data.table.h      |  27 +++++-----
 src/froll.c           | 115 ++++++++++++++++++++++++++++--------------
 src/frollR.c          |  79 +++--------------------------
 src/frolladaptive.c   | 104 +++++++++++++++++++-------------------
 5 files changed, 196 insertions(+), 219 deletions(-)

diff --git a/inst/tests/froll.Rraw b/inst/tests/froll.Rraw
index f9cdce46ec..5e350c5397 100644
--- a/inst/tests/froll.Rraw
+++ b/inst/tests/froll.Rraw
@@ -310,7 +310,7 @@ test(6000.0671, frollmean(c(1:2,NA,4:10), 4), c(rep(NA_real_, 6), 5.5, 6.5, 7.5,
   "frollfunR: 1:",
   "frollmeanFast: running for input length 10, window 4, hasna 0, narm 0",
   "frollmeanFast: NA.*are present in input, skip non-NA attempt and run with extra care for NAs",
-  "frollmean: processing algo 0 took.*",
+  "frollfun: processing fun 0 algo 0 took.*",
   "frollfunR: processing.*took.*"
 ))
 test(6000.0672, frollmean(c(1:2,NA,4:10), 4, hasNA=FALSE), c(rep(NA_real_, 6), 5.5, 6.5, 7.5, 8.5), output=c(
@@ -319,7 +319,7 @@ test(6000.0672, frollmean(c(1:2,NA,4:10), 4, hasNA=FALSE), c(rep(NA_real_, 6), 5
   "frollfunR: 1:",
   "frollmeanFast: running for input length 10, window 4, hasna -1, narm 0",
   "frollmeanFast: NA.*are present in input, skip non-NA attempt and run with extra care for NAs",
-  "frollmean: processing algo 0 took.*",
+  "frollfun: processing fun 0 algo 0 took.*",
   "frollfunR: processing.*took.*"
 ), warning="hasNA=FALSE used but NA.*are present in input, use default hasNA=NA to avoid this warning")
 test(6000.0673, frollmean(c(1:2,NA,4:10), 2, hasNA=FALSE), c(NA, 1.5, NA, NA, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5), output=c(
@@ -328,7 +328,7 @@ test(6000.0673, frollmean(c(1:2,NA,4:10), 2, hasNA=FALSE), c(NA, 1.5, NA, NA, 4.
   "frollfunR: 1:",
   "frollmeanFast: running for input length 10, window 2, hasna -1, narm 0",
   "frollmeanFast: NA.*are present in input, re-running with extra care for NAs",
-  "frollmean: processing algo 0 took.*",
+  "frollfun: processing fun 0 algo 0 took.*",
   "frollfunR: processing.*took.*"
 ), warning="hasNA=FALSE used but NA.*are present in input, use default hasNA=NA to avoid this warning")
 test(6000.0674, frollmean(c(1:2,NA,4:10), 4, align="center"), c(rep(NA_real_, 4), 5.5, 6.5, 7.5, 8.5, NA, NA), output=c(
@@ -336,8 +336,8 @@ test(6000.0674, frollmean(c(1:2,NA,4:10), 4, align="center"), c(rep(NA_real_, 4)
   "frollfunR: 1 column.*1 window.*",
   "frollmeanFast: running for input length 10, window 4, hasna 0, narm 0",
   "frollmeanFast: NA.*are present in input, skip non-NA attempt and run with extra care for NAs",
-  "frollmean: processing algo 0 took.*",
-  "frollfunR: align 0, shift answer by -2",
+  "frollfun: align 0, shift answer by -2",
+  "frollfun: processing fun 0 algo 0 took.*",
   "frollfunR: processing.*took.*"
 ))
 options(datatable.verbose=FALSE)
@@ -383,7 +383,7 @@ test(6000.093, frollmean(list(1:3, 4:6), 4), list(c(NA_real_, NA_real_, NA_real_
 test(6000.0931, frollmean(list(1:3, 4:6), 4, align="center"), list(c(NA_real_, NA_real_, NA_real_), c(NA_real_, NA_real_, NA_real_)))
 test(6000.0932, frollmean(list(1:3, 4:6), 4, align="left"), list(c(NA_real_, NA_real_, NA_real_), c(NA_real_, NA_real_, NA_real_)))
 options(datatable.verbose=TRUE)
-test(6000.0933, frollmean(list(1:3, 4:6), 4), list(c(NA_real_, NA_real_, NA_real_), c(NA_real_, NA_real_, NA_real_)), output="frollfunR: window width longer than input vector, returning all NA vector")
+test(6000.0933, frollmean(list(1:3, 4:6), 4), list(c(NA_real_, NA_real_, NA_real_), c(NA_real_, NA_real_, NA_real_)), output="frollfun: window width longer than input vector, returning all NA vector")
 options(datatable.verbose=FALSE)
 #### n==length(x)
 test(6000.094, frollmean(list(1:3, 4:6), 3), list(c(NA_real_, NA_real_, 2), c(NA_real_, NA_real_, 5)))
@@ -436,9 +436,9 @@ test(6000.1196, frollmean(c(1:5,NA), 1:6, algo="exact", na.rm=TRUE, adaptive=TRU
   "frollfunR: allocating memory for results 1x1",
   "frollfunR: 1 column.*1 window.*, not entering parallel execution here because algo='exact' will compute results in parallel",
   "frollfunR: 1:",
-  "fadaptiverollmeanExact: running in parallel for input length 6, hasna 0, narm 1",
-  "fadaptiverollmeanExact: NA.*are present in input, re-running with extra care for NAs",
-  "fadaptiverollmean: processing algo 1 took.*",
+  "frolladaptivemeanExact: running in parallel for input length 6, hasna 0, narm 1",
+  "frolladaptivemeanExact: NA.*are present in input, re-running with extra care for NAs",
+  "frolladaptivefun: processing fun 0 algo 1 took.*",
   "frollfunR: processing.*took.*"
 ))
 #### exact na.rm=TRUE verbose=TRUE
@@ -448,7 +448,7 @@ test(6000.1197, frollmean(c(1:5,NA), 2, algo="exact", na.rm=TRUE), output=c(
   "frollfunR: 1:",
   "frollmeanExact: running in parallel for input length 6, window 2, hasna 0, narm 1",
   "frollmeanExact: NA.*are present in input, re-running with extra care for NAs",
-  "frollmean: processing algo 1 took.*",
+  "frollfun: processing fun 0 algo 1 took.*",
   "frollfunR: processing.*took.*"
 ))
 options(datatable.verbose=FALSE)
@@ -664,81 +664,81 @@ test(6000.171, frollmean(x, n), output=c(
   "frollfunR: 1 column.*1 window.*",
   "frollfunR: 1:",
   "frollmeanFast: running for input length 10, window 3, hasna 0, narm 0",
-  "frollmean: processing algo 0 took.*",
+  "frollfun: processing fun 0 algo 0 took.*",
   "frollfunR: processing.*took.*"))
 test(6000.172, frollmean(list(x, x+1), n), output=c(
   "frollfunR: allocating memory for results 2x1",
   "frollfunR: 2 column.*1 window.*",
   "frollfunR: 1:",
   "frollmeanFast: running for input length 10, window 3, hasna 0, narm 0",
-  "frollmean: processing algo 0 took.*",
+  "frollfun: processing fun 0 algo 0 took.*",
   "frollfunR: 2:",
   "frollmeanFast: running for input length 10, window 3, hasna 0, narm 0",
-  "frollmean: processing algo 0 took.*",
+  "frollfun: processing fun 0 algo 0 took.*",
   "frollfunR: processing.*took.*"))
 test(6000.173, frollmean(x, c(n, n+1)), output=c(
   "frollfunR: allocating memory for results 1x2",
   "frollfunR: 1 column.*2 window.*",
   "frollfunR: 1:",
   "frollmeanFast: running for input length 10, window 3, hasna 0, narm 0",
-  "frollmean: processing algo 0 took.*",
+  "frollfun: processing fun 0 algo 0 took.*",
   "frollfunR: 2:",
   "frollmeanFast: running for input length 10, window 4, hasna 0, narm 0",
-  "frollmean: processing algo 0 took.*",
+  "frollfun: processing fun 0 algo 0 took.*",
   "frollfunR: processing.*took.*"))
 test(6000.174, frollmean(list(x, x+1), c(n, n+1)), output=c(
   "frollfunR: allocating memory for results 2x2",
   "frollfunR: 2 column.*2 window.*",
   "frollfunR: 1:",
   "frollmeanFast: running for input length 10, window 3, hasna 0, narm 0",
-  "frollmean: processing algo 0 took.*",
+  "frollfun: processing fun 0 algo 0 took.*",
   "frollfunR: 2:",
   "frollmeanFast: running for input length 10, window 4, hasna 0, narm 0",
-  "frollmean: processing algo 0 took.*",
+  "frollfun: processing fun 0 algo 0 took.*",
   "frollfunR: 3:",
   "frollmeanFast: running for input length 10, window 3, hasna 0, narm 0",
-  "frollmean: processing algo 0 took.*",
+  "frollfun: processing fun 0 algo 0 took.*",
   "frollfunR: 4:",
   "frollmeanFast: running for input length 10, window 4, hasna 0, narm 0",
-  "frollmean: processing algo 0 took.*",
+  "frollfun: processing fun 0 algo 0 took.*",
   "frollfunR: processing.*took.*"))
 test(6000.175, frollmean(x, n, algo="exact"), output=c(
   "frollfunR: allocating memory for results 1x1",
   "frollfunR: 1 column.*1 window.*",
   "frollfunR: 1:",
   "frollmeanExact: running in parallel for input length 10, window 3, hasna 0, narm 0",
-  "frollmean: processing algo 1 took.*",
+  "frollfun: processing fun 0 algo 1 took.*",
   "frollfunR: processing.*took.*"))
 test(6000.176, frollmean(x, n, align="center"), output=c(
   "frollfunR: allocating memory for results 1x1",
   "frollfunR: 1 column.*1 window.*",
   "frollfunR: 1:",
   "frollmeanFast: running for input length 10, window 3, hasna 0, narm 0",
-  "frollmean: processing algo 0 took.*",
-  "frollfunR: align 0, shift answer by -1",
+  "frollfun: align 0, shift answer by -1",
+  "frollfun: processing fun 0 algo 0 took.*",
   "frollfunR: processing.*took.*"))
 test(6000.177, frollmean(x, n, align="left"), output=c(
   "frollfunR: allocating memory for results 1x1",
   "frollfunR: 1 column.*1 window.*",
   "frollfunR: 1:",
   "frollmeanFast: running for input length 10, window 3, hasna 0, narm 0",
-  "frollmean: processing algo 0 took.*",
-  "frollfunR: align -1, shift answer by -2",
+  "frollfun: align -1, shift answer by -2",
+  "frollfun: processing fun 0 algo 0 took.*",
   "frollfunR: processing.*took.*"))
 nn = c(1:4,2:3,1:4)
 test(6000.178, frollmean(x, nn, adaptive=TRUE), output=c(
   "frollfunR: allocating memory for results 1x1",
   "frollfunR: 1 column.*1 window.*",
   "frollfunR: 1:",
-  "fadaptiverollmeanFast: running for input length 10, hasna 0, narm 0",
-  "fadaptiverollmean: processing algo 0 took.*",
+  "frolladaptivemeanFast: running for input length 10, hasna 0, narm 0",
+  "frolladaptivefun: processing fun 0 algo 0 took.*",
   "frollfunR: processing.*took.*"))
 test(6000.179, frollmean(x, nn, algo="exact", adaptive=TRUE), output=c(
   "frollfunR: allocating memory for results 1x1",
   "frollfunR: 1 column.*1 window.*",
   "frollfunR: 1:",
-  "fadaptiverollmeanExact: running in parallel for input length 10, hasna 0, narm 0",
-  "fadaptiverollmean: processing algo 1 took.*",
+  "frolladaptivemeanExact: running in parallel for input length 10, hasna 0, narm 0",
+  "frolladaptivefun: processing fun 0 algo 1 took.*",
   "frollfunR: processing.*took.*"))
 
 x[8] = NA
@@ -748,7 +748,7 @@ test(6000.180, frollmean(x, n), output=c(
   "frollfunR: 1:",
   "frollmeanFast: running for input length 10, window 3, hasna 0, narm 0",
   "frollmeanFast: NA.*are present in input, re-running with extra care for NAs",
-  "frollmean: processing algo 0 took.*",
+  "frollfun: processing fun 0 algo 0 took.*",
   "frollfunR: processing.*took.*"))
 test(6000.181, frollmean(x, n, algo="exact"), output=c(
   "frollfunR: allocating memory for results 1x1",
@@ -756,23 +756,23 @@ test(6000.181, frollmean(x, n, algo="exact"), output=c(
   "frollfunR: 1:",
   "frollmeanExact: running in parallel for input length 10, window 3, hasna 0, narm 0",
   "frollmeanExact: NA.*are present in input, na.rm was FALSE so in 'exact' implementation NAs were handled already, no need to re-run",
-  "frollmean: processing algo 1 took.*",
+  "frollfun: processing fun 0 algo 1 took.*",
   "frollfunR: processing.*took.*"))
 test(6000.182, frollmean(x, nn, adaptive=TRUE), output=c(
   "frollfunR: allocating memory for results 1x1",
   "frollfunR: 1 column.*1 window.*",
   "frollfunR: 1:",
-  "fadaptiverollmeanFast: running for input length 10, hasna 0, narm 0",
-  "fadaptiverollmeanFast: NA.*are present in input, re-running with extra care for NAs",
-  "fadaptiverollmean: processing algo 0 took.*",
+  "frolladaptivemeanFast: running for input length 10, hasna 0, narm 0",
+  "frolladaptivemeanFast: NA.*are present in input, re-running with extra care for NAs",
+  "frolladaptivefun: processing fun 0 algo 0 took.*",
   "frollfunR: processing.*took.*"))
 test(6000.183, frollmean(x, nn, algo="exact", adaptive=TRUE), output=c(
   "frollfunR: allocating memory for results 1x1",
   "frollfunR: 1 column.*1 window.*",
   "frollfunR: 1:",
-  "fadaptiverollmeanExact: running in parallel for input length 10, hasna 0, narm 0",
-  "fadaptiverollmeanExact: NA.*are present in input, na.rm was FALSE so in 'exact' implementation NAs were handled already, no need to re-run",
-  "fadaptiverollmean: processing algo 1 took.*",
+  "frolladaptivemeanExact: running in parallel for input length 10, hasna 0, narm 0",
+  "frolladaptivemeanExact: NA.*are present in input, na.rm was FALSE so in 'exact' implementation NAs were handled already, no need to re-run",
+  "frolladaptivefun: processing fun 0 algo 1 took.*",
   "frollfunR: processing.*took.*"))
 
 d = as.data.table(list(1:10/2, 10:1/4))
@@ -781,7 +781,7 @@ test(6000.184, frollmean(d[,1], 3, algo="exact"), output=c(
   "frollfunR: 1 column.*1 window.*",
   "frollfunR: 1:",
   "frollmeanExact: running in parallel for input length 10, window 3, hasna 0, narm 0",
-  "frollmean: processing algo 1 took.*",
+  "frollfun: processing fun 0 algo 1 took.*",
   "frollfunR: processing.*took.*"
 ))
 test(6000.185, frollmean(d, 3:4, algo="exact"), output=c(
@@ -789,16 +789,16 @@ test(6000.185, frollmean(d, 3:4, algo="exact"), output=c(
   "frollfunR: 2 column.*2 window.*",
   "frollfunR: 1:",
   "frollmeanExact: running in parallel for input length 10, window 3, hasna 0, narm 0",
-  "frollmean: processing algo 1 took.*",
+  "frollfun: processing fun 0 algo 1 took.*",
   "frollfunR: 2:",
   "frollmeanExact: running in parallel for input length 10, window 4, hasna 0, narm 0",
-  "frollmean: processing algo 1 took.*",
+  "frollfun: processing fun 0 algo 1 took.*",
   "frollfunR: 3:",
   "frollmeanExact: running in parallel for input length 10, window 3, hasna 0, narm 0",
-  "frollmean: processing algo 1 took.*",
+  "frollfun: processing fun 0 algo 1 took.*",
   "frollfunR: 4:",
   "frollmeanExact: running in parallel for input length 10, window 4, hasna 0, narm 0",
-  "frollmean: processing algo 1 took.*",
+  "frollfun: processing fun 0 algo 1 took.*",
   "frollfunR: processing.*took.*"
 ))
 options(datatable.verbose=FALSE)
@@ -839,10 +839,10 @@ options(datatable.verbose=TRUE)
 test(6000.219, frollsum(c(1:2,NA,4:10), rep(4L,10), adaptive=TRUE, algo="exact", na.rm=TRUE), c(rep(NA_real_, 3L), 7, 11, 15, 22, 26, 30, 34), output="re-running with extra care for NAs")
 test(6000.220, frollsum(c(1:2,NA,4:10), rep(4L,10), adaptive=TRUE, algo="exact"), c(rep(NA_real_, 6), 22, 26, 30, 34), output="NAs were handled already, no need to re-run")
 test(6000.221, frollsum(1:3, 2), c(NA, 3, 5), output="frollsumFast: running for input length")
-test(6000.222, frollsum(1:3, 2, align="left"), c(3, 5, NA), output="frollfunR: align")
+test(6000.222, frollsum(1:3, 2, align="left"), c(3, 5, NA), output="frollfun: align")
 test(6000.223, frollsum(c(1,2,NA), 2), c(NA, 3, NA), output="re-running with extra care for NAs")
 test(6000.224, frollsum(c(NA,2,3), 2), c(NA, NA, 5), output="skip non-NA attempt and run with extra care for NAs")
-test(6000.225, frollsum(1:3, c(2,2,2), adaptive=TRUE), c(NA, 3, 5), output="fadaptiverollsumFast: running for input length")
+test(6000.225, frollsum(1:3, c(2,2,2), adaptive=TRUE), c(NA, 3, 5), output="frolladaptivesumFast: running for input length")
 test(6000.226, frollsum(c(NA,2,3), c(2,2,2), adaptive=TRUE), c(NA, NA, 5), output="re-running with extra care for NAs")
 options(datatable.verbose=FALSE)
 
@@ -1091,9 +1091,9 @@ test(6010.525, frollapply(c(1,2,3,NA,NA,NA,NA), 3L, mean, na.rm=TRUE), c(NA,NA,2
 test(6010.526, frollapply(numeric(), 3L, sum), numeric())
 test(6010.527, frollapply(1:5, 3L, toString), error="frollapply: results from provided FUN are not of type double")
 options(datatable.verbose=TRUE)
-test(6010.551, frollapply(1:3, 4L, sum), c(NA_real_,NA_real_,NA_real_), output=c("frollapplyR: allocating memory.*","frollapplyR: window width longer than input vector.*","frollapplyR: processing.*took.*"))
+test(6010.551, frollapply(1:3, 4L, sum), c(NA_real_,NA_real_,NA_real_), output=c("frollapplyR: allocating memory.*","frollapply: window width longer than input vector.*","frollapplyR: processing.*took.*"))
 test(6010.552, frollapply(1:5, 3L, sum), c(NA,NA,6,9,12), output=c("frollapplyR: allocating memory.*","frollapply: took.*","frollapplyR: processing.*took.*"))
-test(6010.553, frollapply(1:5, 3L, sum, align="center"), c(NA,6,9,12,NA), output=c("frollapplyR: allocating memory.*","frollapply: took.*","frollapplyR: align 0, shift.*","frollapplyR: processing.*took.*"))
+test(6010.553, frollapply(1:5, 3L, sum, align="center"), c(NA,6,9,12,NA), output=c("frollapplyR: allocating memory.*","frollapply: align 0, shift.*","frollapply: took.*","frollapplyR: processing.*took.*"))
 options(datatable.verbose=FALSE)
 ma = function(x, n, na.rm=FALSE) {
   ans = rep(NA_real_, nx<-length(x))
diff --git a/src/data.table.h b/src/data.table.h
index c59c0e55a5..db0c4c5045 100644
--- a/src/data.table.h
+++ b/src/data.table.h
@@ -196,28 +196,29 @@ void initDTthreads();
 int getDTthreads(const int64_t n, const bool throttle);
 void avoid_openmp_hang_within_fork();
 
+typedef enum { // adding rolling functions here and in frollfunR in frollR.c
+  MEAN = 0,
+  SUM = 1,
+  MAX = 2
+} rollfun_t;
 // froll.c
-void frollmean(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose);
+void frollfun(rollfun_t rfun, unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, int align, double fill, bool narm, int hasna, bool verbose);
 void frollmeanFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose);
 void frollmeanExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose);
-void frollsum(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose);
 void frollsumFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose);
 void frollsumExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose);
-void frollmax(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose);
 void frollmaxFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose);
 void frollmaxExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose);
-void frollapply(double *x, int64_t nx, double *w, int k, ans_t *ans, double fill, SEXP call, SEXP rho, bool verbose);
+void frollapply(double *x, int64_t nx, double *w, int k, ans_t *ans, int align, double fill, SEXP call, SEXP rho, bool verbose);
 
 // frolladaptive.c
-void fadaptiverollmean(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose);
-void fadaptiverollmeanFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose);
-void fadaptiverollmeanExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose);
-void fadaptiverollsum(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose);
-void fadaptiverollsumFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose);
-void fadaptiverollsumExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose);
-void fadaptiverollmax(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose);
-//void fadaptiverollmaxFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose); // does not exists as of now
-void fadaptiverollmaxExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose);
+void frolladaptivefun(rollfun_t rfun, unsigned int algo, double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose);
+void frolladaptivemeanFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose);
+void frolladaptivemeanExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose);
+void frolladaptivesumFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose);
+void frolladaptivesumExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose);
+//void frolladaptivemaxFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose); // does not exists as of now
+void frolladaptivemaxExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose);
 
 // frollR.c
 SEXP frollfunR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP algo, SEXP align, SEXP narm, SEXP hasNA, SEXP adaptive);
diff --git a/src/froll.c b/src/froll.c
index 754daf2cd7..37a391218d 100644
--- a/src/froll.c
+++ b/src/froll.c
@@ -1,23 +1,65 @@
 #include "data.table.h"
 
-/* fast rolling mean - router
- * algo = 0: frollmeanFast
+/* rolling fun - router for fun and algo
+ * early stopping for window bigger than input
+ * handles 'align' in single place for center or left
+ * rfun enum rollfun_t routes to rolling function
+ * algo = 0: fast
  *   adding/removing in/out of sliding window of observations
- * algo = 1: frollmeanExact
- *   recalculate whole mean for each observation, roundoff correction is adjusted, also support for NaN and Inf
+ * algo = 1: exact
+ *   recalculate whole fun for each observation, for mean roundoff correction is adjusted, also support for NaN and Inf
  */
-void frollmean(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose) {
+void frollfun(rollfun_t rfun, unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, int align, double fill, bool narm, int hasna, bool verbose) {
   double tic = 0;
   if (verbose)
     tic = omp_get_wtime();
-  if (algo==0) {
-    frollmeanFast(x, nx, ans, k, fill, narm, hasna, verbose);
-  } else if (algo==1) {
-    frollmeanExact(x, nx, ans, k, fill, narm, hasna, verbose);
+  if (nx < k) {                      // if window width bigger than input just return vector of fill values
+    if (verbose)
+      snprintf(end(ans->message[0]), 500, _("%s: window width longer than input vector, returning all NA vector\n"), __func__);
+    // implicit n_message limit discussed here: https://github.com/Rdatatable/data.table/issues/3423#issuecomment-487722586
+    for (uint64_t i=0; i<nx; i++) {
+      ans->dbl_v[i] = fill;
+    }
+    return;
+  }
+  switch (rfun) {
+  case MEAN :
+    if (algo==0) {
+      frollmeanFast(x, nx, ans, k, fill, narm, hasna, verbose);
+    } else if (algo==1) {
+      frollmeanExact(x, nx, ans, k, fill, narm, hasna, verbose);
+    }
+    break;
+  case SUM :
+    if (algo==0) {
+      frollsumFast(x, nx, ans, k, fill, narm, hasna, verbose);
+    } else if (algo==1) {
+      frollsumExact(x, nx, ans, k, fill, narm, hasna, verbose);
+    }
+    break;
+  case MAX :
+    if (algo==0) {
+      frollmaxFast(x, nx, ans, k, fill, narm, hasna, verbose);
+    } else if (algo==1) {
+      frollmaxExact(x, nx, ans, k, fill, narm, hasna, verbose);
+    }
+    break;
+  default:
+    error(_("Internal error: Unknown rfun value in froll: %d"), rfun); // #nocov
+  }
+  if (align < 1 && ans->status < 3) {
+    int k_ = align==-1 ? k-1 : floor(k/2);       // offset to shift
+    if (verbose)
+      snprintf(end(ans->message[0]), 500, _("%s: align %d, shift answer by %d\n"), __func__, align, -k_);
+    memmove((char *)ans->dbl_v, (char *)ans->dbl_v + (k_*sizeof(double)), (nx-k_)*sizeof(double)); // apply shift to achieve expected align
+    for (uint64_t i=nx-k_; i<nx; i++) {          // fill from right side
+      ans->dbl_v[i] = fill;
+    }
   }
   if (verbose)
-    snprintf(end(ans->message[0]), 500, _("%s: processing algo %u took %.3fs\n"), __func__, algo, omp_get_wtime()-tic);
+    snprintf(end(ans->message[0]), 500, _("%s: processing fun %d algo %u took %.3fs\n"), __func__, rfun, algo, omp_get_wtime()-tic);
 }
+
 /* fast rolling mean - fast
  * when no info on NA (hasNA argument) then assume no NAs run faster version
  * rollmean implemented as single pass sliding window for align="right"
@@ -196,19 +238,9 @@ void frollmeanExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool
   }
 }
 
-/* fast rolling sum */
-void frollsum(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose) {
-  double tic = 0;
-  if (verbose)
-    tic = omp_get_wtime();
-  if (algo==0) {
-    frollsumFast(x, nx, ans, k, fill, narm, hasna, verbose);
-  } else if (algo==1) {
-    frollsumExact(x, nx, ans, k, fill, narm, hasna, verbose);
-  }
-  if (verbose)
-    snprintf(end(ans->message[0]), 500, _("%s: processing algo %u took %.3fs\n"), __func__, algo, omp_get_wtime()-tic);
-}
+/* fast rolling sum - fast
+ * same as mean fast
+ */
 void frollsumFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose) {
   if (verbose)
     snprintf(end(ans->message[0]), 500, _("%s: running for input length %"PRIu64", window %d, hasna %d, narm %d\n"), "frollsumFast", (uint64_t)nx, k, hasna, (int)narm);
@@ -293,6 +325,9 @@ void frollsumFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool n
     }
   }
 }
+/* fast rolling sum - exact
+ * same as mean exact
+ */
 void frollsumExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose) {
   if (verbose)
     snprintf(end(ans->message[0]), 500, _("%s: running in parallel for input length %"PRIu64", window %d, hasna %d, narm %d\n"), "frollsumExact", (uint64_t)nx, k, hasna, (int)narm);
@@ -360,19 +395,6 @@ void frollsumExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool
   }
 }
 
-/* fast rolling max */
-void frollmax(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int k, double fill, bool narm, int hasna, bool verbose) {
-  double tic = 0;
-  if (verbose)
-    tic = omp_get_wtime();
-  if (algo==0) {
-    frollmaxFast(x, nx, ans, k, fill, narm, hasna, verbose);
-  } else if (algo==1) {
-    frollmaxExact(x, nx, ans, k, fill, narm, hasna, verbose);
-  }
-  if (verbose)
-    snprintf(end(ans->message[0]), 500, _("%s: processing algo %u took %.3fs\n"), __func__, algo, omp_get_wtime()-tic);
-}
 inline void windowmax(double *x, uint64_t o, int k, double *w, uint64_t *iw) {
   for (int i=0; i<k-1; i++) {
     //Rprintf("windowmax iteration %d, offset %d, first x val %f, testing x[o+i-k+1] >= w[0]: x[%d-%d+1] >= w[0]: %f >= %f: %d\n", i, o, x[o], i, k, x[o+i-k+1], w[0], x[o+i-k+1] >= w[0]);
@@ -618,7 +640,16 @@ void frollmaxExact(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool
  * not plain C, not thread safe
  * R eval() allocates
  */
-void frollapply(double *x, int64_t nx, double *w, int k, ans_t *ans, double fill, SEXP call, SEXP rho, bool verbose) {
+void frollapply(double *x, int64_t nx, double *w, int k, ans_t *ans, int align, double fill, SEXP call, SEXP rho, bool verbose) {
+  // early stopping for window bigger than input
+  if (nx < k) {
+    if (verbose)
+      Rprintf(_("%s: window width longer than input vector, returning all NA vector\n"), __func__);
+    for (int i=0; i<nx; i++) {
+      ans->dbl_v[i] = fill;
+    }
+    return;
+  }
   double tic = 0;
   if (verbose)
     tic = omp_get_wtime();
@@ -659,6 +690,16 @@ void frollapply(double *x, int64_t nx, double *w, int k, ans_t *ans, double fill
       UNPROTECT(1); // evali
     }
   }
+  // align
+  if (ans->status < 3 && align < 1) {
+    int k_ = align==-1 ? k-1 : floor(k/2);
+    if (verbose)
+      Rprintf(_("%s: align %d, shift answer by %d\n"), __func__, align, -k_);
+    memmove((char *)ans->dbl_v, (char *)ans->dbl_v + (k_*sizeof(double)), (nx-k_)*sizeof(double));
+    for (int64_t i=nx-k_; i<nx; i++) {
+      ans->dbl_v[i] = fill;
+    }
+  }
   if (verbose)
     Rprintf(_("%s: took %.3fs\n"), __func__, omp_get_wtime()-tic);
 }
diff --git a/src/frollR.c b/src/frollR.c
index 89f9d425e3..bb7d1cf792 100644
--- a/src/frollR.c
+++ b/src/frollR.c
@@ -132,13 +132,13 @@ SEXP frollfunR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP algo, SEXP align, SEX
     dx[i] = REAL(VECTOR_ELT(x, i));                             // assign source columns to C pointers
   }
 
-  enum {MEAN, SUM, MAX} sfun;
+  rollfun_t rfun; // adding fun needs to be here and data.table.h
   if (!strcmp(CHAR(STRING_ELT(fun, 0)), "mean")) {
-    sfun = MEAN;
+    rfun = MEAN;
   } else if (!strcmp(CHAR(STRING_ELT(fun, 0)), "sum")) {
-    sfun = SUM;
+    rfun = SUM;
   } else if (!strcmp(CHAR(STRING_ELT(fun, 0)), "max")) {
-    sfun = MAX;
+    rfun = MAX;
   } else {
     error(_("Internal error: invalid %s argument in %s function should have been caught earlier. Please report to the data.table issue tracker."), "fun", "rolling"); // # nocov
   }
@@ -183,53 +183,9 @@ SEXP frollfunR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP algo, SEXP align, SEX
   for (R_len_t i=0; i<nx; i++) {                                // loop over multiple columns
     for (R_len_t j=0; j<nk; j++) {                              // loop over multiple windows
       if (!badaptive) {
-        // early stopping for window bigger than input
-        if (inx[i] < iik[j]) {                      // if window width bigger than input just return vector of fill values
-          if (verbose)
-            snprintf(end(dans[i*nk+j].message[0]), 500, _("%s: window width longer than input vector, returning all NA vector\n"), __func__);
-          // implicit n_message limit discussed here: https://github.com/Rdatatable/data.table/issues/3423#issuecomment-487722586
-          for (uint64_t ii=0; ii<inx[i]; ii++) {
-            dans[i*nk+j].dbl_v[ii] = dfill;
-          }
-          continue;
-        }
-        switch (sfun) {
-        case MEAN :
-          frollmean(ialgo, dx[i], inx[i], &dans[i*nk+j], iik[j], dfill, bnarm, ihasna, verbose);
-          break;
-        case SUM :
-          frollsum(ialgo, dx[i], inx[i], &dans[i*nk+j], iik[j], dfill, bnarm, ihasna, verbose);
-          break;
-        case MAX :
-          frollmax(ialgo, dx[i], inx[i], &dans[i*nk+j], iik[j], dfill, bnarm, ihasna, verbose);
-          break;
-        default:
-          error(_("Internal error: Unknown sfun value in froll: %d"), sfun); // #nocov
-        }
-        // handles 'align' in single place for center or left, adaptive align on R level
-        if (ialign < 1 && dans[i*nk+j].status < 3) {
-          int k_ = ialign==-1 ? iik[j]-1 : floor(iik[j]/2);       // offset to shift
-          if (verbose)
-            snprintf(end(dans[i*nk+j].message[0]), 500, _("%s: align %d, shift answer by %d\n"), __func__, ialign, -k_);
-          memmove((char *)dans[i*nk+j].dbl_v, (char *)dans[i*nk+j].dbl_v + (k_*sizeof(double)), (inx[i]-k_)*sizeof(double)); // apply shift to achieve expected align
-          for (uint64_t ii=inx[i]-k_; ii<inx[i]; ii++) {          // fill from right side
-            dans[i*nk+j].dbl_v[ii] = dfill;
-          }
-        }
+        frollfun(rfun, ialgo, dx[i], inx[i], &dans[i*nk+j], iik[j], ialign, dfill, bnarm, ihasna, verbose);
       } else {
-        switch (sfun) {
-        case MEAN :
-          fadaptiverollmean(ialgo, dx[i], inx[i], &dans[i*nk+j], ikl[j], dfill, bnarm, ihasna, verbose);
-          break;
-        case SUM :
-          fadaptiverollsum(ialgo, dx[i], inx[i], &dans[i*nk+j], ikl[j], dfill, bnarm, ihasna, verbose);
-          break;
-        case MAX :
-          fadaptiverollmax(ialgo, dx[i], inx[i], &dans[i*nk+j], ikl[j], dfill, bnarm, ihasna, verbose);
-          break;
-        default:
-          error(_("Internal error: Unknown sfun value in froll: %d"), sfun); // #nocov
-        }
+        frolladaptivefun(rfun, ialgo, dx[i], inx[i], &dans[i*nk+j], ikl[j], dfill, bnarm, ihasna, verbose);
       }
     }
   }
@@ -321,28 +277,7 @@ SEXP frollapplyR(SEXP fun, SEXP obj, SEXP k, SEXP fill, SEXP align, SEXP rho) {
     pc = PROTECT(LCONS(fun, LCONS(pw, LCONS(R_DotsSymbol, R_NilValue))));
 
     for (R_len_t i=0; i<nx; i++) {
-      // early stopping for window bigger than input
-      if (inx[i] < ik[j]) {
-        if (verbose)
-          Rprintf(_("%s: window width longer than input vector, returning all NA vector\n"), __func__);
-        for (uint64_t ii=0; ii<inx[i]; ii++) {
-          dans[i*nk+j].dbl_v[ii] = dfill;
-        }
-        continue;
-      }
-
-      frollapply(dx[i], inx[i], dw, ik[j], &dans[i*nk+j], dfill, pc, rho, verbose);
-
-      // align
-      if (ialign < 1 && dans[i*nk+j].status < 3) {
-        int k_ = ialign==-1 ? ik[j]-1 : floor(ik[j]/2);
-        if (verbose)
-          Rprintf(_("%s: align %d, shift answer by %d\n"), __func__, ialign, -k_);
-        memmove((char *)dans[i*nk+j].dbl_v, (char *)dans[i*nk+j].dbl_v + (k_*sizeof(double)), (inx[i]-k_)*sizeof(double));
-        for (uint64_t ii=inx[i]-k_; ii<inx[i]; ii++) {
-          dans[i*nk+j].dbl_v[ii] = dfill;
-        }
-      }
+      frollapply(dx[i], inx[i], dw, ik[j], &dans[i*nk+j], ialign, dfill, pc, rho, verbose);
     }
 
     UNPROTECT(2);
diff --git a/src/frolladaptive.c b/src/frolladaptive.c
index f0a40fb9ca..096e3b0e00 100644
--- a/src/frolladaptive.c
+++ b/src/frolladaptive.c
@@ -1,32 +1,53 @@
 #include "data.table.h"
 
-/* fast adaptive rolling mean - router
- * algo = 0: fadaptiverollmeanFast
- *   first pass cumsum based solution, second pass uses cumsum to calculate answer
- * algo = 1: fadaptiverollmeanExact
- *   recalculate whole mean for each observation, roundoff correction is adjusted, also support for NaN and Inf
+/* rolling adaptive fun - router for fun and algo
+ * rfun enum rollfun_t routes to rolling function
+ * algo = 0: fast
+ *   adding/removing in/out of sliding window of observations
+ * algo = 1: exact
+ *   recalculate whole fun for each observation, for mean roundoff correction is adjusted, also support for NaN and Inf
  */
-void fadaptiverollmean(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose) {
+void frolladaptivefun(rollfun_t rfun, unsigned int algo, double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose) {
   double tic = 0;
   if (verbose)
     tic = omp_get_wtime();
-  if (algo==0) {
-    fadaptiverollmeanFast(x, nx, ans, k, fill, narm, hasna, verbose);
-  } else if (algo==1) {
-    fadaptiverollmeanExact(x, nx, ans, k, fill, narm, hasna, verbose);
+  switch (rfun) {
+  case MEAN :
+    if (algo==0) {
+      frolladaptivemeanFast(x, nx, ans, k, fill, narm, hasna, verbose);
+    } else if (algo==1) {
+      frolladaptivemeanExact(x, nx, ans, k, fill, narm, hasna, verbose);
+    }
+    break;
+  case SUM :
+    if (algo==0) {
+      frolladaptivesumFast(x, nx, ans, k, fill, narm, hasna, verbose);
+    } else if (algo==1) {
+      frolladaptivesumExact(x, nx, ans, k, fill, narm, hasna, verbose);
+    }
+    break;
+  case MAX :
+    if (algo==0 && verbose) {
+      //frolladaptivemaxFast(x, nx, ans, k, fill, narm, hasna, verbose); // frolladaptivemaxFast does not exists as of now
+      snprintf(end(ans->message[0]), 500, _("%s: algo %u not implemented, fall back to %u\n"), __func__, algo, (unsigned int) 1);
+    }
+    frolladaptivemaxExact(x, nx, ans, k, fill, narm, hasna, verbose);
+    break;
+  default:
+    error(_("Internal error: Unknown rfun value in froll: %d"), rfun); // #nocov
   }
   if (verbose)
-    snprintf(end(ans->message[0]), 500, _("%s: processing algo %u took %.3fs\n"), __func__, algo, omp_get_wtime()-tic);
-  // implicit n_message limit discussed here: https://github.com/Rdatatable/data.table/issues/3423#issuecomment-487722586
+    snprintf(end(ans->message[0]), 500, _("%s: processing fun %d algo %u took %.3fs\n"), __func__, rfun, algo, omp_get_wtime()-tic);
 }
-/* fast adaptive rolling mean - fast
+
+/* fast rolling adaptive mean - fast
  * when no info on NA (hasNA argument) then assume no NAs run faster version
  * adaptive rollmean implemented as cumsum first pass, then diff cumsum by indexes `i` to `i-k[i]`
  * if NAs detected re-run rollmean implemented as cumsum with NA support
  */
-void fadaptiverollmeanFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose) {
+void frolladaptivemeanFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose) {
   if (verbose)
-    snprintf(end(ans->message[0]), 500, _("%s: running for input length %"PRIu64", hasna %d, narm %d\n"), "fadaptiverollmeanFast", (uint64_t)nx, hasna, (int) narm);
+    snprintf(end(ans->message[0]), 500, _("%s: running for input length %"PRIu64", hasna %d, narm %d\n"), "frolladaptivemeanFast", (uint64_t)nx, hasna, (int) narm);
   bool truehasna = hasna>0;                                     // flag to re-run if NAs detected
   long double w = 0.0;
   double *cs = malloc(nx*sizeof(double));                       // cumsum vector, same as double cs[nx] but no segfault
@@ -104,14 +125,14 @@ void fadaptiverollmeanFast(double *x, uint64_t nx, ans_t *ans, int *k, double fi
   } // end of truehasna
   free(cs);
 }
-/* fast adaptive rolling mean exact
+/* fast rolling adaptive mean exact
  * extra nested loop to calculate mean of each obs and error correction
  * requires much more cpu
  * uses multiple cores
  */
-void fadaptiverollmeanExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose) {
+void frolladaptivemeanExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose) {
   if (verbose)
-    snprintf(end(ans->message[0]), 500, _("%s: running in parallel for input length %"PRIu64", hasna %d, narm %d\n"), "fadaptiverollmeanExact", (uint64_t)nx, hasna, (int) narm);
+    snprintf(end(ans->message[0]), 500, _("%s: running in parallel for input length %"PRIu64", hasna %d, narm %d\n"), "frolladaptivemeanExact", (uint64_t)nx, hasna, (int) narm);
   bool truehasna = hasna>0;                                     // flag to re-run if NAs detected
   if (!truehasna || !narm) {                                    // narm=FALSE handled here as NAs properly propagated in exact algo
     #pragma omp parallel for num_threads(getDTthreads(nx, true))
@@ -200,22 +221,12 @@ void fadaptiverollmeanExact(double *x, uint64_t nx, ans_t *ans, int *k, double f
   } // end of truehasna
 }
 
-/* fast adaptive rolling sum */
-void fadaptiverollsum(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose) {
-  double tic = 0;
-  if (verbose)
-    tic = omp_get_wtime();
-  if (algo==0) {
-    fadaptiverollsumFast(x, nx, ans, k, fill, narm, hasna, verbose);
-  } else if (algo==1) {
-    fadaptiverollsumExact(x, nx, ans, k, fill, narm, hasna, verbose);
-  }
-  if (verbose)
-    snprintf(end(ans->message[0]), 500, _("%s: processing algo %u took %.3fs\n"), __func__, algo, omp_get_wtime()-tic);
-}
-void fadaptiverollsumFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose) {
+/* fast rolling adaptive sum - fast
+ * same as adaptive mean fast
+ */
+void frolladaptivesumFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose) {
   if (verbose)
-    snprintf(end(ans->message[0]), 500, _("%s: running for input length %"PRIu64", hasna %d, narm %d\n"), "fadaptiverollsumFast", (uint64_t)nx, hasna, (int) narm);
+    snprintf(end(ans->message[0]), 500, _("%s: running for input length %"PRIu64", hasna %d, narm %d\n"), "frolladaptivesumFast", (uint64_t)nx, hasna, (int) narm);
   bool truehasna = hasna>0;
   long double w = 0.0;
   double *cs = malloc(nx*sizeof(double));
@@ -293,9 +304,12 @@ void fadaptiverollsumFast(double *x, uint64_t nx, ans_t *ans, int *k, double fil
   }
   free(cs);
 }
-void fadaptiverollsumExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose) {
+/* fast rolling adaptive sum - exact
+ * same as adaptive mean exact
+ */
+void frolladaptivesumExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose) {
   if (verbose)
-    snprintf(end(ans->message[0]), 500, _("%s: running in parallel for input length %"PRIu64", hasna %d, narm %d\n"), "fadaptiverollsumExact", (uint64_t)nx, hasna, (int) narm);
+    snprintf(end(ans->message[0]), 500, _("%s: running in parallel for input length %"PRIu64", hasna %d, narm %d\n"), "frolladaptivesumExact", (uint64_t)nx, hasna, (int) narm);
   bool truehasna = hasna>0;
   if (!truehasna || !narm) {
     #pragma omp parallel for num_threads(getDTthreads(nx, true))
@@ -365,26 +379,12 @@ void fadaptiverollsumExact(double *x, uint64_t nx, ans_t *ans, int *k, double fi
   }
 }
 
-/* fast adaptive rolling max */
-void fadaptiverollmax(unsigned int algo, double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose) {
-  double tic = 0;
-  if (verbose)
-    tic = omp_get_wtime();
-  if (algo==0 && verbose) {
-    //fadaptiverollmaxFast(x, nx, ans, k, fill, narm, hasna, verbose); // fadaptiverollmaxFast does not exists as of now
-    snprintf(end(ans->message[0]), 500, _("%s: algo %u not implemented, fall back to %u\n"), __func__, algo, (unsigned int) 1);
-  }
-  fadaptiverollmaxExact(x, nx, ans, k, fill, narm, hasna, verbose);
-  if (verbose)
-    snprintf(end(ans->message[0]), 500, _("%s: processing algo %u took %.3fs\n"), __func__, algo, omp_get_wtime()-tic);
-}
-//void fadaptiverollmaxFast(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose); // does not exists as of now
-/* fast adaptive rolling max - exact
+/* fast rolling adaptive max - exact
  * for hasNA=FALSE it will not detect if any NAs were in the input, therefore could produce incorrect result, well documented
  */
-void fadaptiverollmaxExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose) {
+void frolladaptivemaxExact(double *x, uint64_t nx, ans_t *ans, int *k, double fill, bool narm, int hasna, bool verbose) {
   if (verbose)
-    snprintf(end(ans->message[0]), 500, _("%s: running in parallel for input length %"PRIu64", hasna %d, narm %d\n"), "fadaptiverollmaxExact", (uint64_t)nx, hasna, (int) narm);
+    snprintf(end(ans->message[0]), 500, _("%s: running in parallel for input length %"PRIu64", hasna %d, narm %d\n"), "frolladaptivemaxExact", (uint64_t)nx, hasna, (int) narm);
   if (hasna==-1) { // fastest we can get for adaptive max as there is no algo='fast', therefore we drop any NA checks when hasNA=FALSE
     #pragma omp parallel for num_threads(getDTthreads(nx, true))
     for (uint64_t i=0; i<nx; i++) {

From 77a01e731bd289dd1e1e4c3c253eeee29fc94831 Mon Sep 17 00:00:00 2001
From: jangorecki <j.gorecki@wit.edu.pl>
Date: Mon, 22 Aug 2022 13:04:58 +0200
Subject: [PATCH 14/16] new functionality unit tests

---
 R/froll.R             |  1 -
 inst/tests/froll.Rraw | 12 ++++++++++--
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/R/froll.R b/R/froll.R
index be69c6f457..4982d4ab39 100644
--- a/R/froll.R
+++ b/R/froll.R
@@ -8,7 +8,6 @@ froll = function(fun, x, n, fill=NA, algo=c("fast", "exact"), align=c("right", "
     verbose = getOption("datatable.verbose")
     if (verbose)
       cat("froll: adaptive=TRUE && align='left' pre-processing for align='right'\n")
-    ## TODO test atomic x but list of lists n (multiple windows)!
     x = rev2(x)
     n = rev2(n)
     align = "right"
diff --git a/inst/tests/froll.Rraw b/inst/tests/froll.Rraw
index 5e350c5397..d054c04cbb 100644
--- a/inst/tests/froll.Rraw
+++ b/inst/tests/froll.Rraw
@@ -564,8 +564,15 @@ if (FALSE) {
 #### adaptive limitations
 test(6000.145, frollmean(1:2, 1:2, adaptive=TRUE, align="right"), c(1, 1.5))
 test(6000.146, frollmean(1:2, 1:2, adaptive=TRUE, align="center"), error="using adaptive TRUE and align 'center' is not implemented")
-##test(6000.147, frollmean(1:2, 1:2, adaptive=TRUE, align="left"), error="using adaptive TRUE and align argument different than 'right' is not implemented") ## support added in #5441, TODO add tests
-test(6000.148, frollmean(list(1:2, 1:3), list(1:2), adaptive=TRUE), error="adaptive rolling function can only process 'x' having equal length of elements, like data.table or data.frame. If you want to call rolling function on list having variable length of elements call it for each field separately")
+test(6000.147, frollmean(list(1:2, 1:3), list(1:2), adaptive=TRUE), error="adaptive rolling function can only process 'x' having equal length of elements, like data.table or data.frame. If you want to call rolling function on list having variable length of elements call it for each field separately")
+
+#### adaptive align - added in #5441
+options(datatable.verbose=TRUE)
+test(6000.148, frollsum(c(1,3,4,2,0), c(3,2,2,3,2), adaptive=TRUE, align="left"), c(8,7,6,NA,NA), output=c("processing from align='right'"))
+options(datatable.verbose=FALSE)
+test(6000.1481, frollsum(c(1,3,4,2,0), list(c(3,2,2,3,2), c(3,3,3,3,3)), adaptive=TRUE, align="left"), list(c(8,7,6,NA,NA), c(8,9,6,NA,NA)))
+test(6000.1482, frollsum(list(c(1,3,4,2,0), c(3,1,4,2,0)), c(3,2,2,3,2), adaptive=TRUE, align="left"), list(c(8,7,6,NA,NA), c(8,5,6,NA,NA)))
+test(6000.1483, frollsum(list(c(1,3,4,2,0), c(3,1,4,2,0)), list(c(3,2,2,3,2), c(3,3,3,3,3)), adaptive=TRUE, align="left"), list(c(8,7,6,NA,NA),c(8,9,6,NA,NA),c(8,5,6,NA,NA),c(8,7,6,NA,NA)))
 
 #### adaptive exact
 fastama = function(x, n, na.rm, fill=NA) {
@@ -1063,6 +1070,7 @@ f = function(x) {
 #test(6010.106, head(frollapply(1:5, 3, f), 3L), c(NA_real_,NA_real_,1), output=c("frollapplyR: allocating memory.*","frollapply: took.*","frollapplyR: processing.*took.*")) # only head 3 is valid, rest is undefined as REAL is applied on logical type, can return garbage or fail with REAL error
 options(datatable.verbose=FALSE)
 #### test coverage
+test(6010.5, frollapply(1:3, c(3,3,3), sum, adaptive=TRUE), error="frollapply does not support 'adaptive' argument")
 test(6010.501, frollapply(1:3, "b", sum), error="n must be integer")
 test(6010.502, frollapply(1:3, 2.5, sum), error="n must be integer")
 test(6010.503, frollapply(1:3, integer(), sum), error="n must be non 0 length")

From 311d12fa6597b2f07eef0448aedfc86aa5817c51 Mon Sep 17 00:00:00 2001
From: jangorecki <j.gorecki@wit.edu.pl>
Date: Thu, 25 Aug 2022 11:52:03 +0200
Subject: [PATCH 15/16] doc further improving

---
 man/froll.Rd | 149 ++++++++++++++++-----------------------------------
 1 file changed, 45 insertions(+), 104 deletions(-)

diff --git a/man/froll.Rd b/man/froll.Rd
index be78b26907..576da269c7 100644
--- a/man/froll.Rd
+++ b/man/froll.Rd
@@ -14,147 +14,88 @@
 \alias{frollapply}
 \title{Rolling functions}
 \description{
-  Fast rolling functions to calculate aggregates on sliding windows. Function name and arguments are experimental.
+  Fast rolling functions to calculate aggregates on sliding windows.
 }
 \usage{
-frollmean(x, n, fill=NA, algo=c("fast", "exact"),
-          align=c("right", "left", "center"), na.rm=FALSE, hasNA=NA, adaptive=FALSE)
-frollsum(x, n, fill=NA, algo=c("fast","exact"),
-         align=c("right", "left", "center"), na.rm=FALSE, hasNA=NA, adaptive=FALSE)
-frollmax(x, n, fill=NA, algo=c("fast","exact"),
-         align=c("right", "left", "center"), na.rm=FALSE, hasNA=NA, adaptive=FALSE)
-frollapply(x, n, FUN, \dots, fill=NA, align=c("right", "left", "center"), adaptive)
+  frollmean(x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"),
+    na.rm=FALSE, hasNA=NA, adaptive=FALSE)
+  frollsum(x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"),
+    na.rm=FALSE, hasNA=NA, adaptive=FALSE)
+  frollmax(x, n, fill=NA, algo=c("fast","exact"), align=c("right","left","center"),
+    na.rm=FALSE, hasNA=NA, adaptive=FALSE)
+  frollapply(x, n, FUN, \dots, fill=NA, align=c("right","left","center"), adaptive)
 }
 \arguments{
   \item{x}{ Vector, \code{data.frame} or \code{data.table} of integer, numeric or logical columns over which to calculate the windowed aggregations. May also be a list, in which case the rolling function is applied to each of its elements. }
-  \item{n}{ Integer vector giving rolling window size(s). This is the \emph{total} number of included values. Adaptive rolling functions also accept a list of integer vectors. }
+  \item{n}{ Integer vector giving rolling window size(s). This is the \emph{total} number of included values in aggregate function. Adaptive rolling functions also accept a list of integer vectors when applying multiple window sizes. }
   \item{fill}{ Numeric; value to pad by. Defaults to \code{NA}. }
-  \item{algo}{ Character, default \code{"fast"}. When set to \code{"exact"}, a slower (but more accurate) algorithm is used. It
-    suffers less from floating point rounding errors by performing an extra pass, and carefully handles all non-finite values.
-    It will use mutiple cores where available. See Details for more information. }
+  \item{algo}{ Character, default \code{"fast"}. When set to \code{"exact"}, a slower (in some cases more accurate) algorithm is used. See \emph{Implementation} section below for details. }
   \item{align}{ Character, specifying the "alignment" of the rolling window, defaulting to \code{"right"}. \code{"right"} covers preceding rows (the window \emph{ends} on the current value); \code{"left"} covers following rows (the window \emph{starts} on the current value); \code{"center"} is halfway in between (the window is \emph{centered} on the current value, biased towards \code{"left"} when \code{n} is even). }
-  \item{na.rm}{ Logical, default \code{FALSE}. Should missing values be removed when
-    calculating window? For details on handling other non-finite values, see Details. }
-  \item{hasNA}{ Logical. If it is known that \code{x} contains \code{NA}
-    then setting this to \code{TRUE} will speed up calculation. Defaults to \code{NA}. }
-  \item{adaptive}{ Logical, default \code{FALSE}. Should the rolling function be calculated adaptively? See Details below. }
-  \item{FUN}{ The function to be applied to the rolling window; see Details for restrictions. }
+  \item{na.rm}{ Logical, default \code{FALSE}. Should missing values be removed when calculating window? }
+  \item{hasNA}{ Logical. If it is known that \code{x} contains \code{NA} (or \code{Nan}) then setting this to \code{TRUE} will speed up calculation. Defaults to \code{NA}. See \emph{hasNA argument} section below for details. }
+  \item{adaptive}{ Logical, default \code{FALSE}. Should the rolling function be calculated adaptively? See \emph{Adaptive rolling functions} section below for details. }
+  \item{FUN}{ The function to be applied to the rolling window in \code{frollapply}; See \emph{frollaply} section below for details. }
   \item{\dots}{ Extra arguments passed to \code{FUN} in \code{frollapply}. }
 }
 \details{
-  \code{froll*} functions accept vectors, lists, \code{data.frame}s or
-  \code{data.table}s. They always return a list except when the input is a
-  \code{vector} and \code{length(n)==1}, in which case a \code{vector}
-  is returned, for convenience. Thus, rolling functions can be used
-  conveniently within \code{data.table} syntax.
+  \code{froll*} functions accept vector, list, \code{data.frame} or \code{data.table}. Functions operate on a single vector, when passing a non-atomic input, then function is applied column-by-column, not to a complete set of column at once.
 
-  Argument \code{n} allows multiple values to apply rolling functions on
-  multiple window sizes. If \code{adaptive=TRUE}, then \code{n} must be a list,
-  see \emph{Adaptive rolling functions} section below for details.
+  Argument \code{n} allows multiple values to apply rolling function on multiple window sizes. If \code{adaptive=TRUE}, then \code{n} can be a list to specify multiple window sizes for adaptive rolling computation. See \emph{Adaptive rolling functions} section below for details.
 
-  When multiple columns or multiple windows width are provided, then they
-  are run in parallel. The exception is for \code{algo="exact"}, which runs in
-  parallel already. By default data.table uses only half of available CPUs,
-  see \code{\link{setDTthreads}} for details.
+  When multiple columns and/or multiple windows width are provided, then computation run in parallel (except for \code{frollapply}. The exception is for \code{algo="exact"}, which runs in parallel even for single column and single window width. By default data.table uses only half of available CPUs, see \code{\link{setDTthreads}} for details on how to tune CPU usage.
+
+  Setting \code{options(datatable.verbose=TRUE)} will display various information about how rolling function processed. It will not print information in a real-time but only at the end of the processing.
+}
+\value{
+  A list except when the input is a \code{vector} and \code{length(n)==1}, in which case a \code{vector} is returned, for convenience. Thus, rolling functions can be used conveniently within \code{data.table} syntax.
+}
+\note{
+  Be aware that rolling functions operates on the physical order of input. If the intent is to roll values in a vector by a logical window, for example an hour, or a day, then one has to ensure that there are no gaps in input. For details see \href{https://github.com/Rdatatable/data.table/issues/3241}{issue #3241}.
 }
 \section{\code{hasNA} argument}{
-  \code{hasNA} can be used to speed up processing in cases when it is known
-  if \code{x} contains \code{NA, NaN, +Inf, -Inf}.
+  \code{hasNA} can be used to speed up processing in cases when it is known if \code{x} contains \code{NA} (or \code{NaN}).
   \itemize{
-    \item{ Default \code{hasNA=NA} will use faster NA inaware implementation,
-    but when NAs are detected it will re-run NA aware implementation. }
+    \item{ Default \code{hasNA=NA} will use faster NA inaware implementation, but when NAs are detected it will re-run NA aware implementation. }
     \item{ \code{hasNA=TRUE} will use NA aware implementation straightaway. }
-    \item{ \code{hasNA=FALSE} will use faster NA inaware implementation.
-    Then depending on the rolling function it will either
+    \item{ \code{hasNA=FALSE} will use faster NA inaware implementation. Then depending on the rolling function it will either:
     \itemize{
       \item{ (\emph{mean, sum}) detect NAs, raise warning, re-run NA aware. }
-      \item{ (\emph{max}) not detect NAs and may silently produce incorrect
-      answer. }}
-    Therefore \code{hasNA=FALSE} should be used with care.
+      \item{ (\emph{max}) not detect NAs and may silently produce incorrect answer. }
     }
+    Therefore \code{hasNA=FALSE} should be used with care. }
   }
 }
 \section{Implementation}{
   \itemize{
-    \item{ \code{algo="fast"} uses \emph{"on-line"} algorithm, and
-  all of \code{NaN, +Inf, -Inf} are treated as \code{NA}. Not all functions
-  have \emph{fast} implementation available. As of now
-  \emph{max} and \code{adaptive=TRUE} does not have, therefore it will
-  automatically fall back to \emph{exact} implementation. \code{datatable.verbose}
-  option can be used to check that. }
-    \item{ \code{algo="exact"} will make rolling functions to use a more
-  computationally-intensive algorithm. For each observation from input vector
-  it will compute a function on a window from scratch (complexity \eqn{O(n^2)}).
-  Depeneding on the function, this algorithm may suffers less from
-  floating point rounding error (the same consideration applies to base \code{\link[base]{mean}}).
-  Algorithm also handles \code{NaN, +Inf, -Inf} consistently to
-  base R, unless - for some functions (e.g. \emph{max}) - \code{hasNA}is \code{FALSE}
-  but NAs are present. In case of some functions (like \emph{mean}), it will additionally
-  make extra pass to perform floating point error correction. Error
-  corrections might not be truly exact on some platforms (like Windows)
-  when using multiple threads. }
+    \item{ \code{algo="fast"} uses \emph{"on-line"} algorithm, and all of \code{NaN, +Inf, -Inf} (Inf are to be changed!!) are treated as \code{NA}. Not all functions have \emph{fast} implementation available. As of now \emph{max} and \code{adaptive=TRUE} does not have, therefore it will automatically fall back to \emph{exact} implementation. \code{datatable.verbose} option can be used to check that. }
+    \item{ \code{algo="exact"} will make rolling functions to use a more computationally-intensive algorithm. For each observation from input vector it will compute a function on a window from scratch (complexity \eqn{O(n^2)}). Depeneding on the function, this algorithm may suffers less from floating point rounding error (the same consideration applies to base \code{\link[base]{mean}}). Algorithm also handles \code{NaN, +Inf, -Inf} consistently to base R, unless - for some functions (e.g. \emph{max}) - \code{hasNA}is \code{FALSE} but NAs are present. In case of some functions (like \emph{mean}), it will additionally make extra pass to perform floating point error correction. Error corrections might not be truly exact on some platforms (like Windows) when using multiple threads. }
   }
 }
 \section{Adaptive rolling functions}{
-  Adaptive rolling functions are a special case where each
-  observation has its own corresponding rolling window width. \code{n}
-  argument must be a list, then each list element must be an integer vector
-  of window sizes corresponding to every single observation in each column;
-  see Examples. Due to the logic or implementation of adaptive rolling
-  functions, the following restrictions apply:
+  Adaptive rolling functions are a special case where each observation has its own corresponding rolling window width. Therefore values passed to \code{n} argument must be series corresponding to observations in \code{x}. If multiple windows is meant to be computed then a list of integer vectors is expected; each list element must be an integer vector of window size corresponding to observations in \code{x}; see Examples. Due to the logic or implementation of adaptive rolling functions, the following restrictions apply
   \itemize{
     \item{ \code{align} does not support \code{"center"}. }
-    \item{ if list of vectors is passed to \code{x}, then all
-      vectors within it must have equal length. }
-    \item{ functionality in not suported in \code{frollapply}. }
+    \item{ if list of vectors is passed to \code{x}, then all vectors within it must have equal length. }
+    \item{ functionality in not suported in \code{frollapply} (to be changed). }
   }
 }
 \section{\code{frollapply}}{
-  \code{frollapply} computes rolling aggregate on arbitrary R functions.
-  \code{adaptive} argument is not supported. The input
-  \code{x} (first argument) to the function \code{FUN}
-  is coerced to \emph{numeric} beforehand and \code{FUN}
-  has to return a scalar \emph{numeric} value. Checks for that are made only
-  during the first iteration when \code{FUN} is evaluated. Edge cases can be
-  found in examples below. Any R function is supported, but it is not optimized
-  using our own C implementation -- hence, for example, using \code{frollapply}
-  to compute a rolling average is inefficient. It is also always single-threaded
-  because there is no thread-safe API to R's C \code{eval}. Nevertheless we've
-  seen the computation speed up vis-a-vis versions implemented in base R.
+  \code{frollapply} computes rolling aggregate on arbitrary R functions. \code{adaptive} argument is not supported (to be changed). The input \code{x} (first argument) to the function \code{FUN} is coerced to \emph{numeric} beforehand(to be changed) and \code{FUN} has to return a scalar \emph{numeric} value (to be changed). Checks for that are made only during the first iteration when \code{FUN} is evaluated. Edge cases can be found in examples below. Any R function is supported, but it is not optimized using our own C implementation -- hence, for example, using \code{frollapply} to compute a rolling average is inefficient. It is also always single-threaded because there is no thread-safe API to R's C \code{eval}. Nevertheless we've seen the computation speed up vis-a-vis versions implemented in base R.
 }
 \section{\code{zoo} package users notice}{
-  Users coming from most popular package for rolling functions
-  \code{zoo} might expect following differences in \code{data.table}
-  implementation.
+  Users coming from most popular package for rolling functions \code{zoo} might expect following differences in \code{data.table} implementation
   \itemize{
-    \item{ rolling function will always return result of the same length
-      as input. }
+    \item{ rolling function will always return result of the same length as input. }
     \item{ \code{fill} defaults to \code{NA}. }
-    \item{ \code{fill} accepts only constant values. It does not support
-      for \emph{na.locf} or other functions. }
+    \item{ \code{fill} accepts only constant values. No support for \emph{na.locf} or other functions. }
     \item{ \code{align} defaults to \code{"right"}. }
-    \item{ \code{na.rm} is respected, and other functions are not needed
-      when input contains \code{NA}. }
-    \item{ integers and logical are always coerced to double. }
-    \item{ when \code{adaptive=FALSE} (default), then \code{n} must be a
-      numeric vector. List is not accepted. }
-    \item{ when \code{adaptive=TRUE}, then \code{n} must be vector of
-      length equal to \code{nrow(x)}, or list of such vectors. }
-    \item{ \code{partial} window feature is not supported, although it can
-      be accomplished by using \code{adaptive=TRUE}, see examples.
-      \code{NA} is always returned for incomplete windows. }
+    \item{ \code{na.rm} is respected, and other functions are not needed when input contains \code{NA}. }
+    \item{ integers and logical are always coerced to double (to be changed for frollapply). }
+    \item{ when \code{adaptive=FALSE} (default), then \code{n} must be a numeric vector. List is not accepted. }
+    \item{ when \code{adaptive=TRUE}, then \code{n} must be vector of length equal to \code{nrow(x)}, or list of such vectors. }
+    \item{ \code{partial} window feature is not supported, although it can be accomplished by using \code{adaptive=TRUE}, see examples (to be changed). \code{NA} is always returned for incomplete windows. }
   }
 }
-\value{
-  A list except when the input is a \code{vector} and
-  \code{length(n)==1} in which case a \code{vector} is returned.
-}
-\note{
-  Be aware that rolling functions operates on the physical order of input.
-  If the intent is to roll values in a vector by a logical window, for
-  example an hour, or a day, one has to ensure that there are no gaps in
-  input. For details see \href{https://github.com/Rdatatable/data.table/issues/3241}{issue #3241}.
-}
 \examples{
 d = as.data.table(list(1:6/2, 3:8/4))
 # rollmean of single vector and single window
@@ -240,7 +181,7 @@ f = function(x) {             ## FUN is not type-stable
 try(frollapply(1:5, 3, f))
 }
 \seealso{
-  \code{\link{shift}}, \code{\link{data.table}}
+  \code{\link{shift}}, \code{\link{data.table}}, \code{\link{setDTthreads}}
 }
 \references{
   \href{https://en.wikipedia.org/wiki/Round-off_error}{Round-off error}

From 2a54cfd1df11d822ff47a24f3fbfeab406ca7932 Mon Sep 17 00:00:00 2001
From: jangorecki <j.gorecki@wit.edu.pl>
Date: Thu, 25 Aug 2022 12:25:34 +0200
Subject: [PATCH 16/16] tests and NEWS

---
 NEWS.md               | 35 +++++++++++++++++++++++++++-
 inst/tests/froll.Rraw | 53 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 87 insertions(+), 1 deletion(-)

diff --git a/NEWS.md b/NEWS.md
index 8718fc8ffd..f03bdabe78 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -296,7 +296,40 @@
 
 41. New function `%notin%` provides a convenient alternative to `!(x %in% y)`, [#4152](https://github.com/Rdatatable/data.table/issues/4152). Thanks to Jan Gorecki for suggesting and Michael Czekanski for the PR. `%notin%` uses half the memory because it computes the result directly as opposed to `!` which allocates a new vector to hold the negated result. If `x` is long enough to occupy more than half the remaining free memory, this can make the difference between the operation working, or failing with an out-of-memory error.
 
-42. New function `frollmax` has been implemented. It applies `max` over a rolling window. Request came from @gpierard who needed left aligned, adaptive, rolling max, [#5438](https://github.com/Rdatatable/data.table/issues/5438). Adaptive rolling functions did not have support for `align="left"`, therefore this feature has been added as well. It works for other adaptive rolling functions now. Adaptive `frollmax` has observed to be up to 50 time faster than second fastest solution using `max` and grouping `by=.EACHI`.
+42. Multiple improvements has been added to rolling functions. Request came from @gpierard who needed left aligned, adaptive, rolling max, [#5438](https://github.com/Rdatatable/data.table/issues/5438). There was no `frollmax` function yet. Adaptive rolling functions did not have support for `align="left"`. `frollapply` did not support `adaptive=TRUE`. Available alternatives were base R `mapply` or self-join using `max` and grouping `by=.EACHI`. As a follow up of his request, following features has been added:
+- new function `frollmax`, applies `max` over a rolling window.
+- support for `align="left"` for adaptive rolling function.
+- support for `adaptive=TRUE` in `frollapply`.
+- better support for non-double data types in `frollapply`.
+- better support for `Inf` and `-Inf` support in `algo="fast"` implementation.
+- `partial` argument to trim window width to available observations rather than returning `NA` whenever window is not complete.
+
+For a comprehensive description about all available features see `?froll` manual.
+
+Adaptive `frollmax` has observed to be up to 50 times faster than second fastest solution (data.table self-join + `max` + `by=.EACHI`).
+```r
+set.seed(108)
+setDTthreads(8)
+x = data.table(
+  value = cumsum(rnorm(1e6, 0.1)),
+  end_window = 1:1e6 + sample(50:500, 1e6, TRUE),
+  row = 1:1e6
+)[, "end_window" := pmin(end_window, .N)
+  ][, "len_window" := end_window-row+1L]
+
+baser = function(x) x[, mapply(function(from, to) max(value[from:to]), row, end_window)]
+sj = function(x) x[x, max(value), on=.(row >= row, row <= end_window), by=.EACHI]$V1
+fmax = function(x) x[, frollmax(value, len_window, adaptive=TRUE, align="left", hasNA=FALSE)]
+microbenchmark::microbenchmark(
+  baser(x), sj(x), fmax(x),
+  times=10, check="identical"
+)
+#Unit: milliseconds
+#     expr        min         lq       mean     median         uq      max neval
+# baser(x) 4290.98557 4529.82841 4573.94115 4604.85827 4654.39342 4883.991    10
+#    sj(x) 3600.42771 3752.19359 4118.21755 4235.45856 4329.08728 4884.080    10
+#  fmax(x)   64.48627   73.07978   88.84932   76.64569   82.56115  198.438    10
+```
 
 ## BUG FIXES
 
diff --git a/inst/tests/froll.Rraw b/inst/tests/froll.Rraw
index d054c04cbb..60f548fe8f 100644
--- a/inst/tests/froll.Rraw
+++ b/inst/tests/froll.Rraw
@@ -853,6 +853,59 @@ test(6000.225, frollsum(1:3, c(2,2,2), adaptive=TRUE), c(NA, 3, 5), output="frol
 test(6000.226, frollsum(c(NA,2,3), c(2,2,2), adaptive=TRUE), c(NA, NA, 5), output="re-running with extra care for NAs")
 options(datatable.verbose=FALSE)
 
+## frollmax
+options(datatable.verbose=TRUE) ## adaptive frollmax no fast algo
+test(6000.3, frollmax(1:4, c(2,2,2,2), adaptive=TRUE), output="frolladaptivefun: algo 0 not implemented, fall back to 1")
+test(6000.3001, frollmax(1:4, c(2,2,2,2), algo="fast", adaptive=TRUE), output="frolladaptivefun: algo 0 not implemented, fall back to 1")
+test(6000.3002, frollmax(1:4, c(2,2,2,2), algo="exact", adaptive=TRUE), notOutput="frolladaptivefun: algo 0 not implemented, fall back to 1")
+options(datatable.verbose=FALSE)
+n = c(3,2,2,4,2,1,4,8)
+x = c(7,2,3,6,3,2,6,6) # no NA
+test(6000.3111, frollmax(x, n, adaptive=TRUE), c(NA,7,3,7,6,2,6,7)) # hasNA=NA # narm=F
+test(6000.3112, frollmax(x, n, na.rm=TRUE, adaptive=TRUE), c(NA,7,3,7,6,2,6,7)) # narm=T
+test(6000.3121, frollmax(x, n, hasNA=FALSE, adaptive=TRUE), c(NA,7,3,7,6,2,6,7)) # hasNA=F
+test(6000.3122, frollmax(x, n, hasNA=FALSE, na.rm=TRUE, adaptive=TRUE), error="does not make sense")
+test(6000.3131, frollmax(x, n, hasNA=TRUE, adaptive=TRUE), c(NA,7,3,7,6,2,6,7)) # hasNA=T
+test(6000.3132, frollmax(x, n, hasNA=TRUE, na.rm=TRUE, adaptive=TRUE), c(NA,7,3,7,6,2,6,7))
+x = c(7,2,NA,6,3,NA,6,6) # NA
+test(6000.3211, frollmax(x, n, adaptive=TRUE), c(NA,7,NA,NA,6,NA,NA,NA))
+test(6000.3212, frollmax(x, n, na.rm=TRUE, adaptive=TRUE), c(NA,7,2,7,6,-Inf,6,7))
+test(6000.3221, frollmax(x, n, hasNA=FALSE, adaptive=TRUE), c(NA,7,2,7,6,-Inf,6,7)) ## expected incorrect results, see manual hasNA section for details, added in #5441
+test(6000.3222, frollmax(x, n, hasNA=FALSE, na.rm=TRUE, adaptive=TRUE), error="does not make sense")
+test(6000.3231, frollmax(x, n, hasNA=TRUE, adaptive=TRUE), c(NA,7,NA,NA,6,NA,NA,NA))
+test(6000.3232, frollmax(x, n, hasNA=TRUE, na.rm=TRUE, adaptive=TRUE), c(NA,7,2,7,6,-Inf,6,7))
+x = rep(NA_real_, 8) # all NA
+test(6000.3241, frollmax(x, n, adaptive=TRUE), rep(NA_real_, 8))
+test(6000.3242, frollmax(x, n, na.rm=TRUE, adaptive=TRUE), c(NA, rep(-Inf, 7)))
+test(6000.3251, frollmax(x, n, hasNA=FALSE, adaptive=TRUE), c(NA, rep(-Inf, 7)))
+test(6000.3252, frollmax(x, n, hasNA=FALSE, na.rm=TRUE, adaptive=TRUE), error="does not make sense")
+test(6000.3261, frollmax(x, n, hasNA=TRUE, adaptive=TRUE), rep(NA_real_, 8))
+test(6000.3262, frollmax(x, n, hasNA=TRUE, na.rm=TRUE, adaptive=TRUE), c(NA, rep(-Inf, 7)))
+x = c(NA,NaN,NA,NaN,NaN,NaN,NA,NA) # all NaN/NA
+test(6000.3271, frollmax(x, n, adaptive=TRUE), c(NA,NA,NA,NA,NaN,NaN,NA,NA))
+test(6000.3272, frollmax(x, n, na.rm=TRUE, adaptive=TRUE), c(NA,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf))
+test(6000.3281, frollmax(x, n, hasNA=FALSE, adaptive=TRUE), c(NA,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf)) ## expected incorrect results, see manual hasNA section for details, added in #5441
+test(6000.3282, frollmax(x, n, hasNA=FALSE, na.rm=TRUE, adaptive=TRUE), error="does not make sense")
+test(6000.3291, frollmax(x, n, hasNA=TRUE, adaptive=TRUE), c(NA,NA,NA,NA,NaN,NaN,NA,NA))
+test(6000.3292, frollmax(x, n, hasNA=TRUE, na.rm=TRUE, adaptive=TRUE), c(NA,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf))
+x = c(7,2,NA,6,3,Inf,6,6) # Inf
+test(6000.3311, frollmax(x, n, adaptive=TRUE), c(NA,7,NA,NA,6,Inf,Inf,NA))
+test(6000.3312, frollmax(x, n, na.rm=TRUE, adaptive=TRUE), c(NA,7,2,7,6,Inf,Inf,Inf))
+test(6000.3321, frollmax(x, n, hasNA=FALSE, adaptive=TRUE), c(NA,7,2,7,6,Inf,Inf,Inf)) ## expected incorrect results, see manual hasNA section for details, added in #5441
+test(6000.3322, frollmax(x, n, hasNA=FALSE, na.rm=TRUE, adaptive=TRUE), error="does not make sense")
+test(6000.3331, frollmax(x, n, hasNA=TRUE, adaptive=TRUE), c(NA,7,NA,NA,6,Inf,Inf,NA))
+test(6000.3332, frollmax(x, n, hasNA=TRUE, na.rm=TRUE, adaptive=TRUE), c(NA,7,2,7,6,Inf,Inf,Inf))
+x = c(7,2,-Inf,6,3,NA,6,6) # -Inf
+test(6000.3341, frollmax(x, n, adaptive=TRUE), c(NA,7,2,7,6,NA,NA,NA))
+test(6000.3342, frollmax(x, n, na.rm=TRUE, adaptive=TRUE), c(NA,7,2,7,6,-Inf,6,7))
+test(6000.3351, frollmax(x, n, hasNA=FALSE, adaptive=TRUE), c(NA,7,2,7,6,-Inf,6,7)) ## expected incorrect results, see manual hasNA section for details, added in #5441
+test(6000.3352, frollmax(x, n, hasNA=FALSE, na.rm=TRUE, adaptive=TRUE), error="does not make sense")
+test(6000.3361, frollmax(x, n, hasNA=TRUE, adaptive=TRUE), c(NA,7,2,7,6,NA,NA,NA))
+test(6000.3362, frollmax(x, n, hasNA=TRUE, na.rm=TRUE, adaptive=TRUE), c(NA,7,2,7,6,-Inf,6,7))
+
+#TODO frollmax dev
+#TODO frollmax tests
+
 ## validation
 
 set.seed(108)