From 23c7b3b15c6f1da1f9250d9555713903afa57e82 Mon Sep 17 00:00:00 2001 From: Ivan K Date: Sat, 29 Nov 2025 12:25:31 +0300 Subject: [PATCH 01/17] Use the experimental resizable vectors API Thanks to Luke Tierney for introducing the API and helping with the migration. --- inst/tests/tests.Rraw | 3 ++- src/assign.c | 30 +++++++++++++++--------------- src/dogroups.c | 7 ++++--- src/freadR.c | 21 ++++++++++----------- src/frollapply.c | 17 ++--------------- src/subset.c | 10 ++++------ 6 files changed, 37 insertions(+), 51 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 2f7e47b456..c71aee2ff8 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -19453,7 +19453,8 @@ test(2290.4, DT[, `:=`(a = 2, c := 3)], error="It looks like you re-used `:=` in df = data.frame(a=1:3) setDT(df) attr(df, "att") = 1 -test(2291.1, set(df, NULL, "new", "new"), error="attributes .* have been reassigned") +##test(2291.1, set(df, NULL, "new", "new"), error="attributes .* have been reassigned") +test(2291.1, set(df, NULL, "new", "new"), error="either been loaded from disk.*or constructed manually.*Please run setDT.*setalloccol.*on it first") # ns-qualified bysub error, #6493 DT = data.table(a = 1) diff --git a/src/assign.c b/src/assign.c index eee00cc0f5..c6d9c86a4e 100644 --- a/src/assign.c +++ b/src/assign.c @@ -2,7 +2,6 @@ static void finalizer(SEXP p) { - SEXP x; R_len_t n, l, tl; if(!R_ExternalPtrAddr(p)) internal_error(__func__, "didn't receive an ExternalPtr"); // # nocov p = R_ExternalPtrTag(p); @@ -16,9 +15,12 @@ static void finalizer(SEXP p) // already, so nothing to do (but almost never the case). return; } +#ifdef DODO + SEXP x; x = PROTECT(allocVector(INTSXP, 50)); // 50 so it's big enough to be on LargeVector heap. See NodeClassSize in memory.c:allocVector - // INTSXP rather than VECSXP so that GC doesn't inspect contents after LENGTH (thanks to Karl Miller, Jul 2015) + // INTSXP rather than VECSXP so that GC doesn't inspect contents after LENGTH (thanks to Karl Millar, Jul 2015) SETLENGTH(x,50+n*2*sizeof(void *)/4); // 1*n for the names, 1*n for the VECSXP itself (both are over allocated). +#endif UNPROTECT(1); return; } @@ -69,7 +71,7 @@ happens too late after GC has already released the memory, and ii) copies by bas [<- in write.table just before test 894) allocate at length LENGTH but copy the TRUELENGTH over. If the finalizer sets LENGTH to TRUELENGTH, that's a fail as it wasn't really allocated at TRUELENGTH when R did the copy. -Karl Miller suggested an ENVSXP so that restoring LENGTH in finalizer should work. This is the +Karl Millar suggested an ENVSXP so that restoring LENGTH in finalizer should work. This is the closest I got to getting it to pass all tests : SEXP env = PROTECT(allocSExp(ENVSXP)); @@ -151,7 +153,7 @@ static SEXP shallow(SEXP dt, SEXP cols, R_len_t n) // called from alloccol where n is checked carefully, or from shallow() at R level // where n is set to truelength (i.e. a shallow copy only with no size change) int protecti=0; - SEXP newdt = PROTECT(allocVector(VECSXP, n)); protecti++; // to do, use growVector here? + SEXP newdt = PROTECT(R_allocResizableVector(VECSXP, n)); protecti++; // to do, use growVector here? SHALLOW_DUPLICATE_ATTRIB(newdt, dt); // TO DO: keepattr() would be faster, but can't because shallow isn't merely a shallow copy. It @@ -169,7 +171,7 @@ static SEXP shallow(SEXP dt, SEXP cols, R_len_t n) setAttrib(newdt, sym_sorted, duplicate(sorted)); SEXP names = PROTECT(getAttrib(dt, R_NamesSymbol)); protecti++; - SEXP newnames = PROTECT(allocVector(STRSXP, n)); protecti++; + SEXP newnames = PROTECT(R_allocResizableVector(STRSXP, n)); protecti++; const int l = isNull(cols) ? LENGTH(dt) : length(cols); if (isNull(cols)) { for (int i=0; i 0) && (newDT || TYPEOF(col) != typeSxp[type[i]] || oldIsInt64 != newIsInt64); const bool nrowChanged = (allocNrow != dtnrows); if (typeChanged || nrowChanged) { - SEXP thiscol = typeChanged ? allocVector(typeSxp[type[i]], allocNrow) : growVector(col, allocNrow); // no need to PROTECT, passed immediately to SET_VECTOR_ELT, see R-exts 5.9.1 - + SEXP thiscol = typeChanged ? R_allocResizableVector(typeSxp[type[i]], allocNrow) : growVector(col, allocNrow); // no need to PROTECT, passed immediately to SET_VECTOR_ELT, see R-exts 5.9.1 + SET_VECTOR_ELT(DT, resi, thiscol); if (type[i] == CT_INT64) { SEXP tt = PROTECT(ScalarString(char_integer64)); @@ -534,7 +535,6 @@ size_t allocateDT(int8_t *typeArg, int8_t *sizeArg, int ncolArg, int ndrop, size setAttrib(thiscol, sym_tzone, ScalarString(char_UTC)); // see news for v1.13.0 } - SET_TRUELENGTH(thiscol, allocNrow); DTbytes += RTYPE_SIZEOF(thiscol) * allocNrow; } resi++; @@ -551,9 +551,7 @@ void setFinalNrow(size_t nrow) return; const int ncol = LENGTH(DT); for (int i = 0; i < ncol; i++) { - SETLENGTH(VECTOR_ELT(DT, i), nrow); - SET_TRUELENGTH(VECTOR_ELT(DT, i), dtnrows); - SET_GROWABLE_BIT(VECTOR_ELT(DT, i)); // #3292 + R_resizeVector(VECTOR_ELT(DT, i), nrow); } } R_FlushConsole(); // # 2481. Just a convenient place; nothing per se to do with setFinalNrow() @@ -567,8 +565,9 @@ void dropFilledCols(int* dropArg, int ndelete) SET_VECTOR_ELT(DT, dropFill[i], R_NilValue); SET_STRING_ELT(colNamesSxp, dropFill[i], NA_STRING); } - SETLENGTH(DT, ndt - ndelete); - SETLENGTH(colNamesSxp, ndt - ndelete); + R_resizeVector(DT, ndt - ndelete); + R_resizeVector(colNamesSxp, ndt - ndelete); + setAttrib(DT, R_NamesSymbol, colNamesSxp); // reinstall after resize } void pushBuffer(ThreadLocalFreadParsingContext *ctx) diff --git a/src/frollapply.c b/src/frollapply.c index 1e04059c9b..33eece0d2d 100644 --- a/src/frollapply.c +++ b/src/frollapply.c @@ -50,7 +50,7 @@ SEXP memcpyVectoradaptive(SEXP dest, SEXP src, SEXP offset, SEXP size) { const size_t oi = INTEGER_RO(offset)[0]; const int nrow = INTEGER_RO(size)[oi - 1]; const size_t o = oi - nrow; // oi should always be bigger than nrow because we filter out incomplete window using ansMask - SETLENGTH(dest, nrow); // must be before memcpy_sexp because attempt to set index 1/1 in SET_STRING_ELT test 6010.150 + R_resizeVector(dest, nrow); // must be before memcpy_sexp because attempt to set index 1/1 in SET_STRING_ELT test 6010.150 if (nrow) { // support k[i]==0 memcpy_sexp(dest, o, src, nrow); } @@ -65,7 +65,7 @@ SEXP memcpyDTadaptive(SEXP dest, SEXP src, SEXP offset, SEXP size) { const int ncol = LENGTH(dest); for (int i = 0; i < ncol; i++) { SEXP d = VECTOR_ELT(dest, i); - SETLENGTH(d, nrow); + R_resizeVector(d, nrow); if (nrow) { // support k[i]==0 memcpy_sexp(d, o, VECTOR_ELT(src, i), nrow); } @@ -76,18 +76,5 @@ SEXP memcpyDTadaptive(SEXP dest, SEXP src, SEXP offset, SEXP size) { // needed in adaptive=TRUE SEXP setgrowable(SEXP x) { - if (!isNewList(x)) { - SET_GROWABLE_BIT(x); - SET_TRUELENGTH(x, LENGTH(x)); // important because gc() uses TRUELENGTH to keep counts - } else { - // # nocov start ## does not seem to be reported to codecov most likely due to running in a fork, I manually debugged that it is being called when running froll.Rraw - for (int i = 0; i < LENGTH(x); i++) { - //Rprintf("%d",3); // manual code coverage to confirm it is reached when marking nocov - SEXP col = VECTOR_ELT(x, i); - SET_GROWABLE_BIT(col); - SET_TRUELENGTH(col, LENGTH(col)); - } - // # nocov end - } return x; } diff --git a/src/subset.c b/src/subset.c index 1dd9f5f174..74adcef81a 100644 --- a/src/subset.c +++ b/src/subset.c @@ -297,7 +297,7 @@ SEXP subsetDT(SEXP x, SEXP rows, SEXP cols) { // API change needs update NEWS.md } int overAlloc = checkOverAlloc(GetOption1(install("datatable.alloccol"))); - SEXP ans = PROTECT(allocVector(VECSXP, LENGTH(cols)+overAlloc)); nprotect++; // doing alloc.col directly here; eventually alloc.col can be deprecated. + SEXP ans = PROTECT(R_allocResizableVector(VECSXP, LENGTH(cols)+overAlloc)); nprotect++; // doing alloc.col directly here; eventually alloc.col can be deprecated. // user-defined and superclass attributes get copied as from v1.12.0 copyMostAttrib(x, ans); @@ -305,8 +305,7 @@ SEXP subsetDT(SEXP x, SEXP rows, SEXP cols) { // API change needs update NEWS.md // includes row.names (oddly, given other dims aren't) and "sorted" dealt with below // class is also copied here which retains superclass name in class vector as has been the case for many years; e.g. tests 1228.* for #64 - SET_TRUELENGTH(ans, LENGTH(ans)); - SETLENGTH(ans, LENGTH(cols)); + R_resizeVector(ans, LENGTH(cols)); int ansn; if (isNull(rows)) { ansn = nrow; @@ -329,9 +328,8 @@ SEXP subsetDT(SEXP x, SEXP rows, SEXP cols) { // API change needs update NEWS.md subsetVectorRaw(target, source, rows, anyNA); // parallel within column } } - SEXP tmp = PROTECT(allocVector(STRSXP, LENGTH(cols)+overAlloc)); nprotect++; - SET_TRUELENGTH(tmp, LENGTH(tmp)); - SETLENGTH(tmp, LENGTH(cols)); + SEXP tmp = PROTECT(R_allocResizableVector(STRSXP, LENGTH(cols)+overAlloc)); nprotect++; + R_resizeVector(tmp, LENGTH(cols)); setAttrib(ans, R_NamesSymbol, tmp); subsetVectorRaw(tmp, getAttrib(x, R_NamesSymbol), cols, /*anyNA=*/false); From c3e8f0c46f76dacb7f9da60c7ea2d5523fd64d88 Mon Sep 17 00:00:00 2001 From: Ivan K Date: Sat, 22 Nov 2025 15:19:39 +0300 Subject: [PATCH 02/17] Backport the resizable API Make sure to set the GROWABLE_BIT on the resizable vectors to avoid problems when they are duplicated or garbage-collected. --- src/data.table.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/data.table.h b/src/data.table.h index a299f91f85..88ffc5440c 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -87,6 +87,25 @@ # endif #endif +// TODO(R>=4.6.0): remove the SVN revision check +#if R_VERSION < R_Version(4, 6, 0) || R_SVN_REVISION < 89077 + static inline SEXP R_allocResizableVector_(SEXPTYPE type, R_xlen_t maxlen) { + SEXP ret = allocVector(type, maxlen); + SET_TRUELENGTH(ret, maxlen); + SET_GROWABLE_BIT(ret); + return ret; + } +# define R_allocResizableVector(type, maxlen) R_allocResizableVector_(type, maxlen) + static inline SEXP R_duplicateAsResizable_(SEXP x) { + SEXP ret = duplicate(x); + SET_TRUELENGTH(ret, xlength(ret)); + SET_GROWABLE_BIT(ret); + return ret; + } +# define R_duplicateAsResizable(x) R_duplicateAsResizable_(x) +# define R_resizeVector(x, newlen) SETLENGTH(x, newlen) +#endif + // init.c extern SEXP char_integer64; extern SEXP char_ITime; From 04437b67745582035f99e6137531c5e96a235a80 Mon Sep 17 00:00:00 2001 From: Ivan K Date: Sat, 22 Nov 2025 10:26:56 +0300 Subject: [PATCH 03/17] test 2291.1: misleading TRUELENGTH now impossible Now that data.table objects have the GROWABLE_BIT set, R will reset TRUELENGTH when duplicating them, causing our code to take a different branch. --- inst/tests/tests.Rraw | 1 - 1 file changed, 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index c71aee2ff8..158d0988e1 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -19453,7 +19453,6 @@ test(2290.4, DT[, `:=`(a = 2, c := 3)], error="It looks like you re-used `:=` in df = data.frame(a=1:3) setDT(df) attr(df, "att") = 1 -##test(2291.1, set(df, NULL, "new", "new"), error="attributes .* have been reassigned") test(2291.1, set(df, NULL, "new", "new"), error="either been loaded from disk.*or constructed manually.*Please run setDT.*setalloccol.*on it first") # ns-qualified bysub error, #6493 From 2fa772822a39fbfb95a0c7bfda549a97c1c768d7 Mon Sep 17 00:00:00 2001 From: Ivan K Date: Sat, 22 Nov 2025 10:18:41 +0300 Subject: [PATCH 04/17] Drop the finalizer Now that (1) we depend on R >= 3.4 and (2) data.table objects have the GROWABLE_BIT set, there is no need to adjust allocated memory counts by hand. --- src/assign.c | 61 ++-------------------------------------------------- 1 file changed, 2 insertions(+), 59 deletions(-) diff --git a/src/assign.c b/src/assign.c index c6d9c86a4e..3b0f865549 100644 --- a/src/assign.c +++ b/src/assign.c @@ -1,30 +1,5 @@ #include "data.table.h" -static void finalizer(SEXP p) -{ - R_len_t n, l, tl; - if(!R_ExternalPtrAddr(p)) internal_error(__func__, "didn't receive an ExternalPtr"); // # nocov - p = R_ExternalPtrTag(p); - if (!isString(p)) internal_error(__func__, "ExternalPtr doesn't see names in tag"); // # nocov - l = LENGTH(p); - tl = TRUELENGTH(p); - if (l<0 || tl Date: Sat, 22 Nov 2025 14:14:15 +0300 Subject: [PATCH 05/17] frollapply(adaptive=TRUE): resizable temporaries Since adaptive application of rolling functions requires us to resize the argument to match the window size, make sure to allocate it as such. --- R/frollapply.R | 28 ++++++---------------------- src/data.table.h | 2 +- src/frollapply.c | 12 ++++++++++-- src/init.c | 2 +- 4 files changed, 18 insertions(+), 26 deletions(-) diff --git a/R/frollapply.R b/R/frollapply.R index ee9d785bcd..f20863c7ab 100644 --- a/R/frollapply.R +++ b/R/frollapply.R @@ -297,38 +297,22 @@ frollapply = function(X, N, FUN, ..., by.column=TRUE, fill=NA, align=c("right"," tight = function(i, dest, src, n) FUN(.Call(CmemcpyDT, dest, src, i, n), ...) } } else { - #has.growable = base::getRversion() >= "3.4.0" - ## this is now always TRUE - ## we keep this branch, it may be useful when getting rid of SET_GROWABLE_BIT and SETLENGTH #6180 - has.growable = TRUE - cpy = if (has.growable) function(x) .Call(Csetgrowable, copy(x)) else copy + cpy = function(x) .Call(CcopyAsGrowable, x, by.column) ansMask = function(len, n) { mask = seq_len(len) >= n mask[is.na(mask)] = FALSE ## test 6010.206 mask } if (by.column) { - allocWindow = function(x, n) x[seq_len(max(n, na.rm=TRUE))] - if (has.growable) { - tight = function(i, dest, src, n) FUN(.Call(CmemcpyVectoradaptive, dest, src, i, n), ...) # CmemcpyVectoradaptive handles k[i]==0 - } else { - tight = function(i, dest, src, n) {stopf("internal error: has.growable should be TRUE, implement support for n==0"); FUN(src[(i-n[i]+1L):i], ...)} # nocov - } + allocWindow = function(x, n) cpy(x[seq_len(max(n, na.rm=TRUE))]) + tight = function(i, dest, src, n) FUN(.Call(CmemcpyVectoradaptive, dest, src, i, n), ...) # CmemcpyVectoradaptive handles k[i]==0 } else { if (!list.df) { - allocWindow = function(x, n) x[seq_len(max(n, na.rm=TRUE)), , drop=FALSE] - } else { - allocWindow = function(x, n) lapply(x, `[`, seq_len(max(n))) - } - if (has.growable) { - tight = function(i, dest, src, n) FUN(.Call(CmemcpyDTadaptive, dest, src, i, n), ...) # CmemcpyDTadaptive handles k[i]==0 + allocWindow = function(x, n) cpy(x[seq_len(max(n, na.rm=TRUE)), , drop=FALSE]) } else { - if (!list.df) { # nocov - tight = function(i, dest, src, n) {stopf("internal error: has.growable should be TRUE, implement support for n==0"); FUN(src[(i-n[i]+1L):i, , drop=FALSE], ...)} # nocov - } else { - tight = function(i, dest, src, n) {stopf("internal error: has.growable should be TRUE, implement support for n==0"); FUN(lapply(src, `[`, (i-n[i]+1L):i), ...)} # nocov - } + allocWindow = function(x, n) cpy(lapply(x, `[`, seq_len(max(n)))) } + tight = function(i, dest, src, n) FUN(.Call(CmemcpyDTadaptive, dest, src, i, n), ...) # CmemcpyDTadaptive handles k[i]==0 } } ## prepare templates for errors and warnings diff --git a/src/data.table.h b/src/data.table.h index 88ffc5440c..2dc8d81343 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -301,7 +301,7 @@ SEXP memcpyVector(SEXP dest, SEXP src, SEXP offset, SEXP size); SEXP memcpyDT(SEXP dest, SEXP src, SEXP offset, SEXP size); SEXP memcpyVectoradaptive(SEXP dest, SEXP src, SEXP offset, SEXP size); SEXP memcpyDTadaptive(SEXP dest, SEXP src, SEXP offset, SEXP size); -SEXP setgrowable(SEXP x); +SEXP copyAsGrowable(SEXP x, SEXP by_column); // nafill.c void nafillDouble(double *x, uint_fast64_t nx, unsigned int type, double fill, bool nan_is_na, ans_t *ans, bool verbose); diff --git a/src/frollapply.c b/src/frollapply.c index 33eece0d2d..a31990f41a 100644 --- a/src/frollapply.c +++ b/src/frollapply.c @@ -75,6 +75,14 @@ SEXP memcpyDTadaptive(SEXP dest, SEXP src, SEXP offset, SEXP size) { // # nocov end // needed in adaptive=TRUE -SEXP setgrowable(SEXP x) { - return x; +SEXP copyAsGrowable(SEXP x, SEXP by_column) { + if (LOGICAL_RO(by_column)[0]) + return R_duplicateAsResizable(x); + + SEXP ret = PROTECT(shallow_duplicate(x)); + R_xlen_t n = xlength(ret); + for (R_xlen_t i = 0; i < n; ++i) + SET_VECTOR_ELT(ret, i, R_duplicateAsResizable(VECTOR_ELT(ret, i))); + UNPROTECT(1); + return ret; } diff --git a/src/init.c b/src/init.c index ef81a7a0e0..6ec8afc54a 100644 --- a/src/init.c +++ b/src/init.c @@ -158,7 +158,7 @@ R_CallMethodDef callMethods[] = { {"CmemcpyDT", (DL_FUNC)&memcpyDT, -1}, {"CmemcpyVectoradaptive", (DL_FUNC)&memcpyVectoradaptive, -1}, {"CmemcpyDTadaptive", (DL_FUNC)&memcpyDTadaptive, -1}, -{"Csetgrowable", (DL_FUNC)&setgrowable, -1}, +{"CcopyAsGrowable", (DL_FUNC)©AsGrowable, -1}, {"Cfrolladapt", (DL_FUNC)&frolladapt, -1}, {NULL, NULL, 0} }; From be921b1247b7c9adcfab896a3b164984e12d922b Mon Sep 17 00:00:00 2001 From: Ivan K Date: Thu, 4 Dec 2025 17:17:49 +0300 Subject: [PATCH 06/17] Drop remaining uses of TRUELENGTH from assign.c - Don't SET_TRUELENGTH by hand. All of our resizable vectors now have the GROWABLE_BIT set, so when they are duplicated, TRUELENGTH is reset to 0. - Use a combination of R_isResizable and R_maxLength to replace other uses of TRUELENGTH. --- src/assign.c | 20 +++++++------------- src/data.table.h | 2 ++ 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/src/assign.c b/src/assign.c index 3b0f865549..3a7c34da25 100644 --- a/src/assign.c +++ b/src/assign.c @@ -71,15 +71,9 @@ static int _selfrefok(SEXP x, Rboolean checkNames, Rboolean verbose) { tag = R_ExternalPtrTag(v); if (!(isNull(tag) || isString(tag))) internal_error(__func__, ".internal.selfref tag is neither NULL nor a character vector"); // # nocov names = getAttrib(x, R_NamesSymbol); - if (names!=tag && isString(names) && !ALTREP(names)) // !ALTREP for #4734 - SET_TRUELENGTH(names, LENGTH(names)); - // R copied this vector not data.table; it's not actually over-allocated. It looks over-allocated - // because R copies the original vector's tl over despite allocating length. prot = R_ExternalPtrProtected(v); if (TYPEOF(prot) != EXTPTRSXP) // Very rare. Was error(_(".internal.selfref prot is not itself an extptr")). return 0; // # nocov ; see http://stackoverflow.com/questions/15342227/getting-a-random-internal-selfref-error-in-data-table-for-r - if (x!=R_ExternalPtrAddr(prot) && !ALTREP(x)) - SET_TRUELENGTH(x, LENGTH(x)); // R copied this vector not data.table, it's not actually over-allocated return checkNames ? names==tag : x==R_ExternalPtrAddr(prot); } @@ -201,7 +195,7 @@ SEXP alloccol(SEXP dt, R_len_t n, Rboolean verbose) // if (TRUELENGTH(getAttrib(dt,R_NamesSymbol))!=tl) // internal_error(__func__, "tl of dt passes checks, but tl of names (%d) != tl of dt (%d)", tl, TRUELENGTH(getAttrib(dt,R_NamesSymbol))); // # nocov - tl = TRUELENGTH(dt); + tl = R_maxLength(dt); // R <= 2.13.2 and we didn't catch uninitialized tl somehow if (tl<0) internal_error(__func__, "tl of class is marked but tl<0"); // # nocov if (tl>0 && tloldncol+10000L) warning(_("truelength (%d) is greater than 10,000 items over-allocated (length = %d). See ?truelength. If you didn't set the datatable.alloccol option very large, please report to data.table issue tracker including the result of sessionInfo()."),oldtncol, oldncol); @@ -463,9 +457,9 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values) error(_("It appears that at some earlier point, names of this data.table have been reassigned. Please ensure to use setnames() rather than names<- or colnames<-. Otherwise, please report to data.table issue tracker.")); // # nocov // Can growVector at this point easily enough, but it shouldn't happen in first place so leave it as // strong error message for now. - else if (TRUELENGTH(names) != oldtncol) + else if (R_maxLength(names) != oldtncol) // Use (long long) to cast R_xlen_t to a fixed type to robustly avoid -Wformat compiler warnings, see #5768, PRId64 didn't work - internal_error(__func__, "selfrefnames is ok but tl names [%lld] != tl [%d]", (long long)TRUELENGTH(names), oldtncol); // # nocov + internal_error(__func__, "selfrefnames is ok but maxLength(names) [%lld] != maxLength(dt) [%d]", (long long)R_maxLength(names), oldtncol); // # nocov if (!selfrefok(dt, verbose)) // #6410 setDT(dt) and subsequent attr<- can lead to invalid selfref error(_("It appears that at some earlier point, attributes of this data.table have been reassigned. Please use setattr(DT, name, value) rather than attr(DT, name) <- value. If that doesn't apply to you, please report your case to the data.table issue tracker.")); R_resizeVector(dt, oldncol+LENGTH(newcolnames)); diff --git a/src/data.table.h b/src/data.table.h index 2dc8d81343..ad6d343a3a 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -104,6 +104,8 @@ } # define R_duplicateAsResizable(x) R_duplicateAsResizable_(x) # define R_resizeVector(x, newlen) SETLENGTH(x, newlen) +# define R_maxLength(x) TRUELENGTH(x) +# define R_isResizable(x) ((bool)(IS_GROWABLE(x))) #endif // init.c From c1c4f769b13d83e529c64db07ca8adb82b861a40 Mon Sep 17 00:00:00 2001 From: Ivan K Date: Thu, 4 Dec 2025 17:20:02 +0300 Subject: [PATCH 07/17] Drop test for TRUELENGTH from init.c --- src/init.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/init.c b/src/init.c index 6ec8afc54a..ecdc64e39d 100644 --- a/src/init.c +++ b/src/init.c @@ -212,8 +212,6 @@ void attribute_visible R_init_data_table(DllInfo *info) SEXP tmp = PROTECT(allocVector(INTSXP,2)); if (LENGTH(tmp)!=2) error(_("Checking LENGTH(allocVector(INTSXP,2)) [%d] is 2 %s"), LENGTH(tmp), msg); - // Use (long long) to cast R_xlen_t to a fixed type to robustly avoid -Wformat compiler warnings, see #5768 - if (TRUELENGTH(tmp)!=0) error(_("Checking TRUELENGTH(allocVector(INTSXP,2)) [%lld] is 0 %s"), (long long)TRUELENGTH(tmp), msg); UNPROTECT(1); // According to IEEE (http://en.wikipedia.org/wiki/IEEE_754-1985#Zero) we can rely on 0.0 being all 0 bits. From 87315a4e31eaaeb3493b8cba549d87bda22a683d Mon Sep 17 00:00:00 2001 From: Ivan K Date: Sat, 6 Dec 2025 21:29:36 +0300 Subject: [PATCH 08/17] Better backport of R_isResizable() --- src/data.table.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/data.table.h b/src/data.table.h index ad6d343a3a..54dd9fce83 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -105,7 +105,11 @@ # define R_duplicateAsResizable(x) R_duplicateAsResizable_(x) # define R_resizeVector(x, newlen) SETLENGTH(x, newlen) # define R_maxLength(x) TRUELENGTH(x) -# define R_isResizable(x) ((bool)(IS_GROWABLE(x))) +# define R_isResizable(x) R_isResizable_(x) + static inline bool R_isResizable_(SEXP x) { + // IS_GROWABLE also checks for TRUELENGTH < XLENGTH + return (LEVELS(x) & 0x20) && TRUELENGTH(x); + } #endif // init.c From 2dd68e8b08ea110537c4bcbb19c823f75c5ef582 Mon Sep 17 00:00:00 2001 From: Ivan K Date: Sat, 6 Dec 2025 21:34:05 +0300 Subject: [PATCH 09/17] Placate rchk --- src/subset.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/subset.c b/src/subset.c index 74adcef81a..15ccee0de4 100644 --- a/src/subset.c +++ b/src/subset.c @@ -343,7 +343,8 @@ SEXP subsetDT(SEXP x, SEXP rows, SEXP cols) { // API change needs update NEWS.md // but maintain key if ordered subset SEXP key = getAttrib(x, sym_sorted); if (length(key)) { - SEXP in = PROTECT(chin(key, getAttrib(ans,R_NamesSymbol))); nprotect++; + SEXP innames = PROTECT(getAttrib(ans,R_NamesSymbol)); nprotect++; + SEXP in = PROTECT(chin(key, innames)); nprotect++; int i = 0; while(i Date: Mon, 8 Dec 2025 16:46:57 +0300 Subject: [PATCH 10/17] copyAsGrowable: don't crash on 0-len argument --- src/frollapply.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/frollapply.c b/src/frollapply.c index a31990f41a..69615f9ce3 100644 --- a/src/frollapply.c +++ b/src/frollapply.c @@ -76,7 +76,7 @@ SEXP memcpyDTadaptive(SEXP dest, SEXP src, SEXP offset, SEXP size) { // needed in adaptive=TRUE SEXP copyAsGrowable(SEXP x, SEXP by_column) { - if (LOGICAL_RO(by_column)[0]) + if (asLogical(by_column)) return R_duplicateAsResizable(x); SEXP ret = PROTECT(shallow_duplicate(x)); From 40b92d1aa1deb796c4b6bf80098579951bc0c050 Mon Sep 17 00:00:00 2001 From: Ivan K Date: Mon, 8 Dec 2025 17:12:35 +0300 Subject: [PATCH 11/17] Move the resizable allocation functions to utils.c --- src/data.table.h | 14 ++------------ src/utils.c | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/src/data.table.h b/src/data.table.h index 54dd9fce83..34a2717b34 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -89,19 +89,7 @@ // TODO(R>=4.6.0): remove the SVN revision check #if R_VERSION < R_Version(4, 6, 0) || R_SVN_REVISION < 89077 - static inline SEXP R_allocResizableVector_(SEXPTYPE type, R_xlen_t maxlen) { - SEXP ret = allocVector(type, maxlen); - SET_TRUELENGTH(ret, maxlen); - SET_GROWABLE_BIT(ret); - return ret; - } # define R_allocResizableVector(type, maxlen) R_allocResizableVector_(type, maxlen) - static inline SEXP R_duplicateAsResizable_(SEXP x) { - SEXP ret = duplicate(x); - SET_TRUELENGTH(ret, xlength(ret)); - SET_GROWABLE_BIT(ret); - return ret; - } # define R_duplicateAsResizable(x) R_duplicateAsResizable_(x) # define R_resizeVector(x, newlen) SETLENGTH(x, newlen) # define R_maxLength(x) TRUELENGTH(x) @@ -347,6 +335,8 @@ bool perhapsDataTable(SEXP x); SEXP perhapsDataTableR(SEXP x); SEXP frev(SEXP x, SEXP copyArg); NORET void internal_error(const char *call_name, const char *format, ...); +SEXP R_allocResizableVector_(SEXPTYPE type, R_xlen_t maxlen); +SEXP R_duplicateAsResizable_(SEXP x); // types.c char *end(char *start); diff --git a/src/utils.c b/src/utils.c index b159fa0e60..745495dc52 100644 --- a/src/utils.c +++ b/src/utils.c @@ -659,3 +659,17 @@ void internal_error(const char *call_name, const char *format, ...) { error("%s %s: %s. %s", _("Internal error in"), call_name, buff, _("Please report to the data.table issues tracker.")); } + +SEXP R_allocResizableVector_(SEXPTYPE type, R_xlen_t maxlen) { + SEXP ret = allocVector(type, maxlen); + SET_TRUELENGTH(ret, maxlen); + SET_GROWABLE_BIT(ret); + return ret; +} + +SEXP R_duplicateAsResizable_(SEXP x) { + SEXP ret = duplicate(x); + SET_TRUELENGTH(ret, xlength(ret)); + SET_GROWABLE_BIT(ret); + return ret; +} From af91fe934d75e1cc5cf4d53d0068052053fa16a9 Mon Sep 17 00:00:00 2001 From: Ivan K Date: Mon, 8 Dec 2025 17:18:40 +0300 Subject: [PATCH 12/17] duplicateAsResizable: refuse ALTREP objects --- src/utils.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/utils.c b/src/utils.c index 745495dc52..168cba5d37 100644 --- a/src/utils.c +++ b/src/utils.c @@ -668,6 +668,7 @@ SEXP R_allocResizableVector_(SEXPTYPE type, R_xlen_t maxlen) { } SEXP R_duplicateAsResizable_(SEXP x) { + if (ALTREP(x)) internal_error(__func__, "Cannot duplicate an ALTREP object as resizable"); // # nocov SEXP ret = duplicate(x); SET_TRUELENGTH(ret, xlength(ret)); SET_GROWABLE_BIT(ret); From 5caf9cc80bc805dae3f2baefbb34978ab58c7949 Mon Sep 17 00:00:00 2001 From: Ivan K Date: Mon, 8 Dec 2025 17:23:33 +0300 Subject: [PATCH 13/17] maxLength: return xlength if non-resizable That's what the function does in R-devel. --- src/data.table.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/data.table.h b/src/data.table.h index 34a2717b34..6850f9aa05 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -92,10 +92,13 @@ # define R_allocResizableVector(type, maxlen) R_allocResizableVector_(type, maxlen) # define R_duplicateAsResizable(x) R_duplicateAsResizable_(x) # define R_resizeVector(x, newlen) SETLENGTH(x, newlen) -# define R_maxLength(x) TRUELENGTH(x) +# define R_maxLength(x) R_maxLength_(x) + static inline R_xlen_t R_maxLength_(SEXP x) { + return IS_GROWABLE(x) ? TRUELENGTH(x) : XLENGTH(x); + } # define R_isResizable(x) R_isResizable_(x) static inline bool R_isResizable_(SEXP x) { - // IS_GROWABLE also checks for TRUELENGTH < XLENGTH + // IS_GROWABLE also checks for XLENGTH < TRUELENGTH return (LEVELS(x) & 0x20) && TRUELENGTH(x); } #endif From 14c114809e9bae5c8a7fcdf3cf84c224b4c0ebfd Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Tue, 9 Dec 2025 23:23:41 +0100 Subject: [PATCH 14/17] adjust to previous code --- R/frollapply.R | 2 +- src/data.table.h | 2 +- src/frollapply.c | 5 +++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/R/frollapply.R b/R/frollapply.R index f20863c7ab..f8c5924e9c 100644 --- a/R/frollapply.R +++ b/R/frollapply.R @@ -297,7 +297,7 @@ frollapply = function(X, N, FUN, ..., by.column=TRUE, fill=NA, align=c("right"," tight = function(i, dest, src, n) FUN(.Call(CmemcpyDT, dest, src, i, n), ...) } } else { - cpy = function(x) .Call(CcopyAsGrowable, x, by.column) + cpy = function(x) .Call(CcopyAsGrowable, x) ansMask = function(len, n) { mask = seq_len(len) >= n mask[is.na(mask)] = FALSE ## test 6010.206 diff --git a/src/data.table.h b/src/data.table.h index 6850f9aa05..8c9bf7bbff 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -298,7 +298,7 @@ SEXP memcpyVector(SEXP dest, SEXP src, SEXP offset, SEXP size); SEXP memcpyDT(SEXP dest, SEXP src, SEXP offset, SEXP size); SEXP memcpyVectoradaptive(SEXP dest, SEXP src, SEXP offset, SEXP size); SEXP memcpyDTadaptive(SEXP dest, SEXP src, SEXP offset, SEXP size); -SEXP copyAsGrowable(SEXP x, SEXP by_column); +SEXP copyAsGrowable(SEXP x); // nafill.c void nafillDouble(double *x, uint_fast64_t nx, unsigned int type, double fill, bool nan_is_na, ans_t *ans, bool verbose); diff --git a/src/frollapply.c b/src/frollapply.c index 69615f9ce3..b423c7a491 100644 --- a/src/frollapply.c +++ b/src/frollapply.c @@ -75,14 +75,15 @@ SEXP memcpyDTadaptive(SEXP dest, SEXP src, SEXP offset, SEXP size) { // # nocov end // needed in adaptive=TRUE -SEXP copyAsGrowable(SEXP x, SEXP by_column) { - if (asLogical(by_column)) +SEXP copyAsGrowable(SEXP x) { + if (!isNewList(x)) return R_duplicateAsResizable(x); SEXP ret = PROTECT(shallow_duplicate(x)); R_xlen_t n = xlength(ret); for (R_xlen_t i = 0; i < n; ++i) SET_VECTOR_ELT(ret, i, R_duplicateAsResizable(VECTOR_ELT(ret, i))); + UNPROTECT(1); return ret; } From 3b9a5727d1e95e1fd0f082dceb260d5d0b2fdb3d Mon Sep 17 00:00:00 2001 From: Ivan K Date: Wed, 10 Dec 2025 14:09:21 +0300 Subject: [PATCH 15/17] Safety checks in R_resizeVector() backport --- src/data.table.h | 10 +++++++--- src/utils.c | 10 ++++++++++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/src/data.table.h b/src/data.table.h index 8c9bf7bbff..ebfa1232c6 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -89,18 +89,19 @@ // TODO(R>=4.6.0): remove the SVN revision check #if R_VERSION < R_Version(4, 6, 0) || R_SVN_REVISION < 89077 +# define BACKPORT_RESIZABLE_API # define R_allocResizableVector(type, maxlen) R_allocResizableVector_(type, maxlen) # define R_duplicateAsResizable(x) R_duplicateAsResizable_(x) -# define R_resizeVector(x, newlen) SETLENGTH(x, newlen) # define R_maxLength(x) R_maxLength_(x) static inline R_xlen_t R_maxLength_(SEXP x) { return IS_GROWABLE(x) ? TRUELENGTH(x) : XLENGTH(x); } # define R_isResizable(x) R_isResizable_(x) static inline bool R_isResizable_(SEXP x) { - // IS_GROWABLE also checks for XLENGTH < TRUELENGTH - return (LEVELS(x) & 0x20) && TRUELENGTH(x); + // IS_GROWABLE checks for XLENGTH < TRUELENGTH instead + return (LEVELS(x) & 0x20) && XLENGTH(x) <= TRUELENGTH(x); } +# define R_resizeVector(x, newlen) R_resizeVector_(x, newlen) #endif // init.c @@ -338,8 +339,11 @@ bool perhapsDataTable(SEXP x); SEXP perhapsDataTableR(SEXP x); SEXP frev(SEXP x, SEXP copyArg); NORET void internal_error(const char *call_name, const char *format, ...); +#ifdef BACKPORT_RESIZABLE_API SEXP R_allocResizableVector_(SEXPTYPE type, R_xlen_t maxlen); SEXP R_duplicateAsResizable_(SEXP x); +void R_resizeVector_(SEXP x, R_xlen_t newlen); +#endif // types.c char *end(char *start); diff --git a/src/utils.c b/src/utils.c index 168cba5d37..815fcca65c 100644 --- a/src/utils.c +++ b/src/utils.c @@ -660,6 +660,7 @@ void internal_error(const char *call_name, const char *format, ...) { error("%s %s: %s. %s", _("Internal error in"), call_name, buff, _("Please report to the data.table issues tracker.")); } +#ifdef BACKPORT_RESIZABLE_API SEXP R_allocResizableVector_(SEXPTYPE type, R_xlen_t maxlen) { SEXP ret = allocVector(type, maxlen); SET_TRUELENGTH(ret, maxlen); @@ -674,3 +675,12 @@ SEXP R_duplicateAsResizable_(SEXP x) { SET_GROWABLE_BIT(ret); return ret; } + +void R_resizeVector_(SEXP x, R_xlen_t newlen) { + if (!R_isResizable(x)) + internal_error(__func__, "attempt to resize a non-resizable vector"); + if (newlen > XTRUELENGTH(x)) + internal_error(__func__, "newlen=%g exceeds maxlen=%g", (double)newlen, (double)R_maxLength(x)); + SETLENGTH(x, newlen); +} +#endif From 9d3eee1042b51aad103030bef45d12ce17e0bae6 Mon Sep 17 00:00:00 2001 From: Ivan K Date: Thu, 11 Dec 2025 17:49:23 +0300 Subject: [PATCH 16/17] fix comment --- src/subset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/subset.c b/src/subset.c index 15ccee0de4..b51a33f597 100644 --- a/src/subset.c +++ b/src/subset.c @@ -297,7 +297,7 @@ SEXP subsetDT(SEXP x, SEXP rows, SEXP cols) { // API change needs update NEWS.md } int overAlloc = checkOverAlloc(GetOption1(install("datatable.alloccol"))); - SEXP ans = PROTECT(R_allocResizableVector(VECSXP, LENGTH(cols)+overAlloc)); nprotect++; // doing alloc.col directly here; eventually alloc.col can be deprecated. + SEXP ans = PROTECT(R_allocResizableVector(VECSXP, LENGTH(cols)+overAlloc)); nprotect++; // doing alloc.col directly here // user-defined and superclass attributes get copied as from v1.12.0 copyMostAttrib(x, ans); From 1e351d3aa0dff7f8f7c614c288e963456940c79f Mon Sep 17 00:00:00 2001 From: Ivan K Date: Fri, 12 Dec 2025 12:23:39 +0300 Subject: [PATCH 17/17] Mark internal errors as # nocov --- src/utils.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/utils.c b/src/utils.c index 815fcca65c..a99e2cee35 100644 --- a/src/utils.c +++ b/src/utils.c @@ -678,9 +678,9 @@ SEXP R_duplicateAsResizable_(SEXP x) { void R_resizeVector_(SEXP x, R_xlen_t newlen) { if (!R_isResizable(x)) - internal_error(__func__, "attempt to resize a non-resizable vector"); + internal_error(__func__, "attempt to resize a non-resizable vector"); // # nocov if (newlen > XTRUELENGTH(x)) - internal_error(__func__, "newlen=%g exceeds maxlen=%g", (double)newlen, (double)R_maxLength(x)); + internal_error(__func__, "newlen=%g exceeds maxlen=%g", (double)newlen, (double)R_maxLength(x)); // # nocov SETLENGTH(x, newlen); } #endif