diff --git a/NEWS.md b/NEWS.md
index 3face7519b..524e4e69d6 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -338,6 +338,8 @@ See [#2611](https://github.com/Rdatatable/data.table/issues/2611) for details. T
 
 19. Ellipsis elements like `..1` are correctly excluded when searching for variables in "up-a-level" syntax inside `[`, [#5460](https://github.com/Rdatatable/data.table/issues/5460). Thanks @ggrothendieck for the report and @MichaelChirico for the fix.
 
+20. `forderv` could segfault on keys with long runs of identical bytes (e.g., many duplicate columns) because the single-group branch tail-recursed radix-by-radix until the C stack ran out, [#4300](https://github.com/Rdatatable/data.table/issues/4300). This is a major problem since sorting is extensively used in `data.table`. Thanks @quantitative-technologies for the report and @ben-schwen for the fix.
+
 ### NOTES
 
 1. The following in-progress deprecations have proceeded:
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 609977b991..18c28a8178 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -21823,4 +21823,15 @@ test(2342.3, options = c(datatable.week = "sequential"), week(as.IDate(c("2019-1
 test(2342.4, options = c(datatable.week = "sequential"), week(as.IDate(c("2020-12-31","2021-01-01"))), c(53L,1L))
 test(2342.5, options = c(datatable.week = "sequential"), week(as.IDate("2021-01-06") + 0:6), c(1L,1L,2L,2L,2L,2L,2L))
 test(2342.6, options = c(datatable.week = "sequential"), week(as.IDate(c("2016-02-27","2016-02-28","2016-02-29","2016-03-01","2016-03-02"))), c(9L,9L,9L,9L,9L))
-test(2342.7, options = c(datatable.week = "default"), week(as.IDate("1970-01-07")), 2L, warning = "The default behavior of week() is changing")
\ No newline at end of file
+test(2342.7, options = c(datatable.week = "default"), week(as.IDate("1970-01-07")), 2L, warning = "The default behavior of week() is changing")
+
+# forderv should not segfault on long single group keys due to recursion #4300
+N = 1e4
+set.seed(1)
+idx = sort(sample(10, 20, TRUE))
+x = matrix(rnorm(N), nrow=10)
+DT = as.data.table(x[idx,])
+DT[, V1000 := 20:1]
+test(2343.1, forderv(DT, by=names(DT), sort=FALSE, retGrp=TRUE), forderv(DT, by=c("V1", "V1000"), sort=FALSE, retGrp=TRUE))
+x = c(rep(0, 7e5), 1e6)
+test(2343.2, forderv(list(x)), integer(0))
diff --git a/src/forder.c b/src/forder.c
index 302a14f2ce..a4564497b9 100644
--- a/src/forder.c
+++ b/src/forder.c
@@ -437,7 +437,7 @@ uint64_t dtwiddle(double x) //const void *p, int i)
   STOP(_("Unknown non-finite value; not NA, NaN, -Inf or +Inf"));  // # nocov
 }
 
-void radix_r(const int from, const int to, const int radix);
+void radix_r(const int from, const int to, int radix);
 
 /*
   OpenMP is used here to parallelize multiple operations that come together to
@@ -901,7 +901,8 @@ static bool sort_ugrp(uint8_t *x, const int n)
   return skip;
 }
 
-void radix_r(const int from, const int to, const int radix) {
+void radix_r(const int from, const int to, int radix) {
+  for (;;) {
   TBEG();
   const int my_n = to-from+1;
   if (my_n==1) {  // minor TODO: batch up the 1's instead in caller (and that's only needed when retgrp anyway)
@@ -1025,6 +1026,11 @@ void radix_r(const int from, const int to, const int radix) {
     TEND(9)
     if (radix+1==nradix || ngrp==my_n) {  // ngrp==my_n => unique groups all size 1 and we can stop recursing now
       push(my_gs, ngrp);
+    } else if (ngrp==1) {  // ngrp == 1 => all same byte at this radix; go to next radix with same splits #4300
+      // no need to push since gs stays the same
+      free(my_gs);
+      radix++;
+      continue;
     } else {
       for (int i=0, f=from; i<ngrp; i++) {
         radix_r(f, f+my_gs[i]-1, radix+1);
@@ -1128,6 +1134,11 @@ void radix_r(const int from, const int to, const int radix) {
     if (radix+1==nradix) {
       // aside: cannot be all size 1 (a saving used in my_n<=256 case above) because my_n>256 and ngrp<=256
       push(my_gs, ngrp);
+    } else if (ngrp==1) {  // ngrp == 1 => all same byte at this radix; go to next radix with same splits #4300
+      // no need to push since gs stays the same
+      free(my_gs);
+      radix++;
+      continue;
     } else {
       // this single thread will now descend and resolve all groups, now that the groups are close in cache
       for (int i=0, my_from=from; i<ngrp; i++) {
@@ -1313,6 +1324,16 @@ void radix_r(const int from, const int to, const int radix) {
     push(my_gs, ngrp);
     TEND(23)
   }
+  else if (ngrp==1) {
+    // no need to push since gs stays the same
+    free(my_gs);
+    free(counts);
+    free(starts);
+    free(ugrps);
+    free(ngrps);
+    radix++;
+    continue;
+  }
   else {
     // TODO: explicitly repeat parallel batch for any skew bins
     bool anyBig = false;
@@ -1364,7 +1385,8 @@ void radix_r(const int from, const int to, const int radix) {
   free(ugrps);
   free(ngrps);
   TEND(26)
-}
+  return;
+}}
 
 
 SEXP issorted(SEXP x, SEXP by)