From 78271bcc7b891a138f04d1e33bcb514028f18f93 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 23 Jan 2023 14:33:24 +0100 Subject: [PATCH 01/12] Remove blocksoutputbuffer --- Modules/zlibmodule.c | 417 ++++++++++++------------------------------- 1 file changed, 117 insertions(+), 300 deletions(-) diff --git a/Modules/zlibmodule.c b/Modules/zlibmodule.c index 1cdfd01320288b..8ed4ad0f327989 100644 --- a/Modules/zlibmodule.c +++ b/Modules/zlibmodule.c @@ -14,160 +14,6 @@ #error "At least zlib version 1.2.2.1 is required" #endif -// Blocks output buffer wrappers -#include "pycore_blocks_output_buffer.h" - -#if OUTPUT_BUFFER_MAX_BLOCK_SIZE > UINT32_MAX - #error "The maximum block size accepted by zlib is UINT32_MAX." -#endif - -/* On success, return value >= 0 - On failure, return -1 */ -static inline Py_ssize_t -OutputBuffer_InitAndGrow(_BlocksOutputBuffer *buffer, Py_ssize_t max_length, - Bytef **next_out, uint32_t *avail_out) -{ - Py_ssize_t allocated; - - allocated = _BlocksOutputBuffer_InitAndGrow( - buffer, max_length, (void**) next_out); - *avail_out = (uint32_t) allocated; - return allocated; -} - -/* On success, return value >= 0 - On failure, return -1 */ -static inline Py_ssize_t -OutputBuffer_Grow(_BlocksOutputBuffer *buffer, - Bytef **next_out, uint32_t *avail_out) -{ - Py_ssize_t allocated; - - allocated = _BlocksOutputBuffer_Grow( - buffer, (void**) next_out, (Py_ssize_t) *avail_out); - *avail_out = (uint32_t) allocated; - return allocated; -} - -static inline Py_ssize_t -OutputBuffer_GetDataSize(_BlocksOutputBuffer *buffer, uint32_t avail_out) -{ - return _BlocksOutputBuffer_GetDataSize(buffer, (Py_ssize_t) avail_out); -} - -static inline PyObject * -OutputBuffer_Finish(_BlocksOutputBuffer *buffer, uint32_t avail_out) -{ - return _BlocksOutputBuffer_Finish(buffer, (Py_ssize_t) avail_out); -} - -static inline void -OutputBuffer_OnError(_BlocksOutputBuffer *buffer) -{ - _BlocksOutputBuffer_OnError(buffer); -} - -/* The max buffer size accepted by zlib is UINT32_MAX, the initial buffer size - `init_size` may > it in 64-bit build. These wrapper functions maintain an - UINT32_MAX sliding window for the first block: - 1. OutputBuffer_WindowInitWithSize() - 2. OutputBuffer_WindowGrow() - 3. OutputBuffer_WindowFinish() - 4. OutputBuffer_WindowOnError() - - ==== is the sliding window: - 1. ====------ - ^ next_posi, left_bytes is 6 - 2. ----====-- - ^ next_posi, left_bytes is 2 - 3. --------== - ^ next_posi, left_bytes is 0 */ -typedef struct { - Py_ssize_t left_bytes; - Bytef *next_posi; -} _Uint32Window; - -/* Initialize the buffer with an initial buffer size. - - On success, return value >= 0 - On failure, return value < 0 */ -static inline Py_ssize_t -OutputBuffer_WindowInitWithSize(_BlocksOutputBuffer *buffer, _Uint32Window *window, - Py_ssize_t init_size, - Bytef **next_out, uint32_t *avail_out) -{ - Py_ssize_t allocated = _BlocksOutputBuffer_InitWithSize( - buffer, init_size, (void**) next_out); - - if (allocated >= 0) { - // the UINT32_MAX sliding window - Py_ssize_t window_size = Py_MIN((size_t)allocated, UINT32_MAX); - *avail_out = (uint32_t) window_size; - - window->left_bytes = allocated - window_size; - window->next_posi = *next_out + window_size; - } - return allocated; -} - -/* Grow the buffer. - - On success, return value >= 0 - On failure, return value < 0 */ -static inline Py_ssize_t -OutputBuffer_WindowGrow(_BlocksOutputBuffer *buffer, _Uint32Window *window, - Bytef **next_out, uint32_t *avail_out) -{ - Py_ssize_t allocated; - - /* ensure no gaps in the data. - if inlined, this check could be optimized away.*/ - if (*avail_out != 0) { - PyErr_SetString(PyExc_SystemError, - "*avail_out != 0 in OutputBuffer_WindowGrow()."); - return -1; - } - - // slide the UINT32_MAX sliding window - if (window->left_bytes > 0) { - Py_ssize_t window_size = Py_MIN((size_t)window->left_bytes, UINT32_MAX); - - *next_out = window->next_posi; - *avail_out = (uint32_t) window_size; - - window->left_bytes -= window_size; - window->next_posi += window_size; - - return window_size; - } - assert(window->left_bytes == 0); - - // only the first block may > UINT32_MAX - allocated = _BlocksOutputBuffer_Grow( - buffer, (void**) next_out, (Py_ssize_t) *avail_out); - *avail_out = (uint32_t) allocated; - return allocated; -} - -/* Finish the buffer. - - On success, return a bytes object - On failure, return NULL */ -static inline PyObject * -OutputBuffer_WindowFinish(_BlocksOutputBuffer *buffer, _Uint32Window *window, - uint32_t avail_out) -{ - Py_ssize_t real_avail_out = (Py_ssize_t) avail_out + window->left_bytes; - return _BlocksOutputBuffer_Finish(buffer, real_avail_out); -} - -static inline void -OutputBuffer_WindowOnError(_BlocksOutputBuffer *buffer, _Uint32Window *window) -{ - _BlocksOutputBuffer_OnError(buffer); -} - - #define ENTER_ZLIB(obj) do { \ if (!PyThread_acquire_lock((obj)->lock, 0)) { \ Py_BEGIN_ALLOW_THREADS \ @@ -306,6 +152,56 @@ arrange_input_buffer(z_stream *zst, Py_ssize_t *remains) *remains -= zst->avail_in; } +static Py_ssize_t +arrange_output_buffer_with_maximum(z_stream *zst, PyObject **buffer, + Py_ssize_t length, + Py_ssize_t max_length) +{ + Py_ssize_t occupied; + + if (*buffer == NULL) { + if (!(*buffer = PyBytes_FromStringAndSize(NULL, length))) + return -1; + occupied = 0; + } + else { + occupied = zst->next_out - (Byte *)PyBytes_AS_STRING(*buffer); + + if (length == occupied) { + Py_ssize_t new_length; + assert(length <= max_length); + /* can not scale the buffer over max_length */ + if (length == max_length) + return -2; + if (length <= (max_length >> 1)) + new_length = length << 1; + else + new_length = max_length; + if (_PyBytes_Resize(buffer, new_length) < 0) + return -1; + length = new_length; + } + } + + zst->avail_out = (uInt)Py_MIN((size_t)(length - occupied), UINT_MAX); + zst->next_out = (Byte *)PyBytes_AS_STRING(*buffer) + occupied; + + return length; +} + +static Py_ssize_t +arrange_output_buffer(z_stream *zst, PyObject **buffer, Py_ssize_t length) +{ + Py_ssize_t ret; + + ret = arrange_output_buffer_with_maximum(zst, buffer, length, + PY_SSIZE_T_MAX); + if (ret == -2) + PyErr_NoMemory(); + + return ret; +} + /*[clinic input] zlib.compress @@ -324,20 +220,16 @@ static PyObject * zlib_compress_impl(PyObject *module, Py_buffer *data, int level, int wbits) /*[clinic end generated code: output=46bd152fadd66df2 input=c4d06ee5782a7e3f]*/ { - PyObject *return_value; + PyObject *return_value = NULL; + Py_ssize_t obuflen = DEF_BUF_SIZE; int flush; z_stream zst; - _BlocksOutputBuffer buffer = {.list = NULL}; zlibstate *state = get_zlib_state(module); Byte *ibuf = data->buf; Py_ssize_t ibuflen = data->len; - if (OutputBuffer_InitAndGrow(&buffer, -1, &zst.next_out, &zst.avail_out) < 0) { - goto error; - } - zst.opaque = NULL; zst.zalloc = PyZlib_Malloc; zst.zfree = PyZlib_Free; @@ -366,11 +258,10 @@ zlib_compress_impl(PyObject *module, Py_buffer *data, int level, int wbits) flush = ibuflen == 0 ? Z_FINISH : Z_NO_FLUSH; do { - if (zst.avail_out == 0) { - if (OutputBuffer_Grow(&buffer, &zst.next_out, &zst.avail_out) < 0) { - deflateEnd(&zst); - goto error; - } + obuflen = arrange_output_buffer(&zst, &return_value, obuflen); + if (obuflen < 0) { + deflateEnd(&zst); + goto error; } Py_BEGIN_ALLOW_THREADS @@ -391,16 +282,15 @@ zlib_compress_impl(PyObject *module, Py_buffer *data, int level, int wbits) err = deflateEnd(&zst); if (err == Z_OK) { - return_value = OutputBuffer_Finish(&buffer, zst.avail_out); - if (return_value == NULL) { + if (_PyBytes_Resize(&return_value, zst.next_out - + (Byte *)PyBytes_AS_STRING(return_value)) < 0) goto error; - } return return_value; } else zlib_error(state, zst, err, "while finishing compression"); error: - OutputBuffer_OnError(&buffer); + Py_XDECREF(return_value); return NULL; } @@ -423,13 +313,11 @@ zlib_decompress_impl(PyObject *module, Py_buffer *data, int wbits, Py_ssize_t bufsize) /*[clinic end generated code: output=77c7e35111dc8c42 input=a9ac17beff1f893f]*/ { - PyObject *return_value; + PyObject *return_value = NULL; Byte *ibuf; Py_ssize_t ibuflen; int err, flush; z_stream zst; - _BlocksOutputBuffer buffer = {.list = NULL}; - _Uint32Window window; // output buffer's UINT32_MAX sliding window zlibstate *state = get_zlib_state(module); @@ -440,10 +328,6 @@ zlib_decompress_impl(PyObject *module, Py_buffer *data, int wbits, bufsize = 1; } - if (OutputBuffer_WindowInitWithSize(&buffer, &window, bufsize, - &zst.next_out, &zst.avail_out) < 0) { - goto error; - } ibuf = data->buf; ibuflen = data->len; @@ -473,12 +357,10 @@ zlib_decompress_impl(PyObject *module, Py_buffer *data, int wbits, flush = ibuflen == 0 ? Z_FINISH : Z_NO_FLUSH; do { - if (zst.avail_out == 0) { - if (OutputBuffer_WindowGrow(&buffer, &window, - &zst.next_out, &zst.avail_out) < 0) { - inflateEnd(&zst); - goto error; - } + bufsize = arrange_output_buffer(&zst, &return_value, bufsize); + if (bufsize < 0) { + inflateEnd(&zst); + goto error; } Py_BEGIN_ALLOW_THREADS @@ -518,13 +400,14 @@ zlib_decompress_impl(PyObject *module, Py_buffer *data, int wbits, goto error; } - return_value = OutputBuffer_WindowFinish(&buffer, &window, zst.avail_out); - if (return_value != NULL) { - return return_value; - } + if (_PyBytes_Resize(&return_value, zst.next_out - + (Byte *)PyBytes_AS_STRING(return_value)) < 0) + goto error; + + return return_value; error: - OutputBuffer_WindowOnError(&buffer, &window); + Py_XDECREF(return_value); return NULL; } @@ -748,9 +631,9 @@ zlib_Compress_compress_impl(compobject *self, PyTypeObject *cls, Py_buffer *data) /*[clinic end generated code: output=6731b3f0ff357ca6 input=04d00f65ab01d260]*/ { - PyObject *return_value; + PyObject *return_value = NULL; int err; - _BlocksOutputBuffer buffer = {.list = NULL}; + Py_ssize_t obuflen = DEF_BUF_SIZE; zlibstate *state = PyType_GetModuleState(cls); ENTER_ZLIB(self); @@ -758,18 +641,13 @@ zlib_Compress_compress_impl(compobject *self, PyTypeObject *cls, self->zst.next_in = data->buf; Py_ssize_t ibuflen = data->len; - if (OutputBuffer_InitAndGrow(&buffer, -1, &self->zst.next_out, &self->zst.avail_out) < 0) { - goto error; - } - do { arrange_input_buffer(&self->zst, &ibuflen); do { - if (self->zst.avail_out == 0) { - if (OutputBuffer_Grow(&buffer, &self->zst.next_out, &self->zst.avail_out) < 0) { - goto error; - } + obuflen = arrange_output_buffer(&self->zst, &return_value, obuflen); + if (obuflen < 0) { + goto error; } Py_BEGIN_ALLOW_THREADS @@ -786,14 +664,12 @@ zlib_Compress_compress_impl(compobject *self, PyTypeObject *cls, } while (ibuflen != 0); - return_value = OutputBuffer_Finish(&buffer, self->zst.avail_out); - if (return_value != NULL) { + if (_PyBytes_Resize(&return_value, self->zst.next_out - + (Byte *)PyBytes_AS_STRING(return_value)) == 0) goto success; - } error: - OutputBuffer_OnError(&buffer); - return_value = NULL; + Py_CLEAR(return_value); success: LEAVE_ZLIB(self); return return_value; @@ -870,8 +746,9 @@ zlib_Decompress_decompress_impl(compobject *self, PyTypeObject *cls, { int err = Z_OK; Py_ssize_t ibuflen; - PyObject *return_value; - _BlocksOutputBuffer buffer = {.list = NULL}; + Py_ssize_t obuflen = DEF_BUF_SIZE; + PyObject *return_value = NULL; + Py_ssize_t hard_limit; PyObject *module = PyType_GetModule(cls); if (module == NULL) @@ -881,30 +758,30 @@ zlib_Decompress_decompress_impl(compobject *self, PyTypeObject *cls, if (max_length < 0) { PyErr_SetString(PyExc_ValueError, "max_length must be non-negative"); return NULL; - } else if (max_length == 0) { - max_length = -1; - } + } else if (max_length == 0) + hard_limit = PY_SSIZE_T_MAX; + else + hard_limit = max_length; ENTER_ZLIB(self); self->zst.next_in = data->buf; ibuflen = data->len; - if (OutputBuffer_InitAndGrow(&buffer, max_length, &self->zst.next_out, &self->zst.avail_out) < 0) { - goto abort; - } - do { arrange_input_buffer(&self->zst, &ibuflen); do { - if (self->zst.avail_out == 0) { - if (OutputBuffer_GetDataSize(&buffer, self->zst.avail_out) == max_length) { + obuflen = arrange_output_buffer_with_maximum(&self->zst, &return_value, + obuflen, hard_limit); + if (obuflen == -2) { + if (max_length > 0) { goto save; } - if (OutputBuffer_Grow(&buffer, &self->zst.next_out, &self->zst.avail_out) < 0) { - goto abort; - } + PyErr_NoMemory(); + } + if (obuflen < 0) { + goto abort; } Py_BEGIN_ALLOW_THREADS @@ -948,14 +825,12 @@ zlib_Decompress_decompress_impl(compobject *self, PyTypeObject *cls, goto abort; } - return_value = OutputBuffer_Finish(&buffer, self->zst.avail_out); - if (return_value != NULL) { + if (_PyBytes_Resize(&return_value, self->zst.next_out - + (Byte *)PyBytes_AS_STRING(return_value)) == 0) goto success; - } abort: - OutputBuffer_OnError(&buffer); - return_value = NULL; + Py_CLEAR(return_value); success: LEAVE_ZLIB(self); return return_value; @@ -980,8 +855,8 @@ zlib_Compress_flush_impl(compobject *self, PyTypeObject *cls, int mode) /*[clinic end generated code: output=c7efd13efd62add2 input=286146e29442eb6c]*/ { int err; - PyObject *return_value; - _BlocksOutputBuffer buffer = {.list = NULL}; + Py_ssize_t length = DEF_BUF_SIZE; + PyObject *return_value = NULL; zlibstate *state = PyType_GetModuleState(cls); /* Flushing with Z_NO_FLUSH is a no-op, so there's no point in @@ -994,15 +869,11 @@ zlib_Compress_flush_impl(compobject *self, PyTypeObject *cls, int mode) self->zst.avail_in = 0; - if (OutputBuffer_InitAndGrow(&buffer, -1, &self->zst.next_out, &self->zst.avail_out) < 0) { - goto error; - } - do { - if (self->zst.avail_out == 0) { - if (OutputBuffer_Grow(&buffer, &self->zst.next_out, &self->zst.avail_out) < 0) { - goto error; - } + length = arrange_output_buffer(&self->zst, &return_value, length); + if (length < 0) { + Py_CLEAR(return_value); + goto error; } Py_BEGIN_ALLOW_THREADS @@ -1037,15 +908,11 @@ zlib_Compress_flush_impl(compobject *self, PyTypeObject *cls, int mode) goto error; } - return_value = OutputBuffer_Finish(&buffer, self->zst.avail_out); - if (return_value != NULL) { - goto success; - } + if (_PyBytes_Resize(&return_value, self->zst.next_out - + (Byte *)PyBytes_AS_STRING(return_value)) < 0) + Py_CLEAR(return_value); -error: - OutputBuffer_OnError(&buffer); - return_value = NULL; -success: + error: LEAVE_ZLIB(self); return return_value; } @@ -1241,10 +1108,8 @@ zlib_Decompress_flush_impl(compobject *self, PyTypeObject *cls, { int err, flush; Py_buffer data; - PyObject *return_value; + PyObject *return_value = NULL; Py_ssize_t ibuflen; - _BlocksOutputBuffer buffer = {.list = NULL}; - _Uint32Window window; // output buffer's UINT32_MAX sliding window PyObject *module = PyType_GetModule(cls); if (module == NULL) { @@ -1268,21 +1133,15 @@ zlib_Decompress_flush_impl(compobject *self, PyTypeObject *cls, self->zst.next_in = data.buf; ibuflen = data.len; - if (OutputBuffer_WindowInitWithSize(&buffer, &window, length, - &self->zst.next_out, &self->zst.avail_out) < 0) { - goto abort; - } do { arrange_input_buffer(&self->zst, &ibuflen); flush = ibuflen == 0 ? Z_FINISH : Z_NO_FLUSH; do { - if (self->zst.avail_out == 0) { - if (OutputBuffer_WindowGrow(&buffer, &window, - &self->zst.next_out, &self->zst.avail_out) < 0) { - goto abort; - } + length = arrange_output_buffer(&self->zst, &return_value, length); + if (length < 0) { + goto abort; } Py_BEGIN_ALLOW_THREADS @@ -1318,14 +1177,12 @@ zlib_Decompress_flush_impl(compobject *self, PyTypeObject *cls, } } - return_value = OutputBuffer_WindowFinish(&buffer, &window, self->zst.avail_out); - if (return_value != NULL) { + if (_PyBytes_Resize(&return_value, self->zst.next_out - + (Byte *)PyBytes_AS_STRING(return_value)) == 0) goto success; - } abort: - OutputBuffer_WindowOnError(&buffer, &window); - return_value = NULL; + Py_CLEAR(return_value); success: PyBuffer_Release(&data); LEAVE_ZLIB(self); @@ -1394,45 +1251,6 @@ set_inflate_zdict_ZlibDecompressor(zlibstate *state, ZlibDecompressor *self) return 0; } -static Py_ssize_t -arrange_output_buffer_with_maximum(uint32_t *avail_out, - uint8_t **next_out, - PyObject **buffer, - Py_ssize_t length, - Py_ssize_t max_length) -{ - Py_ssize_t occupied; - - if (*buffer == NULL) { - if (!(*buffer = PyBytes_FromStringAndSize(NULL, length))) - return -1; - occupied = 0; - } - else { - occupied = *next_out - (uint8_t *)PyBytes_AS_STRING(*buffer); - - if (length == occupied) { - Py_ssize_t new_length; - assert(length <= max_length); - /* can not scale the buffer over max_length */ - if (length == max_length) - return -2; - if (length <= (max_length >> 1)) - new_length = length << 1; - else - new_length = max_length; - if (_PyBytes_Resize(buffer, new_length) < 0) - return -1; - length = new_length; - } - } - - *avail_out = (uint32_t)Py_MIN((size_t)(length - occupied), UINT32_MAX); - *next_out = (uint8_t *)PyBytes_AS_STRING(*buffer) + occupied; - - return length; -} - /* Decompress data of length self->avail_in_real in self->state.next_in. The output buffer is allocated dynamically and returned. If the max_length is of sufficiently low size, max_length is allocated immediately. At most @@ -1475,11 +1293,10 @@ decompress_buf(ZlibDecompressor *self, Py_ssize_t max_length) arrange_input_buffer(&(self->zst), &(self->avail_in_real)); do { - obuflen = arrange_output_buffer_with_maximum(&(self->zst.avail_out), - &(self->zst.next_out), - &return_value, - obuflen, - hard_limit); + obuflen = arrange_output_buffer_with_maximum(&self->zst, + &return_value, + obuflen, + hard_limit); if (obuflen == -1){ PyErr_SetString(PyExc_MemoryError, "Insufficient memory for buffer allocation"); From f838c1d61a6107f9843411e472cdbd5b8e8dddf2 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 23 Jan 2023 16:37:24 +0100 Subject: [PATCH 02/12] Fix error regarding maximum length --- Modules/zlibmodule.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Modules/zlibmodule.c b/Modules/zlibmodule.c index 8ed4ad0f327989..db7b40c284cfad 100644 --- a/Modules/zlibmodule.c +++ b/Modules/zlibmodule.c @@ -763,6 +763,9 @@ zlib_Decompress_decompress_impl(compobject *self, PyTypeObject *cls, else hard_limit = max_length; + if (max_length && obuflen > max_length) + obuflen = max_length; + ENTER_ZLIB(self); self->zst.next_in = data->buf; From 8793f1a9c5e0e2ca0eabf9be519331bb8f25fb1d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 24 Jan 2023 06:49:14 +0100 Subject: [PATCH 03/12] Add missing braces --- Modules/zlibmodule.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/Modules/zlibmodule.c b/Modules/zlibmodule.c index db7b40c284cfad..919658313073b0 100644 --- a/Modules/zlibmodule.c +++ b/Modules/zlibmodule.c @@ -283,8 +283,9 @@ zlib_compress_impl(PyObject *module, Py_buffer *data, int level, int wbits) err = deflateEnd(&zst); if (err == Z_OK) { if (_PyBytes_Resize(&return_value, zst.next_out - - (Byte *)PyBytes_AS_STRING(return_value)) < 0) + (Byte *)PyBytes_AS_STRING(return_value)) < 0) { goto error; + } return return_value; } else @@ -401,9 +402,9 @@ zlib_decompress_impl(PyObject *module, Py_buffer *data, int wbits, } if (_PyBytes_Resize(&return_value, zst.next_out - - (Byte *)PyBytes_AS_STRING(return_value)) < 0) + (Byte *)PyBytes_AS_STRING(return_value)) < 0) { goto error; - + } return return_value; error: @@ -665,9 +666,9 @@ zlib_Compress_compress_impl(compobject *self, PyTypeObject *cls, } while (ibuflen != 0); if (_PyBytes_Resize(&return_value, self->zst.next_out - - (Byte *)PyBytes_AS_STRING(return_value)) == 0) + (Byte *)PyBytes_AS_STRING(return_value)) == 0) { goto success; - + } error: Py_CLEAR(return_value); success: @@ -829,9 +830,9 @@ zlib_Decompress_decompress_impl(compobject *self, PyTypeObject *cls, } if (_PyBytes_Resize(&return_value, self->zst.next_out - - (Byte *)PyBytes_AS_STRING(return_value)) == 0) + (Byte *)PyBytes_AS_STRING(return_value)) == 0) { goto success; - + } abort: Py_CLEAR(return_value); success: @@ -912,9 +913,9 @@ zlib_Compress_flush_impl(compobject *self, PyTypeObject *cls, int mode) } if (_PyBytes_Resize(&return_value, self->zst.next_out - - (Byte *)PyBytes_AS_STRING(return_value)) < 0) + (Byte *)PyBytes_AS_STRING(return_value)) < 0) { Py_CLEAR(return_value); - + } error: LEAVE_ZLIB(self); return return_value; @@ -1181,9 +1182,9 @@ zlib_Decompress_flush_impl(compobject *self, PyTypeObject *cls, } if (_PyBytes_Resize(&return_value, self->zst.next_out - - (Byte *)PyBytes_AS_STRING(return_value)) == 0) + (Byte *)PyBytes_AS_STRING(return_value)) == 0) { goto success; - + } abort: Py_CLEAR(return_value); success: From cf7c24478652846b4cbeb1a9470b78944dafb100 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 24 Jan 2023 06:50:51 +0100 Subject: [PATCH 04/12] Set return value properly to NULL in case of error --- Modules/zlibmodule.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Modules/zlibmodule.c b/Modules/zlibmodule.c index 919658313073b0..27c8fce09de144 100644 --- a/Modules/zlibmodule.c +++ b/Modules/zlibmodule.c @@ -886,6 +886,7 @@ zlib_Compress_flush_impl(compobject *self, PyTypeObject *cls, int mode) if (err == Z_STREAM_ERROR) { zlib_error(state, self->zst, err, "while flushing"); + Py_CLEAR(return_value); goto error; } } while (self->zst.avail_out == 0); @@ -898,6 +899,7 @@ zlib_Compress_flush_impl(compobject *self, PyTypeObject *cls, int mode) err = deflateEnd(&self->zst); if (err != Z_OK) { zlib_error(state, self->zst, err, "while finishing compression"); + Py_CLEAR(return_value); goto error; } else @@ -909,6 +911,7 @@ zlib_Compress_flush_impl(compobject *self, PyTypeObject *cls, int mode) */ } else if (err != Z_OK && err != Z_BUF_ERROR) { zlib_error(state, self->zst, err, "while flushing"); + Py_CLEAR(return_value); goto error; } From 723ca68eba016c9620d080a7de365df0a91f4c34 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 24 Jan 2023 07:12:38 +0100 Subject: [PATCH 05/12] Remove blocksbuffer from bz2module --- Modules/_bz2module.c | 146 ++++++++++++++++++++----------------------- 1 file changed, 67 insertions(+), 79 deletions(-) diff --git a/Modules/_bz2module.c b/Modules/_bz2module.c index 9304c13fbed5fc..772ef98a747e4f 100644 --- a/Modules/_bz2module.c +++ b/Modules/_bz2module.c @@ -8,60 +8,6 @@ #include #include -// Blocks output buffer wrappers -#include "pycore_blocks_output_buffer.h" - -#if OUTPUT_BUFFER_MAX_BLOCK_SIZE > UINT32_MAX - #error "The maximum block size accepted by libbzip2 is UINT32_MAX." -#endif - -/* On success, return value >= 0 - On failure, return -1 */ -static inline Py_ssize_t -OutputBuffer_InitAndGrow(_BlocksOutputBuffer *buffer, Py_ssize_t max_length, - char **next_out, uint32_t *avail_out) -{ - Py_ssize_t allocated; - - allocated = _BlocksOutputBuffer_InitAndGrow( - buffer, max_length, (void**) next_out); - *avail_out = (uint32_t) allocated; - return allocated; -} - -/* On success, return value >= 0 - On failure, return -1 */ -static inline Py_ssize_t -OutputBuffer_Grow(_BlocksOutputBuffer *buffer, - char **next_out, uint32_t *avail_out) -{ - Py_ssize_t allocated; - - allocated = _BlocksOutputBuffer_Grow( - buffer, (void**) next_out, (Py_ssize_t) *avail_out); - *avail_out = (uint32_t) allocated; - return allocated; -} - -static inline Py_ssize_t -OutputBuffer_GetDataSize(_BlocksOutputBuffer *buffer, uint32_t avail_out) -{ - return _BlocksOutputBuffer_GetDataSize(buffer, (Py_ssize_t) avail_out); -} - -static inline PyObject * -OutputBuffer_Finish(_BlocksOutputBuffer *buffer, uint32_t avail_out) -{ - return _BlocksOutputBuffer_Finish(buffer, (Py_ssize_t) avail_out); -} - -static inline void -OutputBuffer_OnError(_BlocksOutputBuffer *buffer) -{ - _BlocksOutputBuffer_OnError(buffer); -} - - #ifndef BZ_CONFIG_ERROR #define BZ2_bzCompress bzCompress #define BZ2_bzCompressInit bzCompressInit @@ -156,21 +102,51 @@ catch_bz2_error(int bzerror) } +#if BUFSIZ < 8192 +#define INITIAL_BUFFER_SIZE 8192 +#else +#define INITIAL_BUFFER_SIZE BUFSIZ +#endif + +static int +grow_buffer(PyObject **buf, Py_ssize_t max_length) +{ + /* Expand the buffer by an amount proportional to the current size, + giving us amortized linear-time behavior. Use a less-than-double + growth factor to avoid excessive allocation. */ + size_t size = PyBytes_GET_SIZE(*buf); + size_t new_size = size + (size >> 3) + 6; + + if (max_length > 0 && new_size > (size_t) max_length) + new_size = (size_t) max_length; + + if (new_size > size) { + return _PyBytes_Resize(buf, new_size); + } else { /* overflow */ + PyErr_SetString(PyExc_OverflowError, + "Unable to allocate buffer - output too large"); + return -1; + } +} + /* BZ2Compressor class. */ static PyObject * compress(BZ2Compressor *c, char *data, size_t len, int action) { - PyObject *result; - _BlocksOutputBuffer buffer = {.list = NULL}; + size_t data_size = 0; - if (OutputBuffer_InitAndGrow(&buffer, -1, &c->bzs.next_out, &c->bzs.avail_out) < 0) { - goto error; + PyObject *result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE); + if (result == NULL) { + return NULL; } c->bzs.next_in = data; c->bzs.avail_in = 0; + c->bzs.next_out = PyBytes_AS_STRING(result); + c->bzs.avail_out = INITIAL_BUFFER_SIZE; for (;;) { + char *this_out; int bzerror; /* On a 64-bit system, len might not fit in avail_in (an unsigned int). @@ -185,14 +161,20 @@ compress(BZ2Compressor *c, char *data, size_t len, int action) break; if (c->bzs.avail_out == 0) { - if (OutputBuffer_Grow(&buffer, &c->bzs.next_out, &c->bzs.avail_out) < 0) { - goto error; + size_t buffer_left = PyBytes_GET_SIZE(result) - data_size; + if (buffer_left == 0) { + if (grow_buffer(&result, -1) < 0) + goto error; + c->bzs.next_out = PyBytes_AS_STRING(result) + data_size; + buffer_left = PyBytes_GET_SIZE(result) - data_size; } + c->bzs.avail_out = (unsigned int)Py_MIN(buffer_left, UINT_MAX); } - + this_out = c->bzs.next_out; Py_BEGIN_ALLOW_THREADS bzerror = BZ2_bzCompress(&c->bzs, action); Py_END_ALLOW_THREADS + data_size += c->bzs.next_out - this_out; if (catch_bz2_error(bzerror)) goto error; @@ -201,14 +183,12 @@ compress(BZ2Compressor *c, char *data, size_t len, int action) if (action == BZ_FINISH && bzerror == BZ_STREAM_END) break; } - - result = OutputBuffer_Finish(&buffer, c->bzs.avail_out); - if (result != NULL) { - return result; + if (_PyBytes_Resize(&result, data_size) < 0) { + goto error; } - + return result; error: - OutputBuffer_OnError(&buffer); + Py_XDECREF(result); return NULL; } @@ -425,20 +405,27 @@ decompress_buf(BZ2Decompressor *d, Py_ssize_t max_length) /* data_size is strictly positive, but because we repeatedly have to compare against max_length and PyBytes_GET_SIZE we declare it as signed */ + Py_ssize_t data_size = 0; PyObject *result; - _BlocksOutputBuffer buffer = {.list = NULL}; bz_stream *bzs = &d->bzs; + if (max_length < 0 || max_length >= INITIAL_BUFFER_SIZE) + result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE); + else + result = PyBytes_FromStringAndSize(NULL, max_length); + if (result == NULL) + return NULL; - if (OutputBuffer_InitAndGrow(&buffer, max_length, &bzs->next_out, &bzs->avail_out) < 0) { - goto error; - } + bzs->next_out = PyBytes_AS_STRING(result); for (;;) { int bzret; + size_t avail; /* On a 64-bit system, buffer length might not fit in avail_out, so we do decompression in chunks of no more than UINT_MAX bytes each. Note that the expression for `avail` is guaranteed to be positive, so the cast is safe. */ + avail = (size_t) (PyBytes_GET_SIZE(result) - data_size); + bzs->avail_out = (unsigned int)Py_MIN(avail, UINT_MAX); bzs->avail_in = (unsigned int)Py_MIN(d->bzs_avail_in_real, UINT_MAX); d->bzs_avail_in_real -= bzs->avail_in; @@ -446,6 +433,7 @@ decompress_buf(BZ2Decompressor *d, Py_ssize_t max_length) bzret = BZ2_bzDecompress(bzs); Py_END_ALLOW_THREADS + data_size = bzs->next_out - PyBytes_AS_STRING(result); d->bzs_avail_in_real += bzs->avail_in; if (catch_bz2_error(bzret)) @@ -456,22 +444,22 @@ decompress_buf(BZ2Decompressor *d, Py_ssize_t max_length) } else if (d->bzs_avail_in_real == 0) { break; } else if (bzs->avail_out == 0) { - if (OutputBuffer_GetDataSize(&buffer, bzs->avail_out) == max_length) { + if (data_size == max_length) break; - } - if (OutputBuffer_Grow(&buffer, &bzs->next_out, &bzs->avail_out) < 0) { + if (data_size == PyBytes_GET_SIZE(result) && + grow_buffer(&result, max_length) == -1) goto error; - } + bzs->next_out = PyBytes_AS_STRING(result) + data_size; } } - result = OutputBuffer_Finish(&buffer, bzs->avail_out); - if (result != NULL) { - return result; + if (_PyBytes_Resize(&result, data_size) == -1) { + goto error; } + return result; error: - OutputBuffer_OnError(&buffer); + Py_XDECREF(result); return NULL; } From 66199e4daf498c1465dbddc547208196baae4025 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 24 Jan 2023 07:17:11 +0100 Subject: [PATCH 06/12] Remove blocks output buffer --- .../internal/pycore_blocks_output_buffer.h | 317 ------------------ 1 file changed, 317 deletions(-) delete mode 100644 Include/internal/pycore_blocks_output_buffer.h diff --git a/Include/internal/pycore_blocks_output_buffer.h b/Include/internal/pycore_blocks_output_buffer.h deleted file mode 100644 index 28cf6fba4eeba2..00000000000000 --- a/Include/internal/pycore_blocks_output_buffer.h +++ /dev/null @@ -1,317 +0,0 @@ -/* - _BlocksOutputBuffer is used to maintain an output buffer - that has unpredictable size. Suitable for compression/decompression - API (bz2/lzma/zlib) that has stream->next_out and stream->avail_out: - - stream->next_out: point to the next output position. - stream->avail_out: the number of available bytes left in the buffer. - - It maintains a list of bytes object, so there is no overhead of resizing - the buffer. - - Usage: - - 1, Initialize the struct instance like this: - _BlocksOutputBuffer buffer = {.list = NULL}; - Set .list to NULL for _BlocksOutputBuffer_OnError() - - 2, Initialize the buffer use one of these functions: - _BlocksOutputBuffer_InitAndGrow() - _BlocksOutputBuffer_InitWithSize() - - 3, If (avail_out == 0), grow the buffer: - _BlocksOutputBuffer_Grow() - - 4, Get the current outputted data size: - _BlocksOutputBuffer_GetDataSize() - - 5, Finish the buffer, and return a bytes object: - _BlocksOutputBuffer_Finish() - - 6, Clean up the buffer when an error occurred: - _BlocksOutputBuffer_OnError() -*/ - -#ifndef Py_INTERNAL_BLOCKS_OUTPUT_BUFFER_H -#define Py_INTERNAL_BLOCKS_OUTPUT_BUFFER_H -#ifdef __cplusplus -extern "C" { -#endif - -#include "Python.h" - -typedef struct { - // List of bytes objects - PyObject *list; - // Number of whole allocated size - Py_ssize_t allocated; - // Max length of the buffer, negative number means unlimited length. - Py_ssize_t max_length; -} _BlocksOutputBuffer; - -static const char unable_allocate_msg[] = "Unable to allocate output buffer."; - -/* In 32-bit build, the max block size should <= INT32_MAX. */ -#define OUTPUT_BUFFER_MAX_BLOCK_SIZE (256*1024*1024) - -/* Block size sequence */ -#define KB (1024) -#define MB (1024*1024) -static const Py_ssize_t BUFFER_BLOCK_SIZE[] = - { 32*KB, 64*KB, 256*KB, 1*MB, 4*MB, 8*MB, 16*MB, 16*MB, - 32*MB, 32*MB, 32*MB, 32*MB, 64*MB, 64*MB, 128*MB, 128*MB, - OUTPUT_BUFFER_MAX_BLOCK_SIZE }; -#undef KB -#undef MB - -/* According to the block sizes defined by BUFFER_BLOCK_SIZE, the whole - allocated size growth step is: - 1 32 KB +32 KB - 2 96 KB +64 KB - 3 352 KB +256 KB - 4 1.34 MB +1 MB - 5 5.34 MB +4 MB - 6 13.34 MB +8 MB - 7 29.34 MB +16 MB - 8 45.34 MB +16 MB - 9 77.34 MB +32 MB - 10 109.34 MB +32 MB - 11 141.34 MB +32 MB - 12 173.34 MB +32 MB - 13 237.34 MB +64 MB - 14 301.34 MB +64 MB - 15 429.34 MB +128 MB - 16 557.34 MB +128 MB - 17 813.34 MB +256 MB - 18 1069.34 MB +256 MB - 19 1325.34 MB +256 MB - 20 1581.34 MB +256 MB - 21 1837.34 MB +256 MB - 22 2093.34 MB +256 MB - ... -*/ - -/* Initialize the buffer, and grow the buffer. - - max_length: Max length of the buffer, -1 for unlimited length. - - On success, return allocated size (>=0) - On failure, return -1 -*/ -static inline Py_ssize_t -_BlocksOutputBuffer_InitAndGrow(_BlocksOutputBuffer *buffer, - const Py_ssize_t max_length, - void **next_out) -{ - PyObject *b; - Py_ssize_t block_size; - - // ensure .list was set to NULL - assert(buffer->list == NULL); - - // get block size - if (0 <= max_length && max_length < BUFFER_BLOCK_SIZE[0]) { - block_size = max_length; - } else { - block_size = BUFFER_BLOCK_SIZE[0]; - } - - // the first block - b = PyBytes_FromStringAndSize(NULL, block_size); - if (b == NULL) { - return -1; - } - - // create the list - buffer->list = PyList_New(1); - if (buffer->list == NULL) { - Py_DECREF(b); - return -1; - } - PyList_SET_ITEM(buffer->list, 0, b); - - // set variables - buffer->allocated = block_size; - buffer->max_length = max_length; - - *next_out = PyBytes_AS_STRING(b); - return block_size; -} - -/* Initialize the buffer, with an initial size. - - Check block size limit in the outer wrapper function. For example, some libs - accept UINT32_MAX as the maximum block size, then init_size should <= it. - - On success, return allocated size (>=0) - On failure, return -1 -*/ -static inline Py_ssize_t -_BlocksOutputBuffer_InitWithSize(_BlocksOutputBuffer *buffer, - const Py_ssize_t init_size, - void **next_out) -{ - PyObject *b; - - // ensure .list was set to NULL - assert(buffer->list == NULL); - - // the first block - b = PyBytes_FromStringAndSize(NULL, init_size); - if (b == NULL) { - PyErr_SetString(PyExc_MemoryError, unable_allocate_msg); - return -1; - } - - // create the list - buffer->list = PyList_New(1); - if (buffer->list == NULL) { - Py_DECREF(b); - return -1; - } - PyList_SET_ITEM(buffer->list, 0, b); - - // set variables - buffer->allocated = init_size; - buffer->max_length = -1; - - *next_out = PyBytes_AS_STRING(b); - return init_size; -} - -/* Grow the buffer. The avail_out must be 0, please check it before calling. - - On success, return allocated size (>=0) - On failure, return -1 -*/ -static inline Py_ssize_t -_BlocksOutputBuffer_Grow(_BlocksOutputBuffer *buffer, - void **next_out, - const Py_ssize_t avail_out) -{ - PyObject *b; - const Py_ssize_t list_len = Py_SIZE(buffer->list); - Py_ssize_t block_size; - - // ensure no gaps in the data - if (avail_out != 0) { - PyErr_SetString(PyExc_SystemError, - "avail_out is non-zero in _BlocksOutputBuffer_Grow()."); - return -1; - } - - // get block size - if (list_len < (Py_ssize_t) Py_ARRAY_LENGTH(BUFFER_BLOCK_SIZE)) { - block_size = BUFFER_BLOCK_SIZE[list_len]; - } else { - block_size = BUFFER_BLOCK_SIZE[Py_ARRAY_LENGTH(BUFFER_BLOCK_SIZE) - 1]; - } - - // check max_length - if (buffer->max_length >= 0) { - // if (rest == 0), should not grow the buffer. - Py_ssize_t rest = buffer->max_length - buffer->allocated; - assert(rest > 0); - - // block_size of the last block - if (block_size > rest) { - block_size = rest; - } - } - - // check buffer->allocated overflow - if (block_size > PY_SSIZE_T_MAX - buffer->allocated) { - PyErr_SetString(PyExc_MemoryError, unable_allocate_msg); - return -1; - } - - // create the block - b = PyBytes_FromStringAndSize(NULL, block_size); - if (b == NULL) { - PyErr_SetString(PyExc_MemoryError, unable_allocate_msg); - return -1; - } - if (PyList_Append(buffer->list, b) < 0) { - Py_DECREF(b); - return -1; - } - Py_DECREF(b); - - // set variables - buffer->allocated += block_size; - - *next_out = PyBytes_AS_STRING(b); - return block_size; -} - -/* Return the current outputted data size. */ -static inline Py_ssize_t -_BlocksOutputBuffer_GetDataSize(_BlocksOutputBuffer *buffer, - const Py_ssize_t avail_out) -{ - return buffer->allocated - avail_out; -} - -/* Finish the buffer. - - Return a bytes object on success - Return NULL on failure -*/ -static inline PyObject * -_BlocksOutputBuffer_Finish(_BlocksOutputBuffer *buffer, - const Py_ssize_t avail_out) -{ - PyObject *result, *block; - const Py_ssize_t list_len = Py_SIZE(buffer->list); - - // fast path for single block - if ((list_len == 1 && avail_out == 0) || - (list_len == 2 && Py_SIZE(PyList_GET_ITEM(buffer->list, 1)) == avail_out)) - { - block = PyList_GET_ITEM(buffer->list, 0); - Py_INCREF(block); - - Py_CLEAR(buffer->list); - return block; - } - - // final bytes object - result = PyBytes_FromStringAndSize(NULL, buffer->allocated - avail_out); - if (result == NULL) { - PyErr_SetString(PyExc_MemoryError, unable_allocate_msg); - return NULL; - } - - // memory copy - if (list_len > 0) { - char *posi = PyBytes_AS_STRING(result); - - // blocks except the last one - Py_ssize_t i = 0; - for (; i < list_len-1; i++) { - block = PyList_GET_ITEM(buffer->list, i); - memcpy(posi, PyBytes_AS_STRING(block), Py_SIZE(block)); - posi += Py_SIZE(block); - } - // the last block - block = PyList_GET_ITEM(buffer->list, i); - memcpy(posi, PyBytes_AS_STRING(block), Py_SIZE(block) - avail_out); - } else { - assert(Py_SIZE(result) == 0); - } - - Py_CLEAR(buffer->list); - return result; -} - -/* Clean up the buffer when an error occurred. */ -static inline void -_BlocksOutputBuffer_OnError(_BlocksOutputBuffer *buffer) -{ - Py_CLEAR(buffer->list); -} - -#ifdef __cplusplus -} -#endif -#endif /* Py_INTERNAL_BLOCKS_OUTPUT_BUFFER_H */ \ No newline at end of file From 852bae059f8b0fd087154dab618c4ff8fc47f962 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 24 Jan 2023 07:26:36 +0100 Subject: [PATCH 07/12] More consistent bracing style --- Modules/_bz2module.c | 47 ++++++++++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/Modules/_bz2module.c b/Modules/_bz2module.c index 772ef98a747e4f..cc0811ab019f50 100644 --- a/Modules/_bz2module.c +++ b/Modules/_bz2module.c @@ -117,8 +117,9 @@ grow_buffer(PyObject **buf, Py_ssize_t max_length) size_t size = PyBytes_GET_SIZE(*buf); size_t new_size = size + (size >> 3) + 6; - if (max_length > 0 && new_size > (size_t) max_length) + if (max_length > 0 && new_size > (size_t) max_length) { new_size = (size_t) max_length; + } if (new_size > size) { return _PyBytes_Resize(buf, new_size); @@ -145,7 +146,7 @@ compress(BZ2Compressor *c, char *data, size_t len, int action) c->bzs.next_out = PyBytes_AS_STRING(result); c->bzs.avail_out = INITIAL_BUFFER_SIZE; - for (;;) { + while (1) { char *this_out; int bzerror; @@ -157,8 +158,9 @@ compress(BZ2Compressor *c, char *data, size_t len, int action) } /* In regular compression mode, stop when input data is exhausted. */ - if (action == BZ_RUN && c->bzs.avail_in == 0) + if (action == BZ_RUN && c->bzs.avail_in == 0) { break; + } if (c->bzs.avail_out == 0) { size_t buffer_left = PyBytes_GET_SIZE(result) - data_size; @@ -176,8 +178,9 @@ compress(BZ2Compressor *c, char *data, size_t len, int action) Py_END_ALLOW_THREADS data_size += c->bzs.next_out - this_out; - if (catch_bz2_error(bzerror)) + if (catch_bz2_error(bzerror)) { goto error; + } /* In flushing mode, stop when all buffered data has been flushed. */ if (action == BZ_FINISH && bzerror == BZ_STREAM_END) @@ -260,10 +263,12 @@ _bz2_BZ2Compressor_flush_impl(BZ2Compressor *self) static void* BZ2_Malloc(void* ctx, int items, int size) { - if (items < 0 || size < 0) + if (items < 0 || size < 0) { return NULL; - if (size != 0 && (size_t)items > (size_t)PY_SSIZE_T_MAX / (size_t)size) + } + if (size != 0 && (size_t)items > (size_t)PY_SSIZE_T_MAX / (size_t)size) { return NULL; + } /* PyMem_Malloc() cannot be used: compress() and decompress() release the GIL */ return PyMem_RawMalloc((size_t)items * (size_t)size); @@ -299,9 +304,9 @@ _bz2_BZ2Compressor___init___impl(BZ2Compressor *self, int compresslevel) self->bzs.bzalloc = BZ2_Malloc; self->bzs.bzfree = BZ2_Free; bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0); - if (catch_bz2_error(bzerror)) + if (catch_bz2_error(bzerror)) { goto error; - + } return 0; error: @@ -408,16 +413,17 @@ decompress_buf(BZ2Decompressor *d, Py_ssize_t max_length) Py_ssize_t data_size = 0; PyObject *result; bz_stream *bzs = &d->bzs; - if (max_length < 0 || max_length >= INITIAL_BUFFER_SIZE) + if (max_length < 0 || max_length >= INITIAL_BUFFER_SIZE) { result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE); - else + } else { result = PyBytes_FromStringAndSize(NULL, max_length); - if (result == NULL) + } + if (result == NULL) { return NULL; - + } bzs->next_out = PyBytes_AS_STRING(result); - for (;;) { + while (1) { int bzret; size_t avail; /* On a 64-bit system, buffer length might not fit in avail_out, so we @@ -436,8 +442,9 @@ decompress_buf(BZ2Decompressor *d, Py_ssize_t max_length) data_size = bzs->next_out - PyBytes_AS_STRING(result); d->bzs_avail_in_real += bzs->avail_in; - if (catch_bz2_error(bzret)) + if (catch_bz2_error(bzret)) { goto error; + } if (bzret == BZ_STREAM_END) { d->eof = 1; break; @@ -527,8 +534,9 @@ decompress(BZ2Decompressor *d, char *data, size_t len, Py_ssize_t max_length) if (d->bzs_avail_in_real > 0) { Py_XSETREF(d->unused_data, PyBytes_FromStringAndSize(bzs->next_in, d->bzs_avail_in_real)); - if (d->unused_data == NULL) + if (d->unused_data == NULL) { goto error; + } } } else if (d->bzs_avail_in_real == 0) { @@ -604,10 +612,11 @@ _bz2_BZ2Decompressor_decompress_impl(BZ2Decompressor *self, Py_buffer *data, PyObject *result = NULL; ACQUIRE_LOCK(self); - if (self->eof) + if (self->eof) { PyErr_SetString(PyExc_EOFError, "End of stream already reached"); - else + } else { result = decompress(self, data->buf, data->len, max_length); + } RELEASE_LOCK(self); return result; } @@ -634,9 +643,9 @@ _bz2_BZ2Decompressor___init___impl(BZ2Decompressor *self) self->input_buffer = NULL; self->input_buffer_size = 0; Py_XSETREF(self->unused_data, PyBytes_FromStringAndSize(NULL, 0)); - if (self->unused_data == NULL) + if (self->unused_data == NULL) { goto error; - + } bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0); if (catch_bz2_error(bzerror)) goto error; From a1eff84cf03c3fb2eb3e51bb80d8f986389c5178 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 24 Jan 2023 07:26:54 +0100 Subject: [PATCH 08/12] Grow bz2 buffer more aggresively --- Modules/_bz2module.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Modules/_bz2module.c b/Modules/_bz2module.c index cc0811ab019f50..a3bd0e4a7e5794 100644 --- a/Modules/_bz2module.c +++ b/Modules/_bz2module.c @@ -111,11 +111,12 @@ catch_bz2_error(int bzerror) static int grow_buffer(PyObject **buf, Py_ssize_t max_length) { - /* Expand the buffer by an amount proportional to the current size, - giving us amortized linear-time behavior. Use a less-than-double - growth factor to avoid excessive allocation. */ + /* Expand the buffer by doubling it. Previously this was done by a growth + factor but this results in more calls to _PyBytes_Resize for larger + buffers. Since resizing is costly, doubling the buffer is more + efficient while the cost of overallocation is small on small buffers. */ size_t size = PyBytes_GET_SIZE(*buf); - size_t new_size = size + (size >> 3) + 6; + size_t new_size = size << 1; if (max_length > 0 && new_size > (size_t) max_length) { new_size = (size_t) max_length; From b98450f6202e4dead520a20a6d8371c382caab9d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 24 Jan 2023 08:55:01 +0100 Subject: [PATCH 09/12] Start on lzma module --- Modules/_lzmamodule.c | 101 ++++++++++++++---------------------------- 1 file changed, 33 insertions(+), 68 deletions(-) diff --git a/Modules/_lzmamodule.c b/Modules/_lzmamodule.c index b572d8cd909fd1..79fcbea3bb2cc6 100644 --- a/Modules/_lzmamodule.c +++ b/Modules/_lzmamodule.c @@ -15,60 +15,6 @@ #include -// Blocks output buffer wrappers -#include "pycore_blocks_output_buffer.h" - -#if OUTPUT_BUFFER_MAX_BLOCK_SIZE > SIZE_MAX - #error "The maximum block size accepted by liblzma is SIZE_MAX." -#endif - -/* On success, return value >= 0 - On failure, return -1 */ -static inline Py_ssize_t -OutputBuffer_InitAndGrow(_BlocksOutputBuffer *buffer, Py_ssize_t max_length, - uint8_t **next_out, size_t *avail_out) -{ - Py_ssize_t allocated; - - allocated = _BlocksOutputBuffer_InitAndGrow( - buffer, max_length, (void**) next_out); - *avail_out = (size_t) allocated; - return allocated; -} - -/* On success, return value >= 0 - On failure, return -1 */ -static inline Py_ssize_t -OutputBuffer_Grow(_BlocksOutputBuffer *buffer, - uint8_t **next_out, size_t *avail_out) -{ - Py_ssize_t allocated; - - allocated = _BlocksOutputBuffer_Grow( - buffer, (void**) next_out, (Py_ssize_t) *avail_out); - *avail_out = (size_t) allocated; - return allocated; -} - -static inline Py_ssize_t -OutputBuffer_GetDataSize(_BlocksOutputBuffer *buffer, size_t avail_out) -{ - return _BlocksOutputBuffer_GetDataSize(buffer, (Py_ssize_t) avail_out); -} - -static inline PyObject * -OutputBuffer_Finish(_BlocksOutputBuffer *buffer, size_t avail_out) -{ - return _BlocksOutputBuffer_Finish(buffer, (Py_ssize_t) avail_out); -} - -static inline void -OutputBuffer_OnError(_BlocksOutputBuffer *buffer) -{ - _BlocksOutputBuffer_OnError(buffer); -} - - #define ACQUIRE_LOCK(obj) do { \ if (!PyThread_acquire_lock((obj)->lock, 0)) { \ Py_BEGIN_ALLOW_THREADS \ @@ -183,6 +129,25 @@ PyLzma_Free(void *opaque, void *ptr) } +#if BUFSIZ < 8192 +#define INITIAL_BUFFER_SIZE 8192 +#else +#define INITIAL_BUFFER_SIZE BUFSIZ +#endif + +static int +grow_buffer(PyObject **buf, Py_ssize_t max_length) +{ + Py_ssize_t size = PyBytes_GET_SIZE(*buf); + Py_ssize_t newsize = size << 1; + + if (max_length > 0 && newsize > max_length) { + newsize = max_length; + } + return _PyBytes_Resize(buf, newsize); +} + + /* Some custom type conversions for PyArg_ParseTupleAndKeywords(), since the predefined conversion specifiers do not suit our needs: @@ -546,16 +511,15 @@ class lzma_filter_converter(CConverter): static PyObject * compress(Compressor *c, uint8_t *data, size_t len, lzma_action action) { - PyObject *result; - _BlocksOutputBuffer buffer = {.list = NULL}; - _lzma_state *state = PyType_GetModuleState(Py_TYPE(c)); - assert(state != NULL); - - if (OutputBuffer_InitAndGrow(&buffer, -1, &c->lzs.next_out, &c->lzs.avail_out) < 0) { - goto error; + Py_ssize_t data_size = 0; + PyObject * result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE); + if (result == NULL) { + return NULL; } c->lzs.next_in = data; c->lzs.avail_in = len; + c->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result); + c->lzs.avail_out = PyBytes_GET_SIZE(result); for (;;) { lzma_ret lzret; @@ -563,7 +527,7 @@ compress(Compressor *c, uint8_t *data, size_t len, lzma_action action) Py_BEGIN_ALLOW_THREADS lzret = lzma_code(&c->lzs, action); Py_END_ALLOW_THREADS - + data_size = (char *)c->lzs.next_out - PyBytes_AS_STRING(result); if (lzret == LZMA_BUF_ERROR && len == 0 && c->lzs.avail_out > 0) { lzret = LZMA_OK; /* That wasn't a real error */ } @@ -574,19 +538,20 @@ compress(Compressor *c, uint8_t *data, size_t len, lzma_action action) (action == LZMA_FINISH && lzret == LZMA_STREAM_END)) { break; } else if (c->lzs.avail_out == 0) { - if (OutputBuffer_Grow(&buffer, &c->lzs.next_out, &c->lzs.avail_out) < 0) { + if (grow_buffer(&result, -1) == -1) { goto error; } + c->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result) + data_size; + c->lzs.avail_out = PyBytes_GET_SIZE(result) - data_size; } } - result = OutputBuffer_Finish(&buffer, c->lzs.avail_out); - if (result != NULL) { - return result; + if (_PyBytes_Resize(&result, data_size) == -1) { + goto error; } - + return result; error: - OutputBuffer_OnError(&buffer); + Py_XDECREF(result); return NULL; } From 6162715f960d0073b126bbd7f0dde702b0211141 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 24 Jan 2023 09:32:54 +0100 Subject: [PATCH 10/12] Finish _lzmamodule --- Modules/_lzmamodule.c | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/Modules/_lzmamodule.c b/Modules/_lzmamodule.c index 79fcbea3bb2cc6..bb5dba7fb871cd 100644 --- a/Modules/_lzmamodule.c +++ b/Modules/_lzmamodule.c @@ -512,6 +512,8 @@ static PyObject * compress(Compressor *c, uint8_t *data, size_t len, lzma_action action) { Py_ssize_t data_size = 0; + _lzma_state *state = PyType_GetModuleState(Py_TYPE(c)); + assert(state != NULL); PyObject * result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE); if (result == NULL) { return NULL; @@ -894,15 +896,21 @@ static PyType_Spec lzma_compressor_type_spec = { static PyObject* decompress_buf(Decompressor *d, Py_ssize_t max_length) { + Py_ssize_t data_size = 0; PyObject *result; lzma_stream *lzs = &d->lzs; - _BlocksOutputBuffer buffer = {.list = NULL}; _lzma_state *state = PyType_GetModuleState(Py_TYPE(d)); assert(state != NULL); - if (OutputBuffer_InitAndGrow(&buffer, max_length, &lzs->next_out, &lzs->avail_out) < 0) { - goto error; - } + if (max_length < 0 || max_length >= INITIAL_BUFFER_SIZE) + result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE); + else + result = PyBytes_FromStringAndSize(NULL, max_length); + if (result == NULL) + return NULL; + + lzs->next_out = (uint8_t *)PyBytes_AS_STRING(result); + lzs->avail_out = PyBytes_GET_SIZE(result); for (;;) { lzma_ret lzret; @@ -910,6 +918,7 @@ decompress_buf(Decompressor *d, Py_ssize_t max_length) Py_BEGIN_ALLOW_THREADS lzret = lzma_code(lzs, LZMA_RUN); Py_END_ALLOW_THREADS + data_size = (char *)lzs->next_out - PyBytes_AS_STRING(result); if (lzret == LZMA_BUF_ERROR && lzs->avail_in == 0 && lzs->avail_out > 0) { lzret = LZMA_OK; /* That wasn't a real error */ @@ -928,24 +937,24 @@ decompress_buf(Decompressor *d, Py_ssize_t max_length) Maybe lzs's internal state still have a few bytes can be output, grow the output buffer and continue if max_lengh < 0. */ - if (OutputBuffer_GetDataSize(&buffer, lzs->avail_out) == max_length) { + if (data_size == max_length) break; - } - if (OutputBuffer_Grow(&buffer, &lzs->next_out, &lzs->avail_out) < 0) { + if (grow_buffer(&result, max_length) == -1) goto error; - } + lzs->next_out = (uint8_t *)PyBytes_AS_STRING(result) + data_size; + lzs->avail_out = PyBytes_GET_SIZE(result) - data_size; } else if (lzs->avail_in == 0) { break; } } - result = OutputBuffer_Finish(&buffer, lzs->avail_out); - if (result != NULL) { - return result; + if (_PyBytes_Resize(&result, data_size) == -1) { + goto error; } + return result; error: - OutputBuffer_OnError(&buffer); + Py_XDECREF(result); return NULL; } From 38bef68cae936d2ef03e58712cbed523c33d9c28 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 24 Jan 2023 09:43:08 +0100 Subject: [PATCH 11/12] Code style changes --- Modules/_lzmamodule.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/Modules/_lzmamodule.c b/Modules/_lzmamodule.c index bb5dba7fb871cd..007351968b7ccb 100644 --- a/Modules/_lzmamodule.c +++ b/Modules/_lzmamodule.c @@ -314,9 +314,10 @@ lzma_filter_converter(_lzma_state *state, PyObject *spec, void *ptr) } id_obj = PyMapping_GetItemString(spec, "id"); if (id_obj == NULL) { - if (PyErr_ExceptionMatches(PyExc_KeyError)) + if (PyErr_ExceptionMatches(PyExc_KeyError)) { PyErr_SetString(PyExc_ValueError, "Filter specifier must have an \"id\" entry"); + } return 0; } f->id = PyLong_AsUnsignedLongLong(id_obj); @@ -523,7 +524,7 @@ compress(Compressor *c, uint8_t *data, size_t len, lzma_action action) c->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result); c->lzs.avail_out = PyBytes_GET_SIZE(result); - for (;;) { + while(1) { lzma_ret lzret; Py_BEGIN_ALLOW_THREADS @@ -902,17 +903,19 @@ decompress_buf(Decompressor *d, Py_ssize_t max_length) _lzma_state *state = PyType_GetModuleState(Py_TYPE(d)); assert(state != NULL); - if (max_length < 0 || max_length >= INITIAL_BUFFER_SIZE) + if (max_length < 0 || max_length >= INITIAL_BUFFER_SIZE) { result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE); - else + } else { result = PyBytes_FromStringAndSize(NULL, max_length); - if (result == NULL) + } + if (result == NULL) { return NULL; + } lzs->next_out = (uint8_t *)PyBytes_AS_STRING(result); lzs->avail_out = PyBytes_GET_SIZE(result); - for (;;) { + while(1) { lzma_ret lzret; Py_BEGIN_ALLOW_THREADS @@ -937,10 +940,12 @@ decompress_buf(Decompressor *d, Py_ssize_t max_length) Maybe lzs's internal state still have a few bytes can be output, grow the output buffer and continue if max_lengh < 0. */ - if (data_size == max_length) + if (data_size == max_length) { break; - if (grow_buffer(&result, max_length) == -1) + } + if (grow_buffer(&result, max_length) == -1) { goto error; + } lzs->next_out = (uint8_t *)PyBytes_AS_STRING(result) + data_size; lzs->avail_out = PyBytes_GET_SIZE(result) - data_size; } else if (lzs->avail_in == 0) { @@ -1111,10 +1116,11 @@ _lzma_LZMADecompressor_decompress_impl(Decompressor *self, Py_buffer *data, PyObject *result = NULL; ACQUIRE_LOCK(self); - if (self->eof) + if (self->eof) { PyErr_SetString(PyExc_EOFError, "Already at end of stream"); - else + } else { result = decompress(self, data->buf, data->len, max_length); + } RELEASE_LOCK(self); return result; } @@ -1267,9 +1273,7 @@ _lzma_LZMADecompressor___init___impl(Decompressor *self, int format, static void Decompressor_dealloc(Decompressor *self) { - if(self->input_buffer != NULL) - PyMem_Free(self->input_buffer); - + PyMem_Free(self->input_buffer); lzma_end(&self->lzs); Py_CLEAR(self->unused_data); if (self->lock != NULL) { From 52945c60d23d9037847fac588f255a2819f8ad4a Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 24 Jan 2023 10:21:04 +0100 Subject: [PATCH 12/12] Add blurb --- .../Library/2023-01-24-10-20-52.gh-issue-101260.bsNSeT.rst | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2023-01-24-10-20-52.gh-issue-101260.bsNSeT.rst diff --git a/Misc/NEWS.d/next/Library/2023-01-24-10-20-52.gh-issue-101260.bsNSeT.rst b/Misc/NEWS.d/next/Library/2023-01-24-10-20-52.gh-issue-101260.bsNSeT.rst new file mode 100644 index 00000000000000..77a40614d87bea --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-01-24-10-20-52.gh-issue-101260.bsNSeT.rst @@ -0,0 +1,5 @@ +In Python 3.10 a change was made that enhanced the performance of the zlib, +bz2 and lzma modules when operating on large (multiple megabytes) in-memory +buffers. However this came at the cost of a performance degradation for +smaller buffers. Since smaller buffers are used more often, particularly +during streaming, the change was reverted.