Skip to content
Closed
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
699e616
bpo-36346: Prepare for removing the legacy Unicode C API.
serhiy-storchaka Oct 26, 2018
60b89c9
Fix winreg.SetValue().
serhiy-storchaka Mar 19, 2019
f386b63
Clean up some ifdefs in _testcapimodule.
serhiy-storchaka Mar 19, 2019
11e0e0c
Make path_cleanup() paranoidally safer.
serhiy-storchaka Mar 19, 2019
236f608
Merge branch 'master' into disable-wchar-cache
serhiy-storchaka Mar 19, 2019
8750d48
Fix os.scandir().
serhiy-storchaka Mar 20, 2019
431e71f
Merge branch 'master' into disable-wchar-cache
serhiy-storchaka Mar 20, 2019
545c7a9
Merge branch 'master' into disable-wchar-cache
serhiy-storchaka Mar 28, 2019
3c1ab31
Merge branch 'master' into disable-wchar-cache
serhiy-storchaka Mar 13, 2020
54b0561
Merge branch 'master' into disable-wchar-cache
serhiy-storchaka Jun 15, 2020
2c62d96
Merge branch 'master' into disable-wchar-cache
serhiy-storchaka Jun 29, 2020
6d89775
Merge branch 'master' into disable-wchar-cache
serhiy-storchaka Jun 30, 2020
3b5294a
Silence compiler warnings.
serhiy-storchaka Jun 30, 2020
e7898fa
Fix PyUnicode_IsIdentifier for the cache-less build.
serhiy-storchaka Jun 30, 2020
fd641c6
Silence compiler warnings on Windows.
serhiy-storchaka Jun 30, 2020
a79d935
Merge branch 'master' into disable-wchar-cache
serhiy-storchaka Jun 30, 2020
ed14aa9
Fix compiler warning in _testcapi.
serhiy-storchaka Jun 30, 2020
c5eb102
Merge branch 'master' into disable-wchar-cache
serhiy-storchaka Jul 1, 2020
d529224
Merge branch 'master' into disable-wchar-cache
serhiy-storchaka Jul 1, 2020
6102b4b
Merge branch 'master' into disable-wchar-cache
serhiy-storchaka Jul 5, 2020
74695b3
Merge branch 'master' into disable-wchar-cache
serhiy-storchaka Jul 5, 2020
97b5228
Use HAVE_UNICODE_WCHAR_CACHE instead of USE_UNICODE_WCHAR_CACHE in _P…
serhiy-storchaka Jul 5, 2020
d6ba6b7
Set HAVE_UNICODE_WCHAR_CACHE and USE_UNICODE_WCHAR_CACHE only if they…
serhiy-storchaka Jul 9, 2020
0da4146
Remove Py_UNICODE_MATCH.
serhiy-storchaka Jul 9, 2020
51365fb
Merge branch 'master' into disable-wchar-cache
serhiy-storchaka Jul 10, 2020
e425908
Fix unterminated #if.
serhiy-storchaka Jul 10, 2020
8a9259b
Reset arraymodule.c.
serhiy-storchaka Jul 10, 2020
76ba4b6
Merge branch 'master' into disable-wchar-cache
serhiy-storchaka Jul 10, 2020
16ac7fd
Temporary disable the wchar_t cache by default.
serhiy-storchaka Jul 10, 2020
5e90bf8
Fix unicode_result().
serhiy-storchaka Jul 10, 2020
09675ab
Merge branch 'master' into disable-wchar-cache
serhiy-storchaka Jul 10, 2020
6915ce3
Merge branch 'master' into disable-wchar-cache
serhiy-storchaka Apr 25, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions Include/cpython/unicodeobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

/* --- Internal Unicode Operations ---------------------------------------- */

#define HAVE_UNICODE_WCHAR_CACHE 1
#define USE_UNICODE_WCHAR_CACHE 1

/* Since splitting on whitespace is an important use case, and
Expand Down Expand Up @@ -74,6 +75,16 @@ Py_UNICODE_FILL(Py_UNICODE *target, Py_UNICODE value, Py_ssize_t length) {
/* low surrogate = bottom 10 bits added to DC00 */
#define Py_UNICODE_LOW_SURROGATE(ch) (0xDC00 + ((ch) & 0x3FF))

#if HAVE_UNICODE_WCHAR_CACHE
/* Check if substring matches at given offset. The offset must be
valid, and the substring must not be empty. */

#define Py_UNICODE_MATCH(string, offset, substring) \
((*((string)->wstr + (offset)) == *((substring)->wstr)) && \
((*((string)->wstr + (offset) + (substring)->wstr_length-1) == *((substring)->wstr + (substring)->wstr_length-1))) && \
!memcmp((string)->wstr + (offset), (substring)->wstr, (substring)->wstr_length*sizeof(Py_UNICODE)))
#endif /* HAVE_UNICODE_WCHAR_CACHE */

/* --- Unicode Type ------------------------------------------------------- */

/* ASCII-only strings created through PyUnicode_New use the PyASCIIObject
Expand Down Expand Up @@ -213,7 +224,9 @@ typedef struct {
4 bytes (see issue #19537 on m68k). */
unsigned int :24;
} state;
#if HAVE_UNICODE_WCHAR_CACHE
wchar_t *wstr; /* wchar_t representation (null-terminated) */
#endif /* HAVE_UNICODE_WCHAR_CACHE */
} PyASCIIObject;

/* Non-ASCII strings allocated through PyUnicode_New use the
Expand All @@ -224,8 +237,10 @@ typedef struct {
Py_ssize_t utf8_length; /* Number of bytes in utf8, excluding the
* terminating \0. */
char *utf8; /* UTF-8 representation (null-terminated) */
#if HAVE_UNICODE_WCHAR_CACHE
Py_ssize_t wstr_length; /* Number of code points in wstr, possible
* surrogates count as two code points. */
#endif /* HAVE_UNICODE_WCHAR_CACHE */
} PyCompactUnicodeObject;

/* Strings allocated through PyUnicode_FromUnicode(NULL, len) use the
Expand All @@ -246,6 +261,7 @@ PyAPI_FUNC(int) _PyUnicode_CheckConsistency(
int check_content);

/* Fast access macros */
#if HAVE_UNICODE_WCHAR_CACHE

/* Returns the deprecated Py_UNICODE representation's size in code units
(this includes surrogate pairs as 2 units).
Expand Down Expand Up @@ -280,6 +296,7 @@ PyAPI_FUNC(int) _PyUnicode_CheckConsistency(
#define PyUnicode_AS_DATA(op) \
((const char *)(PyUnicode_AS_UNICODE(op)))

#endif /* HAVE_UNICODE_WCHAR_CACHE */

/* --- Flexible String Representation Helper Macros (PEP 393) -------------- */

Expand Down Expand Up @@ -440,13 +457,15 @@ enum PyUnicode_Kind {
(0xffffU) : \
(0x10ffffU)))))

#if HAVE_UNICODE_WCHAR_CACHE
Py_DEPRECATED(3.3)
static inline Py_ssize_t _PyUnicode_get_wstr_length(PyObject *op) {
return PyUnicode_IS_COMPACT_ASCII(op) ?
((PyASCIIObject*)op)->length :
((PyCompactUnicodeObject*)op)->wstr_length;
}
#define PyUnicode_WSTR_LENGTH(op) _PyUnicode_get_wstr_length((PyObject*)op)
#endif /* HAVE_UNICODE_WCHAR_CACHE */

/* === Public API ========================================================= */

Expand Down
8 changes: 8 additions & 0 deletions Lib/test/support/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@
from .testresult import get_test_runner


try:
from _testcapi import unicode_legacy_string
except ImportError:
unicode_legacy_string = None

__all__ = [
# globals
"PIPE_MAX_SIZE", "verbose", "max_memuse", "use_resources", "failfast",
Expand Down Expand Up @@ -426,6 +431,9 @@ def requires_lzma(reason='requires lzma'):
lzma = None
return unittest.skipUnless(lzma, reason)

requires_legacy_unicode_capi = unittest.skipUnless(unicode_legacy_string,
'requires legacy Unicode C API')

is_jython = sys.platform.startswith('java')

is_android = hasattr(sys, 'getandroidapilevel')
Expand Down
2 changes: 1 addition & 1 deletion Lib/test/test_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,9 +250,9 @@ def test_writerows_errors(self):
self.assertRaises(OSError, writer.writerows, BadIterable())

@support.cpython_only
@support.requires_legacy_unicode_capi
def test_writerows_legacy_strings(self):
import _testcapi

c = _testcapi.unicode_legacy_string('a')
with TemporaryFile("w+", newline='') as fileobj:
writer = csv.writer(fileobj)
Expand Down
5 changes: 4 additions & 1 deletion Lib/test/test_decimal.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@
import numbers
import locale
from test.support import (run_unittest, run_doctest, is_resource_enabled,
requires_IEEE_754, requires_docstrings)
requires_IEEE_754, requires_docstrings,
requires_legacy_unicode_capi)
from test.support import (TestFailed,
run_with_locale, cpython_only)
from test.support.import_helper import import_fresh_module
Expand Down Expand Up @@ -582,6 +583,7 @@ def test_explicit_from_string(self):
self.assertRaises(InvalidOperation, Decimal, "1_2_\u00003")

@cpython_only
@requires_legacy_unicode_capi
def test_from_legacy_strings(self):
import _testcapi
Decimal = self.decimal.Decimal
Expand Down Expand Up @@ -2817,6 +2819,7 @@ def test_none_args(self):
Overflow])

@cpython_only
@requires_legacy_unicode_capi
def test_from_legacy_strings(self):
import _testcapi
c = self.decimal.Context()
Expand Down
4 changes: 4 additions & 0 deletions Lib/test/test_getargs2.py
Original file line number Diff line number Diff line change
Expand Up @@ -975,6 +975,7 @@ def test_et_hash(self):
buf = bytearray()
self.assertRaises(ValueError, getargs_et_hash, 'abc\xe9', 'latin1', buf)

@support.requires_legacy_unicode_capi
def test_u(self):
from _testcapi import getargs_u
self.assertEqual(getargs_u('abc\xe9'), 'abc\xe9')
Expand All @@ -984,6 +985,7 @@ def test_u(self):
self.assertRaises(TypeError, getargs_u, memoryview(b'memoryview'))
self.assertRaises(TypeError, getargs_u, None)

@support.requires_legacy_unicode_capi
def test_u_hash(self):
from _testcapi import getargs_u_hash
self.assertEqual(getargs_u_hash('abc\xe9'), 'abc\xe9')
Expand All @@ -993,6 +995,7 @@ def test_u_hash(self):
self.assertRaises(TypeError, getargs_u_hash, memoryview(b'memoryview'))
self.assertRaises(TypeError, getargs_u_hash, None)

@support.requires_legacy_unicode_capi
def test_Z(self):
from _testcapi import getargs_Z
self.assertEqual(getargs_Z('abc\xe9'), 'abc\xe9')
Expand All @@ -1002,6 +1005,7 @@ def test_Z(self):
self.assertRaises(TypeError, getargs_Z, memoryview(b'memoryview'))
self.assertIsNone(getargs_Z(None))

@support.requires_legacy_unicode_capi
def test_Z_hash(self):
from _testcapi import getargs_Z_hash
self.assertEqual(getargs_Z_hash('abc\xe9'), 'abc\xe9')
Expand Down
4 changes: 4 additions & 0 deletions Lib/test/test_unicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -721,6 +721,7 @@ def test_isidentifier(self):
self.assertFalse("0".isidentifier())

@support.cpython_only
@support.requires_legacy_unicode_capi
def test_isidentifier_legacy(self):
import _testcapi
u = '𝖀𝖓𝖎𝖈𝖔𝖉𝖊'
Expand Down Expand Up @@ -2348,6 +2349,7 @@ def test_getnewargs(self):
self.assertEqual(len(args), 1)

@support.cpython_only
@support.requires_legacy_unicode_capi
def test_resize(self):
from _testcapi import getargs_u
for length in range(1, 100, 7):
Expand Down Expand Up @@ -2918,6 +2920,7 @@ def test_copycharacters(self):
self.assertRaises(SystemError, unicode_copycharacters, s, 0, b'', 0, 0)

@support.cpython_only
@support.requires_legacy_unicode_capi
def test_encode_decimal(self):
from _testcapi import unicode_encodedecimal
self.assertEqual(unicode_encodedecimal('123'),
Expand All @@ -2934,6 +2937,7 @@ def test_encode_decimal(self):
unicode_encodedecimal, "123\u20ac", "replace")

@support.cpython_only
@support.requires_legacy_unicode_capi
def test_transform_decimal(self):
from _testcapi import unicode_transformdecimaltoascii as transform_decimal
self.assertEqual(transform_decimal('123'),
Expand Down
33 changes: 14 additions & 19 deletions Modules/_ctypes/_ctypes.c
Original file line number Diff line number Diff line change
Expand Up @@ -1366,8 +1366,6 @@ WCharArray_get_value(CDataObject *self, void *Py_UNUSED(ignored))
static int
WCharArray_set_value(CDataObject *self, PyObject *value, void *Py_UNUSED(ignored))
{
Py_ssize_t result = 0;

if (value == NULL) {
PyErr_SetString(PyExc_TypeError,
"can't delete attribute");
Expand All @@ -1378,29 +1376,24 @@ WCharArray_set_value(CDataObject *self, PyObject *value, void *Py_UNUSED(ignored
"unicode string expected instead of %s instance",
Py_TYPE(value)->tp_name);
return -1;
} else
Py_INCREF(value);
}

Py_ssize_t size = self->b_size / sizeof(wchar_t);
Py_ssize_t len = PyUnicode_AsWideChar(value, NULL, 0);
if (len < 0) {
return -1;
}
// PyUnicode_AsWideChar() returns number of wchars including trailing null byte,
// when it is called with NULL.
if (((size_t)len-1) > self->b_size/sizeof(wchar_t)) {
assert(len > 0);
if (len - 1 > size) {
PyErr_SetString(PyExc_ValueError, "string too long");
result = -1;
goto done;
}
result = PyUnicode_AsWideChar(value,
(wchar_t *)self->b_ptr,
self->b_size/sizeof(wchar_t));
if (result >= 0 && (size_t)result < self->b_size/sizeof(wchar_t))
((wchar_t *)self->b_ptr)[result] = (wchar_t)0;
done:
Py_DECREF(value);

return result >= 0 ? 0 : -1;
return -1;
}
if (PyUnicode_AsWideChar(value, (wchar_t *)self->b_ptr, size) < 0) {
return -1;
}
return 0;
}

static PyGetSetDef WCharArray_getsets[] = {
Expand Down Expand Up @@ -3484,10 +3477,12 @@ _validate_paramflags(PyTypeObject *type, PyObject *paramflags)
for (i = 0; i < len; ++i) {
PyObject *item = PyTuple_GET_ITEM(paramflags, i);
int flag;
char *name;
PyObject *name = Py_None;
PyObject *defval;
PyObject *typ;
if (!PyArg_ParseTuple(item, "i|ZO", &flag, &name, &defval)) {
if (!PyArg_ParseTuple(item, "i|OO", &flag, &name, &defval) ||
!(name == Py_None || PyUnicode_Check(name)))
{
PyErr_SetString(PyExc_TypeError,
"paramflags must be a sequence of (int [,string [,value]]) tuples");
return 0;
Expand Down
10 changes: 8 additions & 2 deletions Modules/_ctypes/callproc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1300,7 +1300,6 @@ module. load_flags are as defined for LoadLibraryEx in the\n\
Windows API.\n";
static PyObject *load_library(PyObject *self, PyObject *args)
{
const WCHAR *name;
PyObject *nameobj;
int load_flags = 0;
HMODULE hMod;
Expand All @@ -1309,7 +1308,11 @@ static PyObject *load_library(PyObject *self, PyObject *args)
if (!PyArg_ParseTuple(args, "U|i:LoadLibrary", &nameobj, &load_flags))
return NULL;

name = _PyUnicode_AsUnicode(nameobj);
#if USE_UNICODE_WCHAR_CACHE
const WCHAR *name = _PyUnicode_AsUnicode(nameobj);
#else /* USE_UNICODE_WCHAR_CACHE */
WCHAR *name = PyUnicode_AsWideCharString(nameobj, NULL);
#endif /* USE_UNICODE_WCHAR_CACHE */
if (!name)
return NULL;

Expand All @@ -1325,6 +1328,9 @@ static PyObject *load_library(PyObject *self, PyObject *args)
err = hMod ? 0 : GetLastError();
Py_END_ALLOW_THREADS

#if !USE_UNICODE_WCHAR_CACHE
PyMem_Free(name);
#endif /* USE_UNICODE_WCHAR_CACHE */
if (err == ERROR_MOD_NOT_FOUND) {
PyErr_Format(PyExc_FileNotFoundError,
("Could not find module '%.500S' (or one of its "
Expand Down
7 changes: 2 additions & 5 deletions Modules/_ctypes/cfield.c
Original file line number Diff line number Diff line change
Expand Up @@ -1220,11 +1220,8 @@ U_set(void *ptr, PyObject *value, Py_ssize_t length)
"string too long (%zd, maximum length %zd)",
size, length);
return NULL;
} else if (size < length-1)
/* copy terminating NUL character if there is space */
size += 1;

if (PyUnicode_AsWideChar(value, (wchar_t *)ptr, size) == -1) {
}
if (PyUnicode_AsWideChar(value, (wchar_t *)ptr, length) == -1) {
return NULL;
}

Expand Down
12 changes: 12 additions & 0 deletions Modules/_io/fileio.c
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,14 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode,
if (!PyUnicode_FSDecoder(nameobj, &stringobj)) {
return -1;
}
#if USE_UNICODE_WCHAR_CACHE
_Py_COMP_DIAG_PUSH
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
widename = PyUnicode_AsUnicode(stringobj);
_Py_COMP_DIAG_POP
#else /* USE_UNICODE_WCHAR_CACHE */
widename = PyUnicode_AsWideCharString(stringobj, NULL);
#endif /* USE_UNICODE_WCHAR_CACHE */
if (widename == NULL)
return -1;
#else
Expand Down Expand Up @@ -491,6 +498,11 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode,
internal_close(self);

done:
#ifdef MS_WINDOWS
#if !USE_UNICODE_WCHAR_CACHE
PyMem_Free(widename);
#endif /* USE_UNICODE_WCHAR_CACHE */
#endif
Py_CLEAR(stringobj);
return ret;
}
Expand Down
Loading