python · serhiy-storchaka · Oct 26, 2018 · Mar 19, 2019 · Mar 19, 2019 · Mar 19, 2019
diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h
@@ -11,6 +11,7 @@
 
 /* --- Internal Unicode Operations ---------------------------------------- */
 
+#define HAVE_UNICODE_WCHAR_CACHE 1
 #define USE_UNICODE_WCHAR_CACHE 1
 
 /* Since splitting on whitespace is an important use case, and
@@ -74,6 +75,16 @@ Py_UNICODE_FILL(Py_UNICODE *target, Py_UNICODE value, Py_ssize_t length) {
 /* low surrogate = bottom 10 bits added to DC00 */
 #define Py_UNICODE_LOW_SURROGATE(ch) (0xDC00 + ((ch) & 0x3FF))
 
+#if HAVE_UNICODE_WCHAR_CACHE
+/* Check if substring matches at given offset.  The offset must be
+   valid, and the substring must not be empty. */
+
+#define Py_UNICODE_MATCH(string, offset, substring) \
+    ((*((string)->wstr + (offset)) == *((substring)->wstr)) && \
+     ((*((string)->wstr + (offset) + (substring)->wstr_length-1) == *((substring)->wstr + (substring)->wstr_length-1))) && \
+     !memcmp((string)->wstr + (offset), (substring)->wstr, (substring)->wstr_length*sizeof(Py_UNICODE)))
+#endif /* HAVE_UNICODE_WCHAR_CACHE */
+
 /* --- Unicode Type ------------------------------------------------------- */
 
 /* ASCII-only strings created through PyUnicode_New use the PyASCIIObject
@@ -213,7 +224,9 @@ typedef struct {
            4 bytes (see issue #19537 on m68k). */
         unsigned int :24;
     } state;
+#if HAVE_UNICODE_WCHAR_CACHE
     wchar_t *wstr;              /* wchar_t representation (null-terminated) */
+#endif /* HAVE_UNICODE_WCHAR_CACHE */
 } PyASCIIObject;
 
 /* Non-ASCII strings allocated through PyUnicode_New use the
@@ -224,8 +237,10 @@ typedef struct {
     Py_ssize_t utf8_length;     /* Number of bytes in utf8, excluding the
                                  * terminating \0. */
     char *utf8;                 /* UTF-8 representation (null-terminated) */
+#if HAVE_UNICODE_WCHAR_CACHE
     Py_ssize_t wstr_length;     /* Number of code points in wstr, possible
                                  * surrogates count as two code points. */
+#endif /* HAVE_UNICODE_WCHAR_CACHE */
 } PyCompactUnicodeObject;
 
 /* Strings allocated through PyUnicode_FromUnicode(NULL, len) use the
@@ -246,6 +261,7 @@ PyAPI_FUNC(int) _PyUnicode_CheckConsistency(
     int check_content);
 
 /* Fast access macros */
+#if HAVE_UNICODE_WCHAR_CACHE
 
 /* Returns the deprecated Py_UNICODE representation's size in code units
    (this includes surrogate pairs as 2 units).
@@ -280,6 +296,7 @@ PyAPI_FUNC(int) _PyUnicode_CheckConsistency(
 #define PyUnicode_AS_DATA(op) \
     ((const char *)(PyUnicode_AS_UNICODE(op)))
 
+#endif /* HAVE_UNICODE_WCHAR_CACHE */
 
 /* --- Flexible String Representation Helper Macros (PEP 393) -------------- */
 
@@ -440,13 +457,15 @@ enum PyUnicode_Kind {
         (0xffffU) :                                                     \
         (0x10ffffU)))))
 
+#if HAVE_UNICODE_WCHAR_CACHE
 Py_DEPRECATED(3.3)
 static inline Py_ssize_t _PyUnicode_get_wstr_length(PyObject *op) {
     return PyUnicode_IS_COMPACT_ASCII(op) ?
             ((PyASCIIObject*)op)->length :
             ((PyCompactUnicodeObject*)op)->wstr_length;
 }
 #define PyUnicode_WSTR_LENGTH(op) _PyUnicode_get_wstr_length((PyObject*)op)
+#endif /* HAVE_UNICODE_WCHAR_CACHE */
 
 /* === Public API ========================================================= */
 

diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py
@@ -36,6 +36,11 @@
 from .testresult import get_test_runner
 
 
+try:
+    from _testcapi import unicode_legacy_string
+except ImportError:
+    unicode_legacy_string = None
+
 __all__ = [
     # globals
     "PIPE_MAX_SIZE", "verbose", "max_memuse", "use_resources", "failfast",
@@ -426,6 +431,9 @@ def requires_lzma(reason='requires lzma'):
         lzma = None
     return unittest.skipUnless(lzma, reason)
 
+requires_legacy_unicode_capi = unittest.skipUnless(unicode_legacy_string,
+                        'requires legacy Unicode C API')
+
 is_jython = sys.platform.startswith('java')
 
 is_android = hasattr(sys, 'getandroidapilevel')

diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py
@@ -250,9 +250,9 @@ def test_writerows_errors(self):
             self.assertRaises(OSError, writer.writerows, BadIterable())
 
     @support.cpython_only
+    @support.requires_legacy_unicode_capi
     def test_writerows_legacy_strings(self):
         import _testcapi
-
         c = _testcapi.unicode_legacy_string('a')
         with TemporaryFile("w+", newline='') as fileobj:
             writer = csv.writer(fileobj)

diff --git a/Lib/test/test_decimal.py b/Lib/test/test_decimal.py
@@ -33,7 +33,8 @@
 import numbers
 import locale
 from test.support import (run_unittest, run_doctest, is_resource_enabled,
-                          requires_IEEE_754, requires_docstrings)
+                          requires_IEEE_754, requires_docstrings,
+                          requires_legacy_unicode_capi)
 from test.support import (TestFailed,
                           run_with_locale, cpython_only)
 from test.support.import_helper import import_fresh_module
@@ -582,6 +583,7 @@ def test_explicit_from_string(self):
             self.assertRaises(InvalidOperation, Decimal, "1_2_\u00003")
 
     @cpython_only
+    @requires_legacy_unicode_capi
     def test_from_legacy_strings(self):
         import _testcapi
         Decimal = self.decimal.Decimal
@@ -2817,6 +2819,7 @@ def test_none_args(self):
                                               Overflow])
 
     @cpython_only
+    @requires_legacy_unicode_capi
     def test_from_legacy_strings(self):
         import _testcapi
         c = self.decimal.Context()

diff --git a/Lib/test/test_getargs2.py b/Lib/test/test_getargs2.py
@@ -975,6 +975,7 @@ def test_et_hash(self):
         buf = bytearray()
         self.assertRaises(ValueError, getargs_et_hash, 'abc\xe9', 'latin1', buf)
 
+    @support.requires_legacy_unicode_capi
     def test_u(self):
         from _testcapi import getargs_u
         self.assertEqual(getargs_u('abc\xe9'), 'abc\xe9')
@@ -984,6 +985,7 @@ def test_u(self):
         self.assertRaises(TypeError, getargs_u, memoryview(b'memoryview'))
         self.assertRaises(TypeError, getargs_u, None)
 
+    @support.requires_legacy_unicode_capi
     def test_u_hash(self):
         from _testcapi import getargs_u_hash
         self.assertEqual(getargs_u_hash('abc\xe9'), 'abc\xe9')
@@ -993,6 +995,7 @@ def test_u_hash(self):
         self.assertRaises(TypeError, getargs_u_hash, memoryview(b'memoryview'))
         self.assertRaises(TypeError, getargs_u_hash, None)
 
+    @support.requires_legacy_unicode_capi
     def test_Z(self):
         from _testcapi import getargs_Z
         self.assertEqual(getargs_Z('abc\xe9'), 'abc\xe9')
@@ -1002,6 +1005,7 @@ def test_Z(self):
         self.assertRaises(TypeError, getargs_Z, memoryview(b'memoryview'))
         self.assertIsNone(getargs_Z(None))
 
+    @support.requires_legacy_unicode_capi
     def test_Z_hash(self):
         from _testcapi import getargs_Z_hash
         self.assertEqual(getargs_Z_hash('abc\xe9'), 'abc\xe9')

diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
@@ -721,6 +721,7 @@ def test_isidentifier(self):
         self.assertFalse("0".isidentifier())
 
     @support.cpython_only
+    @support.requires_legacy_unicode_capi
     def test_isidentifier_legacy(self):
         import _testcapi
         u = '𝖀𝖓𝖎𝖈𝖔𝖉𝖊'
@@ -2348,6 +2349,7 @@ def test_getnewargs(self):
         self.assertEqual(len(args), 1)
 
     @support.cpython_only
+    @support.requires_legacy_unicode_capi
     def test_resize(self):
         from _testcapi import getargs_u
         for length in range(1, 100, 7):
@@ -2918,6 +2920,7 @@ def test_copycharacters(self):
         self.assertRaises(SystemError, unicode_copycharacters, s, 0, b'', 0, 0)
 
     @support.cpython_only
+    @support.requires_legacy_unicode_capi
     def test_encode_decimal(self):
         from _testcapi import unicode_encodedecimal
         self.assertEqual(unicode_encodedecimal('123'),
@@ -2934,6 +2937,7 @@ def test_encode_decimal(self):
             unicode_encodedecimal, "123\u20ac", "replace")
 
     @support.cpython_only
+    @support.requires_legacy_unicode_capi
     def test_transform_decimal(self):
         from _testcapi import unicode_transformdecimaltoascii as transform_decimal
         self.assertEqual(transform_decimal('123'),

diff --git a/Modules/_ctypes/_ctypes.c b/Modules/_ctypes/_ctypes.c
@@ -1366,8 +1366,6 @@ WCharArray_get_value(CDataObject *self, void *Py_UNUSED(ignored))
 static int
 WCharArray_set_value(CDataObject *self, PyObject *value, void *Py_UNUSED(ignored))
 {
-    Py_ssize_t result = 0;
-
     if (value == NULL) {
         PyErr_SetString(PyExc_TypeError,
                         "can't delete attribute");
@@ -1378,29 +1376,24 @@ WCharArray_set_value(CDataObject *self, PyObject *value, void *Py_UNUSED(ignored
                         "unicode string expected instead of %s instance",
                         Py_TYPE(value)->tp_name);
         return -1;
-    } else
-        Py_INCREF(value);
+    }
 
+    Py_ssize_t size = self->b_size / sizeof(wchar_t);
     Py_ssize_t len = PyUnicode_AsWideChar(value, NULL, 0);
     if (len < 0) {
         return -1;
     }
     // PyUnicode_AsWideChar() returns number of wchars including trailing null byte,
     // when it is called with NULL.
-    if (((size_t)len-1) > self->b_size/sizeof(wchar_t)) {
+    assert(len > 0);
+    if (len - 1 > size) {
         PyErr_SetString(PyExc_ValueError, "string too long");
-        result = -1;
-        goto done;
-    }
-    result = PyUnicode_AsWideChar(value,
-                                  (wchar_t *)self->b_ptr,
-                                  self->b_size/sizeof(wchar_t));
-    if (result >= 0 && (size_t)result < self->b_size/sizeof(wchar_t))
-        ((wchar_t *)self->b_ptr)[result] = (wchar_t)0;
-  done:
-    Py_DECREF(value);
-
-    return result >= 0 ? 0 : -1;
+        return -1;
+    }
+    if (PyUnicode_AsWideChar(value, (wchar_t *)self->b_ptr, size) < 0) {
+        return -1;
+    }
+    return 0;
 }
 
 static PyGetSetDef WCharArray_getsets[] = {
@@ -3484,10 +3477,12 @@ _validate_paramflags(PyTypeObject *type, PyObject *paramflags)
     for (i = 0; i < len; ++i) {
         PyObject *item = PyTuple_GET_ITEM(paramflags, i);
         int flag;
-        char *name;
+        PyObject *name = Py_None;
         PyObject *defval;
         PyObject *typ;
-        if (!PyArg_ParseTuple(item, "i|ZO", &flag, &name, &defval)) {
+        if (!PyArg_ParseTuple(item, "i|OO", &flag, &name, &defval) ||
+            !(name == Py_None || PyUnicode_Check(name)))
+        {
             PyErr_SetString(PyExc_TypeError,
                    "paramflags must be a sequence of (int [,string [,value]]) tuples");
             return 0;

diff --git a/Modules/_ctypes/callproc.c b/Modules/_ctypes/callproc.c
@@ -1300,7 +1300,6 @@ module. load_flags are as defined for LoadLibraryEx in the\n\
 Windows API.\n";
 static PyObject *load_library(PyObject *self, PyObject *args)
 {
-    const WCHAR *name;
     PyObject *nameobj;
     int load_flags = 0;
     HMODULE hMod;
@@ -1309,7 +1308,11 @@ static PyObject *load_library(PyObject *self, PyObject *args)
     if (!PyArg_ParseTuple(args, "U|i:LoadLibrary", &nameobj, &load_flags))
         return NULL;
 
-    name = _PyUnicode_AsUnicode(nameobj);
+#if USE_UNICODE_WCHAR_CACHE
+    const WCHAR *name = _PyUnicode_AsUnicode(nameobj);
+#else /* USE_UNICODE_WCHAR_CACHE */
+    WCHAR *name = PyUnicode_AsWideCharString(nameobj, NULL);
+#endif /* USE_UNICODE_WCHAR_CACHE */
     if (!name)
         return NULL;
 
@@ -1325,6 +1328,9 @@ static PyObject *load_library(PyObject *self, PyObject *args)
     err = hMod ? 0 : GetLastError();
     Py_END_ALLOW_THREADS
 
+#if !USE_UNICODE_WCHAR_CACHE
+    PyMem_Free(name);
+#endif /* USE_UNICODE_WCHAR_CACHE */
     if (err == ERROR_MOD_NOT_FOUND) {
         PyErr_Format(PyExc_FileNotFoundError,
                      ("Could not find module '%.500S' (or one of its "

diff --git a/Modules/_ctypes/cfield.c b/Modules/_ctypes/cfield.c
@@ -1220,11 +1220,8 @@ U_set(void *ptr, PyObject *value, Py_ssize_t length)
                      "string too long (%zd, maximum length %zd)",
                      size, length);
         return NULL;
-    } else if (size < length-1)
-        /* copy terminating NUL character if there is space */
-        size += 1;
-
-    if (PyUnicode_AsWideChar(value, (wchar_t *)ptr, size) == -1) {
+    }
+    if (PyUnicode_AsWideChar(value, (wchar_t *)ptr, length) == -1) {
         return NULL;
     }
 

diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c
@@ -270,7 +270,14 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode,
         if (!PyUnicode_FSDecoder(nameobj, &stringobj)) {
             return -1;
         }
+#if USE_UNICODE_WCHAR_CACHE
+_Py_COMP_DIAG_PUSH
+_Py_COMP_DIAG_IGNORE_DEPR_DECLS
         widename = PyUnicode_AsUnicode(stringobj);
+_Py_COMP_DIAG_POP
+#else /* USE_UNICODE_WCHAR_CACHE */
+        widename = PyUnicode_AsWideCharString(stringobj, NULL);
+#endif /* USE_UNICODE_WCHAR_CACHE */
         if (widename == NULL)
             return -1;
 #else
@@ -491,6 +498,11 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode,
         internal_close(self);
 
  done:
+#ifdef MS_WINDOWS
+#if !USE_UNICODE_WCHAR_CACHE
+    PyMem_Free(widename);
+#endif /* USE_UNICODE_WCHAR_CACHE */
+#endif
     Py_CLEAR(stringobj);
     return ret;
 }