@@ -51,7 +51,7 @@ typedef struct _PyEncoderObject {
5151 char sort_keys ;
5252 char skipkeys ;
5353 int allow_nan ;
54- PyCFunction fast_encode ;
54+ int ( * fast_encode )( PyUnicodeWriter * , PyObject * ) ;
5555} PyEncoderObject ;
5656
5757#define PyEncoderObject_CAST (op ) ((PyEncoderObject *)(op))
@@ -102,8 +102,8 @@ static PyObject *
102102_encoded_const (PyObject * obj );
103103static void
104104raise_errmsg (const char * msg , PyObject * s , Py_ssize_t end );
105- static PyObject *
106- encoder_encode_string (PyEncoderObject * s , PyObject * obj );
105+ static int
106+ encoder_write_string (PyEncoderObject * s , PyUnicodeWriter * writer , PyObject * obj );
107107static PyObject *
108108encoder_encode_float (PyEncoderObject * s , PyObject * obj );
109109
@@ -303,6 +303,89 @@ escape_unicode(PyObject *pystr)
303303 return rval ;
304304}
305305
306+ // Take a PyUnicode pystr and write an ASCII-only escaped string to writer.
307+ static int
308+ write_escaped_ascii (PyUnicodeWriter * writer , PyObject * pystr )
309+ {
310+ Py_ssize_t i ;
311+ Py_ssize_t input_chars ;
312+ Py_ssize_t chars ;
313+ Py_ssize_t copy_len = 0 ;
314+ const void * input ;
315+ int kind ;
316+ int ret ;
317+ unsigned char buf [12 ];
318+
319+ input_chars = PyUnicode_GET_LENGTH (pystr );
320+ input = PyUnicode_DATA (pystr );
321+ kind = PyUnicode_KIND (pystr );
322+
323+ ret = PyUnicodeWriter_WriteChar (writer , '"' );
324+ if (ret ) return ret ;
325+
326+ for (i = 0 ; i < input_chars ; i ++ ) {
327+ Py_UCS4 c = PyUnicode_READ (kind , input , i );
328+ if (S_CHAR (c )) {
329+ copy_len ++ ;
330+ }
331+ else {
332+ ret = PyUnicodeWriter_WriteSubstring (writer , pystr , i - copy_len , i );
333+ if (ret ) return ret ;
334+ copy_len = 0 ;
335+
336+ chars = ascii_escape_unichar (c , buf , 0 );
337+ ret = PyUnicodeWriter_WriteUTF8 (writer , (const char * )buf , chars );
338+ if (ret ) return ret ;
339+ }
340+ }
341+
342+ ret = PyUnicodeWriter_WriteSubstring (writer , pystr , i - copy_len , i );
343+ if (ret ) return ret ;
344+
345+ return PyUnicodeWriter_WriteChar (writer , '"' );
346+ }
347+
348+ // Take a PyUnicode pystr and write an escaped string to writer.
349+ static int
350+ write_escaped_unicode (PyUnicodeWriter * writer , PyObject * pystr )
351+ {
352+ Py_ssize_t i ;
353+ Py_ssize_t input_chars ;
354+ Py_ssize_t chars ;
355+ Py_ssize_t copy_len = 0 ;
356+ const void * input ;
357+ int kind ;
358+ int ret ;
359+ unsigned char buf [12 ];
360+
361+ input_chars = PyUnicode_GET_LENGTH (pystr );
362+ input = PyUnicode_DATA (pystr );
363+ kind = PyUnicode_KIND (pystr );
364+
365+ ret = PyUnicodeWriter_WriteChar (writer , '"' );
366+ if (ret ) return ret ;
367+
368+ for (i = 0 ; i < input_chars ; i ++ ) {
369+ Py_UCS4 c = PyUnicode_READ (kind , input , i );
370+ if (c <= 0x1f || c == '\\' || c == '"' ) {
371+ ret = PyUnicodeWriter_WriteSubstring (writer , pystr , i - copy_len , i );
372+ if (ret ) return ret ;
373+ copy_len = 0 ;
374+
375+ chars = ascii_escape_unichar (c , buf , 0 );
376+ ret = PyUnicodeWriter_WriteUTF8 (writer , (const char * )buf , chars );
377+ if (ret ) return ret ;
378+ }
379+ else {
380+ copy_len ++ ;
381+ }
382+ }
383+
384+ ret = PyUnicodeWriter_WriteSubstring (writer , pystr , i - copy_len , i );
385+ if (ret ) return ret ;
386+ return PyUnicodeWriter_WriteChar (writer , '"' );
387+ }
388+
306389static void
307390raise_errmsg (const char * msg , PyObject * s , Py_ssize_t end )
308391{
@@ -1255,8 +1338,11 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
12551338
12561339 if (PyCFunction_Check (s -> encoder )) {
12571340 PyCFunction f = PyCFunction_GetFunction (s -> encoder );
1258- if (f == py_encode_basestring_ascii || f == py_encode_basestring ) {
1259- s -> fast_encode = f ;
1341+ if (f == py_encode_basestring_ascii ){
1342+ s -> fast_encode = write_escaped_ascii ;
1343+ }
1344+ else if (f == py_encode_basestring ) {
1345+ s -> fast_encode = write_escaped_unicode ;
12601346 }
12611347 }
12621348
@@ -1437,33 +1523,35 @@ encoder_encode_float(PyEncoderObject *s, PyObject *obj)
14371523 return PyFloat_Type .tp_repr (obj );
14381524}
14391525
1440- static PyObject *
1441- encoder_encode_string ( PyEncoderObject * s , PyObject * obj )
1526+ static int
1527+ _steal_accumulate ( PyUnicodeWriter * writer , PyObject * stolen )
14421528{
1443- /* Return the JSON representation of a string */
1444- PyObject * encoded ;
1529+ /* Append stolen and then decrement its reference count */
1530+ int rval = PyUnicodeWriter_WriteStr (writer , stolen );
1531+ Py_DECREF (stolen );
1532+ return rval ;
1533+ }
14451534
1535+ static int
1536+ encoder_write_string (PyEncoderObject * s , PyUnicodeWriter * writer , PyObject * obj )
1537+ {
14461538 if (s -> fast_encode ) {
1447- return s -> fast_encode (NULL , obj );
1539+ return s -> fast_encode (writer , obj );
1540+ }
1541+
1542+ /* Return the JSON representation of a string */
1543+ PyObject * encoded = PyObject_CallOneArg (s -> encoder , obj );
1544+ if (encoded == NULL ) {
1545+ return -1 ;
14481546 }
1449- encoded = PyObject_CallOneArg (s -> encoder , obj );
14501547 if (encoded != NULL && !PyUnicode_Check (encoded )) {
14511548 PyErr_Format (PyExc_TypeError ,
14521549 "encoder() must return a string, not %.80s" ,
14531550 Py_TYPE (encoded )-> tp_name );
14541551 Py_DECREF (encoded );
1455- return NULL ;
1552+ return -1 ;
14561553 }
1457- return encoded ;
1458- }
1459-
1460- static int
1461- _steal_accumulate (PyUnicodeWriter * writer , PyObject * stolen )
1462- {
1463- /* Append stolen and then decrement its reference count */
1464- int rval = PyUnicodeWriter_WriteStr (writer , stolen );
1465- Py_DECREF (stolen );
1466- return rval ;
1554+ return _steal_accumulate (writer , encoded );
14671555}
14681556
14691557static int
@@ -1485,10 +1573,7 @@ encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer,
14851573 return PyUnicodeWriter_WriteUTF8 (writer , "false" , 5 );
14861574 }
14871575 else if (PyUnicode_Check (obj )) {
1488- PyObject * encoded = encoder_encode_string (s , obj );
1489- if (encoded == NULL )
1490- return -1 ;
1491- return _steal_accumulate (writer , encoded );
1576+ return encoder_write_string (s , writer , obj );
14921577 }
14931578 else if (PyLong_Check (obj )) {
14941579 if (PyLong_CheckExact (obj )) {
@@ -1577,7 +1662,7 @@ encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *firs
15771662 PyObject * item_separator )
15781663{
15791664 PyObject * keystr = NULL ;
1580- PyObject * encoded ;
1665+ int rv ;
15811666
15821667 if (PyUnicode_Check (key )) {
15831668 keystr = Py_NewRef (key );
@@ -1617,15 +1702,12 @@ encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *firs
16171702 }
16181703 }
16191704
1620- encoded = encoder_encode_string ( s , keystr );
1705+ rv = encoder_write_string ( s , writer , keystr );
16211706 Py_DECREF (keystr );
1622- if (encoded == NULL ) {
1623- return -1 ;
1707+ if (rv != 0 ) {
1708+ return rv ;
16241709 }
16251710
1626- if (_steal_accumulate (writer , encoded ) < 0 ) {
1627- return -1 ;
1628- }
16291711 if (PyUnicodeWriter_WriteStr (writer , s -> key_separator ) < 0 ) {
16301712 return -1 ;
16311713 }
0 commit comments