From 29ae371a88703c5cce699f302967be3a38c9d25f Mon Sep 17 00:00:00 2001 From: JackDandy Date: Tue, 4 Sep 2018 23:19:28 +0100 Subject: [PATCH] =?UTF-8?q?Update=20SimpleJSON=203.13.2=20(6ffddbe)=20?= =?UTF-8?q?=E2=86=92=203.16.0=20(e2a54f7).?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGES.md | 1 + lib/simplejson/__init__.py | 2 +- lib/simplejson/_speedups.c | 292 +++++++++++++++++-------------------- lib/simplejson/compat.py | 22 +-- lib/simplejson/decoder.py | 18 +-- lib/simplejson/encoder.py | 80 ++++++---- 6 files changed, 202 insertions(+), 213 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 6ea69be9..3e10f7af 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -11,6 +11,7 @@ * Update scandir module 1.6 (c3592ee) to 1.9.0 (9ab3d1f) * Add urllib3 release 1.23 (7c216f4) * Change if old scandir binary module is installed, fallback to slow Python module and inform user to upgrade binary +* Update SimpleJSON 3.13.2 (6ffddbe) to 3.16.0 (e2a54f7) [develop changelog] diff --git a/lib/simplejson/__init__.py b/lib/simplejson/__init__.py index adca4531..0556a7a8 100644 --- a/lib/simplejson/__init__.py +++ b/lib/simplejson/__init__.py @@ -98,7 +98,7 @@ Using simplejson.tool from the shell to validate and pretty-print:: Expecting property name: line 1 column 3 (char 2) """ from __future__ import absolute_import -__version__ = '3.13.2' +__version__ = '3.16.0' __all__ = [ 'dump', 'dumps', 'load', 'loads', 'JSONDecoder', 'JSONDecodeError', 'JSONEncoder', diff --git a/lib/simplejson/_speedups.c b/lib/simplejson/_speedups.c index f3524435..e7101288 100644 --- a/lib/simplejson/_speedups.c +++ b/lib/simplejson/_speedups.c @@ -5,36 +5,24 @@ #if PY_MAJOR_VERSION >= 3 #define PyInt_FromSsize_t PyLong_FromSsize_t #define PyInt_AsSsize_t PyLong_AsSsize_t -#define PyString_Check PyBytes_Check -#define PyString_GET_SIZE PyBytes_GET_SIZE -#define PyString_AS_STRING PyBytes_AS_STRING -#define PyString_FromStringAndSize PyBytes_FromStringAndSize #define PyInt_Check(obj) 0 #define PyInt_CheckExact(obj) 0 #define JSON_UNICHR Py_UCS4 #define JSON_InternFromString PyUnicode_InternFromString -#define JSON_Intern_GET_SIZE PyUnicode_GET_SIZE -#define JSON_ASCII_Check PyUnicode_Check -#define JSON_ASCII_AS_STRING PyUnicode_AsUTF8 -#define PyInt_Type PyLong_Type -#define PyInt_FromString PyLong_FromString +#define PyString_GET_SIZE PyUnicode_GET_LENGTH #define PY2_UNUSED #define PY3_UNUSED UNUSED -#define JSON_NewEmptyUnicode() PyUnicode_New(0, 127) #else /* PY_MAJOR_VERSION >= 3 */ #define PY2_UNUSED UNUSED #define PY3_UNUSED +#define PyBytes_Check PyString_Check #define PyUnicode_READY(obj) 0 #define PyUnicode_KIND(obj) (sizeof(Py_UNICODE)) #define PyUnicode_DATA(obj) ((void *)(PyUnicode_AS_UNICODE(obj))) #define PyUnicode_READ(kind, data, index) ((JSON_UNICHR)((const Py_UNICODE *)(data))[(index)]) -#define PyUnicode_GetLength PyUnicode_GET_SIZE +#define PyUnicode_GET_LENGTH PyUnicode_GET_SIZE #define JSON_UNICHR Py_UNICODE -#define JSON_ASCII_Check PyString_Check -#define JSON_ASCII_AS_STRING PyString_AS_STRING #define JSON_InternFromString PyString_InternFromString -#define JSON_Intern_GET_SIZE PyString_GET_SIZE -#define JSON_NewEmptyUnicode() PyUnicode_FromUnicode(NULL, 0) #endif /* PY_MAJOR_VERSION < 3 */ #if PY_VERSION_HEX < 0x02070000 @@ -84,6 +72,14 @@ json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exce #define JSON_ALLOW_NAN 1 #define JSON_IGNORE_NAN 2 +static PyObject *JSON_Infinity = NULL; +static PyObject *JSON_NegInfinity = NULL; +static PyObject *JSON_NaN = NULL; +static PyObject *JSON_EmptyUnicode = NULL; +#if PY_MAJOR_VERSION < 3 +static PyObject *JSON_EmptyStr = NULL; +#endif + static PyTypeObject PyScannerType; static PyTypeObject PyEncoderType; @@ -188,8 +184,6 @@ join_list_unicode(PyObject *lst); static PyObject * JSON_ParseEncoding(PyObject *encoding); static PyObject * -JSON_UnicodeFromChar(JSON_UNICHR c); -static PyObject * maybe_quote_bigint(PyEncoderObject* s, PyObject *encoded, PyObject *obj); static Py_ssize_t ascii_char_size(JSON_UNICHR c); @@ -318,7 +312,7 @@ JSON_Accu_Accumulate(JSON_Accu *acc, PyObject *unicode) #if PY_MAJOR_VERSION >= 3 assert(PyUnicode_Check(unicode)); #else /* PY_MAJOR_VERSION >= 3 */ - assert(JSON_ASCII_Check(unicode) || PyUnicode_Check(unicode)); + assert(PyString_Check(unicode) || PyUnicode_Check(unicode)); #endif /* PY_MAJOR_VERSION < 3 */ if (PyList_Append(acc->small_strings, unicode)) @@ -368,19 +362,6 @@ IS_DIGIT(JSON_UNICHR c) return c >= '0' && c <= '9'; } -static PyObject * -JSON_UnicodeFromChar(JSON_UNICHR c) -{ -#if PY_MAJOR_VERSION >= 3 - PyObject *rval = PyUnicode_New(1, c); - if (rval) - PyUnicode_WRITE(PyUnicode_KIND(rval), PyUnicode_DATA(rval), 0, c); - return rval; -#else /* PY_MAJOR_VERSION >= 3 */ - return PyUnicode_FromUnicode(&c, 1); -#endif /* PY_MAJOR_VERSION < 3 */ -} - static PyObject * maybe_quote_bigint(PyEncoderObject* s, PyObject *encoded, PyObject *obj) { @@ -466,7 +447,7 @@ ascii_escape_char(JSON_UNICHR c, char *output, Py_ssize_t chars) case '\r': output[chars++] = 'r'; break; case '\t': output[chars++] = 't'; break; default: -#if defined(Py_UNICODE_WIDE) || PY_MAJOR_VERSION >= 3 +#if PY_MAJOR_VERSION >= 3 || defined(Py_UNICODE_WIDE) if (c >= 0x10000) { /* UTF-16 surrogate pair */ JSON_UNICHR v = c - 0x10000; @@ -505,7 +486,7 @@ ascii_char_size(JSON_UNICHR c) c == '\t') { return 2; } -#if defined(Py_UNICODE_WIDE) || PY_MAJOR_VERSION >= 3 +#if PY_MAJOR_VERSION >= 3 || defined(Py_UNICODE_WIDE) else if (c >= 0x10000U) { return 2 * MIN_EXPANSION; } @@ -520,20 +501,14 @@ ascii_escape_unicode(PyObject *pystr) { /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */ Py_ssize_t i; - Py_ssize_t input_chars; - Py_ssize_t output_size; + Py_ssize_t input_chars = PyUnicode_GET_LENGTH(pystr); + Py_ssize_t output_size = 2; Py_ssize_t chars; - PY2_UNUSED int kind; - void *data; + PY2_UNUSED int kind = PyUnicode_KIND(pystr); + void *data = PyUnicode_DATA(pystr); PyObject *rval; char *output; - if (PyUnicode_READY(pystr)) - return NULL; - - kind = PyUnicode_KIND(pystr); - data = PyUnicode_DATA(pystr); - input_chars = PyUnicode_GetLength(pystr); output_size = 2; for (i = 0; i < input_chars; i++) { output_size += ascii_char_size(PyUnicode_READ(kind, data, i)); @@ -568,7 +543,7 @@ static PyObject * ascii_escape_str(PyObject *pystr) { PyObject *rval; - PyObject *input = PyUnicode_DecodeUTF8(PyString_AS_STRING(pystr), PyString_GET_SIZE(pystr), NULL); + PyObject *input = PyUnicode_DecodeUTF8(PyBytes_AS_STRING(pystr), PyBytes_GET_SIZE(pystr), NULL); if (input == NULL) return NULL; rval = ascii_escape_unicode(input); @@ -634,21 +609,23 @@ encoder_stringify_key(PyEncoderObject *s, PyObject *key) Py_INCREF(key); return key; } - else if (PyString_Check(key)) { #if PY_MAJOR_VERSION >= 3 - const char *encoding = JSON_ASCII_AS_STRING(s->encoding); + else if (PyBytes_Check(key) && s->encoding != NULL) { + const char *encoding = PyUnicode_AsUTF8(s->encoding); if (encoding == NULL) return NULL; return PyUnicode_Decode( - PyString_AS_STRING(key), - PyString_GET_SIZE(key), + PyBytes_AS_STRING(key), + PyBytes_GET_SIZE(key), encoding, NULL); + } #else /* PY_MAJOR_VERSION >= 3 */ + else if (PyString_Check(key)) { Py_INCREF(key); return key; -#endif /* PY_MAJOR_VERSION < 3 */ } +#endif /* PY_MAJOR_VERSION < 3 */ else if (PyFloat_Check(key)) { return encoder_encode_float(s, key); } @@ -676,7 +653,7 @@ encoder_stringify_key(PyEncoderObject *s, PyObject *key) else if (s->use_decimal && PyObject_TypeCheck(key, (PyTypeObject *)s->Decimal)) { return PyObject_Str(key); } - else if (s->skipkeys) { + if (s->skipkeys) { Py_INCREF(Py_None); return Py_None; } @@ -799,18 +776,7 @@ static PyObject * join_list_unicode(PyObject *lst) { /* return u''.join(lst) */ - static PyObject *joinfn = NULL; - if (joinfn == NULL) { - PyObject *ustr = JSON_NewEmptyUnicode(); - if (ustr == NULL) - return NULL; - - joinfn = PyObject_GetAttrString(ustr, "join"); - Py_DECREF(ustr); - if (joinfn == NULL) - return NULL; - } - return PyObject_CallFunctionObjArgs(joinfn, lst, NULL); + return PyUnicode_Join(JSON_EmptyUnicode, lst); } #if PY_MAJOR_VERSION >= 3 @@ -822,12 +788,7 @@ join_list_string(PyObject *lst) /* return ''.join(lst) */ static PyObject *joinfn = NULL; if (joinfn == NULL) { - PyObject *ustr = PyString_FromStringAndSize(NULL, 0); - if (ustr == NULL) - return NULL; - - joinfn = PyObject_GetAttrString(ustr, "join"); - Py_DECREF(ustr); + joinfn = PyObject_GetAttrString(JSON_EmptyStr, "join"); if (joinfn == NULL) return NULL; } @@ -932,17 +893,6 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s /* Pick up this chunk if it's not zero length */ if (next != end) { APPEND_OLD_CHUNK -#if PY_MAJOR_VERSION >= 3 - if (!has_unicode) { - chunk = PyUnicode_DecodeASCII(&buf[end], next - end, NULL); - } - else { - chunk = PyUnicode_Decode(&buf[end], next - end, encoding, NULL); - } - if (chunk == NULL) { - goto bail; - } -#else /* PY_MAJOR_VERSION >= 3 */ strchunk = PyString_FromStringAndSize(&buf[end], next - end); if (strchunk == NULL) { goto bail; @@ -957,7 +907,6 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s else { chunk = strchunk; } -#endif /* PY_MAJOR_VERSION < 3 */ } next++; if (c == '"') { @@ -1015,7 +964,7 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s goto bail; } } -#if (PY_MAJOR_VERSION >= 3 || defined(Py_UNICODE_WIDE)) +#if defined(Py_UNICODE_WIDE) /* Surrogate pair */ if ((c & 0xfc00) == 0xd800) { if (end + 6 < len && buf[next] == '\\' && buf[next+1] == 'u') { @@ -1050,20 +999,14 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s } } } -#endif /* PY_MAJOR_VERSION >= 3 || Py_UNICODE_WIDE */ +#endif /* Py_UNICODE_WIDE */ } if (c > 0x7f) { has_unicode = 1; } APPEND_OLD_CHUNK -#if PY_MAJOR_VERSION >= 3 - chunk = JSON_UnicodeFromChar(c); - if (chunk == NULL) { - goto bail; - } -#else /* PY_MAJOR_VERSION >= 3 */ if (has_unicode) { - chunk = JSON_UnicodeFromChar(c); + chunk = PyUnicode_FromOrdinal(c); if (chunk == NULL) { goto bail; } @@ -1075,14 +1018,15 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s goto bail; } } -#endif } if (chunks == NULL) { if (chunk != NULL) rval = chunk; - else - rval = JSON_NewEmptyUnicode(); + else { + rval = JSON_EmptyStr; + Py_INCREF(rval); + } } else { APPEND_OLD_CHUNK @@ -1118,7 +1062,7 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next Py_ssize_t begin = end - 1; Py_ssize_t next = begin; PY2_UNUSED int kind = PyUnicode_KIND(pystr); - Py_ssize_t len = PyUnicode_GetLength(pystr); + Py_ssize_t len = PyUnicode_GET_LENGTH(pystr); void *buf = PyUnicode_DATA(pystr); PyObject *chunks = NULL; PyObject *chunk = NULL; @@ -1256,7 +1200,7 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next #endif } APPEND_OLD_CHUNK - chunk = JSON_UnicodeFromChar(c); + chunk = PyUnicode_FromOrdinal(c); if (chunk == NULL) { goto bail; } @@ -1265,8 +1209,10 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next if (chunks == NULL) { if (chunk != NULL) rval = chunk; - else - rval = JSON_NewEmptyUnicode(); + else { + rval = JSON_EmptyUnicode; + Py_INCREF(rval); + } } else { APPEND_OLD_CHUNK @@ -1314,6 +1260,8 @@ py_scanstring(PyObject* self UNUSED, PyObject *args) encoding = DEFAULT_ENCODING; } if (PyUnicode_Check(pystr)) { + if (PyUnicode_READY(pystr)) + return NULL; rval = scanstring_unicode(pystr, end, strict, &next_end); } #if PY_MAJOR_VERSION < 3 @@ -1343,10 +1291,12 @@ py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr) { /* Return an ASCII-only JSON representation of a Python string */ /* METH_O */ - if (PyString_Check(pystr)) { + if (PyBytes_Check(pystr)) { return ascii_escape_str(pystr); } else if (PyUnicode_Check(pystr)) { + if (PyUnicode_READY(pystr)) + return NULL; return ascii_escape_unicode(pystr); } else { @@ -1419,7 +1369,7 @@ _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ PyObject *item; PyObject *key = NULL; PyObject *val = NULL; - char *encoding = JSON_ASCII_AS_STRING(s->encoding); + char *encoding = PyString_AS_STRING(s->encoding); int has_pairs_hook = (s->pairs_hook != Py_None); int did_parse = 0; Py_ssize_t next_idx; @@ -1573,7 +1523,7 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss Returns a new PyObject (usually a dict, but object_hook can change that) */ void *str = PyUnicode_DATA(pystr); - Py_ssize_t end_idx = PyUnicode_GetLength(pystr) - 1; + Py_ssize_t end_idx = PyUnicode_GET_LENGTH(pystr) - 1; PY2_UNUSED int kind = PyUnicode_KIND(pystr); PyObject *rval = NULL; PyObject *pairs = NULL; @@ -1818,7 +1768,7 @@ _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssi */ PY2_UNUSED int kind = PyUnicode_KIND(pystr); void *str = PyUnicode_DATA(pystr); - Py_ssize_t end_idx = PyUnicode_GetLength(pystr) - 1; + Py_ssize_t end_idx = PyUnicode_GET_LENGTH(pystr) - 1; PyObject *val = NULL; PyObject *rval = PyList_New(0); Py_ssize_t next_idx; @@ -1887,10 +1837,10 @@ bail: } static PyObject * -_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +_parse_constant(PyScannerObject *s, PyObject *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { /* Read a JSON constant from PyString pystr. - constant is the constant string that was found + constant is the Python string that was found ("NaN", "Infinity", "-Infinity"). idx is the index of the first character of the constant *next_idx_ptr is a return-by-reference index to the first character after @@ -1898,17 +1848,11 @@ _parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t * Returns the result of parse_constant */ - PyObject *cstr; PyObject *rval; - /* constant is "NaN", "Infinity", or "-Infinity" */ - cstr = JSON_InternFromString(constant); - if (cstr == NULL) - return NULL; /* rval = parse_constant(constant) */ - rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL); - idx += JSON_Intern_GET_SIZE(cstr); - Py_DECREF(cstr); + rval = PyObject_CallFunctionObjArgs(s->parse_constant, constant, NULL); + idx += PyString_GET_SIZE(constant); *next_idx_ptr = idx; return rval; } @@ -2033,7 +1977,7 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ */ PY2_UNUSED int kind = PyUnicode_KIND(pystr); void *str = PyUnicode_DATA(pystr); - Py_ssize_t end_idx = PyUnicode_GetLength(pystr) - 1; + Py_ssize_t end_idx = PyUnicode_GET_LENGTH(pystr) - 1; Py_ssize_t idx = start; int is_float = 0; JSON_UNICHR c; @@ -2153,7 +2097,7 @@ scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *n case '"': /* string */ rval = scanstring_str(pystr, idx + 1, - JSON_ASCII_AS_STRING(s->encoding), + PyString_AS_STRING(s->encoding), s->strict, next_idx_ptr); break; @@ -2206,7 +2150,7 @@ scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *n case 'N': /* NaN */ if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { - rval = _parse_constant(s, "NaN", idx, next_idx_ptr); + rval = _parse_constant(s, JSON_NaN, idx, next_idx_ptr); } else fallthrough = 1; @@ -2214,7 +2158,7 @@ scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *n case 'I': /* Infinity */ if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { - rval = _parse_constant(s, "Infinity", idx, next_idx_ptr); + rval = _parse_constant(s, JSON_Infinity, idx, next_idx_ptr); } else fallthrough = 1; @@ -2222,7 +2166,7 @@ scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *n case '-': /* -Infinity */ if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { - rval = _parse_constant(s, "-Infinity", idx, next_idx_ptr); + rval = _parse_constant(s, JSON_NegInfinity, idx, next_idx_ptr); } else fallthrough = 1; @@ -2250,7 +2194,7 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ */ PY2_UNUSED int kind = PyUnicode_KIND(pystr); void *str = PyUnicode_DATA(pystr); - Py_ssize_t length = PyUnicode_GetLength(pystr); + Py_ssize_t length = PyUnicode_GET_LENGTH(pystr); PyObject *rval = NULL; int fallthrough = 0; if (idx < 0 || idx >= length) { @@ -2325,7 +2269,7 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ if ((idx + 2 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' && PyUnicode_READ(kind, str, idx + 2) == 'N') { - rval = _parse_constant(s, "NaN", idx, next_idx_ptr); + rval = _parse_constant(s, JSON_NaN, idx, next_idx_ptr); } else fallthrough = 1; @@ -2340,7 +2284,7 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ PyUnicode_READ(kind, str, idx + 5) == 'i' && PyUnicode_READ(kind, str, idx + 6) == 't' && PyUnicode_READ(kind, str, idx + 7) == 'y') { - rval = _parse_constant(s, "Infinity", idx, next_idx_ptr); + rval = _parse_constant(s, JSON_Infinity, idx, next_idx_ptr); } else fallthrough = 1; @@ -2356,7 +2300,7 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ PyUnicode_READ(kind, str, idx + 6) == 'i' && PyUnicode_READ(kind, str, idx + 7) == 't' && PyUnicode_READ(kind, str, idx + 8) == 'y') { - rval = _parse_constant(s, "-Infinity", idx, next_idx_ptr); + rval = _parse_constant(s, JSON_NegInfinity, idx, next_idx_ptr); } else fallthrough = 1; @@ -2386,6 +2330,8 @@ scanner_call(PyObject *self, PyObject *args, PyObject *kwds) return NULL; if (PyUnicode_Check(pystr)) { + if (PyUnicode_READY(pystr)) + return NULL; rval = scan_once_unicode(s, pystr, idx, &next_idx); } #if PY_MAJOR_VERSION < 3 @@ -2406,18 +2352,24 @@ scanner_call(PyObject *self, PyObject *args, PyObject *kwds) static PyObject * JSON_ParseEncoding(PyObject *encoding) { - if (encoding == NULL) - return NULL; if (encoding == Py_None) return JSON_InternFromString(DEFAULT_ENCODING); -#if PY_MAJOR_VERSION < 3 - if (PyUnicode_Check(encoding)) - return PyUnicode_AsEncodedString(encoding, NULL, NULL); -#endif - if (JSON_ASCII_Check(encoding)) { +#if PY_MAJOR_VERSION >= 3 + if (PyUnicode_Check(encoding)) { + if (PyUnicode_AsUTF8(encoding) == NULL) { + return NULL; + } Py_INCREF(encoding); return encoding; } +#else /* PY_MAJOR_VERSION >= 3 */ + if (PyString_Check(encoding)) { + Py_INCREF(encoding); + return encoding; + } + if (PyUnicode_Check(encoding)) + return PyUnicode_AsEncodedString(encoding, NULL, NULL); +#endif /* PY_MAJOR_VERSION >= 3 */ PyErr_SetString(PyExc_TypeError, "encoding must be a string"); return NULL; } @@ -2444,8 +2396,9 @@ scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds) goto bail; } - /* JSON_ASCII_AS_STRING is used on encoding */ encoding = PyObject_GetAttrString(ctx, "encoding"); + if (encoding == NULL) + goto bail; s->encoding = JSON_ParseEncoding(encoding); Py_XDECREF(encoding); if (s->encoding == NULL) @@ -2578,11 +2531,17 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) s->defaultfn = defaultfn; Py_INCREF(encoder); s->encoder = encoder; - s->encoding = JSON_ParseEncoding(encoding); - if (s->encoding == NULL) - goto bail; - if (JSON_ASCII_AS_STRING(s->encoding) == NULL) - goto bail; +#if PY_MAJOR_VERSION >= 3 + if (encoding == Py_None) { + s->encoding = NULL; + } + else +#endif /* PY_MAJOR_VERSION >= 3 */ + { + s->encoding = JSON_ParseEncoding(encoding); + if (s->encoding == NULL) + goto bail; + } Py_INCREF(indent); s->indent = indent; Py_INCREF(key_separator); @@ -2768,28 +2727,16 @@ encoder_encode_float(PyEncoderObject *s, PyObject *obj) } /* JSON_ALLOW_NAN is set */ else if (i > 0) { - static PyObject *sInfinity = NULL; - if (sInfinity == NULL) - sInfinity = JSON_InternFromString("Infinity"); - if (sInfinity) - Py_INCREF(sInfinity); - return sInfinity; + Py_INCREF(JSON_Infinity); + return JSON_Infinity; } else if (i < 0) { - static PyObject *sNegInfinity = NULL; - if (sNegInfinity == NULL) - sNegInfinity = JSON_InternFromString("-Infinity"); - if (sNegInfinity) - Py_INCREF(sNegInfinity); - return sNegInfinity; + Py_INCREF(JSON_NegInfinity); + return JSON_NegInfinity; } else { - static PyObject *sNaN = NULL; - if (sNaN == NULL) - sNaN = JSON_InternFromString("NaN"); - if (sNaN) - Py_INCREF(sNaN); - return sNaN; + Py_INCREF(JSON_NaN); + return JSON_NaN; } } /* Use a better float format here? */ @@ -2821,7 +2768,7 @@ encoder_encode_string(PyEncoderObject *s, PyObject *obj) encoded = PyObject_CallFunctionObjArgs(s->encoder, obj, NULL); if (encoded != NULL && #if PY_MAJOR_VERSION < 3 - !JSON_ASCII_Check(encoded) && + !PyString_Check(encoded) && #endif /* PY_MAJOR_VERSION < 3 */ !PyUnicode_Check(encoded)) { @@ -2854,18 +2801,13 @@ encoder_listencode_obj(PyEncoderObject *s, JSON_Accu *rval, PyObject *obj, Py_ss if (cstr != NULL) rv = _steal_accumulate(rval, cstr); } - else if (PyString_Check(obj) || PyUnicode_Check(obj)) + else if ((PyBytes_Check(obj) && s->encoding != NULL) || + PyUnicode_Check(obj)) { PyObject *encoded = encoder_encode_string(s, obj); if (encoded != NULL) rv = _steal_accumulate(rval, encoded); } - else if (is_raw_json(obj)) - { - PyObject *encoded = PyObject_GetAttrString(obj, "encoded_json"); - if (encoded != NULL) - rv = _steal_accumulate(rval, encoded); - } else if (PyInt_Check(obj) || PyLong_Check(obj)) { PyObject *encoded; if (PyInt_CheckExact(obj) || PyLong_CheckExact(obj)) { @@ -2933,6 +2875,12 @@ encoder_listencode_obj(PyEncoderObject *s, JSON_Accu *rval, PyObject *obj, Py_ss if (encoded != NULL) rv = _steal_accumulate(rval, encoded); } + else if (is_raw_json(obj)) + { + PyObject *encoded = PyObject_GetAttrString(obj, "encoded_json"); + if (encoded != NULL) + rv = _steal_accumulate(rval, encoded); + } else { PyObject *ident = NULL; PyObject *newobj; @@ -3366,6 +3314,32 @@ import_dependency(char *module_name, char *attr_name) return rval; } +static int +init_constants(void) +{ + JSON_NaN = JSON_InternFromString("NaN"); + if (JSON_NaN == NULL) + return 0; + JSON_Infinity = JSON_InternFromString("Infinity"); + if (JSON_Infinity == NULL) + return 0; + JSON_NegInfinity = JSON_InternFromString("-Infinity"); + if (JSON_NegInfinity == NULL) + return 0; +#if PY_MAJOR_VERSION >= 3 + JSON_EmptyUnicode = PyUnicode_New(0, 127); +#else /* PY_MAJOR_VERSION >= 3 */ + JSON_EmptyStr = PyString_FromString(""); + if (JSON_EmptyStr == NULL) + return 0; + JSON_EmptyUnicode = PyUnicode_FromUnicode(NULL, 0); +#endif /* PY_MAJOR_VERSION >= 3 */ + if (JSON_EmptyUnicode == NULL) + return 0; + + return 1; +} + static PyObject * moduleinit(void) { @@ -3374,6 +3348,8 @@ moduleinit(void) return NULL; if (PyType_Ready(&PyEncoderType) < 0) return NULL; + if (!init_constants()) + return NULL; #if PY_MAJOR_VERSION >= 3 m = PyModule_Create(&moduledef); diff --git a/lib/simplejson/compat.py b/lib/simplejson/compat.py index 6f945ccc..5fc14128 100644 --- a/lib/simplejson/compat.py +++ b/lib/simplejson/compat.py @@ -5,10 +5,11 @@ if sys.version_info[0] < 3: PY3 = False def b(s): return s - def u(s): - return unicode(s, 'unicode_escape') - import cStringIO as StringIO - StringIO = BytesIO = StringIO.StringIO + try: + from cStringIO import StringIO + except ImportError: + from StringIO import StringIO + BytesIO = StringIO text_type = unicode binary_type = str string_types = (basestring,) @@ -21,20 +22,13 @@ else: from importlib import reload as reload_module else: from imp import reload as reload_module - import codecs def b(s): - return codecs.latin_1_encode(s)[0] - def u(s): - return s - import io - StringIO = io.StringIO - BytesIO = io.BytesIO + return bytes(s, 'latin1') + from io import StringIO, BytesIO text_type = str binary_type = bytes string_types = (str,) integer_types = (int,) - - def unichr(s): - return u(chr(s)) + unichr = chr long_type = integer_types[-1] diff --git a/lib/simplejson/decoder.py b/lib/simplejson/decoder.py index e0b55a4f..7f0b0568 100644 --- a/lib/simplejson/decoder.py +++ b/lib/simplejson/decoder.py @@ -4,7 +4,7 @@ from __future__ import absolute_import import re import sys import struct -from .compat import u, text_type, binary_type, PY3, unichr +from .compat import PY3, unichr from .scanner import make_scanner, JSONDecodeError def _import_c_scanstring(): @@ -40,14 +40,14 @@ _CONSTANTS = { STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) BACKSLASH = { - '"': u('"'), '\\': u('\u005c'), '/': u('/'), - 'b': u('\b'), 'f': u('\f'), 'n': u('\n'), 'r': u('\r'), 't': u('\t'), + '"': u'"', '\\': u'\\', '/': u'/', + 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', } DEFAULT_ENCODING = "utf-8" def py_scanstring(s, end, encoding=None, strict=True, - _b=BACKSLASH, _m=STRINGCHUNK.match, _join=u('').join, + _b=BACKSLASH, _m=STRINGCHUNK.match, _join=u''.join, _PY3=PY3, _maxunicode=sys.maxunicode): """Scan the string s for a JSON string. End is the index of the character in s after the quote that started the JSON string. @@ -71,8 +71,8 @@ def py_scanstring(s, end, encoding=None, strict=True, content, terminator = chunk.groups() # Content is contains zero or more unescaped string characters if content: - if not _PY3 and not isinstance(content, text_type): - content = text_type(content, encoding) + if not _PY3 and not isinstance(content, unicode): + content = unicode(content, encoding) _append(content) # Terminator is the end of string, a literal control character, # or a backslash denoting that an escape sequence follows @@ -365,8 +365,8 @@ class JSONDecoder(object): instance containing a JSON document) """ - if _PY3 and isinstance(s, binary_type): - s = s.decode(self.encoding) + if _PY3 and isinstance(s, bytes): + s = str(s, self.encoding) obj, end = self.raw_decode(s) end = _w(s, end).end() if end != len(s): @@ -388,7 +388,7 @@ class JSONDecoder(object): # Ensure that raw_decode bails on negative indexes, the regex # would otherwise mask this behavior. #98 raise JSONDecodeError('Expecting value', s, idx) - if _PY3 and not isinstance(s, text_type): + if _PY3 and not isinstance(s, str): raise TypeError("Input string must be text, not bytes") # strip UTF-8 bom if len(s) > idx: diff --git a/lib/simplejson/encoder.py b/lib/simplejson/encoder.py index 831527b7..7ea172e7 100644 --- a/lib/simplejson/encoder.py +++ b/lib/simplejson/encoder.py @@ -5,7 +5,7 @@ import re from operator import itemgetter # Do not import Decimal directly to avoid reload issues import decimal -from .compat import u, unichr, binary_type, text_type, string_types, integer_types, PY3 +from .compat import unichr, binary_type, text_type, string_types, integer_types, PY3 def _import_speedups(): try: from . import _speedups @@ -17,10 +17,7 @@ c_encode_basestring_ascii, c_make_encoder = _import_speedups() from .decoder import PosInf from .raw_json import RawJSON -#ESCAPE = re.compile(ur'[\x00-\x1f\\"\b\f\n\r\t\u2028\u2029]') -# This is required because u() will mangle the string and ur'' isn't valid -# python3 syntax -ESCAPE = re.compile(u'[\\x00-\\x1f\\\\"\\b\\f\\n\\r\\t\u2028\u2029]') +ESCAPE = re.compile(r'[\x00-\x1f\\"]') ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') HAS_UTF8 = re.compile(r'[\x80-\xff]') ESCAPE_DCT = { @@ -35,24 +32,27 @@ ESCAPE_DCT = { for i in range(0x20): #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i)) ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) -for i in [0x2028, 0x2029]: - ESCAPE_DCT.setdefault(unichr(i), '\\u%04x' % (i,)) FLOAT_REPR = repr -def encode_basestring(s, _PY3=PY3, _q=u('"')): +def encode_basestring(s, _PY3=PY3, _q=u'"'): """Return a JSON representation of a Python string """ if _PY3: - if isinstance(s, binary_type): - s = s.decode('utf-8') - if type(s) is not text_type: - s = text_type.__str__(s) + if isinstance(s, bytes): + s = str(s, 'utf-8') + elif type(s) is not str: + # convert an str subclass instance to exact str + # raise a TypeError otherwise + s = str.__str__(s) else: if isinstance(s, str) and HAS_UTF8.search(s) is not None: - s = s.decode('utf-8') - if type(s) not in string_types: + s = unicode(s, 'utf-8') + elif type(s) not in (str, unicode): + # convert an str subclass instance to exact str + # convert a unicode subclass instance to exact unicode + # raise a TypeError otherwise if isinstance(s, str): s = str.__str__(s) else: @@ -67,14 +67,19 @@ def py_encode_basestring_ascii(s, _PY3=PY3): """ if _PY3: - if isinstance(s, binary_type): - s = s.decode('utf-8') - if type(s) is not text_type: - s = text_type.__str__(s) + if isinstance(s, bytes): + s = str(s, 'utf-8') + elif type(s) is not str: + # convert an str subclass instance to exact str + # raise a TypeError otherwise + s = str.__str__(s) else: if isinstance(s, str) and HAS_UTF8.search(s) is not None: - s = s.decode('utf-8') - if type(s) not in string_types: + s = unicode(s, 'utf-8') + elif type(s) not in (str, unicode): + # convert an str subclass instance to exact str + # convert a unicode subclass instance to exact unicode + # raise a TypeError otherwise if isinstance(s, str): s = str.__str__(s) else: @@ -279,7 +284,7 @@ class JSONEncoder(object): if isinstance(o, binary_type): _encoding = self.encoding if (_encoding is not None and not (_encoding == 'utf-8')): - o = o.decode(_encoding) + o = text_type(o, _encoding) if isinstance(o, string_types): if self.ensure_ascii: return encode_basestring_ascii(o) @@ -314,10 +319,10 @@ class JSONEncoder(object): _encoder = encode_basestring_ascii else: _encoder = encode_basestring - if self.encoding != 'utf-8': + if self.encoding != 'utf-8' and self.encoding is not None: def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding): if isinstance(o, binary_type): - o = o.decode(_encoding) + o = text_type(o, _encoding) return _orig_encoder(o) def floatstr(o, allow_nan=self.allow_nan, ignore_nan=self.ignore_nan, @@ -382,6 +387,11 @@ class JSONEncoderForHTML(JSONEncoder): characters &, < and > should be escaped. They cannot be escaped with the usual entities (e.g. &) because they are not expanded within