From 79e26ed290669f9a5a5bd2cc0d77fc477a5e266d Mon Sep 17 00:00:00 2001 From: JackDandy Date: Sat, 13 Jun 2015 23:09:10 +0100 Subject: [PATCH] Update SimpleJSON library 2.0.9 to 3.7.3 (0bcdf20). --- CHANGES.md | 1 + lib/simplejson/__init__.py | 416 ++++-- lib/simplejson/_speedups.c | 2250 +++++++++++++++++++++++--------- lib/simplejson/compat.py | 46 + lib/simplejson/decoder.py | 270 ++-- lib/simplejson/encoder.py | 438 +++++-- lib/simplejson/ordered_dict.py | 119 ++ lib/simplejson/scanner.py | 86 +- lib/simplejson/tool.py | 42 + 9 files changed, 2754 insertions(+), 914 deletions(-) create mode 100644 lib/simplejson/compat.py create mode 100644 lib/simplejson/ordered_dict.py create mode 100644 lib/simplejson/tool.py diff --git a/CHANGES.md b/CHANGES.md index 7510d567..06776e81 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -40,6 +40,7 @@ * Change to consolidate scene exceptions and name cache code * Change check_url function to use requests instead of httplib library * Update Six compatibility library 1.5.2 to 1.9.0 (8a545f4) +* Update SimpleJSON library 2.0.9 to 3.7.3 (0bcdf20) [develop changelog] * Update Requests library 2.7.0 (ab1f493) to 2.7.0 (8b5e457) diff --git a/lib/simplejson/__init__.py b/lib/simplejson/__init__.py index d5b4d399..b8d50978 100644 --- a/lib/simplejson/__init__.py +++ b/lib/simplejson/__init__.py @@ -14,15 +14,15 @@ Encoding basic Python object hierarchies:: >>> import simplejson as json >>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}]) '["foo", {"bar": ["baz", null, 1.0, 2]}]' - >>> print json.dumps("\"foo\bar") + >>> print(json.dumps("\"foo\bar")) "\"foo\bar" - >>> print json.dumps(u'\u1234') + >>> print(json.dumps(u'\u1234')) "\u1234" - >>> print json.dumps('\\') + >>> print(json.dumps('\\')) "\\" - >>> print json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True) + >>> print(json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True)) {"a": 0, "b": 0, "c": 0} - >>> from StringIO import StringIO + >>> from simplejson.compat import StringIO >>> io = StringIO() >>> json.dump(['streaming API'], io) >>> io.getvalue() @@ -31,14 +31,14 @@ Encoding basic Python object hierarchies:: Compact encoding:: >>> import simplejson as json - >>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',',':')) + >>> obj = [1,2,3,{'4': 5, '6': 7}] + >>> json.dumps(obj, separators=(',',':'), sort_keys=True) '[1,2,3,{"4":5,"6":7}]' Pretty printing:: >>> import simplejson as json - >>> s = json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=4) - >>> print '\n'.join([l.rstrip() for l in s.splitlines()]) + >>> print(json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=' ')) { "4": 5, "6": 7 @@ -52,7 +52,7 @@ Decoding JSON:: True >>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar' True - >>> from StringIO import StringIO + >>> from simplejson.compat import StringIO >>> io = StringIO('["streaming API"]') >>> json.load(io)[0] == 'streaming API' True @@ -68,8 +68,8 @@ Specializing JSON object decoding:: >>> json.loads('{"__complex__": true, "real": 1, "imag": 2}', ... object_hook=as_complex) (1+2j) - >>> import decimal - >>> json.loads('1.1', parse_float=decimal.Decimal) == decimal.Decimal('1.1') + >>> from decimal import Decimal + >>> json.loads('1.1', parse_float=Decimal) == Decimal('1.1') True Specializing JSON object encoding:: @@ -95,18 +95,38 @@ Using simplejson.tool from the shell to validate and pretty-print:: "json": "obj" } $ echo '{ 1.2:3.4}' | python -m simplejson.tool - Expecting property name: line 1 column 2 (char 2) + Expecting property name: line 1 column 3 (char 2) """ -__version__ = '2.0.9' +from __future__ import absolute_import +__version__ = '3.7.3' __all__ = [ 'dump', 'dumps', 'load', 'loads', - 'JSONDecoder', 'JSONEncoder', + 'JSONDecoder', 'JSONDecodeError', 'JSONEncoder', + 'OrderedDict', 'simple_first', ] __author__ = 'Bob Ippolito ' -from decoder import JSONDecoder -from encoder import JSONEncoder +from decimal import Decimal + +from .scanner import JSONDecodeError +from .decoder import JSONDecoder +from .encoder import JSONEncoder, JSONEncoderForHTML +def _import_OrderedDict(): + import collections + try: + return collections.OrderedDict + except AttributeError: + from . import ordered_dict + return ordered_dict.OrderedDict +OrderedDict = _import_OrderedDict() + +def _import_c_make_encoder(): + try: + from ._speedups import make_encoder + return make_encoder + except ImportError: + return None _default_encoder = JSONEncoder( skipkeys=False, @@ -117,56 +137,117 @@ _default_encoder = JSONEncoder( separators=None, encoding='utf-8', default=None, + use_decimal=True, + namedtuple_as_object=True, + tuple_as_array=True, + bigint_as_string=False, + item_sort_key=None, + for_json=False, + ignore_nan=False, + int_as_string_bitcount=None, ) def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, - allow_nan=True, cls=None, indent=None, separators=None, - encoding='utf-8', default=None, **kw): + allow_nan=True, cls=None, indent=None, separators=None, + encoding='utf-8', default=None, use_decimal=True, + namedtuple_as_object=True, tuple_as_array=True, + bigint_as_string=False, sort_keys=False, item_sort_key=None, + for_json=False, ignore_nan=False, int_as_string_bitcount=None, **kw): """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a ``.write()``-supporting file-like object). - If ``skipkeys`` is true then ``dict`` keys that are not basic types + If *skipkeys* is true then ``dict`` keys that are not basic types (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) will be skipped instead of raising a ``TypeError``. - If ``ensure_ascii`` is false, then the some chunks written to ``fp`` + If *ensure_ascii* is false, then the some chunks written to ``fp`` may be ``unicode`` instances, subject to normal Python ``str`` to ``unicode`` coercion rules. Unless ``fp.write()`` explicitly understands ``unicode`` (as in ``codecs.getwriter()``) this is likely to cause an error. - If ``check_circular`` is false, then the circular reference check + If *check_circular* is false, then the circular reference check for container types will be skipped and a circular reference will result in an ``OverflowError`` (or worse). - If ``allow_nan`` is false, then it will be a ``ValueError`` to + If *allow_nan* is false, then it will be a ``ValueError`` to serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) - in strict compliance of the JSON specification, instead of using the - JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). + in strict compliance of the original JSON specification, instead of using + the JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). See + *ignore_nan* for ECMA-262 compliant behavior. - If ``indent`` is a non-negative integer, then JSON array elements and object - members will be pretty-printed with that indent level. An indent level - of 0 will only insert newlines. ``None`` is the most compact representation. + If *indent* is a string, then JSON array elements and object members + will be pretty-printed with a newline followed by that string repeated + for each level of nesting. ``None`` (the default) selects the most compact + representation without any newlines. For backwards compatibility with + versions of simplejson earlier than 2.1.0, an integer is also accepted + and is converted to a string with that many spaces. - If ``separators`` is an ``(item_separator, dict_separator)`` tuple - then it will be used instead of the default ``(', ', ': ')`` separators. - ``(',', ':')`` is the most compact JSON representation. + If specified, *separators* should be an + ``(item_separator, key_separator)`` tuple. The default is ``(', ', ': ')`` + if *indent* is ``None`` and ``(',', ': ')`` otherwise. To get the most + compact JSON representation, you should specify ``(',', ':')`` to eliminate + whitespace. - ``encoding`` is the character encoding for str instances, default is UTF-8. + *encoding* is the character encoding for str instances, default is UTF-8. - ``default(obj)`` is a function that should return a serializable version - of obj or raise TypeError. The default simply raises TypeError. + *default(obj)* is a function that should return a serializable version + of obj or raise ``TypeError``. The default simply raises ``TypeError``. + + If *use_decimal* is true (default: ``True``) then decimal.Decimal + will be natively serialized to JSON with full precision. + + If *namedtuple_as_object* is true (default: ``True``), + :class:`tuple` subclasses with ``_asdict()`` methods will be encoded + as JSON objects. + + If *tuple_as_array* is true (default: ``True``), + :class:`tuple` (and subclasses) will be encoded as JSON arrays. + + If *bigint_as_string* is true (default: ``False``), ints 2**53 and higher + or lower than -2**53 will be encoded as strings. This is to avoid the + rounding that happens in Javascript otherwise. Note that this is still a + lossy operation that will not round-trip correctly and should be used + sparingly. + + If *int_as_string_bitcount* is a positive number (n), then int of size + greater than or equal to 2**n or lower than or equal to -2**n will be + encoded as strings. + + If specified, *item_sort_key* is a callable used to sort the items in + each dictionary. This is useful if you want to sort items other than + in alphabetical order by key. This option takes precedence over + *sort_keys*. + + If *sort_keys* is true (default: ``False``), the output of dictionaries + will be sorted by item. + + If *for_json* is true (default: ``False``), objects with a ``for_json()`` + method will use the return value of that method for encoding as JSON + instead of the object. + + If *ignore_nan* is true (default: ``False``), then out of range + :class:`float` values (``nan``, ``inf``, ``-inf``) will be serialized as + ``null`` in compliance with the ECMA-262 specification. If true, this will + override *allow_nan*. To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the ``.default()`` method to serialize additional types), specify it with - the ``cls`` kwarg. + the ``cls`` kwarg. NOTE: You should use *default* or *for_json* instead + of subclassing whenever possible. """ # cached encoder if (not skipkeys and ensure_ascii and check_circular and allow_nan and cls is None and indent is None and separators is None and - encoding == 'utf-8' and default is None and not kw): + encoding == 'utf-8' and default is None and use_decimal + and namedtuple_as_object and tuple_as_array + and not bigint_as_string and not sort_keys + and not item_sort_key and not for_json + and not ignore_nan and int_as_string_bitcount is None + and not kw + ): iterable = _default_encoder.iterencode(obj) else: if cls is None: @@ -174,7 +255,16 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii, check_circular=check_circular, allow_nan=allow_nan, indent=indent, separators=separators, encoding=encoding, - default=default, **kw).iterencode(obj) + default=default, use_decimal=use_decimal, + namedtuple_as_object=namedtuple_as_object, + tuple_as_array=tuple_as_array, + bigint_as_string=bigint_as_string, + sort_keys=sort_keys, + item_sort_key=item_sort_key, + for_json=for_json, + ignore_nan=ignore_nan, + int_as_string_bitcount=int_as_string_bitcount, + **kw).iterencode(obj) # could accelerate with writelines in some versions of Python, at # a debuggability cost for chunk in iterable: @@ -182,8 +272,11 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, - allow_nan=True, cls=None, indent=None, separators=None, - encoding='utf-8', default=None, **kw): + allow_nan=True, cls=None, indent=None, separators=None, + encoding='utf-8', default=None, use_decimal=True, + namedtuple_as_object=True, tuple_as_array=True, + bigint_as_string=False, sort_keys=False, item_sort_key=None, + for_json=False, ignore_nan=False, int_as_string_bitcount=None, **kw): """Serialize ``obj`` to a JSON formatted ``str``. If ``skipkeys`` is false then ``dict`` keys that are not basic types @@ -203,30 +296,77 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, strict compliance of the JSON specification, instead of using the JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). - If ``indent`` is a non-negative integer, then JSON array elements and - object members will be pretty-printed with that indent level. An indent - level of 0 will only insert newlines. ``None`` is the most compact - representation. + If ``indent`` is a string, then JSON array elements and object members + will be pretty-printed with a newline followed by that string repeated + for each level of nesting. ``None`` (the default) selects the most compact + representation without any newlines. For backwards compatibility with + versions of simplejson earlier than 2.1.0, an integer is also accepted + and is converted to a string with that many spaces. - If ``separators`` is an ``(item_separator, dict_separator)`` tuple - then it will be used instead of the default ``(', ', ': ')`` separators. - ``(',', ':')`` is the most compact JSON representation. + If specified, ``separators`` should be an + ``(item_separator, key_separator)`` tuple. The default is ``(', ', ': ')`` + if *indent* is ``None`` and ``(',', ': ')`` otherwise. To get the most + compact JSON representation, you should specify ``(',', ':')`` to eliminate + whitespace. ``encoding`` is the character encoding for str instances, default is UTF-8. ``default(obj)`` is a function that should return a serializable version of obj or raise TypeError. The default simply raises TypeError. + If *use_decimal* is true (default: ``True``) then decimal.Decimal + will be natively serialized to JSON with full precision. + + If *namedtuple_as_object* is true (default: ``True``), + :class:`tuple` subclasses with ``_asdict()`` methods will be encoded + as JSON objects. + + If *tuple_as_array* is true (default: ``True``), + :class:`tuple` (and subclasses) will be encoded as JSON arrays. + + If *bigint_as_string* is true (not the default), ints 2**53 and higher + or lower than -2**53 will be encoded as strings. This is to avoid the + rounding that happens in Javascript otherwise. + + If *int_as_string_bitcount* is a positive number (n), then int of size + greater than or equal to 2**n or lower than or equal to -2**n will be + encoded as strings. + + If specified, *item_sort_key* is a callable used to sort the items in + each dictionary. This is useful if you want to sort items other than + in alphabetical order by key. This option takes precendence over + *sort_keys*. + + If *sort_keys* is true (default: ``False``), the output of dictionaries + will be sorted by item. + + If *for_json* is true (default: ``False``), objects with a ``for_json()`` + method will use the return value of that method for encoding as JSON + instead of the object. + + If *ignore_nan* is true (default: ``False``), then out of range + :class:`float` values (``nan``, ``inf``, ``-inf``) will be serialized as + ``null`` in compliance with the ECMA-262 specification. If true, this will + override *allow_nan*. + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the ``.default()`` method to serialize additional types), specify it with - the ``cls`` kwarg. + the ``cls`` kwarg. NOTE: You should use *default* instead of subclassing + whenever possible. """ # cached encoder - if (not skipkeys and ensure_ascii and + if ( + not skipkeys and ensure_ascii and check_circular and allow_nan and cls is None and indent is None and separators is None and - encoding == 'utf-8' and default is None and not kw): + encoding == 'utf-8' and default is None and use_decimal + and namedtuple_as_object and tuple_as_array + and not bigint_as_string and not sort_keys + and not item_sort_key and not for_json + and not ignore_nan and int_as_string_bitcount is None + and not kw + ): return _default_encoder.encode(obj) if cls is None: cls = JSONEncoder @@ -234,85 +374,191 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, skipkeys=skipkeys, ensure_ascii=ensure_ascii, check_circular=check_circular, allow_nan=allow_nan, indent=indent, separators=separators, encoding=encoding, default=default, + use_decimal=use_decimal, + namedtuple_as_object=namedtuple_as_object, + tuple_as_array=tuple_as_array, + bigint_as_string=bigint_as_string, + sort_keys=sort_keys, + item_sort_key=item_sort_key, + for_json=for_json, + ignore_nan=ignore_nan, + int_as_string_bitcount=int_as_string_bitcount, **kw).encode(obj) -_default_decoder = JSONDecoder(encoding=None, object_hook=None) +_default_decoder = JSONDecoder(encoding=None, object_hook=None, + object_pairs_hook=None) def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None, - parse_int=None, parse_constant=None, **kw): + parse_int=None, parse_constant=None, object_pairs_hook=None, + use_decimal=False, namedtuple_as_object=True, tuple_as_array=True, + **kw): """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing a JSON document) to a Python object. - If the contents of ``fp`` is encoded with an ASCII based encoding other - than utf-8 (e.g. latin-1), then an appropriate ``encoding`` name must - be specified. Encodings that are not ASCII based (such as UCS-2) are - not allowed, and should be wrapped with - ``codecs.getreader(fp)(encoding)``, or simply decoded to a ``unicode`` - object and passed to ``loads()`` + *encoding* determines the encoding used to interpret any + :class:`str` objects decoded by this instance (``'utf-8'`` by + default). It has no effect when decoding :class:`unicode` objects. - ``object_hook`` is an optional function that will be called with the - result of any object literal decode (a ``dict``). The return value of - ``object_hook`` will be used instead of the ``dict``. This feature - can be used to implement custom decoders (e.g. JSON-RPC class hinting). + Note that currently only encodings that are a superset of ASCII work, + strings of other encodings should be passed in as :class:`unicode`. + + *object_hook*, if specified, will be called with the result of every + JSON object decoded and its return value will be used in place of the + given :class:`dict`. This can be used to provide custom + deserializations (e.g. to support JSON-RPC class hinting). + + *object_pairs_hook* is an optional function that will be called with + the result of any object literal decode with an ordered list of pairs. + The return value of *object_pairs_hook* will be used instead of the + :class:`dict`. This feature can be used to implement custom decoders + that rely on the order that the key and value pairs are decoded (for + example, :func:`collections.OrderedDict` will remember the order of + insertion). If *object_hook* is also defined, the *object_pairs_hook* + takes priority. + + *parse_float*, if specified, will be called with the string of every + JSON float to be decoded. By default, this is equivalent to + ``float(num_str)``. This can be used to use another datatype or parser + for JSON floats (e.g. :class:`decimal.Decimal`). + + *parse_int*, if specified, will be called with the string of every + JSON int to be decoded. By default, this is equivalent to + ``int(num_str)``. This can be used to use another datatype or parser + for JSON integers (e.g. :class:`float`). + + *parse_constant*, if specified, will be called with one of the + following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This + can be used to raise an exception if invalid JSON numbers are + encountered. + + If *use_decimal* is true (default: ``False``) then it implies + parse_float=decimal.Decimal for parity with ``dump``. To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` - kwarg. + kwarg. NOTE: You should use *object_hook* or *object_pairs_hook* instead + of subclassing whenever possible. """ return loads(fp.read(), encoding=encoding, cls=cls, object_hook=object_hook, parse_float=parse_float, parse_int=parse_int, - parse_constant=parse_constant, **kw) + parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, + use_decimal=use_decimal, **kw) def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, - parse_int=None, parse_constant=None, **kw): + parse_int=None, parse_constant=None, object_pairs_hook=None, + use_decimal=False, **kw): """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON document) to a Python object. - If ``s`` is a ``str`` instance and is encoded with an ASCII based encoding - other than utf-8 (e.g. latin-1) then an appropriate ``encoding`` name - must be specified. Encodings that are not ASCII based (such as UCS-2) - are not allowed and should be decoded to ``unicode`` first. + *encoding* determines the encoding used to interpret any + :class:`str` objects decoded by this instance (``'utf-8'`` by + default). It has no effect when decoding :class:`unicode` objects. - ``object_hook`` is an optional function that will be called with the - result of any object literal decode (a ``dict``). The return value of - ``object_hook`` will be used instead of the ``dict``. This feature - can be used to implement custom decoders (e.g. JSON-RPC class hinting). + Note that currently only encodings that are a superset of ASCII work, + strings of other encodings should be passed in as :class:`unicode`. - ``parse_float``, if specified, will be called with the string - of every JSON float to be decoded. By default this is equivalent to - float(num_str). This can be used to use another datatype or parser - for JSON floats (e.g. decimal.Decimal). + *object_hook*, if specified, will be called with the result of every + JSON object decoded and its return value will be used in place of the + given :class:`dict`. This can be used to provide custom + deserializations (e.g. to support JSON-RPC class hinting). - ``parse_int``, if specified, will be called with the string - of every JSON int to be decoded. By default this is equivalent to - int(num_str). This can be used to use another datatype or parser - for JSON integers (e.g. float). + *object_pairs_hook* is an optional function that will be called with + the result of any object literal decode with an ordered list of pairs. + The return value of *object_pairs_hook* will be used instead of the + :class:`dict`. This feature can be used to implement custom decoders + that rely on the order that the key and value pairs are decoded (for + example, :func:`collections.OrderedDict` will remember the order of + insertion). If *object_hook* is also defined, the *object_pairs_hook* + takes priority. - ``parse_constant``, if specified, will be called with one of the - following strings: -Infinity, Infinity, NaN, null, true, false. - This can be used to raise an exception if invalid JSON numbers - are encountered. + *parse_float*, if specified, will be called with the string of every + JSON float to be decoded. By default, this is equivalent to + ``float(num_str)``. This can be used to use another datatype or parser + for JSON floats (e.g. :class:`decimal.Decimal`). + + *parse_int*, if specified, will be called with the string of every + JSON int to be decoded. By default, this is equivalent to + ``int(num_str)``. This can be used to use another datatype or parser + for JSON integers (e.g. :class:`float`). + + *parse_constant*, if specified, will be called with one of the + following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This + can be used to raise an exception if invalid JSON numbers are + encountered. + + If *use_decimal* is true (default: ``False``) then it implies + parse_float=decimal.Decimal for parity with ``dump``. To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` - kwarg. + kwarg. NOTE: You should use *object_hook* or *object_pairs_hook* instead + of subclassing whenever possible. """ if (cls is None and encoding is None and object_hook is None and parse_int is None and parse_float is None and - parse_constant is None and not kw): + parse_constant is None and object_pairs_hook is None + and not use_decimal and not kw): return _default_decoder.decode(s) if cls is None: cls = JSONDecoder if object_hook is not None: kw['object_hook'] = object_hook + if object_pairs_hook is not None: + kw['object_pairs_hook'] = object_pairs_hook if parse_float is not None: kw['parse_float'] = parse_float if parse_int is not None: kw['parse_int'] = parse_int if parse_constant is not None: kw['parse_constant'] = parse_constant + if use_decimal: + if parse_float is not None: + raise TypeError("use_decimal=True implies parse_float=Decimal") + kw['parse_float'] = Decimal return cls(encoding=encoding, **kw).decode(s) + + +def _toggle_speedups(enabled): + from . import decoder as dec + from . import encoder as enc + from . import scanner as scan + c_make_encoder = _import_c_make_encoder() + if enabled: + dec.scanstring = dec.c_scanstring or dec.py_scanstring + enc.c_make_encoder = c_make_encoder + enc.encode_basestring_ascii = (enc.c_encode_basestring_ascii or + enc.py_encode_basestring_ascii) + scan.make_scanner = scan.c_make_scanner or scan.py_make_scanner + else: + dec.scanstring = dec.py_scanstring + enc.c_make_encoder = None + enc.encode_basestring_ascii = enc.py_encode_basestring_ascii + scan.make_scanner = scan.py_make_scanner + dec.make_scanner = scan.make_scanner + global _default_decoder + _default_decoder = JSONDecoder( + encoding=None, + object_hook=None, + object_pairs_hook=None, + ) + global _default_encoder + _default_encoder = JSONEncoder( + skipkeys=False, + ensure_ascii=True, + check_circular=True, + allow_nan=True, + indent=None, + separators=None, + encoding='utf-8', + default=None, + ) + +def simple_first(kv): + """Helper function to pass to item_sort_key to sort simple + elements to the top, then container elements. + """ + return (isinstance(kv[1], (list, dict, tuple)), kv[0]) diff --git a/lib/simplejson/_speedups.c b/lib/simplejson/_speedups.c index 23b5f4a6..bc1648ae 100644 --- a/lib/simplejson/_speedups.c +++ b/lib/simplejson/_speedups.c @@ -1,18 +1,85 @@ +/* -*- mode: C; c-file-style: "python"; c-basic-offset: 4 -*- */ #include "Python.h" #include "structmember.h" -#if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE) + +#if PY_MAJOR_VERSION >= 3 +#define PyInt_FromSsize_t PyLong_FromSsize_t +#define PyInt_AsSsize_t PyLong_AsSsize_t +#define PyString_Check PyBytes_Check +#define PyString_GET_SIZE PyBytes_GET_SIZE +#define PyString_AS_STRING PyBytes_AS_STRING +#define PyString_FromStringAndSize PyBytes_FromStringAndSize +#define PyInt_Check(obj) 0 +#define PyInt_CheckExact(obj) 0 +#define JSON_UNICHR Py_UCS4 +#define JSON_InternFromString PyUnicode_InternFromString +#define JSON_Intern_GET_SIZE PyUnicode_GET_SIZE +#define JSON_ASCII_Check PyUnicode_Check +#define JSON_ASCII_AS_STRING PyUnicode_AsUTF8 +#define PyInt_Type PyLong_Type +#define PyInt_FromString PyLong_FromString +#define PY2_UNUSED +#define PY3_UNUSED UNUSED +#define JSON_NewEmptyUnicode() PyUnicode_New(0, 127) +#else /* PY_MAJOR_VERSION >= 3 */ +#define PY2_UNUSED UNUSED +#define PY3_UNUSED +#define PyUnicode_READY(obj) 0 +#define PyUnicode_KIND(obj) (sizeof(Py_UNICODE)) +#define PyUnicode_DATA(obj) ((void *)(PyUnicode_AS_UNICODE(obj))) +#define PyUnicode_READ(kind, data, index) ((JSON_UNICHR)((const Py_UNICODE *)(data))[(index)]) +#define PyUnicode_GetLength PyUnicode_GET_SIZE +#define JSON_UNICHR Py_UNICODE +#define JSON_ASCII_Check PyString_Check +#define JSON_ASCII_AS_STRING PyString_AS_STRING +#define JSON_InternFromString PyString_InternFromString +#define JSON_Intern_GET_SIZE PyString_GET_SIZE +#define JSON_NewEmptyUnicode() PyUnicode_FromUnicode(NULL, 0) +#endif /* PY_MAJOR_VERSION < 3 */ + +#if PY_VERSION_HEX < 0x02070000 +#if !defined(PyOS_string_to_double) +#define PyOS_string_to_double json_PyOS_string_to_double +static double +json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception); +static double +json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception) +{ + double x; + assert(endptr == NULL); + assert(overflow_exception == NULL); + PyFPE_START_PROTECT("json_PyOS_string_to_double", return -1.0;) + x = PyOS_ascii_atof(s); + PyFPE_END_PROTECT(x) + return x; +} +#endif +#endif /* PY_VERSION_HEX < 0x02070000 */ + +#if PY_VERSION_HEX < 0x02060000 +#if !defined(Py_TYPE) #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type) #endif -#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN) +#if !defined(Py_SIZE) +#define Py_SIZE(ob) (((PyVarObject*)(ob))->ob_size) +#endif +#if !defined(PyVarObject_HEAD_INIT) +#define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size, +#endif +#endif /* PY_VERSION_HEX < 0x02060000 */ + +#if PY_VERSION_HEX < 0x02050000 +#if !defined(PY_SSIZE_T_MIN) typedef int Py_ssize_t; #define PY_SSIZE_T_MAX INT_MAX #define PY_SSIZE_T_MIN INT_MIN #define PyInt_FromSsize_t PyInt_FromLong #define PyInt_AsSsize_t PyInt_AsLong #endif -#ifndef Py_IS_FINITE +#if !defined(Py_IS_FINITE) #define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X)) #endif +#endif /* PY_VERSION_HEX < 0x02050000 */ #ifdef __GNUC__ #define UNUSED __attribute__((__unused__)) @@ -27,23 +94,55 @@ typedef int Py_ssize_t; #define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType) #define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType) +#define JSON_ALLOW_NAN 1 +#define JSON_IGNORE_NAN 2 + static PyTypeObject PyScannerType; static PyTypeObject PyEncoderType; +typedef struct { + PyObject *large_strings; /* A list of previously accumulated large strings */ + PyObject *small_strings; /* Pending small strings */ +} JSON_Accu; + +static int +JSON_Accu_Init(JSON_Accu *acc); +static int +JSON_Accu_Accumulate(JSON_Accu *acc, PyObject *unicode); +static PyObject * +JSON_Accu_FinishAsList(JSON_Accu *acc); +static void +JSON_Accu_Destroy(JSON_Accu *acc); + +#define ERR_EXPECTING_VALUE "Expecting value" +#define ERR_ARRAY_DELIMITER "Expecting ',' delimiter or ']'" +#define ERR_ARRAY_VALUE_FIRST "Expecting value or ']'" +#define ERR_OBJECT_DELIMITER "Expecting ',' delimiter or '}'" +#define ERR_OBJECT_PROPERTY "Expecting property name enclosed in double quotes" +#define ERR_OBJECT_PROPERTY_FIRST "Expecting property name enclosed in double quotes or '}'" +#define ERR_OBJECT_PROPERTY_DELIMITER "Expecting ':' delimiter" +#define ERR_STRING_UNTERMINATED "Unterminated string starting at" +#define ERR_STRING_CONTROL "Invalid control character %r at" +#define ERR_STRING_ESC1 "Invalid \\X escape sequence %r" +#define ERR_STRING_ESC4 "Invalid \\uXXXX escape sequence" + typedef struct _PyScannerObject { PyObject_HEAD PyObject *encoding; PyObject *strict; PyObject *object_hook; + PyObject *pairs_hook; PyObject *parse_float; PyObject *parse_int; PyObject *parse_constant; + PyObject *memo; } PyScannerObject; static PyMemberDef scanner_members[] = { {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"}, {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"}, {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"}, + {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, pairs_hook), READONLY, "object_pairs_hook"}, {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"}, {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"}, {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"}, @@ -59,35 +158,73 @@ typedef struct _PyEncoderObject { PyObject *key_separator; PyObject *item_separator; PyObject *sort_keys; - PyObject *skipkeys; + PyObject *key_memo; + PyObject *encoding; + PyObject *Decimal; + PyObject *skipkeys_bool; + int skipkeys; int fast_encode; - int allow_nan; + /* 0, JSON_ALLOW_NAN, JSON_IGNORE_NAN */ + int allow_or_ignore_nan; + int use_decimal; + int namedtuple_as_object; + int tuple_as_array; + PyObject *max_long_size; + PyObject *min_long_size; + PyObject *item_sort_key; + PyObject *item_sort_kw; + int for_json; } PyEncoderObject; static PyMemberDef encoder_members[] = { {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"}, {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"}, {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"}, + {"encoding", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoding"}, {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"}, {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"}, {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"}, {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"}, - {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"}, + /* Python 2.5 does not support T_BOOl */ + {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys_bool), READONLY, "skipkeys"}, + {"key_memo", T_OBJECT, offsetof(PyEncoderObject, key_memo), READONLY, "key_memo"}, + {"item_sort_key", T_OBJECT, offsetof(PyEncoderObject, item_sort_key), READONLY, "item_sort_key"}, + {"max_long_size", T_OBJECT, offsetof(PyEncoderObject, max_long_size), READONLY, "max_long_size"}, + {"min_long_size", T_OBJECT, offsetof(PyEncoderObject, min_long_size), READONLY, "min_long_size"}, {NULL} }; +static PyObject * +join_list_unicode(PyObject *lst); +static PyObject * +JSON_ParseEncoding(PyObject *encoding); +static PyObject * +JSON_UnicodeFromChar(JSON_UNICHR c); +static PyObject * +maybe_quote_bigint(PyEncoderObject* s, PyObject *encoded, PyObject *obj); static Py_ssize_t -ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars); +ascii_char_size(JSON_UNICHR c); +static Py_ssize_t +ascii_escape_char(JSON_UNICHR c, char *output, Py_ssize_t chars); static PyObject * ascii_escape_unicode(PyObject *pystr); static PyObject * ascii_escape_str(PyObject *pystr); static PyObject * py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr); -void init_speedups(void); +#if PY_MAJOR_VERSION < 3 +static PyObject * +join_list_string(PyObject *lst); static PyObject * scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); static PyObject * +scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr); +static PyObject * +_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); +#endif +static PyObject * +scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr); +static PyObject * scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); static PyObject * _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx); @@ -107,14 +244,16 @@ static void encoder_dealloc(PyObject *self); static int encoder_clear(PyObject *self); -static int -encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level); -static int -encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level); -static int -encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level); static PyObject * -_encoded_const(PyObject *const); +encoder_stringify_key(PyEncoderObject *s, PyObject *key); +static int +encoder_listencode_list(PyEncoderObject *s, JSON_Accu *rval, PyObject *seq, Py_ssize_t indent_level); +static int +encoder_listencode_obj(PyEncoderObject *s, JSON_Accu *rval, PyObject *obj, Py_ssize_t indent_level); +static int +encoder_listencode_dict(PyEncoderObject *s, JSON_Accu *rval, PyObject *dct, Py_ssize_t indent_level); +static PyObject * +_encoded_const(PyObject *obj); static void raise_errmsg(char *msg, PyObject *s, Py_ssize_t end); static PyObject * @@ -125,25 +264,185 @@ static PyObject * _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr); static PyObject * encoder_encode_float(PyEncoderObject *s, PyObject *obj); +static int +_is_namedtuple(PyObject *obj); +static int +_has_for_json_hook(PyObject *obj); +static PyObject * +moduleinit(void); #define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"') #define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r')) #define MIN_EXPANSION 6 -#ifdef Py_UNICODE_WIDE -#define MAX_EXPANSION (2 * MIN_EXPANSION) + +static int +JSON_Accu_Init(JSON_Accu *acc) +{ + /* Lazily allocated */ + acc->large_strings = NULL; + acc->small_strings = PyList_New(0); + if (acc->small_strings == NULL) + return -1; + return 0; +} + +static int +flush_accumulator(JSON_Accu *acc) +{ + Py_ssize_t nsmall = PyList_GET_SIZE(acc->small_strings); + if (nsmall) { + int ret; + PyObject *joined; + if (acc->large_strings == NULL) { + acc->large_strings = PyList_New(0); + if (acc->large_strings == NULL) + return -1; + } +#if PY_MAJOR_VERSION >= 3 + joined = join_list_unicode(acc->small_strings); +#else /* PY_MAJOR_VERSION >= 3 */ + joined = join_list_string(acc->small_strings); +#endif /* PY_MAJOR_VERSION < 3 */ + if (joined == NULL) + return -1; + if (PyList_SetSlice(acc->small_strings, 0, nsmall, NULL)) { + Py_DECREF(joined); + return -1; + } + ret = PyList_Append(acc->large_strings, joined); + Py_DECREF(joined); + return ret; + } + return 0; +} + +static int +JSON_Accu_Accumulate(JSON_Accu *acc, PyObject *unicode) +{ + Py_ssize_t nsmall; +#if PY_MAJOR_VERSION >= 3 + assert(PyUnicode_Check(unicode)); +#else /* PY_MAJOR_VERSION >= 3 */ + assert(JSON_ASCII_Check(unicode) || PyUnicode_Check(unicode)); +#endif /* PY_MAJOR_VERSION < 3 */ + + if (PyList_Append(acc->small_strings, unicode)) + return -1; + nsmall = PyList_GET_SIZE(acc->small_strings); + /* Each item in a list of unicode objects has an overhead (in 64-bit + * builds) of: + * - 8 bytes for the list slot + * - 56 bytes for the header of the unicode object + * that is, 64 bytes. 100000 such objects waste more than 6MB + * compared to a single concatenated string. + */ + if (nsmall < 100000) + return 0; + return flush_accumulator(acc); +} + +static PyObject * +JSON_Accu_FinishAsList(JSON_Accu *acc) +{ + int ret; + PyObject *res; + + ret = flush_accumulator(acc); + Py_CLEAR(acc->small_strings); + if (ret) { + Py_CLEAR(acc->large_strings); + return NULL; + } + res = acc->large_strings; + acc->large_strings = NULL; + if (res == NULL) + return PyList_New(0); + return res; +} + +static void +JSON_Accu_Destroy(JSON_Accu *acc) +{ + Py_CLEAR(acc->small_strings); + Py_CLEAR(acc->large_strings); +} + +static int +IS_DIGIT(JSON_UNICHR c) +{ + return c >= '0' && c <= '9'; +} + +static PyObject * +JSON_UnicodeFromChar(JSON_UNICHR c) +{ +#if PY_MAJOR_VERSION >= 3 + PyObject *rval = PyUnicode_New(1, c); + if (rval) + PyUnicode_WRITE(PyUnicode_KIND(rval), PyUnicode_DATA(rval), 0, c); + return rval; +#else /* PY_MAJOR_VERSION >= 3 */ + return PyUnicode_FromUnicode(&c, 1); +#endif /* PY_MAJOR_VERSION < 3 */ +} + +static PyObject * +maybe_quote_bigint(PyEncoderObject* s, PyObject *encoded, PyObject *obj) +{ + if (s->max_long_size != Py_None && s->min_long_size != Py_None) { + if (PyObject_RichCompareBool(obj, s->max_long_size, Py_GE) || + PyObject_RichCompareBool(obj, s->min_long_size, Py_LE)) { +#if PY_MAJOR_VERSION >= 3 + PyObject* quoted = PyUnicode_FromFormat("\"%U\"", encoded); #else -#define MAX_EXPANSION MIN_EXPANSION + PyObject* quoted = PyString_FromFormat("\"%s\"", + PyString_AsString(encoded)); #endif + Py_DECREF(encoded); + encoded = quoted; + } + } + + return encoded; +} + +static int +_is_namedtuple(PyObject *obj) +{ + int rval = 0; + PyObject *_asdict = PyObject_GetAttrString(obj, "_asdict"); + if (_asdict == NULL) { + PyErr_Clear(); + return 0; + } + rval = PyCallable_Check(_asdict); + Py_DECREF(_asdict); + return rval; +} + +static int +_has_for_json_hook(PyObject *obj) +{ + int rval = 0; + PyObject *for_json = PyObject_GetAttrString(obj, "for_json"); + if (for_json == NULL) { + PyErr_Clear(); + return 0; + } + rval = PyCallable_Check(for_json); + Py_DECREF(for_json); + return rval; +} static int _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr) { /* PyObject to Py_ssize_t converter */ *size_ptr = PyInt_AsSsize_t(o); - if (*size_ptr == -1 && PyErr_Occurred()); - return 1; - return 0; + if (*size_ptr == -1 && PyErr_Occurred()) + return 0; + return 1; } static PyObject * @@ -154,44 +453,74 @@ _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr) } static Py_ssize_t -ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars) +ascii_escape_char(JSON_UNICHR c, char *output, Py_ssize_t chars) { /* Escape unicode code point c to ASCII escape sequences in char *output. output must have at least 12 bytes unused to accommodate an escaped surrogate pair "\uXXXX\uXXXX" */ - output[chars++] = '\\'; - switch (c) { - case '\\': output[chars++] = (char)c; break; - case '"': output[chars++] = (char)c; break; - case '\b': output[chars++] = 'b'; break; - case '\f': output[chars++] = 'f'; break; - case '\n': output[chars++] = 'n'; break; - case '\r': output[chars++] = 'r'; break; - case '\t': output[chars++] = 't'; break; - default: -#ifdef Py_UNICODE_WIDE - if (c >= 0x10000) { - /* UTF-16 surrogate pair */ - Py_UNICODE v = c - 0x10000; - c = 0xd800 | ((v >> 10) & 0x3ff); + if (S_CHAR(c)) { + output[chars++] = (char)c; + } + else { + output[chars++] = '\\'; + switch (c) { + case '\\': output[chars++] = (char)c; break; + case '"': output[chars++] = (char)c; break; + case '\b': output[chars++] = 'b'; break; + case '\f': output[chars++] = 'f'; break; + case '\n': output[chars++] = 'n'; break; + case '\r': output[chars++] = 'r'; break; + case '\t': output[chars++] = 't'; break; + default: +#if defined(Py_UNICODE_WIDE) || PY_MAJOR_VERSION >= 3 + if (c >= 0x10000) { + /* UTF-16 surrogate pair */ + JSON_UNICHR v = c - 0x10000; + c = 0xd800 | ((v >> 10) & 0x3ff); + output[chars++] = 'u'; + output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; + output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; + output[chars++] = "0123456789abcdef"[(c ) & 0xf]; + c = 0xdc00 | (v & 0x3ff); + output[chars++] = '\\'; + } +#endif output[chars++] = 'u'; output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; output[chars++] = "0123456789abcdef"[(c ) & 0xf]; - c = 0xdc00 | (v & 0x3ff); - output[chars++] = '\\'; - } -#endif - output[chars++] = 'u'; - output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; - output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; - output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; - output[chars++] = "0123456789abcdef"[(c ) & 0xf]; + } } return chars; } +static Py_ssize_t +ascii_char_size(JSON_UNICHR c) +{ + if (S_CHAR(c)) { + return 1; + } + else if (c == '\\' || + c == '"' || + c == '\b' || + c == '\f' || + c == '\n' || + c == '\r' || + c == '\t') { + return 2; + } +#if defined(Py_UNICODE_WIDE) || PY_MAJOR_VERSION >= 3 + else if (c >= 0x10000U) { + return 2 * MIN_EXPANSION; + } +#endif + else { + return MIN_EXPANSION; + } +} + static PyObject * ascii_escape_unicode(PyObject *pystr) { @@ -199,57 +528,62 @@ ascii_escape_unicode(PyObject *pystr) Py_ssize_t i; Py_ssize_t input_chars; Py_ssize_t output_size; - Py_ssize_t max_output_size; Py_ssize_t chars; + PY2_UNUSED int kind; + void *data; PyObject *rval; char *output; - Py_UNICODE *input_unicode; - input_chars = PyUnicode_GET_SIZE(pystr); - input_unicode = PyUnicode_AS_UNICODE(pystr); + if (PyUnicode_READY(pystr)) + return NULL; - /* One char input can be up to 6 chars output, estimate 4 of these */ - output_size = 2 + (MIN_EXPANSION * 4) + input_chars; - max_output_size = 2 + (input_chars * MAX_EXPANSION); + kind = PyUnicode_KIND(pystr); + data = PyUnicode_DATA(pystr); + input_chars = PyUnicode_GetLength(pystr); + output_size = 2; + for (i = 0; i < input_chars; i++) { + output_size += ascii_char_size(PyUnicode_READ(kind, data, i)); + } +#if PY_MAJOR_VERSION >= 3 + rval = PyUnicode_New(output_size, 127); + if (rval == NULL) { + return NULL; + } + assert(PyUnicode_KIND(rval) == PyUnicode_1BYTE_KIND); + output = (char *)PyUnicode_DATA(rval); +#else rval = PyString_FromStringAndSize(NULL, output_size); if (rval == NULL) { return NULL; } output = PyString_AS_STRING(rval); +#endif chars = 0; output[chars++] = '"'; for (i = 0; i < input_chars; i++) { - Py_UNICODE c = input_unicode[i]; - if (S_CHAR(c)) { - output[chars++] = (char)c; - } - else { - chars = ascii_escape_char(c, output, chars); - } - if (output_size - chars < (1 + MAX_EXPANSION)) { - /* There's more than four, so let's resize by a lot */ - Py_ssize_t new_output_size = output_size * 2; - /* This is an upper bound */ - if (new_output_size > max_output_size) { - new_output_size = max_output_size; - } - /* Make sure that the output size changed before resizing */ - if (new_output_size != output_size) { - output_size = new_output_size; - if (_PyString_Resize(&rval, output_size) == -1) { - return NULL; - } - output = PyString_AS_STRING(rval); - } - } + chars = ascii_escape_char(PyUnicode_READ(kind, data, i), output, chars); } output[chars++] = '"'; - if (_PyString_Resize(&rval, chars) == -1) { - return NULL; - } + assert(chars == output_size); return rval; } +#if PY_MAJOR_VERSION >= 3 + +static PyObject * +ascii_escape_str(PyObject *pystr) +{ + PyObject *rval; + PyObject *input = PyUnicode_DecodeUTF8(PyString_AS_STRING(pystr), PyString_GET_SIZE(pystr), NULL); + if (input == NULL) + return NULL; + rval = ascii_escape_unicode(input); + Py_DECREF(input); + return rval; +} + +#else /* PY_MAJOR_VERSION >= 3 */ + static PyObject * ascii_escape_str(PyObject *pystr) { @@ -264,98 +598,208 @@ ascii_escape_str(PyObject *pystr) input_chars = PyString_GET_SIZE(pystr); input_str = PyString_AS_STRING(pystr); + output_size = 2; /* Fast path for a string that's already ASCII */ for (i = 0; i < input_chars; i++) { - Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i]; - if (!S_CHAR(c)) { - /* If we have to escape something, scan the string for unicode */ - Py_ssize_t j; - for (j = i; j < input_chars; j++) { - c = (Py_UNICODE)(unsigned char)input_str[j]; - if (c > 0x7f) { - /* We hit a non-ASCII character, bail to unicode mode */ - PyObject *uni; - uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict"); - if (uni == NULL) { - return NULL; - } - rval = ascii_escape_unicode(uni); - Py_DECREF(uni); - return rval; - } + JSON_UNICHR c = (JSON_UNICHR)input_str[i]; + if (c > 0x7f) { + /* We hit a non-ASCII character, bail to unicode mode */ + PyObject *uni; + uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict"); + if (uni == NULL) { + return NULL; } - break; + rval = ascii_escape_unicode(uni); + Py_DECREF(uni); + return rval; } + output_size += ascii_char_size(c); } - if (i == input_chars) { - /* Input is already ASCII */ - output_size = 2 + input_chars; - } - else { - /* One char input can be up to 6 chars output, estimate 4 of these */ - output_size = 2 + (MIN_EXPANSION * 4) + input_chars; - } rval = PyString_FromStringAndSize(NULL, output_size); if (rval == NULL) { return NULL; } + chars = 0; output = PyString_AS_STRING(rval); - output[0] = '"'; - - /* We know that everything up to i is ASCII already */ - chars = i + 1; - memcpy(&output[1], input_str, i); - - for (; i < input_chars; i++) { - Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i]; - if (S_CHAR(c)) { - output[chars++] = (char)c; - } - else { - chars = ascii_escape_char(c, output, chars); - } - /* An ASCII char can't possibly expand to a surrogate! */ - if (output_size - chars < (1 + MIN_EXPANSION)) { - /* There's more than four, so let's resize by a lot */ - output_size *= 2; - if (output_size > 2 + (input_chars * MIN_EXPANSION)) { - output_size = 2 + (input_chars * MIN_EXPANSION); - } - if (_PyString_Resize(&rval, output_size) == -1) { - return NULL; - } - output = PyString_AS_STRING(rval); - } + output[chars++] = '"'; + for (i = 0; i < input_chars; i++) { + chars = ascii_escape_char((JSON_UNICHR)input_str[i], output, chars); } output[chars++] = '"'; - if (_PyString_Resize(&rval, chars) == -1) { - return NULL; - } + assert(chars == output_size); return rval; } +#endif /* PY_MAJOR_VERSION < 3 */ + +static PyObject * +encoder_stringify_key(PyEncoderObject *s, PyObject *key) +{ + if (PyUnicode_Check(key)) { + Py_INCREF(key); + return key; + } + else if (PyString_Check(key)) { +#if PY_MAJOR_VERSION >= 3 + return PyUnicode_Decode( + PyString_AS_STRING(key), + PyString_GET_SIZE(key), + JSON_ASCII_AS_STRING(s->encoding), + NULL); +#else /* PY_MAJOR_VERSION >= 3 */ + Py_INCREF(key); + return key; +#endif /* PY_MAJOR_VERSION < 3 */ + } + else if (PyFloat_Check(key)) { + return encoder_encode_float(s, key); + } + else if (key == Py_True || key == Py_False || key == Py_None) { + /* This must come before the PyInt_Check because + True and False are also 1 and 0.*/ + return _encoded_const(key); + } + else if (PyInt_Check(key) || PyLong_Check(key)) { + if (!(PyInt_CheckExact(key) || PyLong_CheckExact(key))) { + /* See #118, do not trust custom str/repr */ + PyObject *res; + PyObject *tmp = PyObject_CallFunctionObjArgs((PyObject *)&PyLong_Type, key, NULL); + if (tmp == NULL) { + return NULL; + } + res = PyObject_Str(tmp); + Py_DECREF(tmp); + return res; + } + else { + return PyObject_Str(key); + } + } + else if (s->use_decimal && PyObject_TypeCheck(key, (PyTypeObject *)s->Decimal)) { + return PyObject_Str(key); + } + else if (s->skipkeys) { + Py_INCREF(Py_None); + return Py_None; + } + PyErr_SetString(PyExc_TypeError, "keys must be a string"); + return NULL; +} + +static PyObject * +encoder_dict_iteritems(PyEncoderObject *s, PyObject *dct) +{ + PyObject *items; + PyObject *iter = NULL; + PyObject *lst = NULL; + PyObject *item = NULL; + PyObject *kstr = NULL; + static PyObject *sortfun = NULL; + static PyObject *sortargs = NULL; + + if (sortargs == NULL) { + sortargs = PyTuple_New(0); + if (sortargs == NULL) + return NULL; + } + + if (PyDict_CheckExact(dct)) + items = PyDict_Items(dct); + else + items = PyMapping_Items(dct); + if (items == NULL) + return NULL; + iter = PyObject_GetIter(items); + Py_DECREF(items); + if (iter == NULL) + return NULL; + if (s->item_sort_kw == Py_None) + return iter; + lst = PyList_New(0); + if (lst == NULL) + goto bail; + while ((item = PyIter_Next(iter))) { + PyObject *key, *value; + if (!PyTuple_Check(item) || Py_SIZE(item) != 2) { + PyErr_SetString(PyExc_ValueError, "items must return 2-tuples"); + goto bail; + } + key = PyTuple_GET_ITEM(item, 0); + if (key == NULL) + goto bail; +#if PY_MAJOR_VERSION < 3 + else if (PyString_Check(key)) { + /* item can be added as-is */ + } +#endif /* PY_MAJOR_VERSION < 3 */ + else if (PyUnicode_Check(key)) { + /* item can be added as-is */ + } + else { + PyObject *tpl; + kstr = encoder_stringify_key(s, key); + if (kstr == NULL) + goto bail; + else if (kstr == Py_None) { + /* skipkeys */ + Py_DECREF(kstr); + continue; + } + value = PyTuple_GET_ITEM(item, 1); + if (value == NULL) + goto bail; + tpl = PyTuple_Pack(2, kstr, value); + if (tpl == NULL) + goto bail; + Py_CLEAR(kstr); + Py_DECREF(item); + item = tpl; + } + if (PyList_Append(lst, item)) + goto bail; + Py_DECREF(item); + } + Py_CLEAR(iter); + if (PyErr_Occurred()) + goto bail; + sortfun = PyObject_GetAttrString(lst, "sort"); + if (sortfun == NULL) + goto bail; + if (!PyObject_Call(sortfun, sortargs, s->item_sort_kw)) + goto bail; + Py_CLEAR(sortfun); + iter = PyObject_GetIter(lst); + Py_CLEAR(lst); + return iter; +bail: + Py_XDECREF(sortfun); + Py_XDECREF(kstr); + Py_XDECREF(item); + Py_XDECREF(lst); + Py_XDECREF(iter); + return NULL; +} static void raise_errmsg(char *msg, PyObject *s, Py_ssize_t end) { - /* Use the Python function simplejson.decoder.errmsg to raise a nice - looking ValueError exception */ - static PyObject *errmsg_fn = NULL; - PyObject *pymsg; - if (errmsg_fn == NULL) { - PyObject *decoder = PyImport_ImportModule("simplejson.decoder"); - if (decoder == NULL) + /* Use JSONDecodeError exception to raise a nice looking ValueError subclass */ + static PyObject *JSONDecodeError = NULL; + PyObject *exc; + if (JSONDecodeError == NULL) { + PyObject *scanner = PyImport_ImportModule("simplejson.scanner"); + if (scanner == NULL) return; - errmsg_fn = PyObject_GetAttrString(decoder, "errmsg"); - Py_DECREF(decoder); - if (errmsg_fn == NULL) + JSONDecodeError = PyObject_GetAttrString(scanner, "JSONDecodeError"); + Py_DECREF(scanner); + if (JSONDecodeError == NULL) return; } - pymsg = PyObject_CallFunction(errmsg_fn, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end); - if (pymsg) { - PyErr_SetObject(PyExc_ValueError, pymsg); - Py_DECREF(pymsg); + exc = PyObject_CallFunction(JSONDecodeError, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end); + if (exc) { + PyErr_SetObject(JSONDecodeError, exc); + Py_DECREF(exc); } } @@ -365,7 +809,7 @@ join_list_unicode(PyObject *lst) /* return u''.join(lst) */ static PyObject *joinfn = NULL; if (joinfn == NULL) { - PyObject *ustr = PyUnicode_FromUnicode(NULL, 0); + PyObject *ustr = JSON_NewEmptyUnicode(); if (ustr == NULL) return NULL; @@ -377,6 +821,9 @@ join_list_unicode(PyObject *lst) return PyObject_CallFunctionObjArgs(joinfn, lst, NULL); } +#if PY_MAJOR_VERSION >= 3 +#define join_list_string join_list_unicode +#else /* PY_MAJOR_VERSION >= 3 */ static PyObject * join_list_string(PyObject *lst) { @@ -394,9 +841,11 @@ join_list_string(PyObject *lst) } return PyObject_CallFunctionObjArgs(joinfn, lst, NULL); } +#endif /* PY_MAJOR_VERSION < 3 */ static PyObject * -_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) { +_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) +{ /* return (rval, idx) tuple, stealing reference to rval */ PyObject *tpl; PyObject *pyidx; @@ -404,6 +853,7 @@ _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) { steal a reference to rval, returns (rval, idx) */ if (rval == NULL) { + assert(PyErr_Occurred()); return NULL; } pyidx = PyInt_FromSsize_t(idx); @@ -422,6 +872,21 @@ _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) { return tpl; } +#define APPEND_OLD_CHUNK \ + if (chunk != NULL) { \ + if (chunks == NULL) { \ + chunks = PyList_New(0); \ + if (chunks == NULL) { \ + goto bail; \ + } \ + } \ + if (PyList_Append(chunks, chunk)) { \ + goto bail; \ + } \ + Py_CLEAR(chunk); \ + } + +#if PY_MAJOR_VERSION < 3 static PyObject * scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr) { @@ -440,25 +905,28 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s Py_ssize_t next = begin; int has_unicode = 0; char *buf = PyString_AS_STRING(pystr); - PyObject *chunks = PyList_New(0); - if (chunks == NULL) { + PyObject *chunks = NULL; + PyObject *chunk = NULL; + PyObject *strchunk = NULL; + + if (len == end) { + raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin); goto bail; } - if (end < 0 || len <= end) { + else if (end < 0 || len < end) { PyErr_SetString(PyExc_ValueError, "end is out of bounds"); goto bail; } while (1) { /* Find the end of the string or the next escape */ Py_UNICODE c = 0; - PyObject *chunk = NULL; for (next = end; next < len; next++) { c = (unsigned char)buf[next]; if (c == '"' || c == '\\') { break; } else if (strict && c <= 0x1f) { - raise_errmsg("Invalid control character at", pystr, next); + raise_errmsg(ERR_STRING_CONTROL, pystr, next); goto bail; } else if (c > 0x7f) { @@ -466,12 +934,24 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s } } if (!(c == '"' || c == '\\')) { - raise_errmsg("Unterminated string starting at", pystr, begin); + raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin); goto bail; } /* Pick up this chunk if it's not zero length */ if (next != end) { - PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - end); + APPEND_OLD_CHUNK +#if PY_MAJOR_VERSION >= 3 + if (!has_unicode) { + chunk = PyUnicode_DecodeASCII(&buf[end], next - end, NULL); + } + else { + chunk = PyUnicode_Decode(&buf[end], next - end, encoding, NULL); + } + if (chunk == NULL) { + goto bail; + } +#else /* PY_MAJOR_VERSION >= 3 */ + strchunk = PyString_FromStringAndSize(&buf[end], next - end); if (strchunk == NULL) { goto bail; } @@ -485,11 +965,7 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s else { chunk = strchunk; } - if (PyList_Append(chunks, chunk)) { - Py_DECREF(chunk); - goto bail; - } - Py_DECREF(chunk); +#endif /* PY_MAJOR_VERSION < 3 */ } next++; if (c == '"') { @@ -497,7 +973,7 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s break; } if (next == len) { - raise_errmsg("Unterminated string starting at", pystr, begin); + raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin); goto bail; } c = buf[next]; @@ -516,7 +992,7 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s default: c = 0; } if (c == 0) { - raise_errmsg("Invalid \\escape", pystr, end - 2); + raise_errmsg(ERR_STRING_ESC1, pystr, end - 2); goto bail; } } @@ -525,12 +1001,12 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s next++; end = next + 4; if (end >= len) { - raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1); + raise_errmsg(ERR_STRING_ESC4, pystr, next - 1); goto bail; } /* Decode 4 hex digits */ for (; next < end; next++) { - Py_UNICODE digit = buf[next]; + JSON_UNICHR digit = (JSON_UNICHR)buf[next]; c <<= 4; switch (digit) { case '0': case '1': case '2': case '3': case '4': @@ -543,28 +1019,21 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s case 'F': c |= (digit - 'A' + 10); break; default: - raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); + raise_errmsg(ERR_STRING_ESC4, pystr, end - 5); goto bail; } } -#ifdef Py_UNICODE_WIDE +#if (PY_MAJOR_VERSION >= 3 || defined(Py_UNICODE_WIDE)) /* Surrogate pair */ if ((c & 0xfc00) == 0xd800) { - Py_UNICODE c2 = 0; - if (end + 6 >= len) { - raise_errmsg("Unpaired high surrogate", pystr, end - 5); - goto bail; - } - if (buf[next++] != '\\' || buf[next++] != 'u') { - raise_errmsg("Unpaired high surrogate", pystr, end - 5); - goto bail; - } - end += 6; - /* Decode 4 hex digits */ - for (; next < end; next++) { - c2 <<= 4; - Py_UNICODE digit = buf[next]; - switch (digit) { + if (end + 6 < len && buf[next] == '\\' && buf[next+1] == 'u') { + JSON_UNICHR c2 = 0; + end += 6; + /* Decode 4 hex digits */ + for (next += 2; next < end; next++) { + c2 <<= 4; + JSON_UNICHR digit = buf[next]; + switch (digit) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': c2 |= (digit - '0'); break; @@ -575,27 +1044,34 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s case 'F': c2 |= (digit - 'A' + 10); break; default: - raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); + raise_errmsg(ERR_STRING_ESC4, pystr, end - 5); goto bail; + } + } + if ((c2 & 0xfc00) != 0xdc00) { + /* not a low surrogate, rewind */ + end -= 6; + next = end; + } + else { + c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); } } - if ((c2 & 0xfc00) != 0xdc00) { - raise_errmsg("Unpaired high surrogate", pystr, end - 5); - goto bail; - } - c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); } - else if ((c & 0xfc00) == 0xdc00) { - raise_errmsg("Unpaired low surrogate", pystr, end - 5); - goto bail; - } -#endif +#endif /* PY_MAJOR_VERSION >= 3 || Py_UNICODE_WIDE */ } if (c > 0x7f) { has_unicode = 1; } + APPEND_OLD_CHUNK +#if PY_MAJOR_VERSION >= 3 + chunk = JSON_UnicodeFromChar(c); + if (chunk == NULL) { + goto bail; + } +#else /* PY_MAJOR_VERSION >= 3 */ if (has_unicode) { - chunk = PyUnicode_FromUnicode(&c, 1); + chunk = JSON_UnicodeFromChar(c); if (chunk == NULL) { goto bail; } @@ -607,26 +1083,33 @@ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s goto bail; } } - if (PyList_Append(chunks, chunk)) { - Py_DECREF(chunk); - goto bail; - } - Py_DECREF(chunk); +#endif } - rval = join_list_string(chunks); - if (rval == NULL) { - goto bail; + if (chunks == NULL) { + if (chunk != NULL) + rval = chunk; + else + rval = JSON_NewEmptyUnicode(); } - Py_CLEAR(chunks); + else { + APPEND_OLD_CHUNK + rval = join_list_string(chunks); + if (rval == NULL) { + goto bail; + } + Py_CLEAR(chunks); + } + *next_end_ptr = end; return rval; bail: *next_end_ptr = -1; + Py_XDECREF(chunk); Py_XDECREF(chunks); return NULL; } - +#endif /* PY_MAJOR_VERSION < 3 */ static PyObject * scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr) @@ -640,47 +1123,50 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next Return value is a new PyUnicode */ PyObject *rval; - Py_ssize_t len = PyUnicode_GET_SIZE(pystr); Py_ssize_t begin = end - 1; Py_ssize_t next = begin; - const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr); - PyObject *chunks = PyList_New(0); - if (chunks == NULL) { + PY2_UNUSED int kind = PyUnicode_KIND(pystr); + Py_ssize_t len = PyUnicode_GetLength(pystr); + void *buf = PyUnicode_DATA(pystr); + PyObject *chunks = NULL; + PyObject *chunk = NULL; + + if (len == end) { + raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin); goto bail; } - if (end < 0 || len <= end) { + else if (end < 0 || len < end) { PyErr_SetString(PyExc_ValueError, "end is out of bounds"); goto bail; } while (1) { /* Find the end of the string or the next escape */ - Py_UNICODE c = 0; - PyObject *chunk = NULL; + JSON_UNICHR c = 0; for (next = end; next < len; next++) { - c = buf[next]; + c = PyUnicode_READ(kind, buf, next); if (c == '"' || c == '\\') { break; } else if (strict && c <= 0x1f) { - raise_errmsg("Invalid control character at", pystr, next); + raise_errmsg(ERR_STRING_CONTROL, pystr, next); goto bail; } } if (!(c == '"' || c == '\\')) { - raise_errmsg("Unterminated string starting at", pystr, begin); + raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin); goto bail; } /* Pick up this chunk if it's not zero length */ if (next != end) { - chunk = PyUnicode_FromUnicode(&buf[end], next - end); + APPEND_OLD_CHUNK +#if PY_MAJOR_VERSION < 3 + chunk = PyUnicode_FromUnicode(&((const Py_UNICODE *)buf)[end], next - end); +#else + chunk = PyUnicode_Substring(pystr, end, next); +#endif if (chunk == NULL) { goto bail; } - if (PyList_Append(chunks, chunk)) { - Py_DECREF(chunk); - goto bail; - } - Py_DECREF(chunk); } next++; if (c == '"') { @@ -688,10 +1174,10 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next break; } if (next == len) { - raise_errmsg("Unterminated string starting at", pystr, begin); + raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin); goto bail; } - c = buf[next]; + c = PyUnicode_READ(kind, buf, next); if (c != 'u') { /* Non-unicode backslash escapes */ end = next + 1; @@ -707,7 +1193,7 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next default: c = 0; } if (c == 0) { - raise_errmsg("Invalid \\escape", pystr, end - 2); + raise_errmsg(ERR_STRING_ESC1, pystr, end - 2); goto bail; } } @@ -716,12 +1202,12 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next next++; end = next + 4; if (end >= len) { - raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1); + raise_errmsg(ERR_STRING_ESC4, pystr, next - 1); goto bail; } /* Decode 4 hex digits */ for (; next < end; next++) { - Py_UNICODE digit = buf[next]; + JSON_UNICHR digit = PyUnicode_READ(kind, buf, next); c <<= 4; switch (digit) { case '0': case '1': case '2': case '3': case '4': @@ -734,28 +1220,23 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next case 'F': c |= (digit - 'A' + 10); break; default: - raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); + raise_errmsg(ERR_STRING_ESC4, pystr, end - 5); goto bail; } } -#ifdef Py_UNICODE_WIDE +#if PY_MAJOR_VERSION >= 3 || defined(Py_UNICODE_WIDE) /* Surrogate pair */ if ((c & 0xfc00) == 0xd800) { - Py_UNICODE c2 = 0; - if (end + 6 >= len) { - raise_errmsg("Unpaired high surrogate", pystr, end - 5); - goto bail; - } - if (buf[next++] != '\\' || buf[next++] != 'u') { - raise_errmsg("Unpaired high surrogate", pystr, end - 5); - goto bail; - } - end += 6; - /* Decode 4 hex digits */ - for (; next < end; next++) { - c2 <<= 4; - Py_UNICODE digit = buf[next]; - switch (digit) { + JSON_UNICHR c2 = 0; + if (end + 6 < len && + PyUnicode_READ(kind, buf, next) == '\\' && + PyUnicode_READ(kind, buf, next + 1) == 'u') { + end += 6; + /* Decode 4 hex digits */ + for (next += 2; next < end; next++) { + JSON_UNICHR digit = PyUnicode_READ(kind, buf, next); + c2 <<= 4; + switch (digit) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': c2 |= (digit - '0'); break; @@ -766,42 +1247,48 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next case 'F': c2 |= (digit - 'A' + 10); break; default: - raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); + raise_errmsg(ERR_STRING_ESC4, pystr, end - 5); goto bail; + } + } + if ((c2 & 0xfc00) != 0xdc00) { + /* not a low surrogate, rewind */ + end -= 6; + next = end; + } + else { + c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); } } - if ((c2 & 0xfc00) != 0xdc00) { - raise_errmsg("Unpaired high surrogate", pystr, end - 5); - goto bail; - } - c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); - } - else if ((c & 0xfc00) == 0xdc00) { - raise_errmsg("Unpaired low surrogate", pystr, end - 5); - goto bail; } #endif } - chunk = PyUnicode_FromUnicode(&c, 1); + APPEND_OLD_CHUNK + chunk = JSON_UnicodeFromChar(c); if (chunk == NULL) { goto bail; } - if (PyList_Append(chunks, chunk)) { - Py_DECREF(chunk); - goto bail; - } - Py_DECREF(chunk); } - rval = join_list_unicode(chunks); - if (rval == NULL) { - goto bail; + if (chunks == NULL) { + if (chunk != NULL) + rval = chunk; + else + rval = JSON_NewEmptyUnicode(); + } + else { + APPEND_OLD_CHUNK + rval = join_list_unicode(chunks); + if (rval == NULL) { + goto bail; + } + Py_CLEAR(chunks); } - Py_DECREF(chunks); *next_end_ptr = end; return rval; bail: *next_end_ptr = -1; + Py_XDECREF(chunk); Py_XDECREF(chunks); return NULL; } @@ -834,12 +1321,16 @@ py_scanstring(PyObject* self UNUSED, PyObject *args) if (encoding == NULL) { encoding = DEFAULT_ENCODING; } - if (PyString_Check(pystr)) { - rval = scanstring_str(pystr, end, encoding, strict, &next_end); - } - else if (PyUnicode_Check(pystr)) { + if (PyUnicode_Check(pystr)) { rval = scanstring_unicode(pystr, end, strict, &next_end); } +#if PY_MAJOR_VERSION < 3 + /* Using a bytes input is unsupported for scanning in Python 3. + It is coerced to str in the decoder before it gets here. */ + else if (PyString_Check(pystr)) { + rval = scanstring_str(pystr, end, encoding, strict, &next_end); + } +#endif else { PyErr_Format(PyExc_TypeError, "first argument must be a string, not %.80s", @@ -891,9 +1382,11 @@ scanner_traverse(PyObject *self, visitproc visit, void *arg) Py_VISIT(s->encoding); Py_VISIT(s->strict); Py_VISIT(s->object_hook); + Py_VISIT(s->pairs_hook); Py_VISIT(s->parse_float); Py_VISIT(s->parse_int); Py_VISIT(s->parse_constant); + Py_VISIT(s->memo); return 0; } @@ -906,52 +1399,83 @@ scanner_clear(PyObject *self) Py_CLEAR(s->encoding); Py_CLEAR(s->strict); Py_CLEAR(s->object_hook); + Py_CLEAR(s->pairs_hook); Py_CLEAR(s->parse_float); Py_CLEAR(s->parse_int); Py_CLEAR(s->parse_constant); + Py_CLEAR(s->memo); return 0; } +#if PY_MAJOR_VERSION < 3 static PyObject * -_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { +_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ /* Read a JSON object from PyString pystr. idx is the index of the first character after the opening curly brace. *next_idx_ptr is a return-by-reference index to the first character after the closing curly brace. - Returns a new PyObject (usually a dict, but object_hook can change that) + Returns a new PyObject (usually a dict, but object_hook or + object_pairs_hook can change that) */ char *str = PyString_AS_STRING(pystr); Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; - PyObject *rval = PyDict_New(); + PyObject *rval = NULL; + PyObject *pairs = NULL; + PyObject *item; PyObject *key = NULL; PyObject *val = NULL; - char *encoding = PyString_AS_STRING(s->encoding); + char *encoding = JSON_ASCII_AS_STRING(s->encoding); int strict = PyObject_IsTrue(s->strict); + int has_pairs_hook = (s->pairs_hook != Py_None); + int did_parse = 0; Py_ssize_t next_idx; - if (rval == NULL) - return NULL; + if (has_pairs_hook) { + pairs = PyList_New(0); + if (pairs == NULL) + return NULL; + } + else { + rval = PyDict_New(); + if (rval == NULL) + return NULL; + } /* skip whitespace after { */ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; /* only loop if the object is non-empty */ if (idx <= end_idx && str[idx] != '}') { + int trailing_delimiter = 0; while (idx <= end_idx) { + PyObject *memokey; + trailing_delimiter = 0; + /* read key */ if (str[idx] != '"') { - raise_errmsg("Expecting property name", pystr, idx); + raise_errmsg(ERR_OBJECT_PROPERTY, pystr, idx); goto bail; } key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx); if (key == NULL) goto bail; + memokey = PyDict_GetItem(s->memo, key); + if (memokey != NULL) { + Py_INCREF(memokey); + Py_DECREF(key); + key = memokey; + } + else { + if (PyDict_SetItem(s->memo, key, key) < 0) + goto bail; + } idx = next_idx; /* skip whitespace between key and : delimiter, read :, skip whitespace */ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; if (idx > end_idx || str[idx] != ':') { - raise_errmsg("Expecting : delimiter", pystr, idx); + raise_errmsg(ERR_OBJECT_PROPERTY_DELIMITER, pystr, idx); goto bail; } idx++; @@ -962,36 +1486,70 @@ _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ if (val == NULL) goto bail; - if (PyDict_SetItem(rval, key, val) == -1) - goto bail; - - Py_CLEAR(key); - Py_CLEAR(val); + if (has_pairs_hook) { + item = PyTuple_Pack(2, key, val); + if (item == NULL) + goto bail; + Py_CLEAR(key); + Py_CLEAR(val); + if (PyList_Append(pairs, item) == -1) { + Py_DECREF(item); + goto bail; + } + Py_DECREF(item); + } + else { + if (PyDict_SetItem(rval, key, val) < 0) + goto bail; + Py_CLEAR(key); + Py_CLEAR(val); + } idx = next_idx; /* skip whitespace before } or , */ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; /* bail if the object is closed or we didn't get the , delimiter */ + did_parse = 1; if (idx > end_idx) break; if (str[idx] == '}') { break; } else if (str[idx] != ',') { - raise_errmsg("Expecting , delimiter", pystr, idx); + raise_errmsg(ERR_OBJECT_DELIMITER, pystr, idx); goto bail; } idx++; /* skip whitespace after , delimiter */ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + trailing_delimiter = 1; + } + if (trailing_delimiter) { + raise_errmsg(ERR_OBJECT_PROPERTY, pystr, idx); + goto bail; } } /* verify that idx < end_idx, str[idx] should be '}' */ if (idx > end_idx || str[idx] != '}') { - raise_errmsg("Expecting object", pystr, end_idx); + if (did_parse) { + raise_errmsg(ERR_OBJECT_DELIMITER, pystr, idx); + } else { + raise_errmsg(ERR_OBJECT_PROPERTY_FIRST, pystr, idx); + } goto bail; } + + /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */ + if (s->pairs_hook != Py_None) { + val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL); + if (val == NULL) + goto bail; + Py_DECREF(pairs); + *next_idx_ptr = idx + 1; + return val; + } + /* if object_hook is not None: rval = object_hook(rval) */ if (s->object_hook != Py_None) { val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL); @@ -1004,14 +1562,17 @@ _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ *next_idx_ptr = idx + 1; return rval; bail: + Py_XDECREF(rval); Py_XDECREF(key); Py_XDECREF(val); - Py_DECREF(rval); + Py_XDECREF(pairs); return NULL; } +#endif /* PY_MAJOR_VERSION < 3 */ static PyObject * -_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { +_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ /* Read a JSON object from PyUnicode pystr. idx is the index of the first character after the opening curly brace. *next_idx_ptr is a return-by-reference index to the first character after @@ -1019,78 +1580,141 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss Returns a new PyObject (usually a dict, but object_hook can change that) */ - Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); - Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; - PyObject *val = NULL; - PyObject *rval = PyDict_New(); + void *str = PyUnicode_DATA(pystr); + Py_ssize_t end_idx = PyUnicode_GetLength(pystr) - 1; + PY2_UNUSED int kind = PyUnicode_KIND(pystr); + PyObject *rval = NULL; + PyObject *pairs = NULL; + PyObject *item; PyObject *key = NULL; + PyObject *val = NULL; int strict = PyObject_IsTrue(s->strict); + int has_pairs_hook = (s->pairs_hook != Py_None); + int did_parse = 0; Py_ssize_t next_idx; - if (rval == NULL) - return NULL; + + if (has_pairs_hook) { + pairs = PyList_New(0); + if (pairs == NULL) + return NULL; + } + else { + rval = PyDict_New(); + if (rval == NULL) + return NULL; + } /* skip whitespace after { */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; /* only loop if the object is non-empty */ - if (idx <= end_idx && str[idx] != '}') { + if (idx <= end_idx && PyUnicode_READ(kind, str, idx) != '}') { + int trailing_delimiter = 0; while (idx <= end_idx) { + PyObject *memokey; + trailing_delimiter = 0; + /* read key */ - if (str[idx] != '"') { - raise_errmsg("Expecting property name", pystr, idx); + if (PyUnicode_READ(kind, str, idx) != '"') { + raise_errmsg(ERR_OBJECT_PROPERTY, pystr, idx); goto bail; } key = scanstring_unicode(pystr, idx + 1, strict, &next_idx); if (key == NULL) goto bail; + memokey = PyDict_GetItem(s->memo, key); + if (memokey != NULL) { + Py_INCREF(memokey); + Py_DECREF(key); + key = memokey; + } + else { + if (PyDict_SetItem(s->memo, key, key) < 0) + goto bail; + } idx = next_idx; - /* skip whitespace between key and : delimiter, read :, skip whitespace */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - if (idx > end_idx || str[idx] != ':') { - raise_errmsg("Expecting : delimiter", pystr, idx); + /* skip whitespace between key and : delimiter, read :, skip + whitespace */ + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; + if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ':') { + raise_errmsg(ERR_OBJECT_PROPERTY_DELIMITER, pystr, idx); goto bail; } idx++; - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; /* read any JSON term */ val = scan_once_unicode(s, pystr, idx, &next_idx); if (val == NULL) goto bail; - if (PyDict_SetItem(rval, key, val) == -1) - goto bail; - - Py_CLEAR(key); - Py_CLEAR(val); + if (has_pairs_hook) { + item = PyTuple_Pack(2, key, val); + if (item == NULL) + goto bail; + Py_CLEAR(key); + Py_CLEAR(val); + if (PyList_Append(pairs, item) == -1) { + Py_DECREF(item); + goto bail; + } + Py_DECREF(item); + } + else { + if (PyDict_SetItem(rval, key, val) < 0) + goto bail; + Py_CLEAR(key); + Py_CLEAR(val); + } idx = next_idx; /* skip whitespace before } or , */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; - /* bail if the object is closed or we didn't get the , delimiter */ + /* bail if the object is closed or we didn't get the , + delimiter */ + did_parse = 1; if (idx > end_idx) break; - if (str[idx] == '}') { + if (PyUnicode_READ(kind, str, idx) == '}') { break; } - else if (str[idx] != ',') { - raise_errmsg("Expecting , delimiter", pystr, idx); + else if (PyUnicode_READ(kind, str, idx) != ',') { + raise_errmsg(ERR_OBJECT_DELIMITER, pystr, idx); goto bail; } idx++; /* skip whitespace after , delimiter */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; + trailing_delimiter = 1; + } + if (trailing_delimiter) { + raise_errmsg(ERR_OBJECT_PROPERTY, pystr, idx); + goto bail; } } /* verify that idx < end_idx, str[idx] should be '}' */ - if (idx > end_idx || str[idx] != '}') { - raise_errmsg("Expecting object", pystr, end_idx); + if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') { + if (did_parse) { + raise_errmsg(ERR_OBJECT_DELIMITER, pystr, idx); + } else { + raise_errmsg(ERR_OBJECT_PROPERTY_FIRST, pystr, idx); + } goto bail; } + /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */ + if (s->pairs_hook != Py_None) { + val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL); + if (val == NULL) + goto bail; + Py_DECREF(pairs); + *next_idx_ptr = idx + 1; + return val; + } + /* if object_hook is not None: rval = object_hook(rval) */ if (s->object_hook != Py_None) { val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL); @@ -1103,14 +1727,17 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss *next_idx_ptr = idx + 1; return rval; bail: + Py_XDECREF(rval); Py_XDECREF(key); Py_XDECREF(val); - Py_DECREF(rval); + Py_XDECREF(pairs); return NULL; } +#if PY_MAJOR_VERSION < 3 static PyObject * -_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { +_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ /* Read a JSON array from PyString pystr. idx is the index of the first character after the opening brace. *next_idx_ptr is a return-by-reference index to the first character after @@ -1131,12 +1758,14 @@ _parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t /* only loop if the array is non-empty */ if (idx <= end_idx && str[idx] != ']') { + int trailing_delimiter = 0; while (idx <= end_idx) { - + trailing_delimiter = 0; /* read any JSON term and de-tuplefy the (rval, idx) */ val = scan_once_str(s, pystr, idx, &next_idx); - if (val == NULL) + if (val == NULL) { goto bail; + } if (PyList_Append(rval, val) == -1) goto bail; @@ -1153,19 +1782,28 @@ _parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t break; } else if (str[idx] != ',') { - raise_errmsg("Expecting , delimiter", pystr, idx); + raise_errmsg(ERR_ARRAY_DELIMITER, pystr, idx); goto bail; } idx++; /* skip whitespace after , */ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + trailing_delimiter = 1; + } + if (trailing_delimiter) { + raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx); + goto bail; } } /* verify that idx < end_idx, str[idx] should be ']' */ if (idx > end_idx || str[idx] != ']') { - raise_errmsg("Expecting object", pystr, end_idx); + if (PyList_GET_SIZE(rval)) { + raise_errmsg(ERR_ARRAY_DELIMITER, pystr, idx); + } else { + raise_errmsg(ERR_ARRAY_VALUE_FIRST, pystr, idx); + } goto bail; } *next_idx_ptr = idx + 1; @@ -1175,9 +1813,11 @@ bail: Py_DECREF(rval); return NULL; } +#endif /* PY_MAJOR_VERSION < 3 */ static PyObject * -_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { +_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ /* Read a JSON array from PyString pystr. idx is the index of the first character after the opening brace. *next_idx_ptr is a return-by-reference index to the first character after @@ -1185,8 +1825,9 @@ _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssi Returns a new PyList */ - Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); - Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; + PY2_UNUSED int kind = PyUnicode_KIND(pystr); + void *str = PyUnicode_DATA(pystr); + Py_ssize_t end_idx = PyUnicode_GetLength(pystr) - 1; PyObject *val = NULL; PyObject *rval = PyList_New(0); Py_ssize_t next_idx; @@ -1194,16 +1835,18 @@ _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssi return NULL; /* skip whitespace after [ */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; /* only loop if the array is non-empty */ - if (idx <= end_idx && str[idx] != ']') { + if (idx <= end_idx && PyUnicode_READ(kind, str, idx) != ']') { + int trailing_delimiter = 0; while (idx <= end_idx) { - + trailing_delimiter = 0; /* read any JSON term */ val = scan_once_unicode(s, pystr, idx, &next_idx); - if (val == NULL) + if (val == NULL) { goto bail; + } if (PyList_Append(rval, val) == -1) goto bail; @@ -1212,27 +1855,36 @@ _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssi idx = next_idx; /* skip whitespace between term and , */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; /* bail if the array is closed or we didn't get the , delimiter */ if (idx > end_idx) break; - if (str[idx] == ']') { + if (PyUnicode_READ(kind, str, idx) == ']') { break; } - else if (str[idx] != ',') { - raise_errmsg("Expecting , delimiter", pystr, idx); + else if (PyUnicode_READ(kind, str, idx) != ',') { + raise_errmsg(ERR_ARRAY_DELIMITER, pystr, idx); goto bail; } idx++; /* skip whitespace after , */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; + while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++; + trailing_delimiter = 1; + } + if (trailing_delimiter) { + raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx); + goto bail; } } /* verify that idx < end_idx, str[idx] should be ']' */ - if (idx > end_idx || str[idx] != ']') { - raise_errmsg("Expecting object", pystr, end_idx); + if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') { + if (PyList_GET_SIZE(rval)) { + raise_errmsg(ERR_ARRAY_DELIMITER, pystr, idx); + } else { + raise_errmsg(ERR_ARRAY_VALUE_FIRST, pystr, idx); + } goto bail; } *next_idx_ptr = idx + 1; @@ -1244,7 +1896,8 @@ bail: } static PyObject * -_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { +_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +{ /* Read a JSON constant from PyString pystr. constant is the constant string that was found ("NaN", "Infinity", "-Infinity"). @@ -1257,20 +1910,22 @@ _parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t * PyObject *cstr; PyObject *rval; /* constant is "NaN", "Infinity", or "-Infinity" */ - cstr = PyString_InternFromString(constant); + cstr = JSON_InternFromString(constant); if (cstr == NULL) return NULL; /* rval = parse_constant(constant) */ rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL); - idx += PyString_GET_SIZE(cstr); + idx += JSON_Intern_GET_SIZE(cstr); Py_DECREF(cstr); *next_idx_ptr = idx; return rval; } +#if PY_MAJOR_VERSION < 3 static PyObject * -_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) { +_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) +{ /* Read a JSON number from PyString pystr. idx is the index of the first character of the number *next_idx_ptr is a return-by-reference index to the first character after @@ -1289,11 +1944,11 @@ _match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssiz /* read a sign if it's there, make sure it's not the end of the string */ if (str[idx] == '-') { - idx++; - if (idx > end_idx) { - PyErr_SetNone(PyExc_StopIteration); + if (idx >= end_idx) { + raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx); return NULL; } + idx++; } /* read as many integer digits as we find as long as it doesn't start with 0 */ @@ -1307,7 +1962,7 @@ _match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssiz } /* no integer digits, error */ else { - PyErr_SetNone(PyExc_StopIteration); + raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx); return NULL; } @@ -1350,7 +2005,12 @@ _match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssiz rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); } else { - rval = PyFloat_FromDouble(PyOS_ascii_atof(PyString_AS_STRING(numstr))); + /* rval = PyFloat_FromDouble(PyOS_ascii_atof(PyString_AS_STRING(numstr))); */ + double d = PyOS_string_to_double(PyString_AS_STRING(numstr), + NULL, NULL); + if (d == -1.0 && PyErr_Occurred()) + return NULL; + rval = PyFloat_FromDouble(d); } } else { @@ -1366,9 +2026,11 @@ _match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssiz *next_idx_ptr = idx; return rval; } +#endif /* PY_MAJOR_VERSION < 3 */ static PyObject * -_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) { +_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) +{ /* Read a JSON number from PyUnicode pystr. idx is the index of the first character of the number *next_idx_ptr is a return-by-reference index to the first character after @@ -1378,57 +2040,68 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ PyInt, PyLong, or PyFloat. May return other types if parse_int or parse_float are set */ - Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); - Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; + PY2_UNUSED int kind = PyUnicode_KIND(pystr); + void *str = PyUnicode_DATA(pystr); + Py_ssize_t end_idx = PyUnicode_GetLength(pystr) - 1; Py_ssize_t idx = start; int is_float = 0; + JSON_UNICHR c; PyObject *rval; PyObject *numstr; /* read a sign if it's there, make sure it's not the end of the string */ - if (str[idx] == '-') { - idx++; - if (idx > end_idx) { - PyErr_SetNone(PyExc_StopIteration); + if (PyUnicode_READ(kind, str, idx) == '-') { + if (idx >= end_idx) { + raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx); return NULL; } + idx++; } /* read as many integer digits as we find as long as it doesn't start with 0 */ - if (str[idx] >= '1' && str[idx] <= '9') { - idx++; - while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; - } - /* if it starts with 0 we only expect one integer digit */ - else if (str[idx] == '0') { + c = PyUnicode_READ(kind, str, idx); + if (c == '0') { + /* if it starts with 0 we only expect one integer digit */ idx++; } - /* no integer digits, error */ + else if (IS_DIGIT(c)) { + idx++; + while (idx <= end_idx && IS_DIGIT(PyUnicode_READ(kind, str, idx))) { + idx++; + } + } else { - PyErr_SetNone(PyExc_StopIteration); + /* no integer digits, error */ + raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx); return NULL; } /* if the next char is '.' followed by a digit then read all float digits */ - if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') { + if (idx < end_idx && + PyUnicode_READ(kind, str, idx) == '.' && + IS_DIGIT(PyUnicode_READ(kind, str, idx + 1))) { is_float = 1; idx += 2; - while (idx < end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + while (idx <= end_idx && IS_DIGIT(PyUnicode_READ(kind, str, idx))) idx++; } /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */ - if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) { + if (idx < end_idx && + (PyUnicode_READ(kind, str, idx) == 'e' || + PyUnicode_READ(kind, str, idx) == 'E')) { Py_ssize_t e_start = idx; idx++; /* read an exponent sign if present */ - if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++; + if (idx < end_idx && + (PyUnicode_READ(kind, str, idx) == '-' || + PyUnicode_READ(kind, str, idx) == '+')) idx++; /* read all digits */ - while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; + while (idx <= end_idx && IS_DIGIT(PyUnicode_READ(kind, str, idx))) idx++; /* if we got a digit, then parse as float. if not, backtrack */ - if (str[idx - 1] >= '0' && str[idx - 1] <= '9') { + if (IS_DIGIT(PyUnicode_READ(kind, str, idx - 1))) { is_float = 1; } else { @@ -1437,7 +2110,11 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ } /* copy the section we determined to be a number */ - numstr = PyUnicode_FromUnicode(&str[start], idx - start); +#if PY_MAJOR_VERSION >= 3 + numstr = PyUnicode_Substring(pystr, start, idx); +#else + numstr = PyUnicode_FromUnicode(&((Py_UNICODE *)str)[start], idx - start); +#endif if (numstr == NULL) return NULL; if (is_float) { @@ -1446,7 +2123,11 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); } else { +#if PY_MAJOR_VERSION >= 3 + rval = PyFloat_FromString(numstr); +#else rval = PyFloat_FromString(numstr, NULL); +#endif } } else { @@ -1458,6 +2139,7 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ return rval; } +#if PY_MAJOR_VERSION < 3 static PyObject * scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { @@ -1470,69 +2152,100 @@ scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *n */ char *str = PyString_AS_STRING(pystr); Py_ssize_t length = PyString_GET_SIZE(pystr); - if (idx >= length) { - PyErr_SetNone(PyExc_StopIteration); + PyObject *rval = NULL; + int fallthrough = 0; + if (idx < 0 || idx >= length) { + raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx); return NULL; } switch (str[idx]) { case '"': /* string */ - return scanstring_str(pystr, idx + 1, - PyString_AS_STRING(s->encoding), + rval = scanstring_str(pystr, idx + 1, + JSON_ASCII_AS_STRING(s->encoding), PyObject_IsTrue(s->strict), next_idx_ptr); + break; case '{': /* object */ - return _parse_object_str(s, pystr, idx + 1, next_idx_ptr); + if (Py_EnterRecursiveCall(" while decoding a JSON object " + "from a string")) + return NULL; + rval = _parse_object_str(s, pystr, idx + 1, next_idx_ptr); + Py_LeaveRecursiveCall(); + break; case '[': /* array */ - return _parse_array_str(s, pystr, idx + 1, next_idx_ptr); + if (Py_EnterRecursiveCall(" while decoding a JSON array " + "from a string")) + return NULL; + rval = _parse_array_str(s, pystr, idx + 1, next_idx_ptr); + Py_LeaveRecursiveCall(); + break; case 'n': /* null */ if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { Py_INCREF(Py_None); *next_idx_ptr = idx + 4; - return Py_None; + rval = Py_None; } + else + fallthrough = 1; break; case 't': /* true */ if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { Py_INCREF(Py_True); *next_idx_ptr = idx + 4; - return Py_True; + rval = Py_True; } + else + fallthrough = 1; break; case 'f': /* false */ if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { Py_INCREF(Py_False); *next_idx_ptr = idx + 5; - return Py_False; + rval = Py_False; } + else + fallthrough = 1; break; case 'N': /* NaN */ if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { - return _parse_constant(s, "NaN", idx, next_idx_ptr); + rval = _parse_constant(s, "NaN", idx, next_idx_ptr); } + else + fallthrough = 1; break; case 'I': /* Infinity */ if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { - return _parse_constant(s, "Infinity", idx, next_idx_ptr); + rval = _parse_constant(s, "Infinity", idx, next_idx_ptr); } + else + fallthrough = 1; break; case '-': /* -Infinity */ if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { - return _parse_constant(s, "-Infinity", idx, next_idx_ptr); + rval = _parse_constant(s, "-Infinity", idx, next_idx_ptr); } + else + fallthrough = 1; break; + default: + fallthrough = 1; } /* Didn't find a string, object, array, or named constant. Look for a number. */ - return _match_number_str(s, pystr, idx, next_idx_ptr); + if (fallthrough) + rval = _match_number_str(s, pystr, idx, next_idx_ptr); + return rval; } +#endif /* PY_MAJOR_VERSION < 3 */ + static PyObject * scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) @@ -1544,69 +2257,126 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ Returns a new PyObject representation of the term. */ - Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); - Py_ssize_t length = PyUnicode_GET_SIZE(pystr); - if (idx >= length) { - PyErr_SetNone(PyExc_StopIteration); + PY2_UNUSED int kind = PyUnicode_KIND(pystr); + void *str = PyUnicode_DATA(pystr); + Py_ssize_t length = PyUnicode_GetLength(pystr); + PyObject *rval = NULL; + int fallthrough = 0; + if (idx < 0 || idx >= length) { + raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx); return NULL; } - switch (str[idx]) { + switch (PyUnicode_READ(kind, str, idx)) { case '"': /* string */ - return scanstring_unicode(pystr, idx + 1, + rval = scanstring_unicode(pystr, idx + 1, PyObject_IsTrue(s->strict), next_idx_ptr); + break; case '{': /* object */ - return _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr); + if (Py_EnterRecursiveCall(" while decoding a JSON object " + "from a unicode string")) + return NULL; + rval = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr); + Py_LeaveRecursiveCall(); + break; case '[': /* array */ - return _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr); + if (Py_EnterRecursiveCall(" while decoding a JSON array " + "from a unicode string")) + return NULL; + rval = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr); + Py_LeaveRecursiveCall(); + break; case 'n': /* null */ - if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { + if ((idx + 3 < length) && + PyUnicode_READ(kind, str, idx + 1) == 'u' && + PyUnicode_READ(kind, str, idx + 2) == 'l' && + PyUnicode_READ(kind, str, idx + 3) == 'l') { Py_INCREF(Py_None); *next_idx_ptr = idx + 4; - return Py_None; + rval = Py_None; } + else + fallthrough = 1; break; case 't': /* true */ - if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { + if ((idx + 3 < length) && + PyUnicode_READ(kind, str, idx + 1) == 'r' && + PyUnicode_READ(kind, str, idx + 2) == 'u' && + PyUnicode_READ(kind, str, idx + 3) == 'e') { Py_INCREF(Py_True); *next_idx_ptr = idx + 4; - return Py_True; + rval = Py_True; } + else + fallthrough = 1; break; case 'f': /* false */ - if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { + if ((idx + 4 < length) && + PyUnicode_READ(kind, str, idx + 1) == 'a' && + PyUnicode_READ(kind, str, idx + 2) == 'l' && + PyUnicode_READ(kind, str, idx + 3) == 's' && + PyUnicode_READ(kind, str, idx + 4) == 'e') { Py_INCREF(Py_False); *next_idx_ptr = idx + 5; - return Py_False; + rval = Py_False; } + else + fallthrough = 1; break; case 'N': /* NaN */ - if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { - return _parse_constant(s, "NaN", idx, next_idx_ptr); + if ((idx + 2 < length) && + PyUnicode_READ(kind, str, idx + 1) == 'a' && + PyUnicode_READ(kind, str, idx + 2) == 'N') { + rval = _parse_constant(s, "NaN", idx, next_idx_ptr); } + else + fallthrough = 1; break; case 'I': /* Infinity */ - if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { - return _parse_constant(s, "Infinity", idx, next_idx_ptr); + if ((idx + 7 < length) && + PyUnicode_READ(kind, str, idx + 1) == 'n' && + PyUnicode_READ(kind, str, idx + 2) == 'f' && + PyUnicode_READ(kind, str, idx + 3) == 'i' && + PyUnicode_READ(kind, str, idx + 4) == 'n' && + PyUnicode_READ(kind, str, idx + 5) == 'i' && + PyUnicode_READ(kind, str, idx + 6) == 't' && + PyUnicode_READ(kind, str, idx + 7) == 'y') { + rval = _parse_constant(s, "Infinity", idx, next_idx_ptr); } + else + fallthrough = 1; break; case '-': /* -Infinity */ - if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { - return _parse_constant(s, "-Infinity", idx, next_idx_ptr); + if ((idx + 8 < length) && + PyUnicode_READ(kind, str, idx + 1) == 'I' && + PyUnicode_READ(kind, str, idx + 2) == 'n' && + PyUnicode_READ(kind, str, idx + 3) == 'f' && + PyUnicode_READ(kind, str, idx + 4) == 'i' && + PyUnicode_READ(kind, str, idx + 5) == 'n' && + PyUnicode_READ(kind, str, idx + 6) == 'i' && + PyUnicode_READ(kind, str, idx + 7) == 't' && + PyUnicode_READ(kind, str, idx + 8) == 'y') { + rval = _parse_constant(s, "-Infinity", idx, next_idx_ptr); } + else + fallthrough = 1; break; + default: + fallthrough = 1; } /* Didn't find a string, object, array, or named constant. Look for a number. */ - return _match_number_unicode(s, pystr, idx, next_idx_ptr); + if (fallthrough) + rval = _match_number_unicode(s, pystr, idx, next_idx_ptr); + return rval; } static PyObject * @@ -1624,18 +2394,21 @@ scanner_call(PyObject *self, PyObject *args, PyObject *kwds) if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx)) return NULL; - if (PyString_Check(pystr)) { - rval = scan_once_str(s, pystr, idx, &next_idx); - } - else if (PyUnicode_Check(pystr)) { + if (PyUnicode_Check(pystr)) { rval = scan_once_unicode(s, pystr, idx, &next_idx); } +#if PY_MAJOR_VERSION < 3 + else if (PyString_Check(pystr)) { + rval = scan_once_str(s, pystr, idx, &next_idx); + } +#endif /* PY_MAJOR_VERSION < 3 */ else { PyErr_Format(PyExc_TypeError, "first argument must be a string, not %.80s", Py_TYPE(pystr)->tp_name); return NULL; } + PyDict_Clear(s->memo); return _build_rval_index_tuple(rval, next_idx); } @@ -1648,6 +2421,7 @@ scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds) s->encoding = NULL; s->strict = NULL; s->object_hook = NULL; + s->pairs_hook = NULL; s->parse_float = NULL; s->parse_int = NULL; s->parse_constant = NULL; @@ -1655,6 +2429,25 @@ scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds) return (PyObject *)s; } +static PyObject * +JSON_ParseEncoding(PyObject *encoding) +{ + if (encoding == NULL) + return NULL; + if (encoding == Py_None) + return JSON_InternFromString(DEFAULT_ENCODING); +#if PY_MAJOR_VERSION < 3 + if (PyUnicode_Check(encoding)) + return PyUnicode_AsEncodedString(encoding, NULL, NULL); +#endif + if (JSON_ASCII_Check(encoding)) { + Py_INCREF(encoding); + return encoding; + } + PyErr_SetString(PyExc_TypeError, "encoding must be a string"); + return NULL; +} + static int scanner_init(PyObject *self, PyObject *args, PyObject *kwds) { @@ -1662,6 +2455,7 @@ scanner_init(PyObject *self, PyObject *args, PyObject *kwds) PyObject *ctx; static char *kwlist[] = {"context", NULL}; PyScannerObject *s; + PyObject *encoding; assert(PyScanner_Check(self)); s = (PyScannerObject *)self; @@ -1669,18 +2463,17 @@ scanner_init(PyObject *self, PyObject *args, PyObject *kwds) if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx)) return -1; - /* PyString_AS_STRING is used on encoding */ - s->encoding = PyObject_GetAttrString(ctx, "encoding"); - if (s->encoding == Py_None) { - Py_DECREF(Py_None); - s->encoding = PyString_InternFromString(DEFAULT_ENCODING); + if (s->memo == NULL) { + s->memo = PyDict_New(); + if (s->memo == NULL) + goto bail; } - else if (PyUnicode_Check(s->encoding)) { - PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL); - Py_DECREF(s->encoding); - s->encoding = tmp; - } - if (s->encoding == NULL || !PyString_Check(s->encoding)) + + /* JSON_ASCII_AS_STRING is used on encoding */ + encoding = PyObject_GetAttrString(ctx, "encoding"); + s->encoding = JSON_ParseEncoding(encoding); + Py_XDECREF(encoding); + if (s->encoding == NULL) goto bail; /* All of these will fail "gracefully" so we don't need to verify them */ @@ -1690,6 +2483,9 @@ scanner_init(PyObject *self, PyObject *args, PyObject *kwds) s->object_hook = PyObject_GetAttrString(ctx, "object_hook"); if (s->object_hook == NULL) goto bail; + s->pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook"); + if (s->pairs_hook == NULL) + goto bail; s->parse_float = PyObject_GetAttrString(ctx, "parse_float"); if (s->parse_float == NULL) goto bail; @@ -1706,6 +2502,7 @@ bail: Py_CLEAR(s->encoding); Py_CLEAR(s->strict); Py_CLEAR(s->object_hook); + Py_CLEAR(s->pairs_hook); Py_CLEAR(s->parse_float); Py_CLEAR(s->parse_int); Py_CLEAR(s->parse_constant); @@ -1716,8 +2513,7 @@ PyDoc_STRVAR(scanner_doc, "JSON scanner object"); static PyTypeObject PyScannerType = { - PyObject_HEAD_INIT(NULL) - 0, /* tp_internal */ + PyVarObject_HEAD_INIT(NULL, 0) "simplejson._speedups.Scanner", /* tp_name */ sizeof(PyScannerObject), /* tp_basicsize */ 0, /* tp_itemsize */ @@ -1767,11 +2563,17 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) s->markers = NULL; s->defaultfn = NULL; s->encoder = NULL; + s->encoding = NULL; s->indent = NULL; s->key_separator = NULL; s->item_separator = NULL; + s->key_memo = NULL; s->sort_keys = NULL; - s->skipkeys = NULL; + s->item_sort_key = NULL; + s->item_sort_kw = NULL; + s->Decimal = NULL; + s->max_long_size = NULL; + s->min_long_size = NULL; } return (PyObject *)s; } @@ -1780,28 +2582,138 @@ static int encoder_init(PyObject *self, PyObject *args, PyObject *kwds) { /* initialize Encoder object */ - static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL}; + static char *kwlist[] = { + "markers", + "default", + "encoder", + "indent", + "key_separator", + "item_separator", + "sort_keys", + "skipkeys", + "allow_nan", + "key_memo", + "use_decimal", + "namedtuple_as_object", + "tuple_as_array", + "int_as_string_bitcount", + "item_sort_key", + "encoding", + "for_json", + "ignore_nan", + "Decimal", + NULL}; PyEncoderObject *s; - PyObject *allow_nan; + PyObject *markers, *defaultfn, *encoder, *indent, *key_separator; + PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan, *key_memo; + PyObject *use_decimal, *namedtuple_as_object, *tuple_as_array; + PyObject *int_as_string_bitcount, *item_sort_key, *encoding, *for_json; + PyObject *ignore_nan, *Decimal; assert(PyEncoder_Check(self)); s = (PyEncoderObject *)self; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlist, - &s->markers, &s->defaultfn, &s->encoder, &s->indent, &s->key_separator, &s->item_separator, &s->sort_keys, &s->skipkeys, &allow_nan)) + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOOOOOOOOOOOO:make_encoder", kwlist, + &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator, + &sort_keys, &skipkeys, &allow_nan, &key_memo, &use_decimal, + &namedtuple_as_object, &tuple_as_array, + &int_as_string_bitcount, &item_sort_key, &encoding, &for_json, + &ignore_nan, &Decimal)) return -1; - Py_INCREF(s->markers); - Py_INCREF(s->defaultfn); - Py_INCREF(s->encoder); - Py_INCREF(s->indent); - Py_INCREF(s->key_separator); - Py_INCREF(s->item_separator); - Py_INCREF(s->sort_keys); - Py_INCREF(s->skipkeys); + Py_INCREF(markers); + s->markers = markers; + Py_INCREF(defaultfn); + s->defaultfn = defaultfn; + Py_INCREF(encoder); + s->encoder = encoder; + s->encoding = JSON_ParseEncoding(encoding); + if (s->encoding == NULL) + return -1; + Py_INCREF(indent); + s->indent = indent; + Py_INCREF(key_separator); + s->key_separator = key_separator; + Py_INCREF(item_separator); + s->item_separator = item_separator; + Py_INCREF(skipkeys); + s->skipkeys_bool = skipkeys; + s->skipkeys = PyObject_IsTrue(skipkeys); + Py_INCREF(key_memo); + s->key_memo = key_memo; s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii); - s->allow_nan = PyObject_IsTrue(allow_nan); + s->allow_or_ignore_nan = ( + (PyObject_IsTrue(ignore_nan) ? JSON_IGNORE_NAN : 0) | + (PyObject_IsTrue(allow_nan) ? JSON_ALLOW_NAN : 0)); + s->use_decimal = PyObject_IsTrue(use_decimal); + s->namedtuple_as_object = PyObject_IsTrue(namedtuple_as_object); + s->tuple_as_array = PyObject_IsTrue(tuple_as_array); + if (PyInt_Check(int_as_string_bitcount) || PyLong_Check(int_as_string_bitcount)) { + static const unsigned int long_long_bitsize = SIZEOF_LONG_LONG * 8; + int int_as_string_bitcount_val = (int)PyLong_AsLong(int_as_string_bitcount); + if (int_as_string_bitcount_val > 0 && int_as_string_bitcount_val < long_long_bitsize) { + s->max_long_size = PyLong_FromUnsignedLongLong(1ULL << int_as_string_bitcount_val); + s->min_long_size = PyLong_FromLongLong(-1LL << int_as_string_bitcount_val); + if (s->min_long_size == NULL || s->max_long_size == NULL) { + return -1; + } + } + else { + PyErr_Format(PyExc_TypeError, + "int_as_string_bitcount (%d) must be greater than 0 and less than the number of bits of a `long long` type (%u bits)", + int_as_string_bitcount_val, long_long_bitsize); + return -1; + } + } + else if (int_as_string_bitcount == Py_None) { + Py_INCREF(Py_None); + s->max_long_size = Py_None; + Py_INCREF(Py_None); + s->min_long_size = Py_None; + } + else { + PyErr_SetString(PyExc_TypeError, "int_as_string_bitcount must be None or an integer"); + return -1; + } + if (item_sort_key != Py_None) { + if (!PyCallable_Check(item_sort_key)) { + PyErr_SetString(PyExc_TypeError, "item_sort_key must be None or callable"); + return -1; + } + } + else if (PyObject_IsTrue(sort_keys)) { + static PyObject *itemgetter0 = NULL; + if (!itemgetter0) { + PyObject *operator = PyImport_ImportModule("operator"); + if (!operator) + return -1; + itemgetter0 = PyObject_CallMethod(operator, "itemgetter", "i", 0); + Py_DECREF(operator); + } + item_sort_key = itemgetter0; + if (!item_sort_key) + return -1; + } + if (item_sort_key == Py_None) { + Py_INCREF(Py_None); + s->item_sort_kw = Py_None; + } + else { + s->item_sort_kw = PyDict_New(); + if (s->item_sort_kw == NULL) + return -1; + if (PyDict_SetItemString(s->item_sort_kw, "key", item_sort_key)) + return -1; + } + Py_INCREF(sort_keys); + s->sort_keys = sort_keys; + Py_INCREF(item_sort_key); + s->item_sort_key = item_sort_key; + Py_INCREF(Decimal); + s->Decimal = Decimal; + s->for_json = PyObject_IsTrue(for_json); + return 0; } @@ -1811,22 +2723,21 @@ encoder_call(PyObject *self, PyObject *args, PyObject *kwds) /* Python callable interface to encode_listencode_obj */ static char *kwlist[] = {"obj", "_current_indent_level", NULL}; PyObject *obj; - PyObject *rval; Py_ssize_t indent_level; PyEncoderObject *s; + JSON_Accu rval; assert(PyEncoder_Check(self)); s = (PyEncoderObject *)self; if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist, &obj, _convertPyInt_AsSsize_t, &indent_level)) return NULL; - rval = PyList_New(0); - if (rval == NULL) + if (JSON_Accu_Init(&rval)) return NULL; - if (encoder_listencode_obj(s, rval, obj, indent_level)) { - Py_DECREF(rval); + if (encoder_listencode_obj(s, &rval, obj, indent_level)) { + JSON_Accu_Destroy(&rval); return NULL; } - return rval; + return JSON_Accu_FinishAsList(&rval); } static PyObject * @@ -1836,7 +2747,7 @@ _encoded_const(PyObject *obj) if (obj == Py_None) { static PyObject *s_null = NULL; if (s_null == NULL) { - s_null = PyString_InternFromString("null"); + s_null = JSON_InternFromString("null"); } Py_INCREF(s_null); return s_null; @@ -1844,7 +2755,7 @@ _encoded_const(PyObject *obj) else if (obj == Py_True) { static PyObject *s_true = NULL; if (s_true == NULL) { - s_true = PyString_InternFromString("true"); + s_true = JSON_InternFromString("true"); } Py_INCREF(s_true); return s_true; @@ -1852,7 +2763,7 @@ _encoded_const(PyObject *obj) else if (obj == Py_False) { static PyObject *s_false = NULL; if (s_false == NULL) { - s_false = PyString_InternFromString("false"); + s_false = JSON_InternFromString("false"); } Py_INCREF(s_false); return s_false; @@ -1869,22 +2780,54 @@ encoder_encode_float(PyEncoderObject *s, PyObject *obj) /* Return the JSON representation of a PyFloat */ double i = PyFloat_AS_DOUBLE(obj); if (!Py_IS_FINITE(i)) { - if (!s->allow_nan) { + if (!s->allow_or_ignore_nan) { PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant"); return NULL; } - if (i > 0) { - return PyString_FromString("Infinity"); + if (s->allow_or_ignore_nan & JSON_IGNORE_NAN) { + return _encoded_const(Py_None); + } + /* JSON_ALLOW_NAN is set */ + else if (i > 0) { + static PyObject *sInfinity = NULL; + if (sInfinity == NULL) + sInfinity = JSON_InternFromString("Infinity"); + if (sInfinity) + Py_INCREF(sInfinity); + return sInfinity; } else if (i < 0) { - return PyString_FromString("-Infinity"); + static PyObject *sNegInfinity = NULL; + if (sNegInfinity == NULL) + sNegInfinity = JSON_InternFromString("-Infinity"); + if (sNegInfinity) + Py_INCREF(sNegInfinity); + return sNegInfinity; } else { - return PyString_FromString("NaN"); + static PyObject *sNaN = NULL; + if (sNaN == NULL) + sNaN = JSON_InternFromString("NaN"); + if (sNaN) + Py_INCREF(sNaN); + return sNaN; } } /* Use a better float format here? */ - return PyObject_Repr(obj); + if (PyFloat_CheckExact(obj)) { + return PyObject_Repr(obj); + } + else { + /* See #118, do not trust custom str/repr */ + PyObject *res; + PyObject *tmp = PyObject_CallFunctionObjArgs((PyObject *)&PyFloat_Type, obj, NULL); + if (tmp == NULL) { + return NULL; + } + res = PyObject_Repr(tmp); + Py_DECREF(tmp); + return res; + } } static PyObject * @@ -1898,116 +2841,169 @@ encoder_encode_string(PyEncoderObject *s, PyObject *obj) } static int -_steal_list_append(PyObject *lst, PyObject *stolen) +_steal_accumulate(JSON_Accu *accu, PyObject *stolen) { /* Append stolen and then decrement its reference count */ - int rval = PyList_Append(lst, stolen); + int rval = JSON_Accu_Accumulate(accu, stolen); Py_DECREF(stolen); return rval; } static int -encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level) +encoder_listencode_obj(PyEncoderObject *s, JSON_Accu *rval, PyObject *obj, Py_ssize_t indent_level) { /* Encode Python object obj to a JSON term, rval is a PyList */ - PyObject *newobj; - int rv; - - if (obj == Py_None || obj == Py_True || obj == Py_False) { - PyObject *cstr = _encoded_const(obj); - if (cstr == NULL) - return -1; - return _steal_list_append(rval, cstr); - } - else if (PyString_Check(obj) || PyUnicode_Check(obj)) - { - PyObject *encoded = encoder_encode_string(s, obj); - if (encoded == NULL) - return -1; - return _steal_list_append(rval, encoded); - } - else if (PyInt_Check(obj) || PyLong_Check(obj)) { - PyObject *encoded = PyObject_Str(obj); - if (encoded == NULL) - return -1; - return _steal_list_append(rval, encoded); - } - else if (PyFloat_Check(obj)) { - PyObject *encoded = encoder_encode_float(s, obj); - if (encoded == NULL) - return -1; - return _steal_list_append(rval, encoded); - } - else if (PyList_Check(obj) || PyTuple_Check(obj)) { - return encoder_listencode_list(s, rval, obj, indent_level); - } - else if (PyDict_Check(obj)) { - return encoder_listencode_dict(s, rval, obj, indent_level); - } - else { - PyObject *ident = NULL; - if (s->markers != Py_None) { - int has_key; - ident = PyLong_FromVoidPtr(obj); - if (ident == NULL) - return -1; - has_key = PyDict_Contains(s->markers, ident); - if (has_key) { - if (has_key != -1) - PyErr_SetString(PyExc_ValueError, "Circular reference detected"); - Py_DECREF(ident); - return -1; + int rv = -1; + do { + if (obj == Py_None || obj == Py_True || obj == Py_False) { + PyObject *cstr = _encoded_const(obj); + if (cstr != NULL) + rv = _steal_accumulate(rval, cstr); + } + else if (PyString_Check(obj) || PyUnicode_Check(obj)) + { + PyObject *encoded = encoder_encode_string(s, obj); + if (encoded != NULL) + rv = _steal_accumulate(rval, encoded); + } + else if (PyInt_Check(obj) || PyLong_Check(obj)) { + PyObject *encoded; + if (PyInt_CheckExact(obj) || PyLong_CheckExact(obj)) { + encoded = PyObject_Str(obj); } - if (PyDict_SetItem(s->markers, ident, obj)) { - Py_DECREF(ident); - return -1; + else { + /* See #118, do not trust custom str/repr */ + PyObject *tmp = PyObject_CallFunctionObjArgs((PyObject *)&PyLong_Type, obj, NULL); + if (tmp == NULL) { + encoded = NULL; + } + else { + encoded = PyObject_Str(tmp); + Py_DECREF(tmp); + } + } + if (encoded != NULL) { + encoded = maybe_quote_bigint(s, encoded, obj); + if (encoded == NULL) + break; + rv = _steal_accumulate(rval, encoded); } } - newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL); - if (newobj == NULL) { - Py_XDECREF(ident); - return -1; + else if (PyFloat_Check(obj)) { + PyObject *encoded = encoder_encode_float(s, obj); + if (encoded != NULL) + rv = _steal_accumulate(rval, encoded); } - rv = encoder_listencode_obj(s, rval, newobj, indent_level); - Py_DECREF(newobj); - if (rv) { - Py_XDECREF(ident); - return -1; + else if (s->for_json && _has_for_json_hook(obj)) { + PyObject *newobj; + if (Py_EnterRecursiveCall(" while encoding a JSON object")) + return rv; + newobj = PyObject_CallMethod(obj, "for_json", NULL); + if (newobj != NULL) { + rv = encoder_listencode_obj(s, rval, newobj, indent_level); + Py_DECREF(newobj); + } + Py_LeaveRecursiveCall(); } - if (ident != NULL) { - if (PyDict_DelItem(s->markers, ident)) { + else if (s->namedtuple_as_object && _is_namedtuple(obj)) { + PyObject *newobj; + if (Py_EnterRecursiveCall(" while encoding a JSON object")) + return rv; + newobj = PyObject_CallMethod(obj, "_asdict", NULL); + if (newobj != NULL) { + rv = encoder_listencode_dict(s, rval, newobj, indent_level); + Py_DECREF(newobj); + } + Py_LeaveRecursiveCall(); + } + else if (PyList_Check(obj) || (s->tuple_as_array && PyTuple_Check(obj))) { + if (Py_EnterRecursiveCall(" while encoding a JSON object")) + return rv; + rv = encoder_listencode_list(s, rval, obj, indent_level); + Py_LeaveRecursiveCall(); + } + else if (PyDict_Check(obj)) { + if (Py_EnterRecursiveCall(" while encoding a JSON object")) + return rv; + rv = encoder_listencode_dict(s, rval, obj, indent_level); + Py_LeaveRecursiveCall(); + } + else if (s->use_decimal && PyObject_TypeCheck(obj, (PyTypeObject *)s->Decimal)) { + PyObject *encoded = PyObject_Str(obj); + if (encoded != NULL) + rv = _steal_accumulate(rval, encoded); + } + else { + PyObject *ident = NULL; + PyObject *newobj; + if (s->markers != Py_None) { + int has_key; + ident = PyLong_FromVoidPtr(obj); + if (ident == NULL) + break; + has_key = PyDict_Contains(s->markers, ident); + if (has_key) { + if (has_key != -1) + PyErr_SetString(PyExc_ValueError, "Circular reference detected"); + Py_DECREF(ident); + break; + } + if (PyDict_SetItem(s->markers, ident, obj)) { + Py_DECREF(ident); + break; + } + } + if (Py_EnterRecursiveCall(" while encoding a JSON object")) + return rv; + newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL); + if (newobj == NULL) { + Py_XDECREF(ident); + Py_LeaveRecursiveCall(); + break; + } + rv = encoder_listencode_obj(s, rval, newobj, indent_level); + Py_LeaveRecursiveCall(); + Py_DECREF(newobj); + if (rv) { + Py_XDECREF(ident); + rv = -1; + } + else if (ident != NULL) { + if (PyDict_DelItem(s->markers, ident)) { + Py_XDECREF(ident); + rv = -1; + } Py_XDECREF(ident); - return -1; } - Py_XDECREF(ident); } - return rv; - } + } while (0); + return rv; } static int -encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level) +encoder_listencode_dict(PyEncoderObject *s, JSON_Accu *rval, PyObject *dct, Py_ssize_t indent_level) { - /* Encode Python dict dct a JSON term, rval is a PyList */ + /* Encode Python dict dct a JSON term */ static PyObject *open_dict = NULL; static PyObject *close_dict = NULL; static PyObject *empty_dict = NULL; PyObject *kstr = NULL; PyObject *ident = NULL; - PyObject *key, *value; - Py_ssize_t pos; - int skipkeys; + PyObject *iter = NULL; + PyObject *item = NULL; + PyObject *items = NULL; + PyObject *encoded = NULL; Py_ssize_t idx; if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) { - open_dict = PyString_InternFromString("{"); - close_dict = PyString_InternFromString("}"); - empty_dict = PyString_InternFromString("{}"); + open_dict = JSON_InternFromString("{"); + close_dict = JSON_InternFromString("}"); + empty_dict = JSON_InternFromString("{}"); if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) return -1; } if (PyDict_Size(dct) == 0) - return PyList_Append(rval, empty_dict); + return JSON_Accu_Accumulate(rval, empty_dict); if (s->markers != Py_None) { int has_key; @@ -2025,75 +3021,77 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss } } - if (PyList_Append(rval, open_dict)) + if (JSON_Accu_Accumulate(rval, open_dict)) goto bail; if (s->indent != Py_None) { /* TODO: DOES NOT RUN */ indent_level += 1; /* - newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) + newline_indent = '\n' + (_indent * _current_indent_level) separator = _item_separator + newline_indent buf += newline_indent */ } - /* TODO: C speedup not implemented for sort_keys */ + iter = encoder_dict_iteritems(s, dct); + if (iter == NULL) + goto bail; - pos = 0; - skipkeys = PyObject_IsTrue(s->skipkeys); idx = 0; - while (PyDict_Next(dct, &pos, &key, &value)) { - PyObject *encoded; - - if (PyString_Check(key) || PyUnicode_Check(key)) { - Py_INCREF(key); - kstr = key; - } - else if (PyFloat_Check(key)) { - kstr = encoder_encode_float(s, key); - if (kstr == NULL) - goto bail; - } - else if (PyInt_Check(key) || PyLong_Check(key)) { - kstr = PyObject_Str(key); - if (kstr == NULL) - goto bail; - } - else if (key == Py_True || key == Py_False || key == Py_None) { - kstr = _encoded_const(key); - if (kstr == NULL) - goto bail; - } - else if (skipkeys) { - continue; - } - else { - /* TODO: include repr of key */ - PyErr_SetString(PyExc_ValueError, "keys must be a string"); + while ((item = PyIter_Next(iter))) { + PyObject *encoded, *key, *value; + if (!PyTuple_Check(item) || Py_SIZE(item) != 2) { + PyErr_SetString(PyExc_ValueError, "items must return 2-tuples"); goto bail; } + key = PyTuple_GET_ITEM(item, 0); + if (key == NULL) + goto bail; + value = PyTuple_GET_ITEM(item, 1); + if (value == NULL) + goto bail; + encoded = PyDict_GetItem(s->key_memo, key); + if (encoded != NULL) { + Py_INCREF(encoded); + } else { + kstr = encoder_stringify_key(s, key); + if (kstr == NULL) + goto bail; + else if (kstr == Py_None) { + /* skipkeys */ + Py_DECREF(item); + Py_DECREF(kstr); + continue; + } + } if (idx) { - if (PyList_Append(rval, s->item_separator)) + if (JSON_Accu_Accumulate(rval, s->item_separator)) goto bail; } - - encoded = encoder_encode_string(s, kstr); - Py_CLEAR(kstr); - if (encoded == NULL) - goto bail; - if (PyList_Append(rval, encoded)) { - Py_DECREF(encoded); + if (encoded == NULL) { + encoded = encoder_encode_string(s, kstr); + Py_CLEAR(kstr); + if (encoded == NULL) + goto bail; + if (PyDict_SetItem(s->key_memo, key, encoded)) + goto bail; + } + if (JSON_Accu_Accumulate(rval, encoded)) { goto bail; } - Py_DECREF(encoded); - if (PyList_Append(rval, s->key_separator)) + Py_CLEAR(encoded); + if (JSON_Accu_Accumulate(rval, s->key_separator)) goto bail; if (encoder_listencode_obj(s, rval, value, indent_level)) goto bail; + Py_CLEAR(item); idx += 1; } + Py_CLEAR(iter); + if (PyErr_Occurred()) + goto bail; if (ident != NULL) { if (PyDict_DelItem(s->markers, ident)) goto bail; @@ -2103,14 +3101,18 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss /* TODO: DOES NOT RUN */ indent_level -= 1; /* - yield '\n' + (' ' * (_indent * _current_indent_level)) + yield '\n' + (_indent * _current_indent_level) */ } - if (PyList_Append(rval, close_dict)) + if (JSON_Accu_Accumulate(rval, close_dict)) goto bail; return 0; bail: + Py_XDECREF(encoded); + Py_XDECREF(items); + Py_XDECREF(item); + Py_XDECREF(iter); Py_XDECREF(kstr); Py_XDECREF(ident); return -1; @@ -2118,34 +3120,31 @@ bail: static int -encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level) +encoder_listencode_list(PyEncoderObject *s, JSON_Accu *rval, PyObject *seq, Py_ssize_t indent_level) { - /* Encode Python list seq to a JSON term, rval is a PyList */ + /* Encode Python list seq to a JSON term */ static PyObject *open_array = NULL; static PyObject *close_array = NULL; static PyObject *empty_array = NULL; PyObject *ident = NULL; - PyObject *s_fast = NULL; - Py_ssize_t num_items; - PyObject **seq_items; - Py_ssize_t i; + PyObject *iter = NULL; + PyObject *obj = NULL; + int is_true; + int i = 0; if (open_array == NULL || close_array == NULL || empty_array == NULL) { - open_array = PyString_InternFromString("["); - close_array = PyString_InternFromString("]"); - empty_array = PyString_InternFromString("[]"); + open_array = JSON_InternFromString("["); + close_array = JSON_InternFromString("]"); + empty_array = JSON_InternFromString("[]"); if (open_array == NULL || close_array == NULL || empty_array == NULL) return -1; } ident = NULL; - s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence"); - if (s_fast == NULL) + is_true = PyObject_IsTrue(seq); + if (is_true == -1) return -1; - num_items = PySequence_Fast_GET_SIZE(s_fast); - if (num_items == 0) { - Py_DECREF(s_fast); - return PyList_Append(rval, empty_array); - } + else if (is_true == 0) + return JSON_Accu_Accumulate(rval, empty_array); if (s->markers != Py_None) { int has_key; @@ -2163,27 +3162,34 @@ encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ss } } - seq_items = PySequence_Fast_ITEMS(s_fast); - if (PyList_Append(rval, open_array)) + iter = PyObject_GetIter(seq); + if (iter == NULL) + goto bail; + + if (JSON_Accu_Accumulate(rval, open_array)) goto bail; if (s->indent != Py_None) { /* TODO: DOES NOT RUN */ indent_level += 1; /* - newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) + newline_indent = '\n' + (_indent * _current_indent_level) separator = _item_separator + newline_indent buf += newline_indent */ } - for (i = 0; i < num_items; i++) { - PyObject *obj = seq_items[i]; + while ((obj = PyIter_Next(iter))) { if (i) { - if (PyList_Append(rval, s->item_separator)) + if (JSON_Accu_Accumulate(rval, s->item_separator)) goto bail; } if (encoder_listencode_obj(s, rval, obj, indent_level)) goto bail; + i++; + Py_CLEAR(obj); } + Py_CLEAR(iter); + if (PyErr_Occurred()) + goto bail; if (ident != NULL) { if (PyDict_DelItem(s->markers, ident)) goto bail; @@ -2193,17 +3199,17 @@ encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ss /* TODO: DOES NOT RUN */ indent_level -= 1; /* - yield '\n' + (' ' * (_indent * _current_indent_level)) + yield '\n' + (_indent * _current_indent_level) */ } - if (PyList_Append(rval, close_array)) + if (JSON_Accu_Accumulate(rval, close_array)) goto bail; - Py_DECREF(s_fast); return 0; bail: + Py_XDECREF(obj); + Py_XDECREF(iter); Py_XDECREF(ident); - Py_DECREF(s_fast); return -1; } @@ -2224,11 +3230,17 @@ encoder_traverse(PyObject *self, visitproc visit, void *arg) Py_VISIT(s->markers); Py_VISIT(s->defaultfn); Py_VISIT(s->encoder); + Py_VISIT(s->encoding); Py_VISIT(s->indent); Py_VISIT(s->key_separator); Py_VISIT(s->item_separator); + Py_VISIT(s->key_memo); Py_VISIT(s->sort_keys); - Py_VISIT(s->skipkeys); + Py_VISIT(s->item_sort_kw); + Py_VISIT(s->item_sort_key); + Py_VISIT(s->max_long_size); + Py_VISIT(s->min_long_size); + Py_VISIT(s->Decimal); return 0; } @@ -2242,11 +3254,18 @@ encoder_clear(PyObject *self) Py_CLEAR(s->markers); Py_CLEAR(s->defaultfn); Py_CLEAR(s->encoder); + Py_CLEAR(s->encoding); Py_CLEAR(s->indent); Py_CLEAR(s->key_separator); Py_CLEAR(s->item_separator); + Py_CLEAR(s->key_memo); + Py_CLEAR(s->skipkeys_bool); Py_CLEAR(s->sort_keys); - Py_CLEAR(s->skipkeys); + Py_CLEAR(s->item_sort_kw); + Py_CLEAR(s->item_sort_key); + Py_CLEAR(s->max_long_size); + Py_CLEAR(s->min_long_size); + Py_CLEAR(s->Decimal); return 0; } @@ -2254,8 +3273,7 @@ PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable") static PyTypeObject PyEncoderType = { - PyObject_HEAD_INIT(NULL) - 0, /* tp_internal */ + PyVarObject_HEAD_INIT(NULL, 0) "simplejson._speedups.Encoder", /* tp_name */ sizeof(PyEncoderObject), /* tp_basicsize */ 0, /* tp_itemsize */ @@ -2311,19 +3329,53 @@ static PyMethodDef speedups_methods[] = { PyDoc_STRVAR(module_doc, "simplejson speedups\n"); -void -init_speedups(void) +#if PY_MAJOR_VERSION >= 3 +static struct PyModuleDef moduledef = { + PyModuleDef_HEAD_INIT, + "_speedups", /* m_name */ + module_doc, /* m_doc */ + -1, /* m_size */ + speedups_methods, /* m_methods */ + NULL, /* m_reload */ + NULL, /* m_traverse */ + NULL, /* m_clear*/ + NULL, /* m_free */ +}; +#endif + +static PyObject * +moduleinit(void) { PyObject *m; PyScannerType.tp_new = PyType_GenericNew; if (PyType_Ready(&PyScannerType) < 0) - return; + return NULL; PyEncoderType.tp_new = PyType_GenericNew; if (PyType_Ready(&PyEncoderType) < 0) - return; + return NULL; + +#if PY_MAJOR_VERSION >= 3 + m = PyModule_Create(&moduledef); +#else m = Py_InitModule3("_speedups", speedups_methods, module_doc); +#endif Py_INCREF((PyObject*)&PyScannerType); PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType); Py_INCREF((PyObject*)&PyEncoderType); PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType); + return m; } + +#if PY_MAJOR_VERSION >= 3 +PyMODINIT_FUNC +PyInit__speedups(void) +{ + return moduleinit(); +} +#else +void +init_speedups(void) +{ + moduleinit(); +} +#endif diff --git a/lib/simplejson/compat.py b/lib/simplejson/compat.py new file mode 100644 index 00000000..a0af4a1c --- /dev/null +++ b/lib/simplejson/compat.py @@ -0,0 +1,46 @@ +"""Python 3 compatibility shims +""" +import sys +if sys.version_info[0] < 3: + PY3 = False + def b(s): + return s + def u(s): + return unicode(s, 'unicode_escape') + import cStringIO as StringIO + StringIO = BytesIO = StringIO.StringIO + text_type = unicode + binary_type = str + string_types = (basestring,) + integer_types = (int, long) + unichr = unichr + reload_module = reload + def fromhex(s): + return s.decode('hex') + +else: + PY3 = True + if sys.version_info[:2] >= (3, 4): + from importlib import reload as reload_module + else: + from imp import reload as reload_module + import codecs + def b(s): + return codecs.latin_1_encode(s)[0] + def u(s): + return s + import io + StringIO = io.StringIO + BytesIO = io.BytesIO + text_type = str + binary_type = bytes + string_types = (str,) + integer_types = (int,) + + def unichr(s): + return u(chr(s)) + + def fromhex(s): + return bytes.fromhex(s) + +long_type = integer_types[-1] diff --git a/lib/simplejson/decoder.py b/lib/simplejson/decoder.py index dd57ddee..545e6587 100644 --- a/lib/simplejson/decoder.py +++ b/lib/simplejson/decoder.py @@ -1,21 +1,30 @@ """Implementation of JSONDecoder """ +from __future__ import absolute_import import re import sys import struct +from .compat import fromhex, b, u, text_type, binary_type, PY3, unichr +from .scanner import make_scanner, JSONDecodeError -from lib.simplejson.scanner import make_scanner -try: - from lib.simplejson._speedups import scanstring as c_scanstring -except ImportError: - c_scanstring = None +def _import_c_scanstring(): + try: + from ._speedups import scanstring + return scanstring + except ImportError: + return None +c_scanstring = _import_c_scanstring() +# NOTE (3.1.0): JSONDecodeError may still be imported from this module for +# compatibility, but it was never in the __all__ __all__ = ['JSONDecoder'] FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL def _floatconstants(): - _BYTES = '7FF80000000000007FF0000000000000'.decode('hex') + _BYTES = fromhex('7FF80000000000007FF0000000000000') + # The struct module in Python 2.4 would get frexp() out of range here + # when an endian is specified in the format string. Fixed in Python 2.5+ if sys.byteorder != 'big': _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1] nan, inf = struct.unpack('dd', _BYTES) @@ -23,31 +32,6 @@ def _floatconstants(): NaN, PosInf, NegInf = _floatconstants() - -def linecol(doc, pos): - lineno = doc.count('\n', 0, pos) + 1 - if lineno == 1: - colno = pos - else: - colno = pos - doc.rindex('\n', 0, pos) - return lineno, colno - - -def errmsg(msg, doc, pos, end=None): - # Note that this function is called from _speedups - lineno, colno = linecol(doc, pos) - if end is None: - #fmt = '{0}: line {1} column {2} (char {3})' - #return fmt.format(msg, lineno, colno, pos) - fmt = '%s: line %d column %d (char %d)' - return fmt % (msg, lineno, colno, pos) - endlineno, endcolno = linecol(doc, end) - #fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})' - #return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end) - fmt = '%s: line %d column %d - line %d column %d (char %d - %d)' - return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end) - - _CONSTANTS = { '-Infinity': NegInf, 'Infinity': PosInf, @@ -56,19 +40,21 @@ _CONSTANTS = { STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) BACKSLASH = { - '"': u'"', '\\': u'\\', '/': u'/', - 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', + '"': u('"'), '\\': u('\u005c'), '/': u('/'), + 'b': u('\b'), 'f': u('\f'), 'n': u('\n'), 'r': u('\r'), 't': u('\t'), } DEFAULT_ENCODING = "utf-8" -def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match): +def py_scanstring(s, end, encoding=None, strict=True, + _b=BACKSLASH, _m=STRINGCHUNK.match, _join=u('').join, + _PY3=PY3, _maxunicode=sys.maxunicode): """Scan the string s for a JSON string. End is the index of the character in s after the quote that started the JSON string. Unescapes all valid JSON string escape sequences and raises ValueError on attempt to decode an invalid string. If strict is False then literal control characters are allowed in the string. - + Returns a tuple of the decoded string and the index of the character in s after the end quote.""" if encoding is None: @@ -79,14 +65,14 @@ def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHU while 1: chunk = _m(s, end) if chunk is None: - raise ValueError( - errmsg("Unterminated string starting at", s, begin)) + raise JSONDecodeError( + "Unterminated string starting at", s, begin) end = chunk.end() content, terminator = chunk.groups() # Content is contains zero or more unescaped string characters if content: - if not isinstance(content, unicode): - content = unicode(content, encoding) + if not _PY3 and not isinstance(content, text_type): + content = text_type(content, encoding) _append(content) # Terminator is the end of string, a literal control character, # or a backslash denoting that an escape sequence follows @@ -94,49 +80,57 @@ def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHU break elif terminator != '\\': if strict: - msg = "Invalid control character %r at" % (terminator,) - #msg = "Invalid control character {0!r} at".format(terminator) - raise ValueError(errmsg(msg, s, end)) + msg = "Invalid control character %r at" + raise JSONDecodeError(msg, s, end) else: _append(terminator) continue try: esc = s[end] except IndexError: - raise ValueError( - errmsg("Unterminated string starting at", s, begin)) + raise JSONDecodeError( + "Unterminated string starting at", s, begin) # If not a unicode escape sequence, must be in the lookup table if esc != 'u': try: char = _b[esc] except KeyError: - msg = "Invalid \\escape: " + repr(esc) - raise ValueError(errmsg(msg, s, end)) + msg = "Invalid \\X escape sequence %r" + raise JSONDecodeError(msg, s, end) end += 1 else: # Unicode escape sequence + msg = "Invalid \\uXXXX escape sequence" esc = s[end + 1:end + 5] - next_end = end + 5 - if len(esc) != 4: - msg = "Invalid \\uXXXX escape" - raise ValueError(errmsg(msg, s, end)) - uni = int(esc, 16) + escX = esc[1:2] + if len(esc) != 4 or escX == 'x' or escX == 'X': + raise JSONDecodeError(msg, s, end - 1) + try: + uni = int(esc, 16) + except ValueError: + raise JSONDecodeError(msg, s, end - 1) + end += 5 # Check for surrogate pair on UCS-4 systems - if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535: - msg = "Invalid \\uXXXX\\uXXXX surrogate pair" - if not s[end + 5:end + 7] == '\\u': - raise ValueError(errmsg(msg, s, end)) - esc2 = s[end + 7:end + 11] - if len(esc2) != 4: - raise ValueError(errmsg(msg, s, end)) - uni2 = int(esc2, 16) - uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) - next_end += 6 + # Note that this will join high/low surrogate pairs + # but will also pass unpaired surrogates through + if (_maxunicode > 65535 and + uni & 0xfc00 == 0xd800 and + s[end:end + 2] == '\\u'): + esc2 = s[end + 2:end + 6] + escX = esc2[1:2] + if len(esc2) == 4 and not (escX == 'x' or escX == 'X'): + try: + uni2 = int(esc2, 16) + except ValueError: + raise JSONDecodeError(msg, s, end) + if uni2 & 0xfc00 == 0xdc00: + uni = 0x10000 + (((uni - 0xd800) << 10) | + (uni2 - 0xdc00)) + end += 6 char = unichr(uni) - end = next_end # Append the unescaped character _append(char) - return u''.join(chunks), end + return _join(chunks), end # Use speedup if available @@ -145,8 +139,15 @@ scanstring = c_scanstring or py_scanstring WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) WHITESPACE_STR = ' \t\n\r' -def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR): - pairs = {} +def JSONObject(state, encoding, strict, scan_once, object_hook, + object_pairs_hook, memo=None, + _w=WHITESPACE.match, _ws=WHITESPACE_STR): + (s, end) = state + # Backwards compatibility + if memo is None: + memo = {} + memo_get = memo.setdefault + pairs = [] # Use a slice to prevent IndexError from being raised, the following # check will raise a more specific ValueError if the string is empty nextchar = s[end:end + 1] @@ -157,19 +158,28 @@ def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE nextchar = s[end:end + 1] # Trivial empty object if nextchar == '}': + if object_pairs_hook is not None: + result = object_pairs_hook(pairs) + return result, end + 1 + pairs = {} + if object_hook is not None: + pairs = object_hook(pairs) return pairs, end + 1 elif nextchar != '"': - raise ValueError(errmsg("Expecting property name", s, end)) + raise JSONDecodeError( + "Expecting property name enclosed in double quotes", + s, end) end += 1 while True: key, end = scanstring(s, end, encoding, strict) + key = memo_get(key, key) # To skip some function call overhead we optimize the fast paths where # the JSON key separator is ": " or just ":". if s[end:end + 1] != ':': end = _w(s, end).end() if s[end:end + 1] != ':': - raise ValueError(errmsg("Expecting : delimiter", s, end)) + raise JSONDecodeError("Expecting ':' delimiter", s, end) end += 1 @@ -181,11 +191,8 @@ def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE except IndexError: pass - try: - value, end = scan_once(s, end) - except StopIteration: - raise ValueError(errmsg("Expecting object", s, end)) - pairs[key] = value + value, end = scan_once(s, end) + pairs.append((key, value)) try: nextchar = s[end] @@ -199,7 +206,7 @@ def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE if nextchar == '}': break elif nextchar != ',': - raise ValueError(errmsg("Expecting , delimiter", s, end - 1)) + raise JSONDecodeError("Expecting ',' delimiter or '}'", s, end - 1) try: nextchar = s[end] @@ -214,13 +221,20 @@ def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE end += 1 if nextchar != '"': - raise ValueError(errmsg("Expecting property name", s, end - 1)) + raise JSONDecodeError( + "Expecting property name enclosed in double quotes", + s, end - 1) + if object_pairs_hook is not None: + result = object_pairs_hook(pairs) + return result, end + pairs = dict(pairs) if object_hook is not None: pairs = object_hook(pairs) return pairs, end -def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): +def JSONArray(state, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): + (s, end) = state values = [] nextchar = s[end:end + 1] if nextchar in _ws: @@ -229,12 +243,11 @@ def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): # Look-ahead for trivial empty array if nextchar == ']': return values, end + 1 + elif nextchar == '': + raise JSONDecodeError("Expecting value or ']'", s, end) _append = values.append while True: - try: - value, end = scan_once(s, end) - except StopIteration: - raise ValueError(errmsg("Expecting object", s, end)) + value, end = scan_once(s, end) _append(value) nextchar = s[end:end + 1] if nextchar in _ws: @@ -244,7 +257,7 @@ def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): if nextchar == ']': break elif nextchar != ',': - raise ValueError(errmsg("Expecting , delimiter", s, end)) + raise JSONDecodeError("Expecting ',' delimiter or ']'", s, end - 1) try: if s[end] in _ws: @@ -268,7 +281,7 @@ class JSONDecoder(object): +---------------+-------------------+ | array | list | +---------------+-------------------+ - | string | unicode | + | string | str, unicode | +---------------+-------------------+ | number (int) | int, long | +---------------+-------------------+ @@ -287,37 +300,56 @@ class JSONDecoder(object): """ def __init__(self, encoding=None, object_hook=None, parse_float=None, - parse_int=None, parse_constant=None, strict=True): - """``encoding`` determines the encoding used to interpret any ``str`` - objects decoded by this instance (utf-8 by default). It has no - effect when decoding ``unicode`` objects. + parse_int=None, parse_constant=None, strict=True, + object_pairs_hook=None): + """ + *encoding* determines the encoding used to interpret any + :class:`str` objects decoded by this instance (``'utf-8'`` by + default). It has no effect when decoding :class:`unicode` objects. Note that currently only encodings that are a superset of ASCII work, - strings of other encodings should be passed in as ``unicode``. + strings of other encodings should be passed in as :class:`unicode`. - ``object_hook``, if specified, will be called with the result - of every JSON object decoded and its return value will be used in - place of the given ``dict``. This can be used to provide custom + *object_hook*, if specified, will be called with the result of every + JSON object decoded and its return value will be used in place of the + given :class:`dict`. This can be used to provide custom deserializations (e.g. to support JSON-RPC class hinting). - ``parse_float``, if specified, will be called with the string - of every JSON float to be decoded. By default this is equivalent to - float(num_str). This can be used to use another datatype or parser - for JSON floats (e.g. decimal.Decimal). + *object_pairs_hook* is an optional function that will be called with + the result of any object literal decode with an ordered list of pairs. + The return value of *object_pairs_hook* will be used instead of the + :class:`dict`. This feature can be used to implement custom decoders + that rely on the order that the key and value pairs are decoded (for + example, :func:`collections.OrderedDict` will remember the order of + insertion). If *object_hook* is also defined, the *object_pairs_hook* + takes priority. - ``parse_int``, if specified, will be called with the string - of every JSON int to be decoded. By default this is equivalent to - int(num_str). This can be used to use another datatype or parser - for JSON integers (e.g. float). + *parse_float*, if specified, will be called with the string of every + JSON float to be decoded. By default, this is equivalent to + ``float(num_str)``. This can be used to use another datatype or parser + for JSON floats (e.g. :class:`decimal.Decimal`). - ``parse_constant``, if specified, will be called with one of the - following strings: -Infinity, Infinity, NaN. - This can be used to raise an exception if invalid JSON numbers - are encountered. + *parse_int*, if specified, will be called with the string of every + JSON int to be decoded. By default, this is equivalent to + ``int(num_str)``. This can be used to use another datatype or parser + for JSON integers (e.g. :class:`float`). + + *parse_constant*, if specified, will be called with one of the + following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This + can be used to raise an exception if invalid JSON numbers are + encountered. + + *strict* controls the parser's behavior when it encounters an + invalid control character in a string. The default setting of + ``True`` means that unescaped control characters are parse errors, if + ``False`` then control characters will be allowed in strings. """ + if encoding is None: + encoding = DEFAULT_ENCODING self.encoding = encoding self.object_hook = object_hook + self.object_pairs_hook = object_pairs_hook self.parse_float = parse_float or float self.parse_int = parse_int or int self.parse_constant = parse_constant or _CONSTANTS.__getitem__ @@ -325,30 +357,44 @@ class JSONDecoder(object): self.parse_object = JSONObject self.parse_array = JSONArray self.parse_string = scanstring + self.memo = {} self.scan_once = make_scanner(self) - def decode(self, s, _w=WHITESPACE.match): + def decode(self, s, _w=WHITESPACE.match, _PY3=PY3): """Return the Python representation of ``s`` (a ``str`` or ``unicode`` instance containing a JSON document) """ - obj, end = self.raw_decode(s, idx=_w(s, 0).end()) + if _PY3 and isinstance(s, binary_type): + s = s.decode(self.encoding) + obj, end = self.raw_decode(s) end = _w(s, end).end() if end != len(s): - raise ValueError(errmsg("Extra data", s, end, len(s))) + raise JSONDecodeError("Extra data", s, end, len(s)) return obj - def raw_decode(self, s, idx=0): - """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning - with a JSON document) and return a 2-tuple of the Python + def raw_decode(self, s, idx=0, _w=WHITESPACE.match, _PY3=PY3): + """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` + beginning with a JSON document) and return a 2-tuple of the Python representation and the index in ``s`` where the document ended. + Optionally, ``idx`` can be used to specify an offset in ``s`` where + the JSON document begins. This can be used to decode a JSON document from a string that may have extraneous data at the end. """ - try: - obj, end = self.scan_once(s, idx) - except StopIteration: - raise ValueError("No JSON object could be decoded") - return obj, end + if idx < 0: + # Ensure that raw_decode bails on negative indexes, the regex + # would otherwise mask this behavior. #98 + raise JSONDecodeError('Expecting value', s, idx) + if _PY3 and not isinstance(s, text_type): + raise TypeError("Input string must be text, not bytes") + # strip UTF-8 bom + if len(s) > idx: + ord0 = ord(s[idx]) + if ord0 == 0xfeff: + idx += 1 + elif ord0 == 0xef and s[idx:idx + 3] == '\xef\xbb\xbf': + idx += 3 + return self.scan_once(s, idx=_w(s, idx).end()) diff --git a/lib/simplejson/encoder.py b/lib/simplejson/encoder.py index 15c35f7a..d240438e 100644 --- a/lib/simplejson/encoder.py +++ b/lib/simplejson/encoder.py @@ -1,17 +1,25 @@ """Implementation of JSONEncoder """ +from __future__ import absolute_import import re +from operator import itemgetter +# Do not import Decimal directly to avoid reload issues +import decimal +from .compat import u, unichr, binary_type, string_types, integer_types, PY3 +def _import_speedups(): + try: + from . import _speedups + return _speedups.encode_basestring_ascii, _speedups.make_encoder + except ImportError: + return None, None +c_encode_basestring_ascii, c_make_encoder = _import_speedups() -try: - from lib.simplejson._speedups import encode_basestring_ascii as c_encode_basestring_ascii -except ImportError: - c_encode_basestring_ascii = None -try: - from lib.simplejson._speedups import make_encoder as c_make_encoder -except ImportError: - c_make_encoder = None +from simplejson.decoder import PosInf -ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]') +#ESCAPE = re.compile(ur'[\x00-\x1f\\"\b\f\n\r\t\u2028\u2029]') +# This is required because u() will mangle the string and ur'' isn't valid +# python3 syntax +ESCAPE = re.compile(u'[\\x00-\\x1f\\\\"\\b\\f\\n\\r\\t\u2028\u2029]') ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') HAS_UTF8 = re.compile(r'[\x80-\xff]') ESCAPE_DCT = { @@ -26,26 +34,36 @@ ESCAPE_DCT = { for i in range(0x20): #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i)) ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) +for i in [0x2028, 0x2029]: + ESCAPE_DCT.setdefault(unichr(i), '\\u%04x' % (i,)) -# Assume this produces an infinity on all machines (probably not guaranteed) -INFINITY = float('1e66666') FLOAT_REPR = repr -def encode_basestring(s): +def encode_basestring(s, _PY3=PY3, _q=u('"')): """Return a JSON representation of a Python string """ + if _PY3: + if isinstance(s, binary_type): + s = s.decode('utf-8') + else: + if isinstance(s, str) and HAS_UTF8.search(s) is not None: + s = s.decode('utf-8') def replace(match): return ESCAPE_DCT[match.group(0)] - return '"' + ESCAPE.sub(replace, s) + '"' + return _q + ESCAPE.sub(replace, s) + _q -def py_encode_basestring_ascii(s): +def py_encode_basestring_ascii(s, _PY3=PY3): """Return an ASCII-only JSON representation of a Python string """ - if isinstance(s, str) and HAS_UTF8.search(s) is not None: - s = s.decode('utf-8') + if _PY3: + if isinstance(s, binary_type): + s = s.decode('utf-8') + else: + if isinstance(s, str) and HAS_UTF8.search(s) is not None: + s = s.decode('utf-8') def replace(match): s = match.group(0) try: @@ -65,7 +83,8 @@ def py_encode_basestring_ascii(s): return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' -encode_basestring_ascii = c_encode_basestring_ascii or py_encode_basestring_ascii +encode_basestring_ascii = ( + c_encode_basestring_ascii or py_encode_basestring_ascii) class JSONEncoder(object): """Extensible JSON encoder for Python data structures. @@ -75,7 +94,7 @@ class JSONEncoder(object): +-------------------+---------------+ | Python | JSON | +===================+===============+ - | dict | object | + | dict, namedtuple | object | +-------------------+---------------+ | list, tuple | array | +-------------------+---------------+ @@ -98,9 +117,14 @@ class JSONEncoder(object): """ item_separator = ', ' key_separator = ': ' + def __init__(self, skipkeys=False, ensure_ascii=True, - check_circular=True, allow_nan=True, sort_keys=False, - indent=None, separators=None, encoding='utf-8', default=None): + check_circular=True, allow_nan=True, sort_keys=False, + indent=None, separators=None, encoding='utf-8', default=None, + use_decimal=True, namedtuple_as_object=True, + tuple_as_array=True, bigint_as_string=False, + item_sort_key=None, for_json=False, ignore_nan=False, + int_as_string_bitcount=None): """Constructor for JSONEncoder, with sensible defaults. If skipkeys is false, then it is a TypeError to attempt @@ -125,14 +149,17 @@ class JSONEncoder(object): sorted by key; this is useful for regression tests to ensure that JSON serializations can be compared on a day-to-day basis. - If indent is a non-negative integer, then JSON array - elements and object members will be pretty-printed with that - indent level. An indent level of 0 will only insert newlines. - None is the most compact representation. + If indent is a string, then JSON array elements and object members + will be pretty-printed with a newline followed by that string repeated + for each level of nesting. ``None`` (the default) selects the most compact + representation without any newlines. For backwards compatibility with + versions of simplejson earlier than 2.1.0, an integer is also accepted + and is converted to a string with that many spaces. - If specified, separators should be a (item_separator, key_separator) - tuple. The default is (', ', ': '). To get the most compact JSON - representation you should specify (',', ':') to eliminate whitespace. + If specified, separators should be an (item_separator, key_separator) + tuple. The default is (', ', ': ') if *indent* is ``None`` and + (',', ': ') otherwise. To get the most compact JSON representation, + you should specify (',', ':') to eliminate whitespace. If specified, default is a function that gets called for objects that can't otherwise be serialized. It should return a JSON encodable @@ -142,6 +169,37 @@ class JSONEncoder(object): transformed into unicode using that encoding prior to JSON-encoding. The default is UTF-8. + If use_decimal is true (not the default), ``decimal.Decimal`` will + be supported directly by the encoder. For the inverse, decode JSON + with ``parse_float=decimal.Decimal``. + + If namedtuple_as_object is true (the default), objects with + ``_asdict()`` methods will be encoded as JSON objects. + + If tuple_as_array is true (the default), tuple (and subclasses) will + be encoded as JSON arrays. + + If bigint_as_string is true (not the default), ints 2**53 and higher + or lower than -2**53 will be encoded as strings. This is to avoid the + rounding that happens in Javascript otherwise. + + If int_as_string_bitcount is a positive number (n), then int of size + greater than or equal to 2**n or lower than or equal to -2**n will be + encoded as strings. + + If specified, item_sort_key is a callable used to sort the items in + each dictionary. This is useful if you want to sort items other than + in alphabetical order by key. + + If for_json is true (not the default), objects with a ``for_json()`` + method will use the return value of that method for encoding as JSON + instead of the object. + + If *ignore_nan* is true (default: ``False``), then out of range + :class:`float` values (``nan``, ``inf``, ``-inf``) will be serialized + as ``null`` in compliance with the ECMA-262 specification. If true, + this will override *allow_nan*. + """ self.skipkeys = skipkeys @@ -149,9 +207,21 @@ class JSONEncoder(object): self.check_circular = check_circular self.allow_nan = allow_nan self.sort_keys = sort_keys + self.use_decimal = use_decimal + self.namedtuple_as_object = namedtuple_as_object + self.tuple_as_array = tuple_as_array + self.bigint_as_string = bigint_as_string + self.item_sort_key = item_sort_key + self.for_json = for_json + self.ignore_nan = ignore_nan + self.int_as_string_bitcount = int_as_string_bitcount + if indent is not None and not isinstance(indent, string_types): + indent = indent * ' ' self.indent = indent if separators is not None: self.item_separator, self.key_separator = separators + elif indent is not None: + self.item_separator = ',' if default is not None: self.default = default self.encoding = encoding @@ -179,17 +249,17 @@ class JSONEncoder(object): def encode(self, o): """Return a JSON string representation of a Python data structure. + >>> from simplejson import JSONEncoder >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) '{"foo": ["bar", "baz"]}' """ # This is for extremely simple cases and benchmarks. - if isinstance(o, basestring): - if isinstance(o, str): - _encoding = self.encoding - if (_encoding is not None - and not (_encoding == 'utf-8')): - o = o.decode(_encoding) + if isinstance(o, binary_type): + _encoding = self.encoding + if (_encoding is not None and not (_encoding == 'utf-8')): + o = o.decode(_encoding) + if isinstance(o, string_types): if self.ensure_ascii: return encode_basestring_ascii(o) else: @@ -200,7 +270,10 @@ class JSONEncoder(object): chunks = self.iterencode(o, _one_shot=True) if not isinstance(chunks, (list, tuple)): chunks = list(chunks) - return ''.join(chunks) + if self.ensure_ascii: + return ''.join(chunks) + else: + return u''.join(chunks) def iterencode(self, o, _one_shot=False): """Encode the given object and yield each string @@ -222,13 +295,15 @@ class JSONEncoder(object): _encoder = encode_basestring if self.encoding != 'utf-8': def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding): - if isinstance(o, str): + if isinstance(o, binary_type): o = o.decode(_encoding) return _orig_encoder(o) - def floatstr(o, allow_nan=self.allow_nan, _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY): - # Check for specials. Note that this type of test is processor- and/or - # platform-specific, so do tests which don't depend on the internals. + def floatstr(o, allow_nan=self.allow_nan, ignore_nan=self.ignore_nan, + _repr=FLOAT_REPR, _inf=PosInf, _neginf=-PosInf): + # Check for specials. Note that this type of test is processor + # and/or platform-specific, so do tests which don't depend on + # the internals. if o != o: text = 'NaN' @@ -237,44 +312,123 @@ class JSONEncoder(object): elif o == _neginf: text = '-Infinity' else: + if type(o) != float: + # See #118, do not trust custom str/repr + o = float(o) return _repr(o) - if not allow_nan: + if ignore_nan: + text = 'null' + elif not allow_nan: raise ValueError( "Out of range float values are not JSON compliant: " + repr(o)) return text - - if _one_shot and c_make_encoder is not None and not self.indent and not self.sort_keys: + key_memo = {} + int_as_string_bitcount = ( + 53 if self.bigint_as_string else self.int_as_string_bitcount) + if (_one_shot and c_make_encoder is not None + and self.indent is None): _iterencode = c_make_encoder( markers, self.default, _encoder, self.indent, self.key_separator, self.item_separator, self.sort_keys, - self.skipkeys, self.allow_nan) + self.skipkeys, self.allow_nan, key_memo, self.use_decimal, + self.namedtuple_as_object, self.tuple_as_array, + int_as_string_bitcount, + self.item_sort_key, self.encoding, self.for_json, + self.ignore_nan, decimal.Decimal) else: _iterencode = _make_iterencode( markers, self.default, _encoder, self.indent, floatstr, self.key_separator, self.item_separator, self.sort_keys, - self.skipkeys, _one_shot) - return _iterencode(o, 0) + self.skipkeys, _one_shot, self.use_decimal, + self.namedtuple_as_object, self.tuple_as_array, + int_as_string_bitcount, + self.item_sort_key, self.encoding, self.for_json, + Decimal=decimal.Decimal) + try: + return _iterencode(o, 0) + finally: + key_memo.clear() -def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, + +class JSONEncoderForHTML(JSONEncoder): + """An encoder that produces JSON safe to embed in HTML. + + To embed JSON content in, say, a script tag on a web page, the + characters &, < and > should be escaped. They cannot be escaped + with the usual entities (e.g. &) because they are not expanded + within