Merge pull request #411 from JackDandy/feature/UpdateSimpleJSON

Update SimpleJSON library 2.0.9 to 3.7.3 (0bcdf20).
This commit is contained in:
JackDandy 2015-06-13 23:46:48 +01:00
commit 78c2ed237c
9 changed files with 2754 additions and 914 deletions

View file

@ -40,6 +40,7 @@
* Change to consolidate scene exceptions and name cache code * Change to consolidate scene exceptions and name cache code
* Change check_url function to use requests instead of httplib library * Change check_url function to use requests instead of httplib library
* Update Six compatibility library 1.5.2 to 1.9.0 (8a545f4) * Update Six compatibility library 1.5.2 to 1.9.0 (8a545f4)
* Update SimpleJSON library 2.0.9 to 3.7.3 (0bcdf20)
[develop changelog] [develop changelog]
* Update Requests library 2.7.0 (ab1f493) to 2.7.0 (8b5e457) * Update Requests library 2.7.0 (ab1f493) to 2.7.0 (8b5e457)

View file

@ -14,15 +14,15 @@ Encoding basic Python object hierarchies::
>>> import simplejson as json >>> import simplejson as json
>>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}]) >>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}])
'["foo", {"bar": ["baz", null, 1.0, 2]}]' '["foo", {"bar": ["baz", null, 1.0, 2]}]'
>>> print json.dumps("\"foo\bar") >>> print(json.dumps("\"foo\bar"))
"\"foo\bar" "\"foo\bar"
>>> print json.dumps(u'\u1234') >>> print(json.dumps(u'\u1234'))
"\u1234" "\u1234"
>>> print json.dumps('\\') >>> print(json.dumps('\\'))
"\\" "\\"
>>> print json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True) >>> print(json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True))
{"a": 0, "b": 0, "c": 0} {"a": 0, "b": 0, "c": 0}
>>> from StringIO import StringIO >>> from simplejson.compat import StringIO
>>> io = StringIO() >>> io = StringIO()
>>> json.dump(['streaming API'], io) >>> json.dump(['streaming API'], io)
>>> io.getvalue() >>> io.getvalue()
@ -31,14 +31,14 @@ Encoding basic Python object hierarchies::
Compact encoding:: Compact encoding::
>>> import simplejson as json >>> import simplejson as json
>>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',',':')) >>> obj = [1,2,3,{'4': 5, '6': 7}]
>>> json.dumps(obj, separators=(',',':'), sort_keys=True)
'[1,2,3,{"4":5,"6":7}]' '[1,2,3,{"4":5,"6":7}]'
Pretty printing:: Pretty printing::
>>> import simplejson as json >>> import simplejson as json
>>> s = json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=4) >>> print(json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=' '))
>>> print '\n'.join([l.rstrip() for l in s.splitlines()])
{ {
"4": 5, "4": 5,
"6": 7 "6": 7
@ -52,7 +52,7 @@ Decoding JSON::
True True
>>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar' >>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar'
True True
>>> from StringIO import StringIO >>> from simplejson.compat import StringIO
>>> io = StringIO('["streaming API"]') >>> io = StringIO('["streaming API"]')
>>> json.load(io)[0] == 'streaming API' >>> json.load(io)[0] == 'streaming API'
True True
@ -68,8 +68,8 @@ Specializing JSON object decoding::
>>> json.loads('{"__complex__": true, "real": 1, "imag": 2}', >>> json.loads('{"__complex__": true, "real": 1, "imag": 2}',
... object_hook=as_complex) ... object_hook=as_complex)
(1+2j) (1+2j)
>>> import decimal >>> from decimal import Decimal
>>> json.loads('1.1', parse_float=decimal.Decimal) == decimal.Decimal('1.1') >>> json.loads('1.1', parse_float=Decimal) == Decimal('1.1')
True True
Specializing JSON object encoding:: Specializing JSON object encoding::
@ -95,18 +95,38 @@ Using simplejson.tool from the shell to validate and pretty-print::
"json": "obj" "json": "obj"
} }
$ echo '{ 1.2:3.4}' | python -m simplejson.tool $ echo '{ 1.2:3.4}' | python -m simplejson.tool
Expecting property name: line 1 column 2 (char 2) Expecting property name: line 1 column 3 (char 2)
""" """
__version__ = '2.0.9' from __future__ import absolute_import
__version__ = '3.7.3'
__all__ = [ __all__ = [
'dump', 'dumps', 'load', 'loads', 'dump', 'dumps', 'load', 'loads',
'JSONDecoder', 'JSONEncoder', 'JSONDecoder', 'JSONDecodeError', 'JSONEncoder',
'OrderedDict', 'simple_first',
] ]
__author__ = 'Bob Ippolito <bob@redivi.com>' __author__ = 'Bob Ippolito <bob@redivi.com>'
from decoder import JSONDecoder from decimal import Decimal
from encoder import JSONEncoder
from .scanner import JSONDecodeError
from .decoder import JSONDecoder
from .encoder import JSONEncoder, JSONEncoderForHTML
def _import_OrderedDict():
import collections
try:
return collections.OrderedDict
except AttributeError:
from . import ordered_dict
return ordered_dict.OrderedDict
OrderedDict = _import_OrderedDict()
def _import_c_make_encoder():
try:
from ._speedups import make_encoder
return make_encoder
except ImportError:
return None
_default_encoder = JSONEncoder( _default_encoder = JSONEncoder(
skipkeys=False, skipkeys=False,
@ -117,56 +137,117 @@ _default_encoder = JSONEncoder(
separators=None, separators=None,
encoding='utf-8', encoding='utf-8',
default=None, default=None,
use_decimal=True,
namedtuple_as_object=True,
tuple_as_array=True,
bigint_as_string=False,
item_sort_key=None,
for_json=False,
ignore_nan=False,
int_as_string_bitcount=None,
) )
def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
allow_nan=True, cls=None, indent=None, separators=None, allow_nan=True, cls=None, indent=None, separators=None,
encoding='utf-8', default=None, **kw): encoding='utf-8', default=None, use_decimal=True,
namedtuple_as_object=True, tuple_as_array=True,
bigint_as_string=False, sort_keys=False, item_sort_key=None,
for_json=False, ignore_nan=False, int_as_string_bitcount=None, **kw):
"""Serialize ``obj`` as a JSON formatted stream to ``fp`` (a """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a
``.write()``-supporting file-like object). ``.write()``-supporting file-like object).
If ``skipkeys`` is true then ``dict`` keys that are not basic types If *skipkeys* is true then ``dict`` keys that are not basic types
(``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``)
will be skipped instead of raising a ``TypeError``. will be skipped instead of raising a ``TypeError``.
If ``ensure_ascii`` is false, then the some chunks written to ``fp`` If *ensure_ascii* is false, then the some chunks written to ``fp``
may be ``unicode`` instances, subject to normal Python ``str`` to may be ``unicode`` instances, subject to normal Python ``str`` to
``unicode`` coercion rules. Unless ``fp.write()`` explicitly ``unicode`` coercion rules. Unless ``fp.write()`` explicitly
understands ``unicode`` (as in ``codecs.getwriter()``) this is likely understands ``unicode`` (as in ``codecs.getwriter()``) this is likely
to cause an error. to cause an error.
If ``check_circular`` is false, then the circular reference check If *check_circular* is false, then the circular reference check
for container types will be skipped and a circular reference will for container types will be skipped and a circular reference will
result in an ``OverflowError`` (or worse). result in an ``OverflowError`` (or worse).
If ``allow_nan`` is false, then it will be a ``ValueError`` to If *allow_nan* is false, then it will be a ``ValueError`` to
serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``)
in strict compliance of the JSON specification, instead of using the in strict compliance of the original JSON specification, instead of using
JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). the JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). See
*ignore_nan* for ECMA-262 compliant behavior.
If ``indent`` is a non-negative integer, then JSON array elements and object If *indent* is a string, then JSON array elements and object members
members will be pretty-printed with that indent level. An indent level will be pretty-printed with a newline followed by that string repeated
of 0 will only insert newlines. ``None`` is the most compact representation. for each level of nesting. ``None`` (the default) selects the most compact
representation without any newlines. For backwards compatibility with
versions of simplejson earlier than 2.1.0, an integer is also accepted
and is converted to a string with that many spaces.
If ``separators`` is an ``(item_separator, dict_separator)`` tuple If specified, *separators* should be an
then it will be used instead of the default ``(', ', ': ')`` separators. ``(item_separator, key_separator)`` tuple. The default is ``(', ', ': ')``
``(',', ':')`` is the most compact JSON representation. if *indent* is ``None`` and ``(',', ': ')`` otherwise. To get the most
compact JSON representation, you should specify ``(',', ':')`` to eliminate
whitespace.
``encoding`` is the character encoding for str instances, default is UTF-8. *encoding* is the character encoding for str instances, default is UTF-8.
``default(obj)`` is a function that should return a serializable version *default(obj)* is a function that should return a serializable version
of obj or raise TypeError. The default simply raises TypeError. of obj or raise ``TypeError``. The default simply raises ``TypeError``.
If *use_decimal* is true (default: ``True``) then decimal.Decimal
will be natively serialized to JSON with full precision.
If *namedtuple_as_object* is true (default: ``True``),
:class:`tuple` subclasses with ``_asdict()`` methods will be encoded
as JSON objects.
If *tuple_as_array* is true (default: ``True``),
:class:`tuple` (and subclasses) will be encoded as JSON arrays.
If *bigint_as_string* is true (default: ``False``), ints 2**53 and higher
or lower than -2**53 will be encoded as strings. This is to avoid the
rounding that happens in Javascript otherwise. Note that this is still a
lossy operation that will not round-trip correctly and should be used
sparingly.
If *int_as_string_bitcount* is a positive number (n), then int of size
greater than or equal to 2**n or lower than or equal to -2**n will be
encoded as strings.
If specified, *item_sort_key* is a callable used to sort the items in
each dictionary. This is useful if you want to sort items other than
in alphabetical order by key. This option takes precedence over
*sort_keys*.
If *sort_keys* is true (default: ``False``), the output of dictionaries
will be sorted by item.
If *for_json* is true (default: ``False``), objects with a ``for_json()``
method will use the return value of that method for encoding as JSON
instead of the object.
If *ignore_nan* is true (default: ``False``), then out of range
:class:`float` values (``nan``, ``inf``, ``-inf``) will be serialized as
``null`` in compliance with the ECMA-262 specification. If true, this will
override *allow_nan*.
To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
``.default()`` method to serialize additional types), specify it with ``.default()`` method to serialize additional types), specify it with
the ``cls`` kwarg. the ``cls`` kwarg. NOTE: You should use *default* or *for_json* instead
of subclassing whenever possible.
""" """
# cached encoder # cached encoder
if (not skipkeys and ensure_ascii and if (not skipkeys and ensure_ascii and
check_circular and allow_nan and check_circular and allow_nan and
cls is None and indent is None and separators is None and cls is None and indent is None and separators is None and
encoding == 'utf-8' and default is None and not kw): encoding == 'utf-8' and default is None and use_decimal
and namedtuple_as_object and tuple_as_array
and not bigint_as_string and not sort_keys
and not item_sort_key and not for_json
and not ignore_nan and int_as_string_bitcount is None
and not kw
):
iterable = _default_encoder.iterencode(obj) iterable = _default_encoder.iterencode(obj)
else: else:
if cls is None: if cls is None:
@ -174,7 +255,16 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii, iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii,
check_circular=check_circular, allow_nan=allow_nan, indent=indent, check_circular=check_circular, allow_nan=allow_nan, indent=indent,
separators=separators, encoding=encoding, separators=separators, encoding=encoding,
default=default, **kw).iterencode(obj) default=default, use_decimal=use_decimal,
namedtuple_as_object=namedtuple_as_object,
tuple_as_array=tuple_as_array,
bigint_as_string=bigint_as_string,
sort_keys=sort_keys,
item_sort_key=item_sort_key,
for_json=for_json,
ignore_nan=ignore_nan,
int_as_string_bitcount=int_as_string_bitcount,
**kw).iterencode(obj)
# could accelerate with writelines in some versions of Python, at # could accelerate with writelines in some versions of Python, at
# a debuggability cost # a debuggability cost
for chunk in iterable: for chunk in iterable:
@ -182,8 +272,11 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
allow_nan=True, cls=None, indent=None, separators=None, allow_nan=True, cls=None, indent=None, separators=None,
encoding='utf-8', default=None, **kw): encoding='utf-8', default=None, use_decimal=True,
namedtuple_as_object=True, tuple_as_array=True,
bigint_as_string=False, sort_keys=False, item_sort_key=None,
for_json=False, ignore_nan=False, int_as_string_bitcount=None, **kw):
"""Serialize ``obj`` to a JSON formatted ``str``. """Serialize ``obj`` to a JSON formatted ``str``.
If ``skipkeys`` is false then ``dict`` keys that are not basic types If ``skipkeys`` is false then ``dict`` keys that are not basic types
@ -203,30 +296,77 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
strict compliance of the JSON specification, instead of using the strict compliance of the JSON specification, instead of using the
JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).
If ``indent`` is a non-negative integer, then JSON array elements and If ``indent`` is a string, then JSON array elements and object members
object members will be pretty-printed with that indent level. An indent will be pretty-printed with a newline followed by that string repeated
level of 0 will only insert newlines. ``None`` is the most compact for each level of nesting. ``None`` (the default) selects the most compact
representation. representation without any newlines. For backwards compatibility with
versions of simplejson earlier than 2.1.0, an integer is also accepted
and is converted to a string with that many spaces.
If ``separators`` is an ``(item_separator, dict_separator)`` tuple If specified, ``separators`` should be an
then it will be used instead of the default ``(', ', ': ')`` separators. ``(item_separator, key_separator)`` tuple. The default is ``(', ', ': ')``
``(',', ':')`` is the most compact JSON representation. if *indent* is ``None`` and ``(',', ': ')`` otherwise. To get the most
compact JSON representation, you should specify ``(',', ':')`` to eliminate
whitespace.
``encoding`` is the character encoding for str instances, default is UTF-8. ``encoding`` is the character encoding for str instances, default is UTF-8.
``default(obj)`` is a function that should return a serializable version ``default(obj)`` is a function that should return a serializable version
of obj or raise TypeError. The default simply raises TypeError. of obj or raise TypeError. The default simply raises TypeError.
If *use_decimal* is true (default: ``True``) then decimal.Decimal
will be natively serialized to JSON with full precision.
If *namedtuple_as_object* is true (default: ``True``),
:class:`tuple` subclasses with ``_asdict()`` methods will be encoded
as JSON objects.
If *tuple_as_array* is true (default: ``True``),
:class:`tuple` (and subclasses) will be encoded as JSON arrays.
If *bigint_as_string* is true (not the default), ints 2**53 and higher
or lower than -2**53 will be encoded as strings. This is to avoid the
rounding that happens in Javascript otherwise.
If *int_as_string_bitcount* is a positive number (n), then int of size
greater than or equal to 2**n or lower than or equal to -2**n will be
encoded as strings.
If specified, *item_sort_key* is a callable used to sort the items in
each dictionary. This is useful if you want to sort items other than
in alphabetical order by key. This option takes precendence over
*sort_keys*.
If *sort_keys* is true (default: ``False``), the output of dictionaries
will be sorted by item.
If *for_json* is true (default: ``False``), objects with a ``for_json()``
method will use the return value of that method for encoding as JSON
instead of the object.
If *ignore_nan* is true (default: ``False``), then out of range
:class:`float` values (``nan``, ``inf``, ``-inf``) will be serialized as
``null`` in compliance with the ECMA-262 specification. If true, this will
override *allow_nan*.
To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
``.default()`` method to serialize additional types), specify it with ``.default()`` method to serialize additional types), specify it with
the ``cls`` kwarg. the ``cls`` kwarg. NOTE: You should use *default* instead of subclassing
whenever possible.
""" """
# cached encoder # cached encoder
if (not skipkeys and ensure_ascii and if (
not skipkeys and ensure_ascii and
check_circular and allow_nan and check_circular and allow_nan and
cls is None and indent is None and separators is None and cls is None and indent is None and separators is None and
encoding == 'utf-8' and default is None and not kw): encoding == 'utf-8' and default is None and use_decimal
and namedtuple_as_object and tuple_as_array
and not bigint_as_string and not sort_keys
and not item_sort_key and not for_json
and not ignore_nan and int_as_string_bitcount is None
and not kw
):
return _default_encoder.encode(obj) return _default_encoder.encode(obj)
if cls is None: if cls is None:
cls = JSONEncoder cls = JSONEncoder
@ -234,85 +374,191 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
skipkeys=skipkeys, ensure_ascii=ensure_ascii, skipkeys=skipkeys, ensure_ascii=ensure_ascii,
check_circular=check_circular, allow_nan=allow_nan, indent=indent, check_circular=check_circular, allow_nan=allow_nan, indent=indent,
separators=separators, encoding=encoding, default=default, separators=separators, encoding=encoding, default=default,
use_decimal=use_decimal,
namedtuple_as_object=namedtuple_as_object,
tuple_as_array=tuple_as_array,
bigint_as_string=bigint_as_string,
sort_keys=sort_keys,
item_sort_key=item_sort_key,
for_json=for_json,
ignore_nan=ignore_nan,
int_as_string_bitcount=int_as_string_bitcount,
**kw).encode(obj) **kw).encode(obj)
_default_decoder = JSONDecoder(encoding=None, object_hook=None) _default_decoder = JSONDecoder(encoding=None, object_hook=None,
object_pairs_hook=None)
def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None, def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None,
parse_int=None, parse_constant=None, **kw): parse_int=None, parse_constant=None, object_pairs_hook=None,
use_decimal=False, namedtuple_as_object=True, tuple_as_array=True,
**kw):
"""Deserialize ``fp`` (a ``.read()``-supporting file-like object containing """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing
a JSON document) to a Python object. a JSON document) to a Python object.
If the contents of ``fp`` is encoded with an ASCII based encoding other *encoding* determines the encoding used to interpret any
than utf-8 (e.g. latin-1), then an appropriate ``encoding`` name must :class:`str` objects decoded by this instance (``'utf-8'`` by
be specified. Encodings that are not ASCII based (such as UCS-2) are default). It has no effect when decoding :class:`unicode` objects.
not allowed, and should be wrapped with
``codecs.getreader(fp)(encoding)``, or simply decoded to a ``unicode``
object and passed to ``loads()``
``object_hook`` is an optional function that will be called with the Note that currently only encodings that are a superset of ASCII work,
result of any object literal decode (a ``dict``). The return value of strings of other encodings should be passed in as :class:`unicode`.
``object_hook`` will be used instead of the ``dict``. This feature
can be used to implement custom decoders (e.g. JSON-RPC class hinting). *object_hook*, if specified, will be called with the result of every
JSON object decoded and its return value will be used in place of the
given :class:`dict`. This can be used to provide custom
deserializations (e.g. to support JSON-RPC class hinting).
*object_pairs_hook* is an optional function that will be called with
the result of any object literal decode with an ordered list of pairs.
The return value of *object_pairs_hook* will be used instead of the
:class:`dict`. This feature can be used to implement custom decoders
that rely on the order that the key and value pairs are decoded (for
example, :func:`collections.OrderedDict` will remember the order of
insertion). If *object_hook* is also defined, the *object_pairs_hook*
takes priority.
*parse_float*, if specified, will be called with the string of every
JSON float to be decoded. By default, this is equivalent to
``float(num_str)``. This can be used to use another datatype or parser
for JSON floats (e.g. :class:`decimal.Decimal`).
*parse_int*, if specified, will be called with the string of every
JSON int to be decoded. By default, this is equivalent to
``int(num_str)``. This can be used to use another datatype or parser
for JSON integers (e.g. :class:`float`).
*parse_constant*, if specified, will be called with one of the
following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This
can be used to raise an exception if invalid JSON numbers are
encountered.
If *use_decimal* is true (default: ``False``) then it implies
parse_float=decimal.Decimal for parity with ``dump``.
To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
kwarg. kwarg. NOTE: You should use *object_hook* or *object_pairs_hook* instead
of subclassing whenever possible.
""" """
return loads(fp.read(), return loads(fp.read(),
encoding=encoding, cls=cls, object_hook=object_hook, encoding=encoding, cls=cls, object_hook=object_hook,
parse_float=parse_float, parse_int=parse_int, parse_float=parse_float, parse_int=parse_int,
parse_constant=parse_constant, **kw) parse_constant=parse_constant, object_pairs_hook=object_pairs_hook,
use_decimal=use_decimal, **kw)
def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
parse_int=None, parse_constant=None, **kw): parse_int=None, parse_constant=None, object_pairs_hook=None,
use_decimal=False, **kw):
"""Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON
document) to a Python object. document) to a Python object.
If ``s`` is a ``str`` instance and is encoded with an ASCII based encoding *encoding* determines the encoding used to interpret any
other than utf-8 (e.g. latin-1) then an appropriate ``encoding`` name :class:`str` objects decoded by this instance (``'utf-8'`` by
must be specified. Encodings that are not ASCII based (such as UCS-2) default). It has no effect when decoding :class:`unicode` objects.
are not allowed and should be decoded to ``unicode`` first.
``object_hook`` is an optional function that will be called with the Note that currently only encodings that are a superset of ASCII work,
result of any object literal decode (a ``dict``). The return value of strings of other encodings should be passed in as :class:`unicode`.
``object_hook`` will be used instead of the ``dict``. This feature
can be used to implement custom decoders (e.g. JSON-RPC class hinting).
``parse_float``, if specified, will be called with the string *object_hook*, if specified, will be called with the result of every
of every JSON float to be decoded. By default this is equivalent to JSON object decoded and its return value will be used in place of the
float(num_str). This can be used to use another datatype or parser given :class:`dict`. This can be used to provide custom
for JSON floats (e.g. decimal.Decimal). deserializations (e.g. to support JSON-RPC class hinting).
``parse_int``, if specified, will be called with the string *object_pairs_hook* is an optional function that will be called with
of every JSON int to be decoded. By default this is equivalent to the result of any object literal decode with an ordered list of pairs.
int(num_str). This can be used to use another datatype or parser The return value of *object_pairs_hook* will be used instead of the
for JSON integers (e.g. float). :class:`dict`. This feature can be used to implement custom decoders
that rely on the order that the key and value pairs are decoded (for
example, :func:`collections.OrderedDict` will remember the order of
insertion). If *object_hook* is also defined, the *object_pairs_hook*
takes priority.
``parse_constant``, if specified, will be called with one of the *parse_float*, if specified, will be called with the string of every
following strings: -Infinity, Infinity, NaN, null, true, false. JSON float to be decoded. By default, this is equivalent to
This can be used to raise an exception if invalid JSON numbers ``float(num_str)``. This can be used to use another datatype or parser
are encountered. for JSON floats (e.g. :class:`decimal.Decimal`).
*parse_int*, if specified, will be called with the string of every
JSON int to be decoded. By default, this is equivalent to
``int(num_str)``. This can be used to use another datatype or parser
for JSON integers (e.g. :class:`float`).
*parse_constant*, if specified, will be called with one of the
following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This
can be used to raise an exception if invalid JSON numbers are
encountered.
If *use_decimal* is true (default: ``False``) then it implies
parse_float=decimal.Decimal for parity with ``dump``.
To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
kwarg. kwarg. NOTE: You should use *object_hook* or *object_pairs_hook* instead
of subclassing whenever possible.
""" """
if (cls is None and encoding is None and object_hook is None and if (cls is None and encoding is None and object_hook is None and
parse_int is None and parse_float is None and parse_int is None and parse_float is None and
parse_constant is None and not kw): parse_constant is None and object_pairs_hook is None
and not use_decimal and not kw):
return _default_decoder.decode(s) return _default_decoder.decode(s)
if cls is None: if cls is None:
cls = JSONDecoder cls = JSONDecoder
if object_hook is not None: if object_hook is not None:
kw['object_hook'] = object_hook kw['object_hook'] = object_hook
if object_pairs_hook is not None:
kw['object_pairs_hook'] = object_pairs_hook
if parse_float is not None: if parse_float is not None:
kw['parse_float'] = parse_float kw['parse_float'] = parse_float
if parse_int is not None: if parse_int is not None:
kw['parse_int'] = parse_int kw['parse_int'] = parse_int
if parse_constant is not None: if parse_constant is not None:
kw['parse_constant'] = parse_constant kw['parse_constant'] = parse_constant
if use_decimal:
if parse_float is not None:
raise TypeError("use_decimal=True implies parse_float=Decimal")
kw['parse_float'] = Decimal
return cls(encoding=encoding, **kw).decode(s) return cls(encoding=encoding, **kw).decode(s)
def _toggle_speedups(enabled):
from . import decoder as dec
from . import encoder as enc
from . import scanner as scan
c_make_encoder = _import_c_make_encoder()
if enabled:
dec.scanstring = dec.c_scanstring or dec.py_scanstring
enc.c_make_encoder = c_make_encoder
enc.encode_basestring_ascii = (enc.c_encode_basestring_ascii or
enc.py_encode_basestring_ascii)
scan.make_scanner = scan.c_make_scanner or scan.py_make_scanner
else:
dec.scanstring = dec.py_scanstring
enc.c_make_encoder = None
enc.encode_basestring_ascii = enc.py_encode_basestring_ascii
scan.make_scanner = scan.py_make_scanner
dec.make_scanner = scan.make_scanner
global _default_decoder
_default_decoder = JSONDecoder(
encoding=None,
object_hook=None,
object_pairs_hook=None,
)
global _default_encoder
_default_encoder = JSONEncoder(
skipkeys=False,
ensure_ascii=True,
check_circular=True,
allow_nan=True,
indent=None,
separators=None,
encoding='utf-8',
default=None,
)
def simple_first(kv):
"""Helper function to pass to item_sort_key to sort simple
elements to the top, then container elements.
"""
return (isinstance(kv[1], (list, dict, tuple)), kv[0])

File diff suppressed because it is too large Load diff

46
lib/simplejson/compat.py Normal file
View file

@ -0,0 +1,46 @@
"""Python 3 compatibility shims
"""
import sys
if sys.version_info[0] < 3:
PY3 = False
def b(s):
return s
def u(s):
return unicode(s, 'unicode_escape')
import cStringIO as StringIO
StringIO = BytesIO = StringIO.StringIO
text_type = unicode
binary_type = str
string_types = (basestring,)
integer_types = (int, long)
unichr = unichr
reload_module = reload
def fromhex(s):
return s.decode('hex')
else:
PY3 = True
if sys.version_info[:2] >= (3, 4):
from importlib import reload as reload_module
else:
from imp import reload as reload_module
import codecs
def b(s):
return codecs.latin_1_encode(s)[0]
def u(s):
return s
import io
StringIO = io.StringIO
BytesIO = io.BytesIO
text_type = str
binary_type = bytes
string_types = (str,)
integer_types = (int,)
def unichr(s):
return u(chr(s))
def fromhex(s):
return bytes.fromhex(s)
long_type = integer_types[-1]

View file

@ -1,21 +1,30 @@
"""Implementation of JSONDecoder """Implementation of JSONDecoder
""" """
from __future__ import absolute_import
import re import re
import sys import sys
import struct import struct
from .compat import fromhex, b, u, text_type, binary_type, PY3, unichr
from .scanner import make_scanner, JSONDecodeError
from lib.simplejson.scanner import make_scanner def _import_c_scanstring():
try: try:
from lib.simplejson._speedups import scanstring as c_scanstring from ._speedups import scanstring
except ImportError: return scanstring
c_scanstring = None except ImportError:
return None
c_scanstring = _import_c_scanstring()
# NOTE (3.1.0): JSONDecodeError may still be imported from this module for
# compatibility, but it was never in the __all__
__all__ = ['JSONDecoder'] __all__ = ['JSONDecoder']
FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
def _floatconstants(): def _floatconstants():
_BYTES = '7FF80000000000007FF0000000000000'.decode('hex') _BYTES = fromhex('7FF80000000000007FF0000000000000')
# The struct module in Python 2.4 would get frexp() out of range here
# when an endian is specified in the format string. Fixed in Python 2.5+
if sys.byteorder != 'big': if sys.byteorder != 'big':
_BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1] _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1]
nan, inf = struct.unpack('dd', _BYTES) nan, inf = struct.unpack('dd', _BYTES)
@ -23,31 +32,6 @@ def _floatconstants():
NaN, PosInf, NegInf = _floatconstants() NaN, PosInf, NegInf = _floatconstants()
def linecol(doc, pos):
lineno = doc.count('\n', 0, pos) + 1
if lineno == 1:
colno = pos
else:
colno = pos - doc.rindex('\n', 0, pos)
return lineno, colno
def errmsg(msg, doc, pos, end=None):
# Note that this function is called from _speedups
lineno, colno = linecol(doc, pos)
if end is None:
#fmt = '{0}: line {1} column {2} (char {3})'
#return fmt.format(msg, lineno, colno, pos)
fmt = '%s: line %d column %d (char %d)'
return fmt % (msg, lineno, colno, pos)
endlineno, endcolno = linecol(doc, end)
#fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'
#return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end)
fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'
return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)
_CONSTANTS = { _CONSTANTS = {
'-Infinity': NegInf, '-Infinity': NegInf,
'Infinity': PosInf, 'Infinity': PosInf,
@ -56,19 +40,21 @@ _CONSTANTS = {
STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
BACKSLASH = { BACKSLASH = {
'"': u'"', '\\': u'\\', '/': u'/', '"': u('"'), '\\': u('\u005c'), '/': u('/'),
'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', 'b': u('\b'), 'f': u('\f'), 'n': u('\n'), 'r': u('\r'), 't': u('\t'),
} }
DEFAULT_ENCODING = "utf-8" DEFAULT_ENCODING = "utf-8"
def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match): def py_scanstring(s, end, encoding=None, strict=True,
_b=BACKSLASH, _m=STRINGCHUNK.match, _join=u('').join,
_PY3=PY3, _maxunicode=sys.maxunicode):
"""Scan the string s for a JSON string. End is the index of the """Scan the string s for a JSON string. End is the index of the
character in s after the quote that started the JSON string. character in s after the quote that started the JSON string.
Unescapes all valid JSON string escape sequences and raises ValueError Unescapes all valid JSON string escape sequences and raises ValueError
on attempt to decode an invalid string. If strict is False then literal on attempt to decode an invalid string. If strict is False then literal
control characters are allowed in the string. control characters are allowed in the string.
Returns a tuple of the decoded string and the index of the character in s Returns a tuple of the decoded string and the index of the character in s
after the end quote.""" after the end quote."""
if encoding is None: if encoding is None:
@ -79,14 +65,14 @@ def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHU
while 1: while 1:
chunk = _m(s, end) chunk = _m(s, end)
if chunk is None: if chunk is None:
raise ValueError( raise JSONDecodeError(
errmsg("Unterminated string starting at", s, begin)) "Unterminated string starting at", s, begin)
end = chunk.end() end = chunk.end()
content, terminator = chunk.groups() content, terminator = chunk.groups()
# Content is contains zero or more unescaped string characters # Content is contains zero or more unescaped string characters
if content: if content:
if not isinstance(content, unicode): if not _PY3 and not isinstance(content, text_type):
content = unicode(content, encoding) content = text_type(content, encoding)
_append(content) _append(content)
# Terminator is the end of string, a literal control character, # Terminator is the end of string, a literal control character,
# or a backslash denoting that an escape sequence follows # or a backslash denoting that an escape sequence follows
@ -94,49 +80,57 @@ def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHU
break break
elif terminator != '\\': elif terminator != '\\':
if strict: if strict:
msg = "Invalid control character %r at" % (terminator,) msg = "Invalid control character %r at"
#msg = "Invalid control character {0!r} at".format(terminator) raise JSONDecodeError(msg, s, end)
raise ValueError(errmsg(msg, s, end))
else: else:
_append(terminator) _append(terminator)
continue continue
try: try:
esc = s[end] esc = s[end]
except IndexError: except IndexError:
raise ValueError( raise JSONDecodeError(
errmsg("Unterminated string starting at", s, begin)) "Unterminated string starting at", s, begin)
# If not a unicode escape sequence, must be in the lookup table # If not a unicode escape sequence, must be in the lookup table
if esc != 'u': if esc != 'u':
try: try:
char = _b[esc] char = _b[esc]
except KeyError: except KeyError:
msg = "Invalid \\escape: " + repr(esc) msg = "Invalid \\X escape sequence %r"
raise ValueError(errmsg(msg, s, end)) raise JSONDecodeError(msg, s, end)
end += 1 end += 1
else: else:
# Unicode escape sequence # Unicode escape sequence
msg = "Invalid \\uXXXX escape sequence"
esc = s[end + 1:end + 5] esc = s[end + 1:end + 5]
next_end = end + 5 escX = esc[1:2]
if len(esc) != 4: if len(esc) != 4 or escX == 'x' or escX == 'X':
msg = "Invalid \\uXXXX escape" raise JSONDecodeError(msg, s, end - 1)
raise ValueError(errmsg(msg, s, end)) try:
uni = int(esc, 16) uni = int(esc, 16)
except ValueError:
raise JSONDecodeError(msg, s, end - 1)
end += 5
# Check for surrogate pair on UCS-4 systems # Check for surrogate pair on UCS-4 systems
if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535: # Note that this will join high/low surrogate pairs
msg = "Invalid \\uXXXX\\uXXXX surrogate pair" # but will also pass unpaired surrogates through
if not s[end + 5:end + 7] == '\\u': if (_maxunicode > 65535 and
raise ValueError(errmsg(msg, s, end)) uni & 0xfc00 == 0xd800 and
esc2 = s[end + 7:end + 11] s[end:end + 2] == '\\u'):
if len(esc2) != 4: esc2 = s[end + 2:end + 6]
raise ValueError(errmsg(msg, s, end)) escX = esc2[1:2]
uni2 = int(esc2, 16) if len(esc2) == 4 and not (escX == 'x' or escX == 'X'):
uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) try:
next_end += 6 uni2 = int(esc2, 16)
except ValueError:
raise JSONDecodeError(msg, s, end)
if uni2 & 0xfc00 == 0xdc00:
uni = 0x10000 + (((uni - 0xd800) << 10) |
(uni2 - 0xdc00))
end += 6
char = unichr(uni) char = unichr(uni)
end = next_end
# Append the unescaped character # Append the unescaped character
_append(char) _append(char)
return u''.join(chunks), end return _join(chunks), end
# Use speedup if available # Use speedup if available
@ -145,8 +139,15 @@ scanstring = c_scanstring or py_scanstring
WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
WHITESPACE_STR = ' \t\n\r' WHITESPACE_STR = ' \t\n\r'
def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR): def JSONObject(state, encoding, strict, scan_once, object_hook,
pairs = {} object_pairs_hook, memo=None,
_w=WHITESPACE.match, _ws=WHITESPACE_STR):
(s, end) = state
# Backwards compatibility
if memo is None:
memo = {}
memo_get = memo.setdefault
pairs = []
# Use a slice to prevent IndexError from being raised, the following # Use a slice to prevent IndexError from being raised, the following
# check will raise a more specific ValueError if the string is empty # check will raise a more specific ValueError if the string is empty
nextchar = s[end:end + 1] nextchar = s[end:end + 1]
@ -157,19 +158,28 @@ def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE
nextchar = s[end:end + 1] nextchar = s[end:end + 1]
# Trivial empty object # Trivial empty object
if nextchar == '}': if nextchar == '}':
if object_pairs_hook is not None:
result = object_pairs_hook(pairs)
return result, end + 1
pairs = {}
if object_hook is not None:
pairs = object_hook(pairs)
return pairs, end + 1 return pairs, end + 1
elif nextchar != '"': elif nextchar != '"':
raise ValueError(errmsg("Expecting property name", s, end)) raise JSONDecodeError(
"Expecting property name enclosed in double quotes",
s, end)
end += 1 end += 1
while True: while True:
key, end = scanstring(s, end, encoding, strict) key, end = scanstring(s, end, encoding, strict)
key = memo_get(key, key)
# To skip some function call overhead we optimize the fast paths where # To skip some function call overhead we optimize the fast paths where
# the JSON key separator is ": " or just ":". # the JSON key separator is ": " or just ":".
if s[end:end + 1] != ':': if s[end:end + 1] != ':':
end = _w(s, end).end() end = _w(s, end).end()
if s[end:end + 1] != ':': if s[end:end + 1] != ':':
raise ValueError(errmsg("Expecting : delimiter", s, end)) raise JSONDecodeError("Expecting ':' delimiter", s, end)
end += 1 end += 1
@ -181,11 +191,8 @@ def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE
except IndexError: except IndexError:
pass pass
try: value, end = scan_once(s, end)
value, end = scan_once(s, end) pairs.append((key, value))
except StopIteration:
raise ValueError(errmsg("Expecting object", s, end))
pairs[key] = value
try: try:
nextchar = s[end] nextchar = s[end]
@ -199,7 +206,7 @@ def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE
if nextchar == '}': if nextchar == '}':
break break
elif nextchar != ',': elif nextchar != ',':
raise ValueError(errmsg("Expecting , delimiter", s, end - 1)) raise JSONDecodeError("Expecting ',' delimiter or '}'", s, end - 1)
try: try:
nextchar = s[end] nextchar = s[end]
@ -214,13 +221,20 @@ def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE
end += 1 end += 1
if nextchar != '"': if nextchar != '"':
raise ValueError(errmsg("Expecting property name", s, end - 1)) raise JSONDecodeError(
"Expecting property name enclosed in double quotes",
s, end - 1)
if object_pairs_hook is not None:
result = object_pairs_hook(pairs)
return result, end
pairs = dict(pairs)
if object_hook is not None: if object_hook is not None:
pairs = object_hook(pairs) pairs = object_hook(pairs)
return pairs, end return pairs, end
def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): def JSONArray(state, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
(s, end) = state
values = [] values = []
nextchar = s[end:end + 1] nextchar = s[end:end + 1]
if nextchar in _ws: if nextchar in _ws:
@ -229,12 +243,11 @@ def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
# Look-ahead for trivial empty array # Look-ahead for trivial empty array
if nextchar == ']': if nextchar == ']':
return values, end + 1 return values, end + 1
elif nextchar == '':
raise JSONDecodeError("Expecting value or ']'", s, end)
_append = values.append _append = values.append
while True: while True:
try: value, end = scan_once(s, end)
value, end = scan_once(s, end)
except StopIteration:
raise ValueError(errmsg("Expecting object", s, end))
_append(value) _append(value)
nextchar = s[end:end + 1] nextchar = s[end:end + 1]
if nextchar in _ws: if nextchar in _ws:
@ -244,7 +257,7 @@ def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
if nextchar == ']': if nextchar == ']':
break break
elif nextchar != ',': elif nextchar != ',':
raise ValueError(errmsg("Expecting , delimiter", s, end)) raise JSONDecodeError("Expecting ',' delimiter or ']'", s, end - 1)
try: try:
if s[end] in _ws: if s[end] in _ws:
@ -268,7 +281,7 @@ class JSONDecoder(object):
+---------------+-------------------+ +---------------+-------------------+
| array | list | | array | list |
+---------------+-------------------+ +---------------+-------------------+
| string | unicode | | string | str, unicode |
+---------------+-------------------+ +---------------+-------------------+
| number (int) | int, long | | number (int) | int, long |
+---------------+-------------------+ +---------------+-------------------+
@ -287,37 +300,56 @@ class JSONDecoder(object):
""" """
def __init__(self, encoding=None, object_hook=None, parse_float=None, def __init__(self, encoding=None, object_hook=None, parse_float=None,
parse_int=None, parse_constant=None, strict=True): parse_int=None, parse_constant=None, strict=True,
"""``encoding`` determines the encoding used to interpret any ``str`` object_pairs_hook=None):
objects decoded by this instance (utf-8 by default). It has no """
effect when decoding ``unicode`` objects. *encoding* determines the encoding used to interpret any
:class:`str` objects decoded by this instance (``'utf-8'`` by
default). It has no effect when decoding :class:`unicode` objects.
Note that currently only encodings that are a superset of ASCII work, Note that currently only encodings that are a superset of ASCII work,
strings of other encodings should be passed in as ``unicode``. strings of other encodings should be passed in as :class:`unicode`.
``object_hook``, if specified, will be called with the result *object_hook*, if specified, will be called with the result of every
of every JSON object decoded and its return value will be used in JSON object decoded and its return value will be used in place of the
place of the given ``dict``. This can be used to provide custom given :class:`dict`. This can be used to provide custom
deserializations (e.g. to support JSON-RPC class hinting). deserializations (e.g. to support JSON-RPC class hinting).
``parse_float``, if specified, will be called with the string *object_pairs_hook* is an optional function that will be called with
of every JSON float to be decoded. By default this is equivalent to the result of any object literal decode with an ordered list of pairs.
float(num_str). This can be used to use another datatype or parser The return value of *object_pairs_hook* will be used instead of the
for JSON floats (e.g. decimal.Decimal). :class:`dict`. This feature can be used to implement custom decoders
that rely on the order that the key and value pairs are decoded (for
example, :func:`collections.OrderedDict` will remember the order of
insertion). If *object_hook* is also defined, the *object_pairs_hook*
takes priority.
``parse_int``, if specified, will be called with the string *parse_float*, if specified, will be called with the string of every
of every JSON int to be decoded. By default this is equivalent to JSON float to be decoded. By default, this is equivalent to
int(num_str). This can be used to use another datatype or parser ``float(num_str)``. This can be used to use another datatype or parser
for JSON integers (e.g. float). for JSON floats (e.g. :class:`decimal.Decimal`).
``parse_constant``, if specified, will be called with one of the *parse_int*, if specified, will be called with the string of every
following strings: -Infinity, Infinity, NaN. JSON int to be decoded. By default, this is equivalent to
This can be used to raise an exception if invalid JSON numbers ``int(num_str)``. This can be used to use another datatype or parser
are encountered. for JSON integers (e.g. :class:`float`).
*parse_constant*, if specified, will be called with one of the
following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This
can be used to raise an exception if invalid JSON numbers are
encountered.
*strict* controls the parser's behavior when it encounters an
invalid control character in a string. The default setting of
``True`` means that unescaped control characters are parse errors, if
``False`` then control characters will be allowed in strings.
""" """
if encoding is None:
encoding = DEFAULT_ENCODING
self.encoding = encoding self.encoding = encoding
self.object_hook = object_hook self.object_hook = object_hook
self.object_pairs_hook = object_pairs_hook
self.parse_float = parse_float or float self.parse_float = parse_float or float
self.parse_int = parse_int or int self.parse_int = parse_int or int
self.parse_constant = parse_constant or _CONSTANTS.__getitem__ self.parse_constant = parse_constant or _CONSTANTS.__getitem__
@ -325,30 +357,44 @@ class JSONDecoder(object):
self.parse_object = JSONObject self.parse_object = JSONObject
self.parse_array = JSONArray self.parse_array = JSONArray
self.parse_string = scanstring self.parse_string = scanstring
self.memo = {}
self.scan_once = make_scanner(self) self.scan_once = make_scanner(self)
def decode(self, s, _w=WHITESPACE.match): def decode(self, s, _w=WHITESPACE.match, _PY3=PY3):
"""Return the Python representation of ``s`` (a ``str`` or ``unicode`` """Return the Python representation of ``s`` (a ``str`` or ``unicode``
instance containing a JSON document) instance containing a JSON document)
""" """
obj, end = self.raw_decode(s, idx=_w(s, 0).end()) if _PY3 and isinstance(s, binary_type):
s = s.decode(self.encoding)
obj, end = self.raw_decode(s)
end = _w(s, end).end() end = _w(s, end).end()
if end != len(s): if end != len(s):
raise ValueError(errmsg("Extra data", s, end, len(s))) raise JSONDecodeError("Extra data", s, end, len(s))
return obj return obj
def raw_decode(self, s, idx=0): def raw_decode(self, s, idx=0, _w=WHITESPACE.match, _PY3=PY3):
"""Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning """Decode a JSON document from ``s`` (a ``str`` or ``unicode``
with a JSON document) and return a 2-tuple of the Python beginning with a JSON document) and return a 2-tuple of the Python
representation and the index in ``s`` where the document ended. representation and the index in ``s`` where the document ended.
Optionally, ``idx`` can be used to specify an offset in ``s`` where
the JSON document begins.
This can be used to decode a JSON document from a string that may This can be used to decode a JSON document from a string that may
have extraneous data at the end. have extraneous data at the end.
""" """
try: if idx < 0:
obj, end = self.scan_once(s, idx) # Ensure that raw_decode bails on negative indexes, the regex
except StopIteration: # would otherwise mask this behavior. #98
raise ValueError("No JSON object could be decoded") raise JSONDecodeError('Expecting value', s, idx)
return obj, end if _PY3 and not isinstance(s, text_type):
raise TypeError("Input string must be text, not bytes")
# strip UTF-8 bom
if len(s) > idx:
ord0 = ord(s[idx])
if ord0 == 0xfeff:
idx += 1
elif ord0 == 0xef and s[idx:idx + 3] == '\xef\xbb\xbf':
idx += 3
return self.scan_once(s, idx=_w(s, idx).end())

View file

@ -1,17 +1,25 @@
"""Implementation of JSONEncoder """Implementation of JSONEncoder
""" """
from __future__ import absolute_import
import re import re
from operator import itemgetter
# Do not import Decimal directly to avoid reload issues
import decimal
from .compat import u, unichr, binary_type, string_types, integer_types, PY3
def _import_speedups():
try:
from . import _speedups
return _speedups.encode_basestring_ascii, _speedups.make_encoder
except ImportError:
return None, None
c_encode_basestring_ascii, c_make_encoder = _import_speedups()
try: from simplejson.decoder import PosInf
from lib.simplejson._speedups import encode_basestring_ascii as c_encode_basestring_ascii
except ImportError:
c_encode_basestring_ascii = None
try:
from lib.simplejson._speedups import make_encoder as c_make_encoder
except ImportError:
c_make_encoder = None
ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]') #ESCAPE = re.compile(ur'[\x00-\x1f\\"\b\f\n\r\t\u2028\u2029]')
# This is required because u() will mangle the string and ur'' isn't valid
# python3 syntax
ESCAPE = re.compile(u'[\\x00-\\x1f\\\\"\\b\\f\\n\\r\\t\u2028\u2029]')
ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
HAS_UTF8 = re.compile(r'[\x80-\xff]') HAS_UTF8 = re.compile(r'[\x80-\xff]')
ESCAPE_DCT = { ESCAPE_DCT = {
@ -26,26 +34,36 @@ ESCAPE_DCT = {
for i in range(0x20): for i in range(0x20):
#ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i)) #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
for i in [0x2028, 0x2029]:
ESCAPE_DCT.setdefault(unichr(i), '\\u%04x' % (i,))
# Assume this produces an infinity on all machines (probably not guaranteed)
INFINITY = float('1e66666')
FLOAT_REPR = repr FLOAT_REPR = repr
def encode_basestring(s): def encode_basestring(s, _PY3=PY3, _q=u('"')):
"""Return a JSON representation of a Python string """Return a JSON representation of a Python string
""" """
if _PY3:
if isinstance(s, binary_type):
s = s.decode('utf-8')
else:
if isinstance(s, str) and HAS_UTF8.search(s) is not None:
s = s.decode('utf-8')
def replace(match): def replace(match):
return ESCAPE_DCT[match.group(0)] return ESCAPE_DCT[match.group(0)]
return '"' + ESCAPE.sub(replace, s) + '"' return _q + ESCAPE.sub(replace, s) + _q
def py_encode_basestring_ascii(s): def py_encode_basestring_ascii(s, _PY3=PY3):
"""Return an ASCII-only JSON representation of a Python string """Return an ASCII-only JSON representation of a Python string
""" """
if isinstance(s, str) and HAS_UTF8.search(s) is not None: if _PY3:
s = s.decode('utf-8') if isinstance(s, binary_type):
s = s.decode('utf-8')
else:
if isinstance(s, str) and HAS_UTF8.search(s) is not None:
s = s.decode('utf-8')
def replace(match): def replace(match):
s = match.group(0) s = match.group(0)
try: try:
@ -65,7 +83,8 @@ def py_encode_basestring_ascii(s):
return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
encode_basestring_ascii = c_encode_basestring_ascii or py_encode_basestring_ascii encode_basestring_ascii = (
c_encode_basestring_ascii or py_encode_basestring_ascii)
class JSONEncoder(object): class JSONEncoder(object):
"""Extensible JSON <http://json.org> encoder for Python data structures. """Extensible JSON <http://json.org> encoder for Python data structures.
@ -75,7 +94,7 @@ class JSONEncoder(object):
+-------------------+---------------+ +-------------------+---------------+
| Python | JSON | | Python | JSON |
+===================+===============+ +===================+===============+
| dict | object | | dict, namedtuple | object |
+-------------------+---------------+ +-------------------+---------------+
| list, tuple | array | | list, tuple | array |
+-------------------+---------------+ +-------------------+---------------+
@ -98,9 +117,14 @@ class JSONEncoder(object):
""" """
item_separator = ', ' item_separator = ', '
key_separator = ': ' key_separator = ': '
def __init__(self, skipkeys=False, ensure_ascii=True, def __init__(self, skipkeys=False, ensure_ascii=True,
check_circular=True, allow_nan=True, sort_keys=False, check_circular=True, allow_nan=True, sort_keys=False,
indent=None, separators=None, encoding='utf-8', default=None): indent=None, separators=None, encoding='utf-8', default=None,
use_decimal=True, namedtuple_as_object=True,
tuple_as_array=True, bigint_as_string=False,
item_sort_key=None, for_json=False, ignore_nan=False,
int_as_string_bitcount=None):
"""Constructor for JSONEncoder, with sensible defaults. """Constructor for JSONEncoder, with sensible defaults.
If skipkeys is false, then it is a TypeError to attempt If skipkeys is false, then it is a TypeError to attempt
@ -125,14 +149,17 @@ class JSONEncoder(object):
sorted by key; this is useful for regression tests to ensure sorted by key; this is useful for regression tests to ensure
that JSON serializations can be compared on a day-to-day basis. that JSON serializations can be compared on a day-to-day basis.
If indent is a non-negative integer, then JSON array If indent is a string, then JSON array elements and object members
elements and object members will be pretty-printed with that will be pretty-printed with a newline followed by that string repeated
indent level. An indent level of 0 will only insert newlines. for each level of nesting. ``None`` (the default) selects the most compact
None is the most compact representation. representation without any newlines. For backwards compatibility with
versions of simplejson earlier than 2.1.0, an integer is also accepted
and is converted to a string with that many spaces.
If specified, separators should be a (item_separator, key_separator) If specified, separators should be an (item_separator, key_separator)
tuple. The default is (', ', ': '). To get the most compact JSON tuple. The default is (', ', ': ') if *indent* is ``None`` and
representation you should specify (',', ':') to eliminate whitespace. (',', ': ') otherwise. To get the most compact JSON representation,
you should specify (',', ':') to eliminate whitespace.
If specified, default is a function that gets called for objects If specified, default is a function that gets called for objects
that can't otherwise be serialized. It should return a JSON encodable that can't otherwise be serialized. It should return a JSON encodable
@ -142,6 +169,37 @@ class JSONEncoder(object):
transformed into unicode using that encoding prior to JSON-encoding. transformed into unicode using that encoding prior to JSON-encoding.
The default is UTF-8. The default is UTF-8.
If use_decimal is true (not the default), ``decimal.Decimal`` will
be supported directly by the encoder. For the inverse, decode JSON
with ``parse_float=decimal.Decimal``.
If namedtuple_as_object is true (the default), objects with
``_asdict()`` methods will be encoded as JSON objects.
If tuple_as_array is true (the default), tuple (and subclasses) will
be encoded as JSON arrays.
If bigint_as_string is true (not the default), ints 2**53 and higher
or lower than -2**53 will be encoded as strings. This is to avoid the
rounding that happens in Javascript otherwise.
If int_as_string_bitcount is a positive number (n), then int of size
greater than or equal to 2**n or lower than or equal to -2**n will be
encoded as strings.
If specified, item_sort_key is a callable used to sort the items in
each dictionary. This is useful if you want to sort items other than
in alphabetical order by key.
If for_json is true (not the default), objects with a ``for_json()``
method will use the return value of that method for encoding as JSON
instead of the object.
If *ignore_nan* is true (default: ``False``), then out of range
:class:`float` values (``nan``, ``inf``, ``-inf``) will be serialized
as ``null`` in compliance with the ECMA-262 specification. If true,
this will override *allow_nan*.
""" """
self.skipkeys = skipkeys self.skipkeys = skipkeys
@ -149,9 +207,21 @@ class JSONEncoder(object):
self.check_circular = check_circular self.check_circular = check_circular
self.allow_nan = allow_nan self.allow_nan = allow_nan
self.sort_keys = sort_keys self.sort_keys = sort_keys
self.use_decimal = use_decimal
self.namedtuple_as_object = namedtuple_as_object
self.tuple_as_array = tuple_as_array
self.bigint_as_string = bigint_as_string
self.item_sort_key = item_sort_key
self.for_json = for_json
self.ignore_nan = ignore_nan
self.int_as_string_bitcount = int_as_string_bitcount
if indent is not None and not isinstance(indent, string_types):
indent = indent * ' '
self.indent = indent self.indent = indent
if separators is not None: if separators is not None:
self.item_separator, self.key_separator = separators self.item_separator, self.key_separator = separators
elif indent is not None:
self.item_separator = ','
if default is not None: if default is not None:
self.default = default self.default = default
self.encoding = encoding self.encoding = encoding
@ -179,17 +249,17 @@ class JSONEncoder(object):
def encode(self, o): def encode(self, o):
"""Return a JSON string representation of a Python data structure. """Return a JSON string representation of a Python data structure.
>>> from simplejson import JSONEncoder
>>> JSONEncoder().encode({"foo": ["bar", "baz"]}) >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
'{"foo": ["bar", "baz"]}' '{"foo": ["bar", "baz"]}'
""" """
# This is for extremely simple cases and benchmarks. # This is for extremely simple cases and benchmarks.
if isinstance(o, basestring): if isinstance(o, binary_type):
if isinstance(o, str): _encoding = self.encoding
_encoding = self.encoding if (_encoding is not None and not (_encoding == 'utf-8')):
if (_encoding is not None o = o.decode(_encoding)
and not (_encoding == 'utf-8')): if isinstance(o, string_types):
o = o.decode(_encoding)
if self.ensure_ascii: if self.ensure_ascii:
return encode_basestring_ascii(o) return encode_basestring_ascii(o)
else: else:
@ -200,7 +270,10 @@ class JSONEncoder(object):
chunks = self.iterencode(o, _one_shot=True) chunks = self.iterencode(o, _one_shot=True)
if not isinstance(chunks, (list, tuple)): if not isinstance(chunks, (list, tuple)):
chunks = list(chunks) chunks = list(chunks)
return ''.join(chunks) if self.ensure_ascii:
return ''.join(chunks)
else:
return u''.join(chunks)
def iterencode(self, o, _one_shot=False): def iterencode(self, o, _one_shot=False):
"""Encode the given object and yield each string """Encode the given object and yield each string
@ -222,13 +295,15 @@ class JSONEncoder(object):
_encoder = encode_basestring _encoder = encode_basestring
if self.encoding != 'utf-8': if self.encoding != 'utf-8':
def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding): def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
if isinstance(o, str): if isinstance(o, binary_type):
o = o.decode(_encoding) o = o.decode(_encoding)
return _orig_encoder(o) return _orig_encoder(o)
def floatstr(o, allow_nan=self.allow_nan, _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY): def floatstr(o, allow_nan=self.allow_nan, ignore_nan=self.ignore_nan,
# Check for specials. Note that this type of test is processor- and/or _repr=FLOAT_REPR, _inf=PosInf, _neginf=-PosInf):
# platform-specific, so do tests which don't depend on the internals. # Check for specials. Note that this type of test is processor
# and/or platform-specific, so do tests which don't depend on
# the internals.
if o != o: if o != o:
text = 'NaN' text = 'NaN'
@ -237,44 +312,123 @@ class JSONEncoder(object):
elif o == _neginf: elif o == _neginf:
text = '-Infinity' text = '-Infinity'
else: else:
if type(o) != float:
# See #118, do not trust custom str/repr
o = float(o)
return _repr(o) return _repr(o)
if not allow_nan: if ignore_nan:
text = 'null'
elif not allow_nan:
raise ValueError( raise ValueError(
"Out of range float values are not JSON compliant: " + "Out of range float values are not JSON compliant: " +
repr(o)) repr(o))
return text return text
key_memo = {}
if _one_shot and c_make_encoder is not None and not self.indent and not self.sort_keys: int_as_string_bitcount = (
53 if self.bigint_as_string else self.int_as_string_bitcount)
if (_one_shot and c_make_encoder is not None
and self.indent is None):
_iterencode = c_make_encoder( _iterencode = c_make_encoder(
markers, self.default, _encoder, self.indent, markers, self.default, _encoder, self.indent,
self.key_separator, self.item_separator, self.sort_keys, self.key_separator, self.item_separator, self.sort_keys,
self.skipkeys, self.allow_nan) self.skipkeys, self.allow_nan, key_memo, self.use_decimal,
self.namedtuple_as_object, self.tuple_as_array,
int_as_string_bitcount,
self.item_sort_key, self.encoding, self.for_json,
self.ignore_nan, decimal.Decimal)
else: else:
_iterencode = _make_iterencode( _iterencode = _make_iterencode(
markers, self.default, _encoder, self.indent, floatstr, markers, self.default, _encoder, self.indent, floatstr,
self.key_separator, self.item_separator, self.sort_keys, self.key_separator, self.item_separator, self.sort_keys,
self.skipkeys, _one_shot) self.skipkeys, _one_shot, self.use_decimal,
return _iterencode(o, 0) self.namedtuple_as_object, self.tuple_as_array,
int_as_string_bitcount,
self.item_sort_key, self.encoding, self.for_json,
Decimal=decimal.Decimal)
try:
return _iterencode(o, 0)
finally:
key_memo.clear()
def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
class JSONEncoderForHTML(JSONEncoder):
"""An encoder that produces JSON safe to embed in HTML.
To embed JSON content in, say, a script tag on a web page, the
characters &, < and > should be escaped. They cannot be escaped
with the usual entities (e.g. &amp;) because they are not expanded
within <script> tags.
"""
def encode(self, o):
# Override JSONEncoder.encode because it has hacks for
# performance that make things more complicated.
chunks = self.iterencode(o, True)
if self.ensure_ascii:
return ''.join(chunks)
else:
return u''.join(chunks)
def iterencode(self, o, _one_shot=False):
chunks = super(JSONEncoderForHTML, self).iterencode(o, _one_shot)
for chunk in chunks:
chunk = chunk.replace('&', '\\u0026')
chunk = chunk.replace('<', '\\u003c')
chunk = chunk.replace('>', '\\u003e')
yield chunk
def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
_key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
_use_decimal, _namedtuple_as_object, _tuple_as_array,
_int_as_string_bitcount, _item_sort_key,
_encoding,_for_json,
## HACK: hand-optimized bytecode; turn globals into locals ## HACK: hand-optimized bytecode; turn globals into locals
False=False, _PY3=PY3,
True=True,
ValueError=ValueError, ValueError=ValueError,
basestring=basestring, string_types=string_types,
Decimal=None,
dict=dict, dict=dict,
float=float, float=float,
id=id, id=id,
int=int, integer_types=integer_types,
isinstance=isinstance, isinstance=isinstance,
list=list, list=list,
long=long,
str=str, str=str,
tuple=tuple, tuple=tuple,
): ):
if _use_decimal and Decimal is None:
Decimal = decimal.Decimal
if _item_sort_key and not callable(_item_sort_key):
raise TypeError("item_sort_key must be None or callable")
elif _sort_keys and not _item_sort_key:
_item_sort_key = itemgetter(0)
if (_int_as_string_bitcount is not None and
(_int_as_string_bitcount <= 0 or
not isinstance(_int_as_string_bitcount, integer_types))):
raise TypeError("int_as_string_bitcount must be a positive integer")
def _encode_int(value):
skip_quoting = (
_int_as_string_bitcount is None
or
_int_as_string_bitcount < 1
)
if type(value) not in integer_types:
# See #118, do not trust custom str/repr
value = int(value)
if (
skip_quoting or
(-1 << _int_as_string_bitcount)
< value <
(1 << _int_as_string_bitcount)
):
return str(value)
return '"' + str(value) + '"'
def _iterencode_list(lst, _current_indent_level): def _iterencode_list(lst, _current_indent_level):
if not lst: if not lst:
@ -288,7 +442,7 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separ
buf = '[' buf = '['
if _indent is not None: if _indent is not None:
_current_indent_level += 1 _current_indent_level += 1
newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) newline_indent = '\n' + (_indent * _current_indent_level)
separator = _item_separator + newline_indent separator = _item_separator + newline_indent
buf += newline_indent buf += newline_indent
else: else:
@ -300,7 +454,8 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separ
first = False first = False
else: else:
buf = separator buf = separator
if isinstance(value, basestring): if (isinstance(value, string_types) or
(_PY3 and isinstance(value, binary_type))):
yield buf + _encoder(value) yield buf + _encoder(value)
elif value is None: elif value is None:
yield buf + 'null' yield buf + 'null'
@ -308,27 +463,65 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separ
yield buf + 'true' yield buf + 'true'
elif value is False: elif value is False:
yield buf + 'false' yield buf + 'false'
elif isinstance(value, (int, long)): elif isinstance(value, integer_types):
yield buf + str(value) yield buf + _encode_int(value)
elif isinstance(value, float): elif isinstance(value, float):
yield buf + _floatstr(value) yield buf + _floatstr(value)
elif _use_decimal and isinstance(value, Decimal):
yield buf + str(value)
else: else:
yield buf yield buf
if isinstance(value, (list, tuple)): for_json = _for_json and getattr(value, 'for_json', None)
if for_json and callable(for_json):
chunks = _iterencode(for_json(), _current_indent_level)
elif isinstance(value, list):
chunks = _iterencode_list(value, _current_indent_level) chunks = _iterencode_list(value, _current_indent_level)
elif isinstance(value, dict):
chunks = _iterencode_dict(value, _current_indent_level)
else: else:
chunks = _iterencode(value, _current_indent_level) _asdict = _namedtuple_as_object and getattr(value, '_asdict', None)
if _asdict and callable(_asdict):
chunks = _iterencode_dict(_asdict(),
_current_indent_level)
elif _tuple_as_array and isinstance(value, tuple):
chunks = _iterencode_list(value, _current_indent_level)
elif isinstance(value, dict):
chunks = _iterencode_dict(value, _current_indent_level)
else:
chunks = _iterencode(value, _current_indent_level)
for chunk in chunks: for chunk in chunks:
yield chunk yield chunk
if newline_indent is not None: if newline_indent is not None:
_current_indent_level -= 1 _current_indent_level -= 1
yield '\n' + (' ' * (_indent * _current_indent_level)) yield '\n' + (_indent * _current_indent_level)
yield ']' yield ']'
if markers is not None: if markers is not None:
del markers[markerid] del markers[markerid]
def _stringify_key(key):
if isinstance(key, string_types): # pragma: no cover
pass
elif isinstance(key, binary_type):
key = key.decode(_encoding)
elif isinstance(key, float):
key = _floatstr(key)
elif key is True:
key = 'true'
elif key is False:
key = 'false'
elif key is None:
key = 'null'
elif isinstance(key, integer_types):
if type(key) not in integer_types:
# See #118, do not trust custom str/repr
key = int(key)
key = str(key)
elif _use_decimal and isinstance(key, Decimal):
key = str(key)
elif _skipkeys:
key = None
else:
raise TypeError("key " + repr(key) + " is not a string")
return key
def _iterencode_dict(dct, _current_indent_level): def _iterencode_dict(dct, _current_indent_level):
if not dct: if not dct:
yield '{}' yield '{}'
@ -341,44 +534,42 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separ
yield '{' yield '{'
if _indent is not None: if _indent is not None:
_current_indent_level += 1 _current_indent_level += 1
newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) newline_indent = '\n' + (_indent * _current_indent_level)
item_separator = _item_separator + newline_indent item_separator = _item_separator + newline_indent
yield newline_indent yield newline_indent
else: else:
newline_indent = None newline_indent = None
item_separator = _item_separator item_separator = _item_separator
first = True first = True
if _sort_keys: if _PY3:
items = dct.items() iteritems = dct.items()
items.sort(key=lambda kv: kv[0])
else: else:
items = dct.iteritems() iteritems = dct.iteritems()
if _item_sort_key:
items = []
for k, v in dct.items():
if not isinstance(k, string_types):
k = _stringify_key(k)
if k is None:
continue
items.append((k, v))
items.sort(key=_item_sort_key)
else:
items = iteritems
for key, value in items: for key, value in items:
if isinstance(key, basestring): if not (_item_sort_key or isinstance(key, string_types)):
pass key = _stringify_key(key)
# JavaScript is weakly typed for these, so it makes sense to if key is None:
# also allow them. Many encoders seem to do something like this. # _skipkeys must be True
elif isinstance(key, float): continue
key = _floatstr(key)
elif key is True:
key = 'true'
elif key is False:
key = 'false'
elif key is None:
key = 'null'
elif isinstance(key, (int, long)):
key = str(key)
elif _skipkeys:
continue
else:
raise TypeError("key " + repr(key) + " is not a string")
if first: if first:
first = False first = False
else: else:
yield item_separator yield item_separator
yield _encoder(key) yield _encoder(key)
yield _key_separator yield _key_separator
if isinstance(value, basestring): if (isinstance(value, string_types) or
(_PY3 and isinstance(value, binary_type))):
yield _encoder(value) yield _encoder(value)
elif value is None: elif value is None:
yield 'null' yield 'null'
@ -386,28 +577,41 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separ
yield 'true' yield 'true'
elif value is False: elif value is False:
yield 'false' yield 'false'
elif isinstance(value, (int, long)): elif isinstance(value, integer_types):
yield str(value) yield _encode_int(value)
elif isinstance(value, float): elif isinstance(value, float):
yield _floatstr(value) yield _floatstr(value)
elif _use_decimal and isinstance(value, Decimal):
yield str(value)
else: else:
if isinstance(value, (list, tuple)): for_json = _for_json and getattr(value, 'for_json', None)
if for_json and callable(for_json):
chunks = _iterencode(for_json(), _current_indent_level)
elif isinstance(value, list):
chunks = _iterencode_list(value, _current_indent_level) chunks = _iterencode_list(value, _current_indent_level)
elif isinstance(value, dict):
chunks = _iterencode_dict(value, _current_indent_level)
else: else:
chunks = _iterencode(value, _current_indent_level) _asdict = _namedtuple_as_object and getattr(value, '_asdict', None)
if _asdict and callable(_asdict):
chunks = _iterencode_dict(_asdict(),
_current_indent_level)
elif _tuple_as_array and isinstance(value, tuple):
chunks = _iterencode_list(value, _current_indent_level)
elif isinstance(value, dict):
chunks = _iterencode_dict(value, _current_indent_level)
else:
chunks = _iterencode(value, _current_indent_level)
for chunk in chunks: for chunk in chunks:
yield chunk yield chunk
if newline_indent is not None: if newline_indent is not None:
_current_indent_level -= 1 _current_indent_level -= 1
yield '\n' + (' ' * (_indent * _current_indent_level)) yield '\n' + (_indent * _current_indent_level)
yield '}' yield '}'
if markers is not None: if markers is not None:
del markers[markerid] del markers[markerid]
def _iterencode(o, _current_indent_level): def _iterencode(o, _current_indent_level):
if isinstance(o, basestring): if (isinstance(o, string_types) or
(_PY3 and isinstance(o, binary_type))):
yield _encoder(o) yield _encoder(o)
elif o is None: elif o is None:
yield 'null' yield 'null'
@ -415,26 +619,42 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separ
yield 'true' yield 'true'
elif o is False: elif o is False:
yield 'false' yield 'false'
elif isinstance(o, (int, long)): elif isinstance(o, integer_types):
yield str(o) yield _encode_int(o)
elif isinstance(o, float): elif isinstance(o, float):
yield _floatstr(o) yield _floatstr(o)
elif isinstance(o, (list, tuple)):
for chunk in _iterencode_list(o, _current_indent_level):
yield chunk
elif isinstance(o, dict):
for chunk in _iterencode_dict(o, _current_indent_level):
yield chunk
else: else:
if markers is not None: for_json = _for_json and getattr(o, 'for_json', None)
markerid = id(o) if for_json and callable(for_json):
if markerid in markers: for chunk in _iterencode(for_json(), _current_indent_level):
raise ValueError("Circular reference detected") yield chunk
markers[markerid] = o elif isinstance(o, list):
o = _default(o) for chunk in _iterencode_list(o, _current_indent_level):
for chunk in _iterencode(o, _current_indent_level): yield chunk
yield chunk else:
if markers is not None: _asdict = _namedtuple_as_object and getattr(o, '_asdict', None)
del markers[markerid] if _asdict and callable(_asdict):
for chunk in _iterencode_dict(_asdict(),
_current_indent_level):
yield chunk
elif (_tuple_as_array and isinstance(o, tuple)):
for chunk in _iterencode_list(o, _current_indent_level):
yield chunk
elif isinstance(o, dict):
for chunk in _iterencode_dict(o, _current_indent_level):
yield chunk
elif _use_decimal and isinstance(o, Decimal):
yield str(o)
else:
if markers is not None:
markerid = id(o)
if markerid in markers:
raise ValueError("Circular reference detected")
markers[markerid] = o
o = _default(o)
for chunk in _iterencode(o, _current_indent_level):
yield chunk
if markers is not None:
del markers[markerid]
return _iterencode return _iterencode

View file

@ -0,0 +1,119 @@
"""Drop-in replacement for collections.OrderedDict by Raymond Hettinger
http://code.activestate.com/recipes/576693/
"""
from UserDict import DictMixin
# Modified from original to support Python 2.4, see
# http://code.google.com/p/simplejson/issues/detail?id=53
try:
all
except NameError:
def all(seq):
for elem in seq:
if not elem:
return False
return True
class OrderedDict(dict, DictMixin):
def __init__(self, *args, **kwds):
if len(args) > 1:
raise TypeError('expected at most 1 arguments, got %d' % len(args))
try:
self.__end
except AttributeError:
self.clear()
self.update(*args, **kwds)
def clear(self):
self.__end = end = []
end += [None, end, end] # sentinel node for doubly linked list
self.__map = {} # key --> [key, prev, next]
dict.clear(self)
def __setitem__(self, key, value):
if key not in self:
end = self.__end
curr = end[1]
curr[2] = end[1] = self.__map[key] = [key, curr, end]
dict.__setitem__(self, key, value)
def __delitem__(self, key):
dict.__delitem__(self, key)
key, prev, next = self.__map.pop(key)
prev[2] = next
next[1] = prev
def __iter__(self):
end = self.__end
curr = end[2]
while curr is not end:
yield curr[0]
curr = curr[2]
def __reversed__(self):
end = self.__end
curr = end[1]
while curr is not end:
yield curr[0]
curr = curr[1]
def popitem(self, last=True):
if not self:
raise KeyError('dictionary is empty')
# Modified from original to support Python 2.4, see
# http://code.google.com/p/simplejson/issues/detail?id=53
if last:
key = reversed(self).next()
else:
key = iter(self).next()
value = self.pop(key)
return key, value
def __reduce__(self):
items = [[k, self[k]] for k in self]
tmp = self.__map, self.__end
del self.__map, self.__end
inst_dict = vars(self).copy()
self.__map, self.__end = tmp
if inst_dict:
return (self.__class__, (items,), inst_dict)
return self.__class__, (items,)
def keys(self):
return list(self)
setdefault = DictMixin.setdefault
update = DictMixin.update
pop = DictMixin.pop
values = DictMixin.values
items = DictMixin.items
iterkeys = DictMixin.iterkeys
itervalues = DictMixin.itervalues
iteritems = DictMixin.iteritems
def __repr__(self):
if not self:
return '%s()' % (self.__class__.__name__,)
return '%s(%r)' % (self.__class__.__name__, self.items())
def copy(self):
return self.__class__(self)
@classmethod
def fromkeys(cls, iterable, value=None):
d = cls()
for key in iterable:
d[key] = value
return d
def __eq__(self, other):
if isinstance(other, OrderedDict):
return len(self)==len(other) and \
all(p==q for p, q in zip(self.items(), other.items()))
return dict.__eq__(self, other)
def __ne__(self, other):
return not self == other

View file

@ -1,17 +1,70 @@
"""JSON token scanner """JSON token scanner
""" """
import re import re
try: def _import_c_make_scanner():
from lib.simplejson._speedups import make_scanner as c_make_scanner try:
except ImportError: from simplejson._speedups import make_scanner
c_make_scanner = None return make_scanner
except ImportError:
return None
c_make_scanner = _import_c_make_scanner()
__all__ = ['make_scanner'] __all__ = ['make_scanner', 'JSONDecodeError']
NUMBER_RE = re.compile( NUMBER_RE = re.compile(
r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?', r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?',
(re.VERBOSE | re.MULTILINE | re.DOTALL)) (re.VERBOSE | re.MULTILINE | re.DOTALL))
class JSONDecodeError(ValueError):
"""Subclass of ValueError with the following additional properties:
msg: The unformatted error message
doc: The JSON document being parsed
pos: The start index of doc where parsing failed
end: The end index of doc where parsing failed (may be None)
lineno: The line corresponding to pos
colno: The column corresponding to pos
endlineno: The line corresponding to end (may be None)
endcolno: The column corresponding to end (may be None)
"""
# Note that this exception is used from _speedups
def __init__(self, msg, doc, pos, end=None):
ValueError.__init__(self, errmsg(msg, doc, pos, end=end))
self.msg = msg
self.doc = doc
self.pos = pos
self.end = end
self.lineno, self.colno = linecol(doc, pos)
if end is not None:
self.endlineno, self.endcolno = linecol(doc, end)
else:
self.endlineno, self.endcolno = None, None
def __reduce__(self):
return self.__class__, (self.msg, self.doc, self.pos, self.end)
def linecol(doc, pos):
lineno = doc.count('\n', 0, pos) + 1
if lineno == 1:
colno = pos + 1
else:
colno = pos - doc.rindex('\n', 0, pos)
return lineno, colno
def errmsg(msg, doc, pos, end=None):
lineno, colno = linecol(doc, pos)
msg = msg.replace('%r', repr(doc[pos:pos + 1]))
if end is None:
fmt = '%s: line %d column %d (char %d)'
return fmt % (msg, lineno, colno, pos)
endlineno, endcolno = linecol(doc, end)
fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'
return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)
def py_make_scanner(context): def py_make_scanner(context):
parse_object = context.parse_object parse_object = context.parse_object
parse_array = context.parse_array parse_array = context.parse_array
@ -23,17 +76,21 @@ def py_make_scanner(context):
parse_int = context.parse_int parse_int = context.parse_int
parse_constant = context.parse_constant parse_constant = context.parse_constant
object_hook = context.object_hook object_hook = context.object_hook
object_pairs_hook = context.object_pairs_hook
memo = context.memo
def _scan_once(string, idx): def _scan_once(string, idx):
errmsg = 'Expecting value'
try: try:
nextchar = string[idx] nextchar = string[idx]
except IndexError: except IndexError:
raise StopIteration raise JSONDecodeError(errmsg, string, idx)
if nextchar == '"': if nextchar == '"':
return parse_string(string, idx + 1, encoding, strict) return parse_string(string, idx + 1, encoding, strict)
elif nextchar == '{': elif nextchar == '{':
return parse_object((string, idx + 1), encoding, strict, _scan_once, object_hook) return parse_object((string, idx + 1), encoding, strict,
_scan_once, object_hook, object_pairs_hook, memo)
elif nextchar == '[': elif nextchar == '[':
return parse_array((string, idx + 1), _scan_once) return parse_array((string, idx + 1), _scan_once)
elif nextchar == 'n' and string[idx:idx + 4] == 'null': elif nextchar == 'n' and string[idx:idx + 4] == 'null':
@ -58,8 +115,19 @@ def py_make_scanner(context):
elif nextchar == '-' and string[idx:idx + 9] == '-Infinity': elif nextchar == '-' and string[idx:idx + 9] == '-Infinity':
return parse_constant('-Infinity'), idx + 9 return parse_constant('-Infinity'), idx + 9
else: else:
raise StopIteration raise JSONDecodeError(errmsg, string, idx)
return _scan_once def scan_once(string, idx):
if idx < 0:
# Ensure the same behavior as the C speedup, otherwise
# this would work for *some* negative string indices due
# to the behavior of __getitem__ for strings. #98
raise JSONDecodeError('Expecting value', string, idx)
try:
return _scan_once(string, idx)
finally:
memo.clear()
return scan_once
make_scanner = c_make_scanner or py_make_scanner make_scanner = c_make_scanner or py_make_scanner

42
lib/simplejson/tool.py Normal file
View file

@ -0,0 +1,42 @@
r"""Command-line tool to validate and pretty-print JSON
Usage::
$ echo '{"json":"obj"}' | python -m simplejson.tool
{
"json": "obj"
}
$ echo '{ 1.2:3.4}' | python -m simplejson.tool
Expecting property name: line 1 column 2 (char 2)
"""
from __future__ import with_statement
import sys
import simplejson as json
def main():
if len(sys.argv) == 1:
infile = sys.stdin
outfile = sys.stdout
elif len(sys.argv) == 2:
infile = open(sys.argv[1], 'r')
outfile = sys.stdout
elif len(sys.argv) == 3:
infile = open(sys.argv[1], 'r')
outfile = open(sys.argv[2], 'w')
else:
raise SystemExit(sys.argv[0] + " [infile [outfile]]")
with infile:
try:
obj = json.load(infile,
object_pairs_hook=json.OrderedDict,
use_decimal=True)
except ValueError:
raise SystemExit(sys.exc_info()[1])
with outfile:
json.dump(obj, outfile, sort_keys=True, indent=' ', use_decimal=True)
outfile.write('\n')
if __name__ == '__main__':
main()