Update cachecontrol library 0.11.5 to 0.12.3 (db54c40).

This commit is contained in:
JackDandy 2017-07-12 01:36:15 +01:00
parent e730f0b84e
commit 85deeec565
14 changed files with 1148 additions and 95 deletions

View file

@ -11,7 +11,7 @@
* Change webserver startup to correctly use xheaders in reverse proxy or load balance set-ups * Change webserver startup to correctly use xheaders in reverse proxy or load balance set-ups
* Update backports_abc 0.4 to 0.5 * Update backports_abc 0.4 to 0.5
* Update Beautiful Soup 4.4.0 (r397) to 4.5.3 (r439) * Update Beautiful Soup 4.4.0 (r397) to 4.5.3 (r439)
* Update cachecontrol library 0.11.5 to 0.11.7 (3b3b776) * Update cachecontrol library 0.11.5 to 0.12.3 (db54c40)
* Update Certifi 2015.11.20.1 (385476b) to 2017.01.23 (9f9dc30) * Update Certifi 2015.11.20.1 (385476b) to 2017.01.23 (9f9dc30)
* Update feedparser library 5.2.0 (8c62940) to 5.2.1 (f1dd1bb) * Update feedparser library 5.2.0 (8c62940) to 5.2.1 (f1dd1bb)
* Update html5lib 0.99999999/1.0b9 (46dae3d) to (1a28d72) * Update html5lib 0.99999999/1.0b9 (46dae3d) to (1a28d72)
@ -68,6 +68,7 @@
* Fix "too many redirects" or "no CSS/JS content" delivered * Fix "too many redirects" or "no CSS/JS content" delivered
* Change restart/shutdown to use updated jQuery * Change restart/shutdown to use updated jQuery
* Remove AlphaReign torrent provider * Remove AlphaReign torrent provider
* Update cachecontrol library 0.11.5 to 0.11.7 (3b3b776)
### 0.12.21 (2017-06-19 23:35:00 UTC) ### 0.12.21 (2017-06-19 23:35:00 UTC)

View file

@ -4,7 +4,7 @@ Make it easy to import from cachecontrol without long namespaces.
""" """
__author__ = 'Eric Larson' __author__ = 'Eric Larson'
__email__ = 'eric@ionrock.org' __email__ = 'eric@ionrock.org'
__version__ = '0.11.7' __version__ = '0.12.3'
from .wrapper import CacheControl from .wrapper import CacheControl
from .adapter import CacheControlAdapter from .adapter import CacheControlAdapter

View file

@ -16,10 +16,12 @@ class CacheControlAdapter(HTTPAdapter):
controller_class=None, controller_class=None,
serializer=None, serializer=None,
heuristic=None, heuristic=None,
cacheable_methods=None,
*args, **kw): *args, **kw):
super(CacheControlAdapter, self).__init__(*args, **kw) super(CacheControlAdapter, self).__init__(*args, **kw)
self.cache = cache or DictCache() self.cache = cache or DictCache()
self.heuristic = heuristic self.heuristic = heuristic
self.cacheable_methods = cacheable_methods or ('GET',)
controller_factory = controller_class or CacheController controller_factory = controller_class or CacheController
self.controller = controller_factory( self.controller = controller_factory(
@ -28,12 +30,13 @@ class CacheControlAdapter(HTTPAdapter):
serializer=serializer, serializer=serializer,
) )
def send(self, request, **kw): def send(self, request, cacheable_methods=None, **kw):
""" """
Send a request. Use the request information to see if it Send a request. Use the request information to see if it
exists in the cache and cache the response if we need to and can. exists in the cache and cache the response if we need to and can.
""" """
if request.method == 'GET': cacheable = cacheable_methods or self.cacheable_methods
if request.method in cacheable:
cached_response = self.controller.cached_request(request) cached_response = self.controller.cached_request(request)
if cached_response: if cached_response:
return self.build_response(request, cached_response, return self.build_response(request, cached_response,
@ -48,14 +51,16 @@ class CacheControlAdapter(HTTPAdapter):
return resp return resp
def build_response(self, request, response, from_cache=False): def build_response(self, request, response, from_cache=False,
cacheable_methods=None):
""" """
Build a response by making a request or using the cache. Build a response by making a request or using the cache.
This will end up calling send and returning a potentially This will end up calling send and returning a potentially
cached response cached response
""" """
if not from_cache and request.method == 'GET': cacheable = cacheable_methods or self.cacheable_methods
if not from_cache and request.method in cacheable:
# Check for any heuristics that might update headers # Check for any heuristics that might update headers
# before trying to cache. # before trying to cache.
if self.heuristic: if self.heuristic:

View file

@ -5,6 +5,12 @@ from textwrap import dedent
from ..cache import BaseCache from ..cache import BaseCache
from ..controller import CacheController from ..controller import CacheController
try:
FileNotFoundError
except NameError:
# py2.X
FileNotFoundError = OSError
def _secure_open_write(filename, fmode): def _secure_open_write(filename, fmode):
# We only want to write to this file, so open it in write only mode # We only want to write to this file, so open it in write only mode

View file

@ -25,7 +25,7 @@ class RedisCache(object):
if not expires: if not expires:
self.conn.set(key, value) self.conn.set(key, value)
else: else:
expires = expires - datetime.now() expires = expires - datetime.utcnow()
self.conn.setex(key, total_seconds(expires), value) self.conn.setex(key, total_seconds(expires), value)
def delete(self, key): def delete(self, key):

View file

@ -24,6 +24,6 @@ except ImportError:
# Replicate some six behaviour # Replicate some six behaviour
try: try:
text_type = (unicode,) text_type = unicode
except NameError: except NameError:
text_type = (str,) text_type = str

View file

@ -30,10 +30,12 @@ def parse_uri(uri):
class CacheController(object): class CacheController(object):
"""An interface to see if request should cached or not. """An interface to see if request should cached or not.
""" """
def __init__(self, cache=None, cache_etags=True, serializer=None): def __init__(self, cache=None, cache_etags=True, serializer=None,
status_codes=None):
self.cache = cache or DictCache() self.cache = cache or DictCache()
self.cache_etags = cache_etags self.cache_etags = cache_etags
self.serializer = serializer or Serializer() self.serializer = serializer or Serializer()
self.cacheable_status_codes = status_codes or (200, 203, 300, 301)
@classmethod @classmethod
def _urlnorm(cls, uri): def _urlnorm(cls, uri):
@ -60,27 +62,51 @@ class CacheController(object):
return cls._urlnorm(uri) return cls._urlnorm(uri)
def parse_cache_control(self, headers): def parse_cache_control(self, headers):
""" known_directives = {
Parse the cache control headers returning a dictionary with values # https://tools.ietf.org/html/rfc7234#section-5.2
for the different directives. 'max-age': (int, True,),
""" 'max-stale': (int, False,),
'min-fresh': (int, True,),
'no-cache': (None, False,),
'no-store': (None, False,),
'no-transform': (None, False,),
'only-if-cached' : (None, False,),
'must-revalidate': (None, False,),
'public': (None, False,),
'private': (None, False,),
'proxy-revalidate': (None, False,),
's-maxage': (int, True,)
}
cc_headers = headers.get('cache-control',
headers.get('Cache-Control', ''))
retval = {} retval = {}
cc_header = 'cache-control' for cc_directive in cc_headers.split(','):
if 'Cache-Control' in headers: parts = cc_directive.split('=', 1)
cc_header = 'Cache-Control' directive = parts[0].strip()
try:
typ, required = known_directives[directive]
except KeyError:
logger.debug('Ignoring unknown cache-control directive: %s',
directive)
continue
if not typ or not required:
retval[directive] = None
if typ:
try:
retval[directive] = typ(parts[1].strip())
except IndexError:
if required:
logger.debug('Missing value for cache-control '
'directive: %s', directive)
except ValueError:
logger.debug('Invalid value for cache-control directive '
'%s, must be %s', directive, typ.__name__)
if cc_header in headers:
parts = headers[cc_header].split(',')
parts_with_args = [
tuple([x.strip().lower() for x in part.split("=", 1)])
for part in parts if -1 != part.find("=")
]
parts_wo_args = [
(name.strip().lower(), 1)
for name in parts if -1 == name.find("=")
]
retval = dict(parts_with_args + parts_wo_args)
return retval return retval
def cached_request(self, request): def cached_request(self, request):
@ -154,8 +180,8 @@ class CacheController(object):
freshness_lifetime = 0 freshness_lifetime = 0
# Check the max-age pragma in the cache control header # Check the max-age pragma in the cache control header
if 'max-age' in resp_cc and resp_cc['max-age'].isdigit(): if 'max-age' in resp_cc:
freshness_lifetime = int(resp_cc['max-age']) freshness_lifetime = resp_cc['max-age']
logger.debug('Freshness lifetime from max-age: %i', logger.debug('Freshness lifetime from max-age: %i',
freshness_lifetime) freshness_lifetime)
@ -171,18 +197,12 @@ class CacheController(object):
# Determine if we are setting freshness limit in the # Determine if we are setting freshness limit in the
# request. Note, this overrides what was in the response. # request. Note, this overrides what was in the response.
if 'max-age' in cc: if 'max-age' in cc:
try: freshness_lifetime = cc['max-age']
freshness_lifetime = int(cc['max-age']) logger.debug('Freshness lifetime from request max-age: %i',
logger.debug('Freshness lifetime from request max-age: %i', freshness_lifetime)
freshness_lifetime)
except ValueError:
freshness_lifetime = 0
if 'min-fresh' in cc: if 'min-fresh' in cc:
try: min_fresh = cc['min-fresh']
min_fresh = int(cc['min-fresh'])
except ValueError:
min_fresh = 0
# adjust our current age by our min fresh # adjust our current age by our min fresh
current_age += min_fresh current_age += min_fresh
logger.debug('Adjusted current age from min-fresh: %i', logger.debug('Adjusted current age from min-fresh: %i',
@ -220,7 +240,8 @@ class CacheController(object):
return new_headers return new_headers
def cache_response(self, request, response, body=None): def cache_response(self, request, response, body=None,
status_codes=None):
""" """
Algorithm for caching requests. Algorithm for caching requests.
@ -228,7 +249,7 @@ class CacheController(object):
""" """
# From httplib2: Don't cache 206's since we aren't going to # From httplib2: Don't cache 206's since we aren't going to
# handle byte range requests # handle byte range requests
cacheable_status_codes = [200, 203, 300, 301] cacheable_status_codes = status_codes or self.cacheable_status_codes
if response.status not in cacheable_status_codes: if response.status not in cacheable_status_codes:
logger.debug( logger.debug(
'Status code %s not in %s', 'Status code %s not in %s',
@ -257,10 +278,10 @@ class CacheController(object):
# Delete it from the cache if we happen to have it stored there # Delete it from the cache if we happen to have it stored there
no_store = False no_store = False
if cc.get('no-store'): if 'no-store' in cc:
no_store = True no_store = True
logger.debug('Response header has "no-store"') logger.debug('Response header has "no-store"')
if cc_req.get('no-store'): if 'no-store' in cc_req:
no_store = True no_store = True
logger.debug('Request header has "no-store"') logger.debug('Request header has "no-store"')
if no_store and self.cache.get(cache_url): if no_store and self.cache.get(cache_url):
@ -289,13 +310,12 @@ class CacheController(object):
# the cache. # the cache.
elif 'date' in response_headers: elif 'date' in response_headers:
# cache when there is a max-age > 0 # cache when there is a max-age > 0
if cc and cc.get('max-age'): if 'max-age' in cc and cc['max-age'] > 0:
if cc['max-age'].isdigit() and int(cc['max-age']) > 0: logger.debug('Caching b/c date exists and max-age > 0')
logger.debug('Caching b/c date exists and max-age > 0') self.cache.set(
self.cache.set( cache_url,
cache_url, self.serializer.dumps(request, response, body=body),
self.serializer.dumps(request, response, body=body), )
)
# If the request can expire, it means we should cache it # If the request can expire, it means we should cache it
# in the meantime. # in the meantime.

View file

@ -9,7 +9,7 @@ TIME_FMT = "%a, %d %b %Y %H:%M:%S GMT"
def expire_after(delta, date=None): def expire_after(delta, date=None):
date = date or datetime.now() date = date or datetime.utcnow()
return date + delta return date + delta

View file

@ -3,25 +3,12 @@ import io
import json import json
import zlib import zlib
import msgpack
from requests.structures import CaseInsensitiveDict from requests.structures import CaseInsensitiveDict
from .compat import HTTPResponse, pickle, text_type from .compat import HTTPResponse, pickle, text_type
def _b64_encode_bytes(b):
return base64.b64encode(b).decode("ascii")
def _b64_encode_str(s):
return _b64_encode_bytes(s.encode("utf8"))
def _b64_encode(s):
if isinstance(s, text_type):
return _b64_encode_str(s)
return _b64_encode_bytes(s)
def _b64_decode_bytes(b): def _b64_decode_bytes(b):
return base64.b64decode(b.encode("ascii")) return base64.b64decode(b.encode("ascii"))
@ -50,43 +37,40 @@ class Serializer(object):
# `Serializer.dump`. # `Serializer.dump`.
response._fp = io.BytesIO(body) response._fp = io.BytesIO(body)
# NOTE: This is all a bit weird, but it's really important that on
# Python 2.x these objects are unicode and not str, even when
# they contain only ascii. The problem here is that msgpack
# understands the difference between unicode and bytes and we
# have it set to differentiate between them, however Python 2
# doesn't know the difference. Forcing these to unicode will be
# enough to have msgpack know the difference.
data = { data = {
"response": { u"response": {
"body": _b64_encode_bytes(body), u"body": body,
"headers": dict( u"headers": dict(
(_b64_encode(k), _b64_encode(v)) (text_type(k), text_type(v))
for k, v in response.headers.items() for k, v in response.headers.items()
), ),
"status": response.status, u"status": response.status,
"version": response.version, u"version": response.version,
"reason": _b64_encode_str(response.reason), u"reason": text_type(response.reason),
"strict": response.strict, u"strict": response.strict,
"decode_content": response.decode_content, u"decode_content": response.decode_content,
}, },
} }
# Construct our vary headers # Construct our vary headers
data["vary"] = {} data[u"vary"] = {}
if "vary" in response_headers: if u"vary" in response_headers:
varied_headers = response_headers['vary'].split(',') varied_headers = response_headers[u'vary'].split(',')
for header in varied_headers: for header in varied_headers:
header = header.strip() header = header.strip()
data["vary"][header] = request.headers.get(header, None) header_value = request.headers.get(header, None)
if header_value is not None:
header_value = text_type(header_value)
data[u"vary"][header] = header_value
# Encode our Vary headers to ensure they can be serialized as JSON return b",".join([b"cc=4", msgpack.dumps(data, use_bin_type=True)])
data["vary"] = dict(
(_b64_encode(k), _b64_encode(v) if v is not None else v)
for k, v in data["vary"].items()
)
return b",".join([
b"cc=2",
zlib.compress(
json.dumps(
data, separators=(",", ":"), sort_keys=True,
).encode("utf8"),
),
])
def loads(self, request, data): def loads(self, request, data):
# Short circuit if we've been given an empty set of data # Short circuit if we've been given an empty set of data
@ -194,3 +178,17 @@ class Serializer(object):
) )
return self.prepare_response(request, cached) return self.prepare_response(request, cached)
def _loads_v3(self, request, data):
# Due to Python 2 encoding issues, it's impossible to know for sure
# exactly how to load v3 entries, thus we'll treat these as a miss so
# that they get rewritten out as v4 entries.
return
def _loads_v4(self, request, data):
try:
cached = msgpack.loads(data, encoding='utf-8')
except ValueError:
return
return self.prepare_response(request, cached)

View file

@ -6,14 +6,20 @@ def CacheControl(sess,
cache=None, cache=None,
cache_etags=True, cache_etags=True,
serializer=None, serializer=None,
heuristic=None): heuristic=None,
controller_class=None,
adapter_class=None,
cacheable_methods=None):
cache = cache or DictCache() cache = cache or DictCache()
adapter = CacheControlAdapter( adapter_class = adapter_class or CacheControlAdapter
adapter = adapter_class(
cache, cache,
cache_etags=cache_etags, cache_etags=cache_etags,
serializer=serializer, serializer=serializer,
heuristic=heuristic, heuristic=heuristic,
controller_class=controller_class,
cacheable_methods=cacheable_methods
) )
sess.mount('http://', adapter) sess.mount('http://', adapter)
sess.mount('https://', adapter) sess.mount('https://', adapter)

54
lib/msgpack/__init__.py Normal file
View file

@ -0,0 +1,54 @@
# coding: utf-8
from msgpack._version import version
from msgpack.exceptions import *
from collections import namedtuple
class ExtType(namedtuple('ExtType', 'code data')):
"""ExtType represents ext type in msgpack."""
def __new__(cls, code, data):
if not isinstance(code, int):
raise TypeError("code must be int")
if not isinstance(data, bytes):
raise TypeError("data must be bytes")
if not 0 <= code <= 127:
raise ValueError("code must be 0~127")
return super(ExtType, cls).__new__(cls, code, data)
import os
if os.environ.get('MSGPACK_PUREPYTHON'):
from msgpack.fallback import Packer, unpack, unpackb, Unpacker
else:
try:
from msgpack._packer import Packer
from msgpack._unpacker import unpack, unpackb, Unpacker
except ImportError:
from msgpack.fallback import Packer, unpack, unpackb, Unpacker
def pack(o, stream, **kwargs):
"""
Pack object `o` and write it to `stream`
See :class:`Packer` for options.
"""
packer = Packer(**kwargs)
stream.write(packer.pack(o))
def packb(o, **kwargs):
"""
Pack object `o` and return packed bytes
See :class:`Packer` for options.
"""
return Packer(**kwargs).pack(o)
# alias for compatibility to simplejson/marshal/pickle.
load = unpack
loads = unpackb
dump = pack
dumps = packb

1
lib/msgpack/_version.py Normal file
View file

@ -0,0 +1 @@
version = (0, 4, 8)

41
lib/msgpack/exceptions.py Normal file
View file

@ -0,0 +1,41 @@
class UnpackException(Exception):
"""Deprecated. Use Exception instead to catch all exception during unpacking."""
class BufferFull(UnpackException):
pass
class OutOfData(UnpackException):
pass
class UnpackValueError(UnpackException, ValueError):
"""Deprecated. Use ValueError instead."""
class ExtraData(UnpackValueError):
def __init__(self, unpacked, extra):
self.unpacked = unpacked
self.extra = extra
def __str__(self):
return "unpack(b) received extra data."
class PackException(Exception):
"""Deprecated. Use Exception instead to catch all exception during packing."""
class PackValueError(PackException, ValueError):
"""PackValueError is raised when type of input data is supported but it's value is unsupported.
Deprecated. Use ValueError instead.
"""
class PackOverflowError(PackValueError, OverflowError):
"""PackOverflowError is raised when integer value is out of range of msgpack support [-2**31, 2**32).
Deprecated. Use ValueError instead.
"""

921
lib/msgpack/fallback.py Normal file
View file

@ -0,0 +1,921 @@
"""Fallback pure Python implementation of msgpack"""
import sys
import struct
import warnings
if sys.version_info[0] == 3:
PY3 = True
int_types = int
Unicode = str
xrange = range
def dict_iteritems(d):
return d.items()
else:
PY3 = False
int_types = (int, long)
Unicode = unicode
def dict_iteritems(d):
return d.iteritems()
if hasattr(sys, 'pypy_version_info'):
# cStringIO is slow on PyPy, StringIO is faster. However: PyPy's own
# StringBuilder is fastest.
from __pypy__ import newlist_hint
try:
from __pypy__.builders import BytesBuilder as StringBuilder
except ImportError:
from __pypy__.builders import StringBuilder
USING_STRINGBUILDER = True
class StringIO(object):
def __init__(self, s=b''):
if s:
self.builder = StringBuilder(len(s))
self.builder.append(s)
else:
self.builder = StringBuilder()
def write(self, s):
if isinstance(s, memoryview):
s = s.tobytes()
elif isinstance(s, bytearray):
s = bytes(s)
self.builder.append(s)
def getvalue(self):
return self.builder.build()
else:
USING_STRINGBUILDER = False
from io import BytesIO as StringIO
newlist_hint = lambda size: []
from msgpack.exceptions import (
BufferFull,
OutOfData,
UnpackValueError,
PackValueError,
PackOverflowError,
ExtraData)
from msgpack import ExtType
EX_SKIP = 0
EX_CONSTRUCT = 1
EX_READ_ARRAY_HEADER = 2
EX_READ_MAP_HEADER = 3
TYPE_IMMEDIATE = 0
TYPE_ARRAY = 1
TYPE_MAP = 2
TYPE_RAW = 3
TYPE_BIN = 4
TYPE_EXT = 5
DEFAULT_RECURSE_LIMIT = 511
def _check_type_strict(obj, t, type=type, tuple=tuple):
if type(t) is tuple:
return type(obj) in t
else:
return type(obj) is t
def _get_data_from_buffer(obj):
try:
view = memoryview(obj)
except TypeError:
# try to use legacy buffer protocol if 2.7, otherwise re-raise
if not PY3:
view = memoryview(buffer(obj))
warnings.warn("using old buffer interface to unpack %s; "
"this leads to unpacking errors if slicing is used and "
"will be removed in a future version" % type(obj),
RuntimeWarning)
else:
raise
if view.itemsize != 1:
raise ValueError("cannot unpack from multi-byte object")
return view
def unpack(stream, **kwargs):
"""
Unpack an object from `stream`.
Raises `ExtraData` when `packed` contains extra bytes.
See :class:`Unpacker` for options.
"""
data = stream.read()
return unpackb(data, **kwargs)
def unpackb(packed, **kwargs):
"""
Unpack an object from `packed`.
Raises `ExtraData` when `packed` contains extra bytes.
See :class:`Unpacker` for options.
"""
unpacker = Unpacker(None, **kwargs)
unpacker.feed(packed)
try:
ret = unpacker._unpack()
except OutOfData:
raise UnpackValueError("Data is not enough.")
if unpacker._got_extradata():
raise ExtraData(ret, unpacker._get_extradata())
return ret
class Unpacker(object):
"""Streaming unpacker.
arguments:
:param file_like:
File-like object having `.read(n)` method.
If specified, unpacker reads serialized data from it and :meth:`feed()` is not usable.
:param int read_size:
Used as `file_like.read(read_size)`. (default: `min(16*1024, max_buffer_size)`)
:param bool use_list:
If true, unpack msgpack array to Python list.
Otherwise, unpack to Python tuple. (default: True)
:param callable object_hook:
When specified, it should be callable.
Unpacker calls it with a dict argument after unpacking msgpack map.
(See also simplejson)
:param callable object_pairs_hook:
When specified, it should be callable.
Unpacker calls it with a list of key-value pairs after unpacking msgpack map.
(See also simplejson)
:param str encoding:
Encoding used for decoding msgpack raw.
If it is None (default), msgpack raw is deserialized to Python bytes.
:param str unicode_errors:
Used for decoding msgpack raw with *encoding*.
(default: `'strict'`)
:param int max_buffer_size:
Limits size of data waiting unpacked. 0 means system's INT_MAX (default).
Raises `BufferFull` exception when it is insufficient.
You should set this parameter when unpacking data from untrusted source.
:param int max_str_len:
Limits max length of str. (default: 2**31-1)
:param int max_bin_len:
Limits max length of bin. (default: 2**31-1)
:param int max_array_len:
Limits max length of array. (default: 2**31-1)
:param int max_map_len:
Limits max length of map. (default: 2**31-1)
example of streaming deserialize from file-like object::
unpacker = Unpacker(file_like)
for o in unpacker:
process(o)
example of streaming deserialize from socket::
unpacker = Unpacker()
while True:
buf = sock.recv(1024**2)
if not buf:
break
unpacker.feed(buf)
for o in unpacker:
process(o)
"""
def __init__(self, file_like=None, read_size=0, use_list=True,
object_hook=None, object_pairs_hook=None, list_hook=None,
encoding=None, unicode_errors='strict', max_buffer_size=0,
ext_hook=ExtType,
max_str_len=2147483647, # 2**32-1
max_bin_len=2147483647,
max_array_len=2147483647,
max_map_len=2147483647,
max_ext_len=2147483647):
if file_like is None:
self._feeding = True
else:
if not callable(file_like.read):
raise TypeError("`file_like.read` must be callable")
self.file_like = file_like
self._feeding = False
#: array of bytes fed.
self._buffer = bytearray()
#: Which position we currently reads
self._buff_i = 0
# When Unpacker is used as an iterable, between the calls to next(),
# the buffer is not "consumed" completely, for efficiency sake.
# Instead, it is done sloppily. To make sure we raise BufferFull at
# the correct moments, we have to keep track of how sloppy we were.
# Furthermore, when the buffer is incomplete (that is: in the case
# we raise an OutOfData) we need to rollback the buffer to the correct
# state, which _buf_checkpoint records.
self._buf_checkpoint = 0
self._max_buffer_size = max_buffer_size or 2**31-1
if read_size > self._max_buffer_size:
raise ValueError("read_size must be smaller than max_buffer_size")
self._read_size = read_size or min(self._max_buffer_size, 16*1024)
self._encoding = encoding
self._unicode_errors = unicode_errors
self._use_list = use_list
self._list_hook = list_hook
self._object_hook = object_hook
self._object_pairs_hook = object_pairs_hook
self._ext_hook = ext_hook
self._max_str_len = max_str_len
self._max_bin_len = max_bin_len
self._max_array_len = max_array_len
self._max_map_len = max_map_len
self._max_ext_len = max_ext_len
self._stream_offset = 0
if list_hook is not None and not callable(list_hook):
raise TypeError('`list_hook` is not callable')
if object_hook is not None and not callable(object_hook):
raise TypeError('`object_hook` is not callable')
if object_pairs_hook is not None and not callable(object_pairs_hook):
raise TypeError('`object_pairs_hook` is not callable')
if object_hook is not None and object_pairs_hook is not None:
raise TypeError("object_pairs_hook and object_hook are mutually "
"exclusive")
if not callable(ext_hook):
raise TypeError("`ext_hook` is not callable")
def feed(self, next_bytes):
assert self._feeding
view = _get_data_from_buffer(next_bytes)
if (len(self._buffer) - self._buff_i + len(view) > self._max_buffer_size):
raise BufferFull
self._buffer += view
def _consume(self):
""" Gets rid of the used parts of the buffer. """
self._stream_offset += self._buff_i - self._buf_checkpoint
self._buf_checkpoint = self._buff_i
def _got_extradata(self):
return self._buff_i < len(self._buffer)
def _get_extradata(self):
return self._buffer[self._buff_i:]
def read_bytes(self, n):
return self._read(n)
def _read(self, n):
# (int) -> bytearray
self._reserve(n)
i = self._buff_i
self._buff_i = i+n
return self._buffer[i:i+n]
def _reserve(self, n):
remain_bytes = len(self._buffer) - self._buff_i - n
# Fast path: buffer has n bytes already
if remain_bytes >= 0:
return
if self._feeding:
self._buff_i = self._buf_checkpoint
raise OutOfData
# Strip buffer before checkpoint before reading file.
if self._buf_checkpoint > 0:
del self._buffer[:self._buf_checkpoint]
self._buff_i -= self._buf_checkpoint
self._buf_checkpoint = 0
# Read from file
remain_bytes = -remain_bytes
while remain_bytes > 0:
to_read_bytes = max(self._read_size, remain_bytes)
read_data = self.file_like.read(to_read_bytes)
if not read_data:
break
assert isinstance(read_data, bytes)
self._buffer += read_data
remain_bytes -= len(read_data)
if len(self._buffer) < n + self._buff_i:
self._buff_i = 0 # rollback
raise OutOfData
def _read_header(self, execute=EX_CONSTRUCT):
typ = TYPE_IMMEDIATE
n = 0
obj = None
self._reserve(1)
b = self._buffer[self._buff_i]
self._buff_i += 1
if b & 0b10000000 == 0:
obj = b
elif b & 0b11100000 == 0b11100000:
obj = -1 - (b ^ 0xff)
elif b & 0b11100000 == 0b10100000:
n = b & 0b00011111
typ = TYPE_RAW
if n > self._max_str_len:
raise UnpackValueError("%s exceeds max_str_len(%s)", n, self._max_str_len)
obj = self._read(n)
elif b & 0b11110000 == 0b10010000:
n = b & 0b00001111
typ = TYPE_ARRAY
if n > self._max_array_len:
raise UnpackValueError("%s exceeds max_array_len(%s)", n, self._max_array_len)
elif b & 0b11110000 == 0b10000000:
n = b & 0b00001111
typ = TYPE_MAP
if n > self._max_map_len:
raise UnpackValueError("%s exceeds max_map_len(%s)", n, self._max_map_len)
elif b == 0xc0:
obj = None
elif b == 0xc2:
obj = False
elif b == 0xc3:
obj = True
elif b == 0xc4:
typ = TYPE_BIN
self._reserve(1)
n = self._buffer[self._buff_i]
self._buff_i += 1
if n > self._max_bin_len:
raise UnpackValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len))
obj = self._read(n)
elif b == 0xc5:
typ = TYPE_BIN
self._reserve(2)
n = struct.unpack_from(">H", self._buffer, self._buff_i)[0]
self._buff_i += 2
if n > self._max_bin_len:
raise UnpackValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len))
obj = self._read(n)
elif b == 0xc6:
typ = TYPE_BIN
self._reserve(4)
n = struct.unpack_from(">I", self._buffer, self._buff_i)[0]
self._buff_i += 4
if n > self._max_bin_len:
raise UnpackValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len))
obj = self._read(n)
elif b == 0xc7: # ext 8
typ = TYPE_EXT
self._reserve(2)
L, n = struct.unpack_from('Bb', self._buffer, self._buff_i)
self._buff_i += 2
if L > self._max_ext_len:
raise UnpackValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len))
obj = self._read(L)
elif b == 0xc8: # ext 16
typ = TYPE_EXT
self._reserve(3)
L, n = struct.unpack_from('>Hb', self._buffer, self._buff_i)
self._buff_i += 3
if L > self._max_ext_len:
raise UnpackValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len))
obj = self._read(L)
elif b == 0xc9: # ext 32
typ = TYPE_EXT
self._reserve(5)
L, n = struct.unpack_from('>Ib', self._buffer, self._buff_i)
self._buff_i += 5
if L > self._max_ext_len:
raise UnpackValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len))
obj = self._read(L)
elif b == 0xca:
self._reserve(4)
obj = struct.unpack_from(">f", self._buffer, self._buff_i)[0]
self._buff_i += 4
elif b == 0xcb:
self._reserve(8)
obj = struct.unpack_from(">d", self._buffer, self._buff_i)[0]
self._buff_i += 8
elif b == 0xcc:
self._reserve(1)
obj = self._buffer[self._buff_i]
self._buff_i += 1
elif b == 0xcd:
self._reserve(2)
obj = struct.unpack_from(">H", self._buffer, self._buff_i)[0]
self._buff_i += 2
elif b == 0xce:
self._reserve(4)
obj = struct.unpack_from(">I", self._buffer, self._buff_i)[0]
self._buff_i += 4
elif b == 0xcf:
self._reserve(8)
obj = struct.unpack_from(">Q", self._buffer, self._buff_i)[0]
self._buff_i += 8
elif b == 0xd0:
self._reserve(1)
obj = struct.unpack_from("b", self._buffer, self._buff_i)[0]
self._buff_i += 1
elif b == 0xd1:
self._reserve(2)
obj = struct.unpack_from(">h", self._buffer, self._buff_i)[0]
self._buff_i += 2
elif b == 0xd2:
self._reserve(4)
obj = struct.unpack_from(">i", self._buffer, self._buff_i)[0]
self._buff_i += 4
elif b == 0xd3:
self._reserve(8)
obj = struct.unpack_from(">q", self._buffer, self._buff_i)[0]
self._buff_i += 8
elif b == 0xd4: # fixext 1
typ = TYPE_EXT
if self._max_ext_len < 1:
raise UnpackValueError("%s exceeds max_ext_len(%s)" % (1, self._max_ext_len))
self._reserve(2)
n, obj = struct.unpack_from("b1s", self._buffer, self._buff_i)
self._buff_i += 2
elif b == 0xd5: # fixext 2
typ = TYPE_EXT
if self._max_ext_len < 2:
raise UnpackValueError("%s exceeds max_ext_len(%s)" % (2, self._max_ext_len))
self._reserve(3)
n, obj = struct.unpack_from("b2s", self._buffer, self._buff_i)
self._buff_i += 3
elif b == 0xd6: # fixext 4
typ = TYPE_EXT
if self._max_ext_len < 4:
raise UnpackValueError("%s exceeds max_ext_len(%s)" % (4, self._max_ext_len))
self._reserve(5)
n, obj = struct.unpack_from("b4s", self._buffer, self._buff_i)
self._buff_i += 5
elif b == 0xd7: # fixext 8
typ = TYPE_EXT
if self._max_ext_len < 8:
raise UnpackValueError("%s exceeds max_ext_len(%s)" % (8, self._max_ext_len))
self._reserve(9)
n, obj = struct.unpack_from("b8s", self._buffer, self._buff_i)
self._buff_i += 9
elif b == 0xd8: # fixext 16
typ = TYPE_EXT
if self._max_ext_len < 16:
raise UnpackValueError("%s exceeds max_ext_len(%s)" % (16, self._max_ext_len))
self._reserve(17)
n, obj = struct.unpack_from("b16s", self._buffer, self._buff_i)
self._buff_i += 17
elif b == 0xd9:
typ = TYPE_RAW
self._reserve(1)
n = self._buffer[self._buff_i]
self._buff_i += 1
if n > self._max_str_len:
raise UnpackValueError("%s exceeds max_str_len(%s)", n, self._max_str_len)
obj = self._read(n)
elif b == 0xda:
typ = TYPE_RAW
self._reserve(2)
n, = struct.unpack_from(">H", self._buffer, self._buff_i)
self._buff_i += 2
if n > self._max_str_len:
raise UnpackValueError("%s exceeds max_str_len(%s)", n, self._max_str_len)
obj = self._read(n)
elif b == 0xdb:
typ = TYPE_RAW
self._reserve(4)
n, = struct.unpack_from(">I", self._buffer, self._buff_i)
self._buff_i += 4
if n > self._max_str_len:
raise UnpackValueError("%s exceeds max_str_len(%s)", n, self._max_str_len)
obj = self._read(n)
elif b == 0xdc:
typ = TYPE_ARRAY
self._reserve(2)
n, = struct.unpack_from(">H", self._buffer, self._buff_i)
self._buff_i += 2
if n > self._max_array_len:
raise UnpackValueError("%s exceeds max_array_len(%s)", n, self._max_array_len)
elif b == 0xdd:
typ = TYPE_ARRAY
self._reserve(4)
n, = struct.unpack_from(">I", self._buffer, self._buff_i)
self._buff_i += 4
if n > self._max_array_len:
raise UnpackValueError("%s exceeds max_array_len(%s)", n, self._max_array_len)
elif b == 0xde:
self._reserve(2)
n, = struct.unpack_from(">H", self._buffer, self._buff_i)
self._buff_i += 2
if n > self._max_map_len:
raise UnpackValueError("%s exceeds max_map_len(%s)", n, self._max_map_len)
typ = TYPE_MAP
elif b == 0xdf:
self._reserve(4)
n, = struct.unpack_from(">I", self._buffer, self._buff_i)
self._buff_i += 4
if n > self._max_map_len:
raise UnpackValueError("%s exceeds max_map_len(%s)", n, self._max_map_len)
typ = TYPE_MAP
else:
raise UnpackValueError("Unknown header: 0x%x" % b)
return typ, n, obj
def _unpack(self, execute=EX_CONSTRUCT):
typ, n, obj = self._read_header(execute)
if execute == EX_READ_ARRAY_HEADER:
if typ != TYPE_ARRAY:
raise UnpackValueError("Expected array")
return n
if execute == EX_READ_MAP_HEADER:
if typ != TYPE_MAP:
raise UnpackValueError("Expected map")
return n
# TODO should we eliminate the recursion?
if typ == TYPE_ARRAY:
if execute == EX_SKIP:
for i in xrange(n):
# TODO check whether we need to call `list_hook`
self._unpack(EX_SKIP)
return
ret = newlist_hint(n)
for i in xrange(n):
ret.append(self._unpack(EX_CONSTRUCT))
if self._list_hook is not None:
ret = self._list_hook(ret)
# TODO is the interaction between `list_hook` and `use_list` ok?
return ret if self._use_list else tuple(ret)
if typ == TYPE_MAP:
if execute == EX_SKIP:
for i in xrange(n):
# TODO check whether we need to call hooks
self._unpack(EX_SKIP)
self._unpack(EX_SKIP)
return
if self._object_pairs_hook is not None:
ret = self._object_pairs_hook(
(self._unpack(EX_CONSTRUCT),
self._unpack(EX_CONSTRUCT))
for _ in xrange(n))
else:
ret = {}
for _ in xrange(n):
key = self._unpack(EX_CONSTRUCT)
ret[key] = self._unpack(EX_CONSTRUCT)
if self._object_hook is not None:
ret = self._object_hook(ret)
return ret
if execute == EX_SKIP:
return
if typ == TYPE_RAW:
if self._encoding is not None:
obj = obj.decode(self._encoding, self._unicode_errors)
else:
obj = bytes(obj)
return obj
if typ == TYPE_EXT:
return self._ext_hook(n, bytes(obj))
if typ == TYPE_BIN:
return bytes(obj)
assert typ == TYPE_IMMEDIATE
return obj
def __iter__(self):
return self
def __next__(self):
try:
ret = self._unpack(EX_CONSTRUCT)
self._consume()
return ret
except OutOfData:
self._consume()
raise StopIteration
next = __next__
def skip(self, write_bytes=None):
self._unpack(EX_SKIP)
if write_bytes is not None:
write_bytes(self._buffer[self._buf_checkpoint:self._buff_i])
self._consume()
def unpack(self, write_bytes=None):
ret = self._unpack(EX_CONSTRUCT)
if write_bytes is not None:
write_bytes(self._buffer[self._buf_checkpoint:self._buff_i])
self._consume()
return ret
def read_array_header(self, write_bytes=None):
ret = self._unpack(EX_READ_ARRAY_HEADER)
if write_bytes is not None:
write_bytes(self._buffer[self._buf_checkpoint:self._buff_i])
self._consume()
return ret
def read_map_header(self, write_bytes=None):
ret = self._unpack(EX_READ_MAP_HEADER)
if write_bytes is not None:
write_bytes(self._buffer[self._buf_checkpoint:self._buff_i])
self._consume()
return ret
def tell(self):
return self._stream_offset
class Packer(object):
"""
MessagePack Packer
usage:
packer = Packer()
astream.write(packer.pack(a))
astream.write(packer.pack(b))
Packer's constructor has some keyword arguments:
:param callable default:
Convert user type to builtin type that Packer supports.
See also simplejson's document.
:param str encoding:
Convert unicode to bytes with this encoding. (default: 'utf-8')
:param str unicode_errors:
Error handler for encoding unicode. (default: 'strict')
:param bool use_single_float:
Use single precision float type for float. (default: False)
:param bool autoreset:
Reset buffer after each pack and return its content as `bytes`. (default: True).
If set this to false, use `bytes()` to get content and `.reset()` to clear buffer.
:param bool use_bin_type:
Use bin type introduced in msgpack spec 2.0 for bytes.
It also enables str8 type for unicode.
:param bool strict_types:
If set to true, types will be checked to be exact. Derived classes
from serializeable types will not be serialized and will be
treated as unsupported type and forwarded to default.
Additionally tuples will not be serialized as lists.
This is useful when trying to implement accurate serialization
for python types.
"""
def __init__(self, default=None, encoding='utf-8', unicode_errors='strict',
use_single_float=False, autoreset=True, use_bin_type=False,
strict_types=False):
self._strict_types = strict_types
self._use_float = use_single_float
self._autoreset = autoreset
self._use_bin_type = use_bin_type
self._encoding = encoding
self._unicode_errors = unicode_errors
self._buffer = StringIO()
if default is not None:
if not callable(default):
raise TypeError("default must be callable")
self._default = default
def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT,
check=isinstance, check_type_strict=_check_type_strict):
default_used = False
if self._strict_types:
check = check_type_strict
list_types = list
else:
list_types = (list, tuple)
while True:
if nest_limit < 0:
raise PackValueError("recursion limit exceeded")
if obj is None:
return self._buffer.write(b"\xc0")
if check(obj, bool):
if obj:
return self._buffer.write(b"\xc3")
return self._buffer.write(b"\xc2")
if check(obj, int_types):
if 0 <= obj < 0x80:
return self._buffer.write(struct.pack("B", obj))
if -0x20 <= obj < 0:
return self._buffer.write(struct.pack("b", obj))
if 0x80 <= obj <= 0xff:
return self._buffer.write(struct.pack("BB", 0xcc, obj))
if -0x80 <= obj < 0:
return self._buffer.write(struct.pack(">Bb", 0xd0, obj))
if 0xff < obj <= 0xffff:
return self._buffer.write(struct.pack(">BH", 0xcd, obj))
if -0x8000 <= obj < -0x80:
return self._buffer.write(struct.pack(">Bh", 0xd1, obj))
if 0xffff < obj <= 0xffffffff:
return self._buffer.write(struct.pack(">BI", 0xce, obj))
if -0x80000000 <= obj < -0x8000:
return self._buffer.write(struct.pack(">Bi", 0xd2, obj))
if 0xffffffff < obj <= 0xffffffffffffffff:
return self._buffer.write(struct.pack(">BQ", 0xcf, obj))
if -0x8000000000000000 <= obj < -0x80000000:
return self._buffer.write(struct.pack(">Bq", 0xd3, obj))
if not default_used and self._default is not None:
obj = self._default(obj)
default_used = True
continue
raise PackOverflowError("Integer value out of range")
if check(obj, (bytes, bytearray)):
n = len(obj)
if n >= 2**32:
raise PackValueError("%s is too large" % type(obj).__name__)
self._pack_bin_header(n)
return self._buffer.write(obj)
if check(obj, Unicode):
if self._encoding is None:
raise TypeError(
"Can't encode unicode string: "
"no encoding is specified")
obj = obj.encode(self._encoding, self._unicode_errors)
n = len(obj)
if n >= 2**32:
raise PackValueError("String is too large")
self._pack_raw_header(n)
return self._buffer.write(obj)
if check(obj, memoryview):
n = len(obj) * obj.itemsize
if n >= 2**32:
raise PackValueError("Memoryview is too large")
self._pack_bin_header(n)
return self._buffer.write(obj)
if check(obj, float):
if self._use_float:
return self._buffer.write(struct.pack(">Bf", 0xca, obj))
return self._buffer.write(struct.pack(">Bd", 0xcb, obj))
if check(obj, ExtType):
code = obj.code
data = obj.data
assert isinstance(code, int)
assert isinstance(data, bytes)
L = len(data)
if L == 1:
self._buffer.write(b'\xd4')
elif L == 2:
self._buffer.write(b'\xd5')
elif L == 4:
self._buffer.write(b'\xd6')
elif L == 8:
self._buffer.write(b'\xd7')
elif L == 16:
self._buffer.write(b'\xd8')
elif L <= 0xff:
self._buffer.write(struct.pack(">BB", 0xc7, L))
elif L <= 0xffff:
self._buffer.write(struct.pack(">BH", 0xc8, L))
else:
self._buffer.write(struct.pack(">BI", 0xc9, L))
self._buffer.write(struct.pack("b", code))
self._buffer.write(data)
return
if check(obj, list_types):
n = len(obj)
self._pack_array_header(n)
for i in xrange(n):
self._pack(obj[i], nest_limit - 1)
return
if check(obj, dict):
return self._pack_map_pairs(len(obj), dict_iteritems(obj),
nest_limit - 1)
if not default_used and self._default is not None:
obj = self._default(obj)
default_used = 1
continue
raise TypeError("Cannot serialize %r" % obj)
def pack(self, obj):
self._pack(obj)
ret = self._buffer.getvalue()
if self._autoreset:
self._buffer = StringIO()
elif USING_STRINGBUILDER:
self._buffer = StringIO(ret)
return ret
def pack_map_pairs(self, pairs):
self._pack_map_pairs(len(pairs), pairs)
ret = self._buffer.getvalue()
if self._autoreset:
self._buffer = StringIO()
elif USING_STRINGBUILDER:
self._buffer = StringIO(ret)
return ret
def pack_array_header(self, n):
if n >= 2**32:
raise PackValueError
self._pack_array_header(n)
ret = self._buffer.getvalue()
if self._autoreset:
self._buffer = StringIO()
elif USING_STRINGBUILDER:
self._buffer = StringIO(ret)
return ret
def pack_map_header(self, n):
if n >= 2**32:
raise PackValueError
self._pack_map_header(n)
ret = self._buffer.getvalue()
if self._autoreset:
self._buffer = StringIO()
elif USING_STRINGBUILDER:
self._buffer = StringIO(ret)
return ret
def pack_ext_type(self, typecode, data):
if not isinstance(typecode, int):
raise TypeError("typecode must have int type.")
if not 0 <= typecode <= 127:
raise ValueError("typecode should be 0-127")
if not isinstance(data, bytes):
raise TypeError("data must have bytes type")
L = len(data)
if L > 0xffffffff:
raise PackValueError("Too large data")
if L == 1:
self._buffer.write(b'\xd4')
elif L == 2:
self._buffer.write(b'\xd5')
elif L == 4:
self._buffer.write(b'\xd6')
elif L == 8:
self._buffer.write(b'\xd7')
elif L == 16:
self._buffer.write(b'\xd8')
elif L <= 0xff:
self._buffer.write(b'\xc7' + struct.pack('B', L))
elif L <= 0xffff:
self._buffer.write(b'\xc8' + struct.pack('>H', L))
else:
self._buffer.write(b'\xc9' + struct.pack('>I', L))
self._buffer.write(struct.pack('B', typecode))
self._buffer.write(data)
def _pack_array_header(self, n):
if n <= 0x0f:
return self._buffer.write(struct.pack('B', 0x90 + n))
if n <= 0xffff:
return self._buffer.write(struct.pack(">BH", 0xdc, n))
if n <= 0xffffffff:
return self._buffer.write(struct.pack(">BI", 0xdd, n))
raise PackValueError("Array is too large")
def _pack_map_header(self, n):
if n <= 0x0f:
return self._buffer.write(struct.pack('B', 0x80 + n))
if n <= 0xffff:
return self._buffer.write(struct.pack(">BH", 0xde, n))
if n <= 0xffffffff:
return self._buffer.write(struct.pack(">BI", 0xdf, n))
raise PackValueError("Dict is too large")
def _pack_map_pairs(self, n, pairs, nest_limit=DEFAULT_RECURSE_LIMIT):
self._pack_map_header(n)
for (k, v) in pairs:
self._pack(k, nest_limit - 1)
self._pack(v, nest_limit - 1)
def _pack_raw_header(self, n):
if n <= 0x1f:
self._buffer.write(struct.pack('B', 0xa0 + n))
elif self._use_bin_type and n <= 0xff:
self._buffer.write(struct.pack('>BB', 0xd9, n))
elif n <= 0xffff:
self._buffer.write(struct.pack(">BH", 0xda, n))
elif n <= 0xffffffff:
self._buffer.write(struct.pack(">BI", 0xdb, n))
else:
raise PackValueError('Raw is too large')
def _pack_bin_header(self, n):
if not self._use_bin_type:
return self._pack_raw_header(n)
elif n <= 0xff:
return self._buffer.write(struct.pack('>BB', 0xc4, n))
elif n <= 0xffff:
return self._buffer.write(struct.pack(">BH", 0xc5, n))
elif n <= 0xffffffff:
return self._buffer.write(struct.pack(">BI", 0xc6, n))
else:
raise PackValueError('Bin is too large')
def bytes(self):
return self._buffer.getvalue()
def reset(self):
self._buffer = StringIO()