2015-04-28 17:32:10 +00:00
|
|
|
import base64
|
2014-04-23 06:24:08 +00:00
|
|
|
import io
|
2015-04-28 17:32:10 +00:00
|
|
|
import json
|
|
|
|
import zlib
|
2014-04-23 06:24:08 +00:00
|
|
|
|
2017-07-12 00:36:15 +00:00
|
|
|
import msgpack
|
2014-04-23 06:24:08 +00:00
|
|
|
from requests.structures import CaseInsensitiveDict
|
|
|
|
|
2016-01-12 00:00:58 +00:00
|
|
|
from .compat import HTTPResponse, pickle, text_type
|
2014-04-23 06:24:08 +00:00
|
|
|
|
|
|
|
|
2015-04-28 17:32:10 +00:00
|
|
|
def _b64_decode_bytes(b):
|
|
|
|
return base64.b64decode(b.encode("ascii"))
|
|
|
|
|
|
|
|
|
|
|
|
def _b64_decode_str(s):
|
|
|
|
return _b64_decode_bytes(s).decode("utf8")
|
|
|
|
|
|
|
|
|
2014-04-23 06:24:08 +00:00
|
|
|
class Serializer(object):
|
2015-04-28 17:32:10 +00:00
|
|
|
|
2014-04-23 06:24:08 +00:00
|
|
|
def dumps(self, request, response, body=None):
|
|
|
|
response_headers = CaseInsensitiveDict(response.headers)
|
|
|
|
|
|
|
|
if body is None:
|
|
|
|
body = response.read(decode_content=False)
|
2015-04-28 17:32:10 +00:00
|
|
|
|
|
|
|
# NOTE: 99% sure this is dead code. I'm only leaving it
|
|
|
|
# here b/c I don't have a test yet to prove
|
|
|
|
# it. Basically, before using
|
|
|
|
# `cachecontrol.filewrapper.CallbackFileWrapper`,
|
|
|
|
# this made an effort to reset the file handle. The
|
|
|
|
# `CallbackFileWrapper` short circuits this code by
|
|
|
|
# setting the body as the content is consumed, the
|
|
|
|
# result being a `body` argument is *always* passed
|
|
|
|
# into cache_response, and in turn,
|
|
|
|
# `Serializer.dump`.
|
2014-04-23 06:24:08 +00:00
|
|
|
response._fp = io.BytesIO(body)
|
|
|
|
|
2017-07-12 00:36:15 +00:00
|
|
|
# NOTE: This is all a bit weird, but it's really important that on
|
|
|
|
# Python 2.x these objects are unicode and not str, even when
|
|
|
|
# they contain only ascii. The problem here is that msgpack
|
|
|
|
# understands the difference between unicode and bytes and we
|
|
|
|
# have it set to differentiate between them, however Python 2
|
|
|
|
# doesn't know the difference. Forcing these to unicode will be
|
|
|
|
# enough to have msgpack know the difference.
|
2014-04-23 06:24:08 +00:00
|
|
|
data = {
|
2017-07-12 00:36:15 +00:00
|
|
|
u"response": {
|
|
|
|
u"body": body,
|
|
|
|
u"headers": dict(
|
|
|
|
(text_type(k), text_type(v))
|
2015-04-28 17:32:10 +00:00
|
|
|
for k, v in response.headers.items()
|
|
|
|
),
|
2017-07-12 00:36:15 +00:00
|
|
|
u"status": response.status,
|
|
|
|
u"version": response.version,
|
|
|
|
u"reason": text_type(response.reason),
|
|
|
|
u"strict": response.strict,
|
|
|
|
u"decode_content": response.decode_content,
|
2014-04-23 06:24:08 +00:00
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
# Construct our vary headers
|
2017-07-12 00:36:15 +00:00
|
|
|
data[u"vary"] = {}
|
|
|
|
if u"vary" in response_headers:
|
|
|
|
varied_headers = response_headers[u'vary'].split(',')
|
2014-04-23 06:24:08 +00:00
|
|
|
for header in varied_headers:
|
|
|
|
header = header.strip()
|
2017-07-12 00:36:15 +00:00
|
|
|
header_value = request.headers.get(header, None)
|
|
|
|
if header_value is not None:
|
|
|
|
header_value = text_type(header_value)
|
|
|
|
data[u"vary"][header] = header_value
|
2015-04-28 17:32:10 +00:00
|
|
|
|
2017-07-12 00:36:15 +00:00
|
|
|
return b",".join([b"cc=4", msgpack.dumps(data, use_bin_type=True)])
|
2014-04-23 06:24:08 +00:00
|
|
|
|
|
|
|
def loads(self, request, data):
|
|
|
|
# Short circuit if we've been given an empty set of data
|
|
|
|
if not data:
|
|
|
|
return
|
|
|
|
|
|
|
|
# Determine what version of the serializer the data was serialized
|
|
|
|
# with
|
|
|
|
try:
|
|
|
|
ver, data = data.split(b",", 1)
|
|
|
|
except ValueError:
|
|
|
|
ver = b"cc=0"
|
|
|
|
|
|
|
|
# Make sure that our "ver" is actually a version and isn't a false
|
|
|
|
# positive from a , being in the data stream.
|
|
|
|
if ver[:3] != b"cc=":
|
|
|
|
data = ver + data
|
|
|
|
ver = b"cc=0"
|
|
|
|
|
|
|
|
# Get the version number out of the cc=N
|
|
|
|
ver = ver.split(b"=", 1)[-1].decode("ascii")
|
|
|
|
|
|
|
|
# Dispatch to the actual load method for the given version
|
|
|
|
try:
|
|
|
|
return getattr(self, "_loads_v{0}".format(ver))(request, data)
|
|
|
|
except AttributeError:
|
|
|
|
# This is a version we don't have a loads function for, so we'll
|
|
|
|
# just treat it as a miss and return None
|
|
|
|
return
|
|
|
|
|
2015-04-28 17:32:10 +00:00
|
|
|
def prepare_response(self, request, cached):
|
|
|
|
"""Verify our vary headers match and construct a real urllib3
|
|
|
|
HTTPResponse object.
|
|
|
|
"""
|
2014-04-23 06:24:08 +00:00
|
|
|
# Special case the '*' Vary value as it means we cannot actually
|
|
|
|
# determine if the cached response is suitable for this request.
|
|
|
|
if "*" in cached.get("vary", {}):
|
|
|
|
return
|
|
|
|
|
|
|
|
# Ensure that the Vary headers for the cached response match our
|
|
|
|
# request
|
|
|
|
for header, value in cached.get("vary", {}).items():
|
|
|
|
if request.headers.get(header, None) != value:
|
|
|
|
return
|
|
|
|
|
2015-04-28 17:32:10 +00:00
|
|
|
body_raw = cached["response"].pop("body")
|
|
|
|
|
2017-01-27 14:21:52 +00:00
|
|
|
headers = CaseInsensitiveDict(data=cached['response']['headers'])
|
|
|
|
if headers.get('transfer-encoding', '') == 'chunked':
|
|
|
|
headers.pop('transfer-encoding')
|
|
|
|
|
|
|
|
cached['response']['headers'] = headers
|
|
|
|
|
2015-04-28 17:32:10 +00:00
|
|
|
try:
|
|
|
|
body = io.BytesIO(body_raw)
|
|
|
|
except TypeError:
|
|
|
|
# This can happen if cachecontrol serialized to v1 format (pickle)
|
|
|
|
# using Python 2. A Python 2 str(byte string) will be unpickled as
|
|
|
|
# a Python 3 str (unicode string), which will cause the above to
|
|
|
|
# fail with:
|
|
|
|
#
|
|
|
|
# TypeError: 'str' does not support the buffer interface
|
|
|
|
body = io.BytesIO(body_raw.encode('utf8'))
|
|
|
|
|
2014-04-23 06:24:08 +00:00
|
|
|
return HTTPResponse(
|
|
|
|
body=body,
|
|
|
|
preload_content=False,
|
|
|
|
**cached["response"]
|
|
|
|
)
|
2015-04-28 17:32:10 +00:00
|
|
|
|
|
|
|
def _loads_v0(self, request, data):
|
|
|
|
# The original legacy cache data. This doesn't contain enough
|
|
|
|
# information to construct everything we need, so we'll treat this as
|
|
|
|
# a miss.
|
|
|
|
return
|
|
|
|
|
|
|
|
def _loads_v1(self, request, data):
|
|
|
|
try:
|
|
|
|
cached = pickle.loads(data)
|
|
|
|
except ValueError:
|
|
|
|
return
|
|
|
|
|
|
|
|
return self.prepare_response(request, cached)
|
|
|
|
|
|
|
|
def _loads_v2(self, request, data):
|
|
|
|
try:
|
|
|
|
cached = json.loads(zlib.decompress(data).decode("utf8"))
|
2017-01-27 14:21:52 +00:00
|
|
|
except (ValueError, zlib.error):
|
2015-04-28 17:32:10 +00:00
|
|
|
return
|
|
|
|
|
|
|
|
# We need to decode the items that we've base64 encoded
|
|
|
|
cached["response"]["body"] = _b64_decode_bytes(
|
|
|
|
cached["response"]["body"]
|
|
|
|
)
|
|
|
|
cached["response"]["headers"] = dict(
|
|
|
|
(_b64_decode_str(k), _b64_decode_str(v))
|
|
|
|
for k, v in cached["response"]["headers"].items()
|
|
|
|
)
|
|
|
|
cached["response"]["reason"] = _b64_decode_str(
|
|
|
|
cached["response"]["reason"],
|
|
|
|
)
|
|
|
|
cached["vary"] = dict(
|
|
|
|
(_b64_decode_str(k), _b64_decode_str(v) if v is not None else v)
|
|
|
|
for k, v in cached["vary"].items()
|
|
|
|
)
|
|
|
|
|
|
|
|
return self.prepare_response(request, cached)
|
2017-07-12 00:36:15 +00:00
|
|
|
|
|
|
|
def _loads_v3(self, request, data):
|
|
|
|
# Due to Python 2 encoding issues, it's impossible to know for sure
|
|
|
|
# exactly how to load v3 entries, thus we'll treat these as a miss so
|
|
|
|
# that they get rewritten out as v4 entries.
|
|
|
|
return
|
|
|
|
|
|
|
|
def _loads_v4(self, request, data):
|
|
|
|
try:
|
|
|
|
cached = msgpack.loads(data, encoding='utf-8')
|
|
|
|
except ValueError:
|
|
|
|
return
|
|
|
|
|
|
|
|
return self.prepare_response(request, cached)
|