SickGear/lib/cachecontrol/serialize.py

import base64
import io
import json
import zlib

import msgpack
from requests.structures import CaseInsensitiveDict

from .compat import HTTPResponse, pickle, text_type


def _b64_decode_bytes(b):
    return base64.b64decode(b.encode("ascii"))


def _b64_decode_str(s):
    return _b64_decode_bytes(s).decode("utf8")


class Serializer(object):

    def dumps(self, request, response, body=None):
        response_headers = CaseInsensitiveDict(response.headers)

        if body is None:
            body = response.read(decode_content=False)

            # NOTE: 99% sure this is dead code. I'm only leaving it
            #       here b/c I don't have a test yet to prove
            #       it. Basically, before using
            #       `cachecontrol.filewrapper.CallbackFileWrapper`,
            #       this made an effort to reset the file handle. The
            #       `CallbackFileWrapper` short circuits this code by
            #       setting the body as the content is consumed, the
            #       result being a `body` argument is *always* passed
            #       into cache_response, and in turn,
            #       `Serializer.dump`.
            response._fp = io.BytesIO(body)

        # NOTE: This is all a bit weird, but it's really important that on
        #       Python 2.x these objects are unicode and not str, even when
        #       they contain only ascii. The problem here is that msgpack
        #       understands the difference between unicode and bytes and we
        #       have it set to differentiate between them, however Python 2
        #       doesn't know the difference. Forcing these to unicode will be
        #       enough to have msgpack know the difference.
        data = {
            u"response": {
                u"body": body,
                u"headers": dict(
                    (text_type(k), text_type(v))
                    for k, v in response.headers.items()
                ),
                u"status": response.status,
                u"version": response.version,
                u"reason": text_type(response.reason),
                u"strict": response.strict,
                u"decode_content": response.decode_content,
            },
        }

        # Construct our vary headers
        data[u"vary"] = {}
        if u"vary" in response_headers:
            varied_headers = response_headers[u'vary'].split(',')
            for header in varied_headers:
                header = header.strip()
                header_value = request.headers.get(header, None)
                if header_value is not None:
                    header_value = text_type(header_value)
                data[u"vary"][header] = header_value

        return b",".join([b"cc=4", msgpack.dumps(data, use_bin_type=True)])

    def loads(self, request, data):
        # Short circuit if we've been given an empty set of data
        if not data:
            return

        # Determine what version of the serializer the data was serialized
        # with
        try:
            ver, data = data.split(b",", 1)
        except ValueError:
            ver = b"cc=0"

        # Make sure that our "ver" is actually a version and isn't a false
        # positive from a , being in the data stream.
        if ver[:3] != b"cc=":
            data = ver + data
            ver = b"cc=0"

        # Get the version number out of the cc=N
        ver = ver.split(b"=", 1)[-1].decode("ascii")

        # Dispatch to the actual load method for the given version
        try:
            return getattr(self, "_loads_v{0}".format(ver))(request, data)
        except AttributeError:
            # This is a version we don't have a loads function for, so we'll
            # just treat it as a miss and return None
            return

    def prepare_response(self, request, cached):
        """Verify our vary headers match and construct a real urllib3
        HTTPResponse object.
        """
        # Special case the '*' Vary value as it means we cannot actually
        # determine if the cached response is suitable for this request.
        if "*" in cached.get("vary", {}):
            return

        # Ensure that the Vary headers for the cached response match our
        # request
        for header, value in cached.get("vary", {}).items():
            if request.headers.get(header, None) != value:
                return

        body_raw = cached["response"].pop("body")

        headers = CaseInsensitiveDict(data=cached['response']['headers'])
        if headers.get('transfer-encoding', '') == 'chunked':
            headers.pop('transfer-encoding')

        cached['response']['headers'] = headers

        try:
            body = io.BytesIO(body_raw)
        except TypeError:
            # This can happen if cachecontrol serialized to v1 format (pickle)
            # using Python 2. A Python 2 str(byte string) will be unpickled as
            # a Python 3 str (unicode string), which will cause the above to
            # fail with:
            #
            #     TypeError: 'str' does not support the buffer interface
            body = io.BytesIO(body_raw.encode('utf8'))

        return HTTPResponse(
            body=body,
            preload_content=False,
            **cached["response"]
        )

    def _loads_v0(self, request, data):
        # The original legacy cache data. This doesn't contain enough
        # information to construct everything we need, so we'll treat this as
        # a miss.
        return

    def _loads_v1(self, request, data):
        try:
            cached = pickle.loads(data)
        except ValueError:
            return

        return self.prepare_response(request, cached)

    def _loads_v2(self, request, data):
        try:
            cached = json.loads(zlib.decompress(data).decode("utf8"))
        except (ValueError, zlib.error):
            return

        # We need to decode the items that we've base64 encoded
        cached["response"]["body"] = _b64_decode_bytes(
            cached["response"]["body"]
        )
        cached["response"]["headers"] = dict(
            (_b64_decode_str(k), _b64_decode_str(v))
            for k, v in cached["response"]["headers"].items()
        )
        cached["response"]["reason"] = _b64_decode_str(
            cached["response"]["reason"],
        )
        cached["vary"] = dict(
            (_b64_decode_str(k), _b64_decode_str(v) if v is not None else v)
            for k, v in cached["vary"].items()
        )

        return self.prepare_response(request, cached)

    def _loads_v3(self, request, data):
        # Due to Python 2 encoding issues, it's impossible to know for sure
        # exactly how to load v3 entries, thus we'll treat these as a miss so
        # that they get rewritten out as v4 entries.
        return

    def _loads_v4(self, request, data):
        try:
            cached = msgpack.loads(data, encoding='utf-8')
        except ValueError:
            return

        return self.prepare_response(request, cached)
Update cachecontrol library 0.9.3 to 0.11.2. 2015-04-28 17:32:10 +00:00			`import base64`
Updated our cache code. Updated rsstorrents to not bother using requests sessions. 2014-04-23 06:24:08 +00:00			`import io`
Update cachecontrol library 0.9.3 to 0.11.2. 2015-04-28 17:32:10 +00:00			`import json`
			`import zlib`
Updated our cache code. Updated rsstorrents to not bother using requests sessions. 2014-04-23 06:24:08 +00:00
Update cachecontrol library 0.11.5 to 0.12.3 (db54c40). 2017-07-12 00:36:15 +00:00			`import msgpack`
Updated our cache code. Updated rsstorrents to not bother using requests sessions. 2014-04-23 06:24:08 +00:00			`from requests.structures import CaseInsensitiveDict`

Update cachecontrol library 0.11.2 to 0.11.5. 2016-01-12 00:00:58 +00:00			`from .compat import HTTPResponse, pickle, text_type`
Updated our cache code. Updated rsstorrents to not bother using requests sessions. 2014-04-23 06:24:08 +00:00

Update cachecontrol library 0.9.3 to 0.11.2. 2015-04-28 17:32:10 +00:00			`def _b64_decode_bytes(b):`
			`return base64.b64decode(b.encode("ascii"))`


			`def _b64_decode_str(s):`
			`return _b64_decode_bytes(s).decode("utf8")`


Updated our cache code. Updated rsstorrents to not bother using requests sessions. 2014-04-23 06:24:08 +00:00			`class Serializer(object):`
Update cachecontrol library 0.9.3 to 0.11.2. 2015-04-28 17:32:10 +00:00
Updated our cache code. Updated rsstorrents to not bother using requests sessions. 2014-04-23 06:24:08 +00:00			`def dumps(self, request, response, body=None):`
			`response_headers = CaseInsensitiveDict(response.headers)`

			`if body is None:`
			`body = response.read(decode_content=False)`
Update cachecontrol library 0.9.3 to 0.11.2. 2015-04-28 17:32:10 +00:00
			`# NOTE: 99% sure this is dead code. I'm only leaving it`
			`# here b/c I don't have a test yet to prove`
			`# it. Basically, before using`
			# `cachecontrol.filewrapper.CallbackFileWrapper`,
			`# this made an effort to reset the file handle. The`
			# `CallbackFileWrapper` short circuits this code by
			`# setting the body as the content is consumed, the`
			# result being a `body` argument is always passed
			`# into cache_response, and in turn,`
			# `Serializer.dump`.
Updated our cache code. Updated rsstorrents to not bother using requests sessions. 2014-04-23 06:24:08 +00:00			`response._fp = io.BytesIO(body)`

Update cachecontrol library 0.11.5 to 0.12.3 (db54c40). 2017-07-12 00:36:15 +00:00			`# NOTE: This is all a bit weird, but it's really important that on`
			`# Python 2.x these objects are unicode and not str, even when`
			`# they contain only ascii. The problem here is that msgpack`
			`# understands the difference between unicode and bytes and we`
			`# have it set to differentiate between them, however Python 2`
			`# doesn't know the difference. Forcing these to unicode will be`
			`# enough to have msgpack know the difference.`
Updated our cache code. Updated rsstorrents to not bother using requests sessions. 2014-04-23 06:24:08 +00:00			`data = {`
Update cachecontrol library 0.11.5 to 0.12.3 (db54c40). 2017-07-12 00:36:15 +00:00			`u"response": {`
			`u"body": body,`
			`u"headers": dict(`
			`(text_type(k), text_type(v))`
Update cachecontrol library 0.9.3 to 0.11.2. 2015-04-28 17:32:10 +00:00			`for k, v in response.headers.items()`
			`),`
Update cachecontrol library 0.11.5 to 0.12.3 (db54c40). 2017-07-12 00:36:15 +00:00			`u"status": response.status,`
			`u"version": response.version,`
			`u"reason": text_type(response.reason),`
			`u"strict": response.strict,`
			`u"decode_content": response.decode_content,`
Updated our cache code. Updated rsstorrents to not bother using requests sessions. 2014-04-23 06:24:08 +00:00			`},`
			`}`

			`# Construct our vary headers`
Update cachecontrol library 0.11.5 to 0.12.3 (db54c40). 2017-07-12 00:36:15 +00:00			`data[u"vary"] = {}`
			`if u"vary" in response_headers:`
			`varied_headers = response_headers[u'vary'].split(',')`
Updated our cache code. Updated rsstorrents to not bother using requests sessions. 2014-04-23 06:24:08 +00:00			`for header in varied_headers:`
			`header = header.strip()`
Update cachecontrol library 0.11.5 to 0.12.3 (db54c40). 2017-07-12 00:36:15 +00:00			`header_value = request.headers.get(header, None)`
			`if header_value is not None:`
			`header_value = text_type(header_value)`
			`data[u"vary"][header] = header_value`
Update cachecontrol library 0.9.3 to 0.11.2. 2015-04-28 17:32:10 +00:00
Update cachecontrol library 0.11.5 to 0.12.3 (db54c40). 2017-07-12 00:36:15 +00:00			`return b",".join([b"cc=4", msgpack.dumps(data, use_bin_type=True)])`
Updated our cache code. Updated rsstorrents to not bother using requests sessions. 2014-04-23 06:24:08 +00:00
			`def loads(self, request, data):`
			`# Short circuit if we've been given an empty set of data`
			`if not data:`
			`return`

			`# Determine what version of the serializer the data was serialized`
			`# with`
			`try:`
			`ver, data = data.split(b",", 1)`
			`except ValueError:`
			`ver = b"cc=0"`

			`# Make sure that our "ver" is actually a version and isn't a false`
			`# positive from a , being in the data stream.`
			`if ver[:3] != b"cc=":`
			`data = ver + data`
			`ver = b"cc=0"`

			`# Get the version number out of the cc=N`
			`ver = ver.split(b"=", 1)[-1].decode("ascii")`

			`# Dispatch to the actual load method for the given version`
			`try:`
			`return getattr(self, "_loads_v{0}".format(ver))(request, data)`
			`except AttributeError:`
			`# This is a version we don't have a loads function for, so we'll`
			`# just treat it as a miss and return None`
			`return`

Update cachecontrol library 0.9.3 to 0.11.2. 2015-04-28 17:32:10 +00:00			`def prepare_response(self, request, cached):`
			`"""Verify our vary headers match and construct a real urllib3`
			`HTTPResponse object.`
			`"""`
Updated our cache code. Updated rsstorrents to not bother using requests sessions. 2014-04-23 06:24:08 +00:00			`# Special case the '*' Vary value as it means we cannot actually`
			`# determine if the cached response is suitable for this request.`
			`if "*" in cached.get("vary", {}):`
			`return`

			`# Ensure that the Vary headers for the cached response match our`
			`# request`
			`for header, value in cached.get("vary", {}).items():`
			`if request.headers.get(header, None) != value:`
			`return`

Update cachecontrol library 0.9.3 to 0.11.2. 2015-04-28 17:32:10 +00:00			`body_raw = cached["response"].pop("body")`

Update cachecontrol library 0.11.5 to 0.11.7 (3b3b776). 2017-01-27 14:21:52 +00:00			`headers = CaseInsensitiveDict(data=cached['response']['headers'])`
			`if headers.get('transfer-encoding', '') == 'chunked':`
			`headers.pop('transfer-encoding')`

			`cached['response']['headers'] = headers`

Update cachecontrol library 0.9.3 to 0.11.2. 2015-04-28 17:32:10 +00:00			`try:`
			`body = io.BytesIO(body_raw)`
			`except TypeError:`
			`# This can happen if cachecontrol serialized to v1 format (pickle)`
			`# using Python 2. A Python 2 str(byte string) will be unpickled as`
			`# a Python 3 str (unicode string), which will cause the above to`
			`# fail with:`
			`#`
			`# TypeError: 'str' does not support the buffer interface`
			`body = io.BytesIO(body_raw.encode('utf8'))`

Updated our cache code. Updated rsstorrents to not bother using requests sessions. 2014-04-23 06:24:08 +00:00			`return HTTPResponse(`
			`body=body,`
			`preload_content=False,`
			`**cached["response"]`
			`)`
Update cachecontrol library 0.9.3 to 0.11.2. 2015-04-28 17:32:10 +00:00
			`def _loads_v0(self, request, data):`
			`# The original legacy cache data. This doesn't contain enough`
			`# information to construct everything we need, so we'll treat this as`
			`# a miss.`
			`return`

			`def _loads_v1(self, request, data):`
			`try:`
			`cached = pickle.loads(data)`
			`except ValueError:`
			`return`

			`return self.prepare_response(request, cached)`

			`def _loads_v2(self, request, data):`
			`try:`
			`cached = json.loads(zlib.decompress(data).decode("utf8"))`
Update cachecontrol library 0.11.5 to 0.11.7 (3b3b776). 2017-01-27 14:21:52 +00:00			`except (ValueError, zlib.error):`
Update cachecontrol library 0.9.3 to 0.11.2. 2015-04-28 17:32:10 +00:00			`return`

			`# We need to decode the items that we've base64 encoded`
			`cached["response"]["body"] = _b64_decode_bytes(`
			`cached["response"]["body"]`
			`)`
			`cached["response"]["headers"] = dict(`
			`(_b64_decode_str(k), _b64_decode_str(v))`
			`for k, v in cached["response"]["headers"].items()`
			`)`
			`cached["response"]["reason"] = _b64_decode_str(`
			`cached["response"]["reason"],`
			`)`
			`cached["vary"] = dict(`
			`(_b64_decode_str(k), _b64_decode_str(v) if v is not None else v)`
			`for k, v in cached["vary"].items()`
			`)`

			`return self.prepare_response(request, cached)`
Update cachecontrol library 0.11.5 to 0.12.3 (db54c40). 2017-07-12 00:36:15 +00:00
			`def _loads_v3(self, request, data):`
			`# Due to Python 2 encoding issues, it's impossible to know for sure`
			`# exactly how to load v3 entries, thus we'll treat these as a miss so`
			`# that they get rewritten out as v4 entries.`
			`return`

			`def _loads_v4(self, request, data):`
			`try:`
			`cached = msgpack.loads(data, encoding='utf-8')`
			`except ValueError:`
			`return`

			`return self.prepare_response(request, cached)`