Merge pull request #337 from JackDandy/feature/UpdateCachecontrol

Feature/update cachecontrol
This commit is contained in:
JackDandy 2015-04-29 02:13:00 +01:00
commit b0314454f9
18 changed files with 513 additions and 208 deletions

View file

@ -9,6 +9,7 @@
* Update change to suppress HTTPS verification InsecureRequestWarning to updated package as listed in hacks.txt
* Remove listed hacks.txt record for check that SSLv3 is available because issue was addressed by vendor
* Update chardet packages 2.2.1 to 2.3.0 (ff40135)
* Update cachecontrol library 0.9.3 to 0.11.2
* Add ToTV provider
* Fix Backlog scheduler initialization and change backlog frequency from minutes to days
* Change to consolidate and tidy some provider code

View file

@ -2,6 +2,10 @@
Make it easy to import from cachecontrol without long namespaces.
"""
__author__ = 'Eric Larson'
__email__ = 'eric@ionrock.org'
__version__ = '0.11.2'
from .wrapper import CacheControl
from .adapter import CacheControlAdapter
from .controller import CacheController

View file

@ -1,16 +1,24 @@
from lib.requests.adapters import HTTPAdapter
import functools
from requests.adapters import HTTPAdapter
from .controller import CacheController
from .cache import DictCache
from .filewrapper import CallbackFileWrapper
class CacheControlAdapter(HTTPAdapter):
invalidating_methods = set(['PUT', 'DELETE'])
def __init__(self, cache=None, cache_etags=True, controller_class=None,
serializer=None, *args, **kw):
def __init__(self, cache=None,
cache_etags=True,
controller_class=None,
serializer=None,
heuristic=None,
*args, **kw):
super(CacheControlAdapter, self).__init__(*args, **kw)
self.cache = cache or DictCache()
self.heuristic = heuristic
controller_factory = controller_class or CacheController
self.controller = controller_factory(
@ -27,10 +35,13 @@ class CacheControlAdapter(HTTPAdapter):
if request.method == 'GET':
cached_response = self.controller.cached_request(request)
if cached_response:
return self.build_response(request, cached_response, from_cache=True)
return self.build_response(request, cached_response,
from_cache=True)
# check for etags and add headers if appropriate
request.headers.update(self.controller.conditional_headers(request))
request.headers.update(
self.controller.conditional_headers(request)
)
resp = super(CacheControlAdapter, self).send(request, **kw)
@ -44,6 +55,8 @@ class CacheControlAdapter(HTTPAdapter):
cached response
"""
if not from_cache and request.method == 'GET':
# apply any expiration heuristics
if response.status == 304:
# We must have sent an ETag request. This could mean
# that we've been expired already or that we simply
@ -56,14 +69,34 @@ class CacheControlAdapter(HTTPAdapter):
if cached_response is not response:
from_cache = True
# We are done with the server response, read a
# possible response body (compliant servers will
# not return one, but we cannot be 100% sure) and
# release the connection back to the pool.
response.read(decode_content=False)
response.release_conn()
response = cached_response
# We always cache the 301 responses
elif response.status == 301:
self.controller.cache_response(request, response)
else:
# try to cache the response
try:
self.controller.cache_response(request, response)
except Exception as e:
# Failed to cache the results
pass
# Check for any heuristics that might update headers
# before trying to cache.
if self.heuristic:
response = self.heuristic.apply(response)
# Wrap the response file with a wrapper that will cache the
# response when the stream has been consumed.
response._fp = CallbackFileWrapper(
response._fp,
functools.partial(
self.controller.cache_response,
request,
response,
)
)
resp = super(CacheControlAdapter, self).build_response(
request, response
@ -78,3 +111,7 @@ class CacheControlAdapter(HTTPAdapter):
resp.from_cache = from_cache
return resp
def close(self):
self.cache.close()
super(CacheControlAdapter, self).close()

View file

@ -1,9 +1,10 @@
"""
The cache object API for implementing caches. The default is just a
dictionary, which in turns means it is not threadsafe for writing.
The cache object API for implementing caches. The default is a thread
safe in-memory dictionary.
"""
from threading import Lock
class BaseCache(object):
def get(self, key):
@ -15,6 +16,10 @@ class BaseCache(object):
def delete(self, key):
raise NotImplemented()
def close(self):
pass
class DictCache(BaseCache):
def __init__(self, init_dict=None):

View file

@ -3,6 +3,9 @@ import os
from lockfile import FileLock
from ..cache import BaseCache
from ..controller import CacheController
def _secure_open_write(filename, fmode):
# We only want to write to this file, so open it in write only mode
@ -44,22 +47,24 @@ def _secure_open_write(filename, fmode):
raise
class FileCache(object):
class FileCache(BaseCache):
def __init__(self, directory, forever=False, filemode=0o0600,
dirmode=0o0700):
self.directory = directory
self.forever = forever
self.filemode = filemode
if not os.path.isdir(self.directory):
os.makedirs(self.directory, dirmode)
self.dirmode = dirmode
@staticmethod
def encode(x):
return hashlib.sha224(x.encode()).hexdigest()
def _fn(self, name):
return os.path.join(self.directory, self.encode(name))
# NOTE: This method should not change as some may depend on it.
# See: https://github.com/ionrock/cachecontrol/issues/63
hashed = self.encode(name)
parts = list(hashed[:5]) + [hashed]
return os.path.join(self.directory, *parts)
def get(self, key):
name = self._fn(key)
@ -71,7 +76,15 @@ class FileCache(object):
def set(self, key, value):
name = self._fn(key)
# Make sure the directory exists
try:
os.makedirs(os.path.dirname(name), self.dirmode)
except (IOError, OSError):
pass
with FileLock(name) as lock:
# Write our actual file
with _secure_open_write(lock.path, self.filemode) as fh:
fh.write(value)
@ -79,3 +92,12 @@ class FileCache(object):
name = self._fn(key)
if not self.forever:
os.remove(name)
def url_to_file_path(url, filecache):
"""Return the file cache path based on the URL.
This does not ensure the file exists!
"""
key = CacheController.cache_url(url)
return filecache._fn(key)

View file

@ -36,3 +36,6 @@ class RedisCache(object):
caution!"""
for key in self.conn.keys():
self.conn.delete(key)
def close(self):
self.conn.disconnect()

View file

@ -4,23 +4,20 @@ except ImportError:
from urlparse import urljoin
try:
import email.utils
parsedate_tz = email.utils.parsedate_tz
except ImportError:
import email.Utils
parsedate_tz = email.Utils.parsedate_tz
try:
import cPickle as pickle
except ImportError:
import pickle
# Handle the case where the requests has been patched to not have urllib3
# bundled as part of it's source.
# Handle the case where the requests module has been patched to not have
# urllib3 bundled as part of its source.
try:
from lib.requests.packages.urllib3.response import HTTPResponse
from requests.packages.urllib3.response import HTTPResponse
except ImportError:
from urllib3.response import HTTPResponse
try:
from requests.packages.urllib3.util import is_fp_closed
except ImportError:
from urllib3.util import is_fp_closed

View file

@ -4,14 +4,14 @@ The httplib2 algorithms ported for use with requests.
import re
import calendar
import time
import datetime
from email.utils import parsedate_tz
from lib.requests.structures import CaseInsensitiveDict
from requests.structures import CaseInsensitiveDict
from .cache import DictCache
from .compat import parsedate_tz
from .serialize import Serializer
URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")
@ -21,7 +21,7 @@ def parse_uri(uri):
(scheme, authority, path, query, fragment) = parse_uri(uri)
"""
groups = URI.match(uri).groups()
return groups[1], groups[3], groups[4], groups[6], groups[8]
return (groups[1], groups[3], groups[4], groups[6], groups[8])
class CacheController(object):
@ -32,26 +32,29 @@ class CacheController(object):
self.cache_etags = cache_etags
self.serializer = serializer or Serializer()
def _urlnorm(self, uri):
@classmethod
def _urlnorm(cls, uri):
"""Normalize the URL to create a safe key for the cache"""
(scheme, authority, path, query, fragment) = parse_uri(uri)
if not scheme or not authority:
raise Exception("Only absolute URIs are allowed. uri = %s" % uri)
authority = authority.lower()
scheme = scheme.lower()
authority = authority.lower()
if not path:
path = "/"
# Could do syntax based normalization of the URI before
# computing the digest. See Section 6.2.2 of Std 66.
request_uri = query and "?".join([path, query]) or path
scheme = scheme.lower()
defrag_uri = scheme + "://" + authority + request_uri
return defrag_uri
def cache_url(self, uri):
return self._urlnorm(uri)
@classmethod
def cache_url(cls, uri):
return cls._urlnorm(uri)
def parse_cache_control(self, headers):
"""
@ -68,13 +71,20 @@ class CacheController(object):
parts = headers[cc_header].split(',')
parts_with_args = [
tuple([x.strip().lower() for x in part.split("=", 1)])
for part in parts if -1 != part.find("=")]
parts_wo_args = [(name.strip().lower(), 1)
for name in parts if -1 == name.find("=")]
for part in parts if -1 != part.find("=")
]
parts_wo_args = [
(name.strip().lower(), 1)
for name in parts if -1 == name.find("=")
]
retval = dict(parts_with_args + parts_wo_args)
return retval
def cached_request(self, request):
"""
Return a cached response if it exists in the cache, otherwise
return False.
"""
cache_url = self.cache_url(request.url)
cc = self.parse_cache_control(request.headers)
@ -95,7 +105,24 @@ class CacheController(object):
if not resp:
return False
# If we have a cached 301, return it immediately. We don't
# need to test our response for other headers b/c it is
# intrinsically "cacheable" as it is Permanent.
# See:
# https://tools.ietf.org/html/rfc7231#section-6.4.2
#
# Client can try to refresh the value by repeating the request
# with cache busting headers as usual (ie no-cache).
if resp.status == 301:
return resp
headers = CaseInsensitiveDict(resp.headers)
if not headers or 'date' not in headers:
# With date or etag, the cached response can never be used
# and should be deleted.
if 'etag' not in headers:
self.cache.delete(cache_url)
return False
now = time.time()
date = calendar.timegm(
@ -104,15 +131,19 @@ class CacheController(object):
current_age = max(0, now - date)
# TODO: There is an assumption that the result will be a
# urllib3 response object. This may not be best since we
# could probably avoid instantiating or constructing the
# response until we know we need it.
# urllib3 response object. This may not be best since we
# could probably avoid instantiating or constructing the
# response until we know we need it.
resp_cc = self.parse_cache_control(headers)
# determine freshness
freshness_lifetime = 0
# Check the max-age pragma in the cache control header
if 'max-age' in resp_cc and resp_cc['max-age'].isdigit():
freshness_lifetime = int(resp_cc['max-age'])
# If there isn't a max-age, check for an expires header
elif 'expires' in headers:
expires = parsedate_tz(headers['expires'])
if expires is not None:
@ -163,32 +194,24 @@ class CacheController(object):
return new_headers
def cache_response(self, request, response):
def cache_response(self, request, response, body=None):
"""
Algorithm for caching requests.
This assumes a requests Response object.
"""
# From httplib2: Don't cache 206's since we aren't going to
# handle byte range requests
if response.status not in [200, 203]:
# handle byte range requests
if response.status not in [200, 203, 300, 301]:
return
# Cache Session Params
cache_auto = getattr(request, 'cache_auto', False)
cache_urls = getattr(request, 'cache_urls', [])
cache_max_age = getattr(request, 'cache_max_age', None)
response_headers = CaseInsensitiveDict(response.headers)
# Check if we are wanting to cache responses from specific urls only
cache_url = self.cache_url(request.url)
if len(cache_urls) > 0 and not any(s in cache_url for s in cache_urls):
return
cc_req = self.parse_cache_control(request.headers)
cc = self.parse_cache_control(response_headers)
cache_url = self.cache_url(request.url)
# Delete it from the cache if we happen to have it stored there
no_store = cc.get('no-store') or cc_req.get('no-store')
if no_store and self.cache.get(cache_url):
@ -196,21 +219,18 @@ class CacheController(object):
# If we've been given an etag, then keep the response
if self.cache_etags and 'etag' in response_headers:
self.cache.set(cache_url, self.serializer.dumps(request, response))
self.cache.set(
cache_url,
self.serializer.dumps(request, response, body=body),
)
# If we want to cache sites not setup with cache headers then add the proper headers and keep the response
elif cache_auto and not cc and response_headers:
headers = {'Cache-Control': 'public,max-age=%d' % int(cache_max_age or 900)}
response.headers.update(headers)
if 'expires' not in response_headers:
if getattr(response_headers, 'expires', None) is None:
expires = datetime.datetime.utcnow() + datetime.timedelta(days=1)
expires = expires.strftime("%a, %d %b %Y %H:%M:%S GMT")
headers = {'Expires': expires}
response.headers.update(headers)
self.cache.set(cache_url, self.serializer.dumps(request, response))
# Add to the cache any 301s. We do this before looking that
# the Date headers.
elif response.status == 301:
self.cache.set(
cache_url,
self.serializer.dumps(request, response)
)
# Add to the cache if the response headers demand it. If there
# is no date header then we can't do anything about expiring
@ -219,10 +239,10 @@ class CacheController(object):
# cache when there is a max-age > 0
if cc and cc.get('max-age'):
if int(cc['max-age']) > 0:
if isinstance(cache_max_age, int):
cc['max-age'] = int(cache_max_age)
response.headers['cache-control'] = ''.join(['%s=%s' % (key, value) for (key, value) in cc.items()])
self.cache.set(cache_url, self.serializer.dumps(request, response))
self.cache.set(
cache_url,
self.serializer.dumps(request, response, body=body),
)
# If the request can expire, it means we should cache it
# in the meantime.
@ -230,7 +250,7 @@ class CacheController(object):
if response_headers['expires']:
self.cache.set(
cache_url,
self.serializer.dumps(request, response),
self.serializer.dumps(request, response, body=body),
)
def update_cached_response(self, request, response):
@ -242,14 +262,30 @@ class CacheController(object):
"""
cache_url = self.cache_url(request.url)
cached_response = self.serializer.loads(request, self.cache.get(cache_url))
cached_response = self.serializer.loads(
request,
self.cache.get(cache_url)
)
if not cached_response:
# we didn't have a cached response
return response
# did so lets update our headers
cached_response.headers.update(response.headers)
# Lets update our headers with the headers from the new request:
# http://tools.ietf.org/html/draft-ietf-httpbis-p4-conditional-26#section-4.1
#
# The server isn't supposed to send headers that would make
# the cached body invalid. But... just in case, we'll be sure
# to strip out ones we know that might be problmatic due to
# typical assumptions.
excluded_headers = [
"content-length",
]
cached_response.headers.update(
dict((k, v) for k, v in response.headers.items()
if k.lower() not in excluded_headers)
)
# we want a 200 b/c we have content via the cache
cached_response.status = 200

View file

@ -0,0 +1,63 @@
from io import BytesIO
class CallbackFileWrapper(object):
"""
Small wrapper around a fp object which will tee everything read into a
buffer, and when that file is closed it will execute a callback with the
contents of that buffer.
All attributes are proxied to the underlying file object.
This class uses members with a double underscore (__) leading prefix so as
not to accidentally shadow an attribute.
"""
def __init__(self, fp, callback):
self.__buf = BytesIO()
self.__fp = fp
self.__callback = callback
def __getattr__(self, name):
# The vaguaries of garbage collection means that self.__fp is
# not always set. By using __getattribute__ and the private
# name[0] allows looking up the attribute value and raising an
# AttributeError when it doesn't exist. This stop thigns from
# infinitely recursing calls to getattr in the case where
# self.__fp hasn't been set.
#
# [0] https://docs.python.org/2/reference/expressions.html#atom-identifiers
fp = self.__getattribute__('_CallbackFileWrapper__fp')
return getattr(fp, name)
def __is_fp_closed(self):
try:
return self.__fp.fp is None
except AttributeError:
pass
try:
return self.__fp.closed
except AttributeError:
pass
# We just don't cache it then.
# TODO: Add some logging here...
return False
def read(self, amt=None):
data = self.__fp.read(amt)
self.__buf.write(data)
if self.__is_fp_closed():
if self.__callback:
self.__callback(self.__buf.getvalue())
# We assign this to None here, because otherwise we can get into
# really tricky problems where the CPython interpreter dead locks
# because the callback is holding a reference to something which
# has a __del__ method. Setting this to None breaks the cycle
# and allows the garbage collector to do it's thing normally.
self.__callback = None
return data

View file

@ -0,0 +1,134 @@
import calendar
import time
from email.utils import formatdate, parsedate, parsedate_tz
from datetime import datetime, timedelta
TIME_FMT = "%a, %d %b %Y %H:%M:%S GMT"
def expire_after(delta, date=None):
date = date or datetime.now()
return date + delta
def datetime_to_header(dt):
return formatdate(calendar.timegm(dt.timetuple()))
class BaseHeuristic(object):
def warning(self, response):
"""
Return a valid 1xx warning header value describing the cache
adjustments.
The response is provided too allow warnings like 113
http://tools.ietf.org/html/rfc7234#section-5.5.4 where we need
to explicitly say response is over 24 hours old.
"""
return '110 - "Response is Stale"'
def update_headers(self, response):
"""Update the response headers with any new headers.
NOTE: This SHOULD always include some Warning header to
signify that the response was cached by the client, not
by way of the provided headers.
"""
return {}
def apply(self, response):
warning_header_value = self.warning(response)
response.headers.update(self.update_headers(response))
if warning_header_value is not None:
response.headers.update({'Warning': warning_header_value})
return response
class OneDayCache(BaseHeuristic):
"""
Cache the response by providing an expires 1 day in the
future.
"""
def update_headers(self, response):
headers = {}
if 'expires' not in response.headers:
date = parsedate(response.headers['date'])
expires = expire_after(timedelta(days=1),
date=datetime(*date[:6]))
headers['expires'] = datetime_to_header(expires)
headers['cache-control'] = 'public'
return headers
class ExpiresAfter(BaseHeuristic):
"""
Cache **all** requests for a defined time period.
"""
def __init__(self, **kw):
self.delta = timedelta(**kw)
def update_headers(self, response):
expires = expire_after(self.delta)
return {
'expires': datetime_to_header(expires),
'cache-control': 'public',
}
def warning(self, response):
tmpl = '110 - Automatically cached for %s. Response might be stale'
return tmpl % self.delta
class LastModified(BaseHeuristic):
"""
If there is no Expires header already, fall back on Last-Modified
using the heuristic from
http://tools.ietf.org/html/rfc7234#section-4.2.2
to calculate a reasonable value.
Firefox also does something like this per
https://developer.mozilla.org/en-US/docs/Web/HTTP/Caching_FAQ
http://lxr.mozilla.org/mozilla-release/source/netwerk/protocol/http/nsHttpResponseHead.cpp#397
Unlike mozilla we limit this to 24-hr.
"""
cacheable_by_default_statuses = set([
200, 203, 204, 206, 300, 301, 404, 405, 410, 414, 501
])
def update_headers(self, resp):
headers = resp.headers
if 'expires' in headers:
return {}
if 'cache-control' in headers and headers['cache-control'] != 'public':
return {}
if resp.status not in self.cacheable_by_default_statuses:
return {}
if 'date' not in headers or 'last-modified' not in headers:
return {}
date = calendar.timegm(parsedate_tz(headers['date']))
last_modified = parsedate(headers['last-modified'])
if date is None or last_modified is None:
return {}
now = time.time()
current_age = max(0, now - date)
delta = date - calendar.timegm(last_modified)
freshness_lifetime = max(0, min(delta / 10, 24 * 3600))
if freshness_lifetime <= current_age:
return {}
expires = date + freshness_lifetime
return {'expires': time.strftime(TIME_FMT, time.gmtime(expires))}
def warning(self, resp):
return None

View file

@ -1,56 +0,0 @@
import requests
from requests import models
from requests.packages.urllib3.response import HTTPResponse
__attrs__ = [
'_content',
'status_code',
'headers',
'url',
'history',
'encoding',
'reason',
'cookies',
'elapsed',
]
def response_getstate(self):
# consume everything
if not self._content_consumed:
self.content
state = dict(
(attr, getattr(self, attr, None))
for attr in __attrs__
)
# deal with our raw content b/c we need it for our cookie jar
state['raw_original_response'] = self.raw._original_response
return state
def response_setstate(self, state):
for name, value in state.items():
if name != 'raw_original_response':
setattr(self, name, value)
setattr(self, 'raw', HTTPResponse())
self.raw._original_response = state['raw_original_response']
def make_responses_pickleable():
try:
version_parts = [int(part) for part in requests.__version__.split('.')]
# must be >= 2.2.x
if not version_parts[0] >= 2 or not version_parts[1] >= 2:
models.Response.__getstate__ = response_getstate
models.Response.__setstate__ = response_setstate
except:
raise
pass
make_responses_pickleable()

View file

@ -1,27 +1,59 @@
import base64
import io
import json
import zlib
from requests.structures import CaseInsensitiveDict
from .compat import HTTPResponse, pickle
def _b64_encode_bytes(b):
return base64.b64encode(b).decode("ascii")
def _b64_encode_str(s):
return _b64_encode_bytes(s.encode("utf8"))
def _b64_decode_bytes(b):
return base64.b64decode(b.encode("ascii"))
def _b64_decode_str(s):
return _b64_decode_bytes(s).decode("utf8")
class Serializer(object):
def dumps(self, request, response, body=None):
response_headers = CaseInsensitiveDict(response.headers)
if body is None:
# TODO: Figure out a way to handle this which doesn't break
# streaming
body = response.read(decode_content=False)
# NOTE: 99% sure this is dead code. I'm only leaving it
# here b/c I don't have a test yet to prove
# it. Basically, before using
# `cachecontrol.filewrapper.CallbackFileWrapper`,
# this made an effort to reset the file handle. The
# `CallbackFileWrapper` short circuits this code by
# setting the body as the content is consumed, the
# result being a `body` argument is *always* passed
# into cache_response, and in turn,
# `Serializer.dump`.
response._fp = io.BytesIO(body)
data = {
"response": {
"body": body,
"headers": response.headers,
"body": _b64_encode_bytes(body),
"headers": dict(
(_b64_encode_str(k), _b64_encode_str(v))
for k, v in response.headers.items()
),
"status": response.status,
"version": response.version,
"reason": response.reason,
"reason": _b64_encode_str(response.reason),
"strict": response.strict,
"decode_content": response.decode_content,
},
@ -35,7 +67,20 @@ class Serializer(object):
header = header.strip()
data["vary"][header] = request.headers.get(header, None)
return b"cc=1," + pickle.dumps(data, pickle.HIGHEST_PROTOCOL)
# Encode our Vary headers to ensure they can be serialized as JSON
data["vary"] = dict(
(_b64_encode_str(k), _b64_encode_str(v) if v is not None else v)
for k, v in data["vary"].items()
)
return b",".join([
b"cc=2",
zlib.compress(
json.dumps(
data, separators=(",", ":"), sort_keys=True,
).encode("utf8"),
),
])
def loads(self, request, data):
# Short circuit if we've been given an empty set of data
@ -66,6 +111,40 @@ class Serializer(object):
# just treat it as a miss and return None
return
def prepare_response(self, request, cached):
"""Verify our vary headers match and construct a real urllib3
HTTPResponse object.
"""
# Special case the '*' Vary value as it means we cannot actually
# determine if the cached response is suitable for this request.
if "*" in cached.get("vary", {}):
return
# Ensure that the Vary headers for the cached response match our
# request
for header, value in cached.get("vary", {}).items():
if request.headers.get(header, None) != value:
return
body_raw = cached["response"].pop("body")
try:
body = io.BytesIO(body_raw)
except TypeError:
# This can happen if cachecontrol serialized to v1 format (pickle)
# using Python 2. A Python 2 str(byte string) will be unpickled as
# a Python 3 str (unicode string), which will cause the above to
# fail with:
#
# TypeError: 'str' does not support the buffer interface
body = io.BytesIO(body_raw.encode('utf8'))
return HTTPResponse(
body=body,
preload_content=False,
**cached["response"]
)
def _loads_v0(self, request, data):
# The original legacy cache data. This doesn't contain enough
# information to construct everything we need, so we'll treat this as
@ -78,20 +157,28 @@ class Serializer(object):
except ValueError:
return
# Special case the '*' Vary value as it means we cannot actually
# determine if the cached response is suitable for this request.
if "*" in cached.get("vary", {}):
return self.prepare_response(request, cached)
def _loads_v2(self, request, data):
try:
cached = json.loads(zlib.decompress(data).decode("utf8"))
except ValueError:
return
# Ensure that the Vary headers for the cached response match our
# request
for header, value in cached.get("vary", {}).items():
if request.headers.get(header, None) != value:
return
body = io.BytesIO(cached["response"].pop("body"))
return HTTPResponse(
body=body,
preload_content=False,
**cached["response"]
# We need to decode the items that we've base64 encoded
cached["response"]["body"] = _b64_decode_bytes(
cached["response"]["body"]
)
cached["response"]["headers"] = dict(
(_b64_decode_str(k), _b64_decode_str(v))
for k, v in cached["response"]["headers"].items()
)
cached["response"]["reason"] = _b64_decode_str(
cached["response"]["reason"],
)
cached["vary"] = dict(
(_b64_decode_str(k), _b64_decode_str(v) if v is not None else v)
for k, v in cached["vary"].items()
)
return self.prepare_response(request, cached)

View file

@ -1,34 +0,0 @@
from requests.sessions import Session
class CacheControlSession(Session):
def __init__(self):
super(CacheControlSession, self).__init__()
def get(self, *args, **kw):
# auto-cache response
self.cache_auto = False
if kw.get('cache_auto'):
self.cache_auto = kw.pop('cache_auto')
# urls allowed to cache
self.cache_urls = []
if kw.get('cache_urls'):
self.cache_urls = [str(args[0])] + kw.pop('cache_urls')
# timeout for cached responses
self.cache_max_age = None
if kw.get('cache_max_age'):
self.cache_max_age = int(kw.pop('cache_max_age'))
return super(CacheControlSession, self).get(*args, **kw)
def prepare_request(self, *args, **kw):
# get response
req = super(CacheControlSession, self).prepare_request(*args, **kw)
# attach params to request
req.cache_auto = self.cache_auto
req.cache_urls = self.cache_urls
req.cache_max_age = self.cache_max_age
return req

View file

@ -1,14 +1,19 @@
from .adapter import CacheControlAdapter
from .cache import DictCache
from .session import CacheControlSession
def CacheControl(sess=None, cache=None, cache_etags=True, serializer=None):
sess = sess or CacheControlSession()
def CacheControl(sess,
cache=None,
cache_etags=True,
serializer=None,
heuristic=None):
cache = cache or DictCache()
adapter = CacheControlAdapter(
cache,
cache_etags=cache_etags,
serializer=serializer,
heuristic=heuristic,
)
sess.mount('http://', adapter)
sess.mount('https://', adapter)

View file

@ -563,7 +563,7 @@ class Tvdb:
# get response from TVDB
if self.config['cache_enabled']:
session = CacheControl(cache=caches.FileCache(self.config['cache_location']))
session = CacheControl(requests.session(), cache=caches.FileCache(self.config['cache_location']))
if self.config['proxy']:
log().debug("Using proxy for URL: %s" % url)
session.proxies = {
@ -571,7 +571,7 @@ class Tvdb:
"https": self.config['proxy'],
}
resp = session.get(url, cache_auto=True, params=params)
resp = session.get(url, params=params)
else:
resp = requests.get(url, params=params)
except requests.exceptions.HTTPError, e:
@ -836,15 +836,14 @@ class Tvdb:
# Parse show information
log().debug('Getting all series data for %s' % (sid))
seriesInfoEt = self._getetsrc(
self.config['url_seriesInfo'] % (sid, getShowInLanguage)
)
url = self.config['url_epInfo%s' % ('', '_zip')[self.config['useZip']]] % (sid, language)
show_data = self._getetsrc(url, language=getShowInLanguage)
# check and make sure we have data to process and that it contains a series name
if not len(seriesInfoEt) or (isinstance(seriesInfoEt, dict) and 'seriesname' not in seriesInfoEt['series']):
if not len(show_data) or (isinstance(show_data, dict) and 'seriesname' not in show_data['series']):
return False
for k, v in seriesInfoEt['series'].items():
for k, v in show_data['series'].items():
if v is not None:
if k in ['banner', 'fanart', 'poster']:
v = self.config['url_artworkPrefix'] % (v)
@ -865,16 +864,10 @@ class Tvdb:
# Parse episode data
log().debug('Getting all episodes of %s' % (sid))
if self.config['useZip']:
url = self.config['url_epInfo_zip'] % (sid, language)
else:
url = self.config['url_epInfo'] % (sid, language)
epsEt = self._getetsrc(url, language=language)
if 'episode' not in epsEt:
if 'episode' not in show_data:
return False
episodes = epsEt['episode']
episodes = show_data['episode']
if not isinstance(episodes, list):
episodes = [episodes]

View file

@ -404,7 +404,7 @@ class TVRage:
"https": self.config['proxy'],
}
resp = session.get(url.strip(), cache_auto=True, params=params)
resp = session.get(url.strip(), params=params)
else:
resp = requests.get(url.strip(), params=params)

View file

@ -1154,6 +1154,8 @@ def getURL(url, post_data=None, params=None, headers=None, timeout=30, session=N
"""
# request session
if None is session:
session = requests.session()
cache_dir = sickbeard.CACHE_DIR or _getTempDir()
session = CacheControl(sess=session, cache=caches.FileCache(os.path.join(cache_dir, 'sessions')))
@ -1224,6 +1226,8 @@ def getURL(url, post_data=None, params=None, headers=None, timeout=30, session=N
def download_file(url, filename, session=None):
# create session
if None is session:
session = requests.session()
cache_dir = sickbeard.CACHE_DIR or _getTempDir()
session = CacheControl(sess=session, cache=caches.FileCache(os.path.join(cache_dir, 'sessions')))

View file

@ -485,6 +485,10 @@ class TVShow(object):
cachedShow = t[self.indexerid]
cachedSeasons = {}
if None is cachedShow:
logger.log(u'No cache showdata to parse from %s' % sickbeard.indexerApi(self.indexer).name)
return scannedEps
for curResult in sqlResults:
deleteEp = False