Replaced our cache handler 'CacheControl' with 'httpcache' as we found the previous was not stable enough and was causing more issues then good.

Added cache handler to sickbeard it self so now everything should be running faster for searches and rss feeds.
This commit is contained in:
echel0n 2014-03-16 04:06:21 -07:00
parent 437b46f907
commit ce193ffcdb
21 changed files with 474 additions and 590 deletions

View file

@ -1,13 +0,0 @@
"""CacheControl import Interface.
Make it easy to import from cachecontrol without long namespaces.
"""
# patch our requests.models.Response to make them pickleable in older
# versions of requests.
import cachecontrol.patch_requests
from cachecontrol.wrapper import CacheControl
from cachecontrol.adapter import CacheControlAdapter
from cachecontrol.controller import CacheController

View file

@ -1,70 +0,0 @@
from requests.adapters import HTTPAdapter
from cachecontrol.controller import CacheController
from cachecontrol.cache import DictCache
class CacheControlAdapter(HTTPAdapter):
invalidating_methods = set(['PUT', 'DELETE'])
def __init__(self, cache=None, cache_etags=True, cache_force=False, *args, **kw):
super(CacheControlAdapter, self).__init__(*args, **kw)
self.cache = cache or DictCache()
self.controller = CacheController(self.cache, cache_etags=cache_etags, cache_force=cache_force)
def send(self, request, **kw):
"""Send a request. Use the request information to see if it
exists in the cache.
"""
if request.method == 'GET':
cached_response = self.controller.cached_request(
request.url, request.headers
)
if cached_response:
# Cached responses should not have a raw field since
# they *cannot* be created from some stream.
cached_response.raw = None
return cached_response
# check for etags and add headers if appropriate
headers = self.controller.add_headers(request.url)
request.headers.update(headers)
resp = super(CacheControlAdapter, self).send(request, **kw)
return resp
def build_response(self, request, response):
"""Build a response by making a request or using the cache.
This will end up calling send and returning a potentially
cached response
"""
resp = super(CacheControlAdapter, self).build_response(
request, response
)
# See if we should invalidate the cache.
if request.method in self.invalidating_methods and resp.ok:
cache_url = self.controller.cache_url(request.url)
self.cache.delete(cache_url)
# Try to store the response if it is a GET
elif request.method == 'GET':
if response.status == 304:
# We must have sent an ETag request. This could mean
# that we've been expired already or that we simply
# have an etag. In either case, we want to try and
# update the cache if that is the case.
resp = self.controller.update_cached_response(
request, response
)
else:
# try to cache the response
self.controller.cache_response(request, resp)
# Give the request a from_cache attr to let people use it
# rather than testing for hasattr.
if not hasattr(resp, 'from_cache'):
resp.from_cache = False
return resp

View file

@ -1,36 +0,0 @@
"""
The cache object API for implementing caches. The default is just a
dictionary, which in turns means it is not threadsafe for writing.
"""
from threading import Lock
class BaseCache(object):
def get(self, key):
raise NotImplemented()
def set(self, key, value):
raise NotImplemented()
def delete(self, key):
raise NotImplemented()
class DictCache(BaseCache):
def __init__(self, init_dict=None):
self.lock = Lock()
self.data = init_dict or {}
def get(self, key):
return self.data.get(key, None)
def set(self, key, value):
with self.lock:
self.data.update({key: value})
def delete(self, key):
with self.lock:
if key in self.data:
self.data.pop(key)

View file

@ -1,18 +0,0 @@
from textwrap import dedent
try:
from cachecontrol.caches.file_cache import FileCache
except ImportError:
notice = dedent('''
NOTE: In order to use the FileCache you must have
lockfile installed. You can install it via pip:
pip install lockfile
''')
print(notice)
try:
import redis
from cachecontrol.caches.redis_cache import RedisCache
except ImportError:
pass

View file

@ -1,43 +0,0 @@
import os
import codecs
from hashlib import md5
try:
from pickle import load, dump
except ImportError:
from cPickle import load, dump
from lib.lockfile import FileLock
class FileCache(object):
def __init__(self, directory, forever=False):
self.directory = directory
self.forever = forever
if not os.path.isdir(self.directory):
os.mkdir(self.directory)
def encode(self, x):
return md5(x.encode()).hexdigest()
def _fn(self, name):
return os.path.join(self.directory, self.encode(name))
def get(self, key):
name = self._fn(key)
if os.path.exists(name):
return load(codecs.open(name, 'rb'))
def set(self, key, value):
name = self._fn(key)
lock = FileLock(name)
with lock:
with codecs.open(lock.path, 'w+b') as fh:
dump(value, fh)
def delete(self, key):
if not self.forever:
os.remove(self._fn(key))

View file

@ -1,46 +0,0 @@
from __future__ import division
from datetime import datetime
try:
from cPickle import loads, dumps
except ImportError: # Python 3.x
from pickle import loads, dumps
def total_seconds(td):
"""Python 2.6 compatability"""
if hasattr(td, 'total_seconds'):
return td.total_seconds()
ms = td.microseconds
secs = (td.seconds + td.days * 24 * 3600)
return (ms + secs * 10**6) / 10**6
class RedisCache(object):
def __init__(self, conn):
self.conn = conn
def get(self, key):
val = self.conn.get(key)
if val:
return loads(val)
return None
def set(self, key, value, expires=None):
if not expires:
self.conn.set(key, dumps(value))
else:
expires = expires - datetime.now()
self.conn.setex(key, total_seconds(expires), value)
def delete(self, key):
self.conn.delete(key)
def clear(self):
"""Helper for clearing all the keys in a database. Use with
caution!"""
for key in self.conn.keys():
self.conn.delete(key)

View file

@ -1,12 +0,0 @@
try:
from urllib.parse import urljoin
except ImportError:
from urlparse import urljoin
try:
import email.utils
parsedate_tz = email.utils.parsedate_tz
except ImportError:
import email.Utils
parsedate_tz = email.Utils.parsedate_tz

View file

@ -1,256 +0,0 @@
"""
The httplib2 algorithms ported for use with requests.
"""
import re
import calendar
import time
import os
from cachecontrol.cache import DictCache
from cachecontrol.compat import parsedate_tz
URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")
def parse_uri(uri):
"""Parses a URI using the regex given in Appendix B of RFC 3986.
(scheme, authority, path, query, fragment) = parse_uri(uri)
"""
groups = URI.match(uri).groups()
return (groups[1], groups[3], groups[4], groups[6], groups[8])
class CacheController(object):
"""An interface to see if request should cached or not.
"""
def __init__(self, cache=None, cache_etags=True, cache_force=False):
self.cache = cache or DictCache()
self.cache_etags = cache_etags
self.cache_force = cache_force
def _urlnorm(self, uri):
"""Normalize the URL to create a safe key for the cache"""
(scheme, authority, path, query, fragment) = parse_uri(uri)
if not scheme or not authority:
raise Exception("Only absolute URIs are allowed. uri = %s" % uri)
authority = authority.lower()
scheme = scheme.lower()
if not path:
path = "/"
# Could do syntax based normalization of the URI before
# computing the digest. See Section 6.2.2 of Std 66.
request_uri = query and "?".join([path, query]) or path
scheme = scheme.lower()
defrag_uri = scheme + "://" + authority + request_uri
return defrag_uri
def cache_url(self, uri):
return self._urlnorm(uri)
def parse_cache_control(self, headers):
"""
Parse the cache control headers returning a dictionary with values
for the different directives.
"""
retval = {}
cc_header = 'cache-control'
if 'Cache-Control' in headers:
cc_header = 'Cache-Control'
if cc_header in headers:
parts = headers[cc_header].split(',')
parts_with_args = [
tuple([x.strip().lower() for x in part.split("=", 1)])
for part in parts if -1 != part.find("=")]
parts_wo_args = [(name.strip().lower(), 1)
for name in parts if -1 == name.find("=")]
retval = dict(parts_with_args + parts_wo_args)
return retval
def cached_request(self, url, headers):
cache_url = self.cache_url(url)
cc = self.parse_cache_control(headers)
# non-caching states
no_cache = True if 'no-cache' in cc else False
if 'max-age' in cc and cc['max-age'] == 0:
no_cache = True
# see if it is in the cache anyways
in_cache = self.cache.get(cache_url)
if no_cache or not in_cache:
return False
# It is in the cache, so lets see if it is going to be
# fresh enough
resp = self.cache.get(cache_url)
# Check our Vary header to make sure our request headers match
# up. We don't delete it from the though, we just don't return
# our cached value.
#
# NOTE: Because httplib2 stores raw content, it denotes
# headers that were sent in the original response by
# adding -varied-$name. We don't have to do that b/c we
# are storing the object which has a reference to the
# original request. If that changes, then I'd propose
# using the varied headers in the cache key to avoid the
# situation all together.
if 'vary' in resp.headers:
varied_headers = resp.headers['vary'].replace(' ', '').split(',')
original_headers = resp.request.headers
for header in varied_headers:
# If our headers don't match for the headers listed in
# the vary header, then don't use the cached response
if headers.get(header, None) != original_headers.get(header):
return False
now = time.time()
date = calendar.timegm(
parsedate_tz(resp.headers['date'])
)
current_age = max(0, now - date)
# TODO: There is an assumption that the result will be a
# requests response object. This may not be best since we
# could probably avoid instantiating or constructing the
# response until we know we need it.
resp_cc = self.parse_cache_control(resp.headers)
# determine freshness
freshness_lifetime = 0
if 'max-age' in resp_cc and resp_cc['max-age'].isdigit():
freshness_lifetime = int(resp_cc['max-age'])
elif 'expires' in resp.headers:
expires = parsedate_tz(resp.headers['expires'])
if expires is not None:
expire_time = calendar.timegm(expires) - date
freshness_lifetime = max(0, expire_time)
# determine if we are setting freshness limit in the req
if 'max-age' in cc:
try:
freshness_lifetime = int(cc['max-age'])
except ValueError:
freshness_lifetime = 0
if 'min-fresh' in cc:
try:
min_fresh = int(cc['min-fresh'])
except ValueError:
min_fresh = 0
# adjust our current age by our min fresh
current_age += min_fresh
# see how fresh we actually are
fresh = (freshness_lifetime > current_age)
if fresh:
if resp.ok:
# make sure we set the from_cache to true
resp.from_cache = True
return resp
return False
# we're not fresh. If we don't have an Etag, clear it out
if 'etag' not in resp.headers:
self.cache.delete(cache_url)
if 'etag' in resp.headers:
headers['If-None-Match'] = resp.headers['ETag']
if 'last-modified' in resp.headers:
headers['If-Modified-Since'] = resp.headers['Last-Modified']
# return the original handler
return False
def add_headers(self, url, resp=None):
resp = self.cache.get(url)
if resp and 'etag' in resp.headers:
return {'If-None-Match': resp.headers['etag']}
return {}
def cache_response(self, request, resp):
"""
Algorithm for caching requests.
This assumes a requests Response object.
"""
# From httplib2: Don't cache 206's since we aren't going to
# handle byte range requests
if resp.status_code not in [200, 203]:
return
cc_req = self.parse_cache_control(request.headers)
cc = self.parse_cache_control(resp.headers)
cache_url = self.cache_url(request.url)
# Delete it from the cache if we happen to have it stored there
no_store = cc.get('no-store') or cc_req.get('no-store')
if no_store and self.cache.get(cache_url):
self.cache.delete(cache_url)
# If we've been given an etag, then keep the response
if self.cache_etags and 'etag' in resp.headers:
self.cache.set(cache_url, resp)
# Add to the cache if the response headers demand it. If there
# is no date header then we can't do anything about expiring
# the cache.
elif 'date' in resp.headers:
# cache when there is a max-age > 0
if cc and cc.get('max-age'):
if int(cc['max-age']) > 0:
self.cache.set(cache_url, resp)
# If the request can expire, it means we should cache it
# in the meantime.
elif 'expires' in resp.headers:
if resp.headers['expires']:
self.cache.set(cache_url, resp)
# If the request is for our local cache, it means we should cache it
elif self.cache_force:
resp.headers.update({'cache-control': 'max-age=900, private'})
self.cache.set(cache_url, resp)
def update_cached_response(self, request, response):
"""On a 304 we will get a new set of headers that we want to
update our cached value with, assuming we have one.
This should only ever be called when we've sent an ETag and
gotten a 304 as the response.
"""
cache_url = self.cache_url(request.url)
resp = self.cache.get(cache_url)
if not resp:
# we didn't have a cached response
return response
# did so lets update our headers
resp.headers.update(resp.headers)
# we want a 200 b/c we have content via the cache
request.status_code = 200
# update the request as it has the if-none-match header + any
# other headers that the server might have updated (ie Date,
# Cache-Control, Expires, etc.)
resp.request = request
# update our cache
self.cache.set(cache_url, resp)
# Let everyone know this was from the cache.
resp.from_cache = True
return resp

View file

@ -1,56 +0,0 @@
import requests
from requests import models
from requests.packages.urllib3.response import HTTPResponse
__attrs__ = [
'_content',
'status_code',
'headers',
'url',
'history',
'encoding',
'reason',
'cookies',
'elapsed',
]
def response_getstate(self):
# consume everything
if not self._content_consumed:
self.content
state = dict(
(attr, getattr(self, attr, None))
for attr in __attrs__
)
# deal with our raw content b/c we need it for our cookie jar
state['raw_original_response'] = self.raw._original_response
return state
def response_setstate(self, state):
for name, value in state.items():
if name != 'raw_original_response':
setattr(self, name, value)
setattr(self, 'raw', HTTPResponse())
self.raw._original_response = state['raw_original_response']
def make_responses_pickleable():
try:
version_parts = [int(part) for part in requests.__version__.split('.')]
# must be >= 2.2.x
if not version_parts[0] >= 2 or not version_parts[1] >= 2:
models.Response.__getstate__ = response_getstate
models.Response.__setstate__ = response_setstate
except:
raise
pass
make_responses_pickleable()

View file

@ -1,10 +0,0 @@
from cachecontrol.adapter import CacheControlAdapter
from cachecontrol.cache import DictCache
def CacheControl(sess, cache=None, cache_etags=True, cache_force=False):
cache = cache or DictCache()
adapter = CacheControlAdapter(cache, cache_etags=cache_etags, cache_force=cache_force)
sess.mount('http://', adapter)
return sess

14
lib/httpcache/__init__.py Normal file
View file

@ -0,0 +1,14 @@
# -*- coding: utf-8 -*-
"""
__init__.py
~~~~~~~~~~~
Defines the public API to the httpcache module.
"""
__version__ = '0.1.3'
from .cache import HTTPCache
from .adapter import CachingHTTPAdapter
__all__ = [HTTPCache, CachingHTTPAdapter]

55
lib/httpcache/adapter.py Normal file
View file

@ -0,0 +1,55 @@
"""
adapter.py
~~~~~~~~~~
Contains an implementation of an HTTP adapter for Requests that is aware of the
cache contained in this module.
"""
from requests.adapters import HTTPAdapter
from .cache import HTTPCache
class CachingHTTPAdapter(HTTPAdapter):
"""
A HTTP-caching-aware Transport Adapter for Python Requests. The central
portion of the API.
:param capacity: The maximum capacity of the backing cache.
"""
def __init__(self, capacity=50, **kwargs):
super(CachingHTTPAdapter, self).__init__(**kwargs)
#: The HTTP Cache backing the adapter.
self.cache = HTTPCache(capacity=capacity)
def send(self, request, **kwargs):
"""
Sends a PreparedRequest object, respecting RFC 2616's rules about HTTP
caching. Returns a Response object that may have been cached.
:param request: The Requests :class:`PreparedRequest <PreparedRequest>` object to send.
"""
cached_resp = self.cache.retrieve(request)
if cached_resp is not None:
return cached_resp
else:
return super(CachingHTTPAdapter, self).send(request, **kwargs)
def build_response(self, request, response):
"""
Builds a Response object from a urllib3 response. May involve returning
a cached Response.
:param request: The Requests :class:`PreparedRequest <PreparedRequest>` object sent.
:param response: The urllib3 response.
"""
resp = super(CachingHTTPAdapter, self).build_response(request,
response)
if resp.status_code == 304:
resp = self.cache.handle_304(resp)
else:
self.cache.store(resp)
return resp

207
lib/httpcache/cache.py Normal file
View file

@ -0,0 +1,207 @@
# -*- coding: utf-8 -*-
"""
cache.py
~~~~~~~~
Contains the primary cache structure used in http-cache.
"""
from .structures import RecentOrderedDict
from .utils import (parse_date_header, build_date_header,
expires_from_cache_control, url_contains_query)
from datetime import datetime
# RFC 2616 specifies that we can cache 200 OK, 203 Non Authoritative,
# 206 Partial Content, 300 Multiple Choices, 301 Moved Permanently and
# 410 Gone responses. We don't cache 206s at the moment because we
# don't handle Range and Content-Range headers.
CACHEABLE_RCS = (200, 203, 300, 301, 410)
# Cacheable verbs.
CACHEABLE_VERBS = ('GET', 'HEAD', 'OPTIONS')
# Some verbs MUST invalidate the resource in the cache, according to RFC 2616.
# If we send one of these, or any verb we don't recognise, invalidate the
# cache entry for that URL. As it happens, these are also the cacheable
# verbs. That works out well for us.
NON_INVALIDATING_VERBS = CACHEABLE_VERBS
class HTTPCache(object):
"""
The HTTP Cache object. Manages caching of responses according to RFC 2616,
adding necessary headers to HTTP request objects, and returning cached
responses based on server responses.
This object is not expected to be used by most users. It is exposed as part
of the public API for users who feel the need for more control. This API
may change in a minor version increase. Be warned.
:param capacity: (Optional) The maximum capacity of the HTTP cache.
"""
def __init__(self, capacity=50):
#: The maximum capacity of the HTTP cache. When this many cache entries
#: end up in the cache, the oldest entries are removed.
self.capacity = capacity
#: The cache backing store. Cache entries are stored here as key-value
#: pairs. The key is the URL used to retrieve the cached response. The
#: value is a python dict, which stores three objects: the response
#: (keyed off of 'response'), the retrieval or creation date (keyed off
#: of 'creation') and the cache expiry date (keyed off of 'expiry').
#: This last value may be None.
self._cache = RecentOrderedDict()
def store(self, response):
"""
Takes an HTTP response object and stores it in the cache according to
RFC 2616. Returns a boolean value indicating whether the response was
cached or not.
:param response: Requests :class:`Response <Response>` object to cache.
"""
# Define an internal utility function.
def date_header_or_default(header_name, default, response):
try:
date_header = response.headers[header_name]
except KeyError:
value = default
else:
value = parse_date_header(date_header)
return value
if response.status_code not in CACHEABLE_RCS:
return False
if response.request.method not in CACHEABLE_VERBS:
return False
url = response.url
now = datetime.utcnow()
# Get the value of the 'Date' header, if it exists. If it doesn't, just
# use now.
creation = date_header_or_default('Date', now, response)
# Get the value of the 'Cache-Control' header, if it exists.
cc = response.headers.get('Cache-Control', None)
if cc is not None:
expiry = expires_from_cache_control(cc, now)
# If the above returns None, we are explicitly instructed not to
# cache this.
if expiry is None:
return False
# Get the value of the 'Expires' header, if it exists, and if we don't
# have anything from the 'Cache-Control' header.
if cc is None:
expiry = date_header_or_default('Expires', None, response)
# If the expiry date is earlier or the same as the Date header, don't
# cache the response at all.
if expiry is not None and expiry <= creation:
return False
# If there's a query portion of the url and it's a GET, don't cache
# this unless explicitly instructed to.
if expiry is None and response.request.method == 'GET':
if url_contains_query(url):
return False
self._cache[url] = {'response': response,
'creation': creation,
'expiry': expiry}
self.__reduce_cache_count()
return True
def handle_304(self, response):
"""
Given a 304 response, retrieves the cached entry. This unconditionally
returns the cached entry, so it can be used when the 'intelligent'
behaviour of retrieve() is not desired.
Returns None if there is no entry in the cache.
:param response: The 304 response to find the cached entry for. Should be a Requests :class:`Response <Response>`.
"""
try:
cached_response = self._cache[response.url]['response']
except KeyError:
cached_response = None
return cached_response
def retrieve(self, request):
"""
Retrieves a cached response if possible.
If there is a response that can be unconditionally returned (e.g. one
that had a Cache-Control header set), that response is returned. If
there is one that can be conditionally returned (if a 304 is returned),
applies an If-Modified-Since header to the request and returns None.
:param request: The Requests :class:`PreparedRequest <PreparedRequest>` object.
"""
return_response = None
url = request.url
try:
cached_response = self._cache[url]
except KeyError:
return None
if request.method not in NON_INVALIDATING_VERBS:
del self._cache[url]
return None
if cached_response['expiry'] is None:
# We have no explicit expiry time, so we weren't instructed to
# cache. Add an 'If-Modified-Since' header.
creation = cached_response['creation']
header = build_date_header(creation)
request.headers['If-Modified-Since'] = header
else:
# We have an explicit expiry time. If we're earlier than the expiry
# time, return the response.
now = datetime.utcnow()
if now <= cached_response['expiry']:
return_response = cached_response['response']
else:
del self._cache[url]
return return_response
def __reduce_cache_count(self):
"""
Drops the number of entries in the cache to the capacity of the cache.
Walks the backing RecentOrderedDict in order from oldest to youngest.
Deletes cache entries that are either invalid or being speculatively
cached until the number of cache entries drops to the capacity. If this
leaves the cache above capacity, begins deleting the least-used cache
entries that are still valid until the cache has space.
"""
if len(self._cache) <= self.capacity:
return
to_delete = len(self._cache) - self.capacity
keys = list(self._cache.keys())
for key in keys:
if self._cache[key]['expiry'] is None:
del self._cache[key]
to_delete -= 1
if to_delete == 0:
return
keys = list(self._cache.keys())
for i in range(to_delete):
del self._cache[keys[i]]
return

10
lib/httpcache/compat.py Normal file
View file

@ -0,0 +1,10 @@
# -*- coding: utf-8 -*-
"""
compat.py
~~~~~~~~~
Defines cross-platform functions and classes needed to achieve proper
functionality.
"""
pass

View file

@ -0,0 +1,59 @@
"""
structures.py
~~~~~~~~~~~~~
Defines structures used by the httpcache module.
"""
class RecentOrderedDict(dict):
"""
A custom variant of the dictionary that ensures that the object most
recently inserted _or_ retrieved from the dictionary is enumerated first.
"""
def __init__(self):
self._data = {}
self._order = []
def __setitem__(self, key, value):
if key in self._data:
self._order.remove(key)
self._order.append(key)
self._data[key] = value
def __getitem__(self, key):
value = self._data[key]
self._order.remove(key)
self._order.append(key)
return value
def __delitem__(self, key):
del self._data[key]
self._order.remove(key)
def __iter__(self):
return self._order
def __len__(self):
return len(self._order)
def __contains__(self, value):
return self._data.__contains__(value)
def items(self):
return [(key, self._data[key]) for key in self._order]
def keys(self):
return self._order
def values(self):
return [self._data[key] for key in self._order]
def clear(self):
self._data = {}
self._order = []
def copy(self):
c = RecentOrderedDict()
c._data = self._data.copy()
c._order = self._order[:]

97
lib/httpcache/utils.py Normal file
View file

@ -0,0 +1,97 @@
# -*- coding: utf-8 -*-
"""
utils.py
~~~~~~~~
Utility functions for use with httpcache.
"""
from datetime import datetime, timedelta
try: # Python 2
from urlparse import urlparse
except ImportError: # Python 3
from urllib.parse import urlparse
RFC_1123_DT_STR = "%a, %d %b %Y %H:%M:%S GMT"
RFC_850_DT_STR = "%A, %d-%b-%y %H:%M:%S GMT"
def parse_date_header(header):
"""
Given a date header in the form specified by RFC 2616, return a Python
datetime object.
RFC 2616 specifies three possible formats for date/time headers, and
makes it clear that all dates/times should be in UTC/GMT. That is assumed
by this library, which simply does everything in UTC. This currently does
not parse the C asctime() string, because that's effort.
This function does _not_ follow Postel's Law. If a format does not strictly
match the defined strings, this function returns None. This is considered
'safe' behaviour.
"""
try:
dt = datetime.strptime(header, RFC_1123_DT_STR)
except ValueError:
try:
dt = datetime.strptime(header, RFC_850_DT_STR)
except ValueError:
dt = None
except TypeError:
dt = None
return dt
def build_date_header(dt):
"""
Given a Python datetime object, build a Date header value according to
RFC 2616.
RFC 2616 specifies that the RFC 1123 form is to be preferred, so that is
what we use.
"""
return dt.strftime(RFC_1123_DT_STR)
def expires_from_cache_control(header, current_time):
"""
Given a Cache-Control header, builds a Python datetime object corresponding
to the expiry time (in UTC). This function should respect all relevant
Cache-Control directives.
Takes current_time as an argument to ensure that 'max-age=0' generates the
correct behaviour without being special-cased.
Returns None to indicate that a request must not be cached.
"""
# Cache control header values are made of multiple comma separated fields.
# Splitting them like this is probably a bad idea, but I'm going to roll with
# it for now. We'll come back to it.
fields = header.split(', ')
duration = None
for field in fields:
# Right now we don't handle no-cache applied to specific fields. To be
# as 'nice' as possible, treat any no-cache as applying to the whole
# request. Bail early, because there's no reason to stick around.
if field.startswith('no-cache') or field == 'no-store':
return None
if field.startswith('max-age'):
_, duration = field.split('=')
duration = int(duration)
if duration:
interval = timedelta(seconds=int(duration))
return current_time + interval
def url_contains_query(url):
"""
A very stupid function for determining if a URL contains a query string
or not.
"""
if urlparse(url).query:
return True
else:
return False

View file

@ -9,6 +9,7 @@ and maintain connections.
""" """
import socket import socket
import copy
from .models import Response from .models import Response
from .packages.urllib3.poolmanager import PoolManager, proxy_from_url from .packages.urllib3.poolmanager import PoolManager, proxy_from_url
@ -62,7 +63,7 @@ class HTTPAdapter(BaseAdapter):
Usage:: Usage::
>>> import requests >>> import lib.requests
>>> s = requests.Session() >>> s = requests.Session()
>>> a = requests.adapters.HTTPAdapter(max_retries=3) >>> a = requests.adapters.HTTPAdapter(max_retries=3)
>>> s.mount('http://', a) >>> s.mount('http://', a)

View file

@ -39,19 +39,14 @@ except ImportError:
gzip = None gzip = None
from lib import requests from lib import requests
from lib.cachecontrol.wrapper import CacheControl
from lib.cachecontrol.caches.file_cache import FileCache
from tvdb_ui import BaseUI, ConsoleUI from tvdb_ui import BaseUI, ConsoleUI
from tvdb_exceptions import (tvdb_error, tvdb_userabort, tvdb_shownotfound, from tvdb_exceptions import (tvdb_error, tvdb_userabort, tvdb_shownotfound,
tvdb_seasonnotfound, tvdb_episodenotfound, tvdb_attributenotfound) tvdb_seasonnotfound, tvdb_episodenotfound, tvdb_attributenotfound)
def log(): def log():
return logging.getLogger("tvdb_api") return logging.getLogger("tvdb_api")
class ShowContainer(dict): class ShowContainer(dict):
"""Simple dict that holds a series of Show instances """Simple dict that holds a series of Show instances
""" """
@ -518,12 +513,15 @@ class Tvdb:
# cacheControl # cacheControl
if self.config['cache_enabled']: if self.config['cache_enabled']:
sess = CacheControl(requests.Session(), cache_force=True, cache=FileCache(self.config['cache_location'])) from lib.httpcache import CachingHTTPAdapter
sess = requests.Session()
sess.mount('http://', CachingHTTPAdapter())
else: else:
sess = requests.Session() sess = requests.Session()
# get response from TVDB # get response from TVDB
resp = sess.get(url, params=params) resp = sess.get(url, params=params)
sess.close()
except requests.HTTPError, e: except requests.HTTPError, e:
raise tvdb_error("HTTP error " + str(e.errno) + " while loading URL " + str(url)) raise tvdb_error("HTTP error " + str(e.errno) + " while loading URL " + str(url))
@ -536,7 +534,7 @@ class Tvdb:
except Exception, e: except Exception, e:
raise tvdb_error("Unknown exception occured: " + str(e.message) + " while loading URL " + str(url)) raise tvdb_error("Unknown exception occured: " + str(e.message) + " while loading URL " + str(url))
if resp.ok: if resp.ok and resp.content:
if 'application/zip' in resp.headers.get("Content-Type", ''): if 'application/zip' in resp.headers.get("Content-Type", ''):
try: try:
# TODO: The zip contains actors.xml and banners.xml, which are currently ignored [GH-20] # TODO: The zip contains actors.xml and banners.xml, which are currently ignored [GH-20]
@ -559,11 +557,11 @@ class Tvdb:
try: try:
# TVDB doesn't sanitize \r (CR) from user input in some fields, # TVDB doesn't sanitize \r (CR) from user input in some fields,
# remove it to avoid errors. Change from SickBeard, from will14m # remove it to avoid errors. Change from SickBeard, from will14m
return ElementTree.fromstring(src.rstrip("\r")) return ElementTree.fromstring(src.rstrip("\r")) if src else None
except SyntaxError: except SyntaxError:
src = self._loadUrl(url, params=params, language=language) src = self._loadUrl(url, params=params, language=language)
try: try:
return ElementTree.fromstring(src.rstrip("\r")) return ElementTree.fromstring(src.rstrip("\r")) if src else None
except SyntaxError, exceptionmsg: except SyntaxError, exceptionmsg:
errormsg = "There was an error with the XML retrieved from thetvdb.com:\n%s" % ( errormsg = "There was an error with the XML retrieved from thetvdb.com:\n%s" % (
exceptionmsg exceptionmsg

View file

@ -30,10 +30,7 @@ except ImportError:
import xml.etree.ElementTree as ElementTree import xml.etree.ElementTree as ElementTree
from lib.dateutil.parser import parse from lib.dateutil.parser import parse
from lib import requests from lib import requests
from lib.cachecontrol.wrapper import CacheControl
from lib.cachecontrol.caches.file_cache import FileCache
from tvrage_ui import BaseUI from tvrage_ui import BaseUI
from tvrage_exceptions import (tvrage_error, tvrage_userabort, tvrage_shownotfound, from tvrage_exceptions import (tvrage_error, tvrage_userabort, tvrage_shownotfound,
@ -42,7 +39,6 @@ from tvrage_exceptions import (tvrage_error, tvrage_userabort, tvrage_shownotfou
def log(): def log():
return logging.getLogger("tvrage_api") return logging.getLogger("tvrage_api")
class ShowContainer(dict): class ShowContainer(dict):
"""Simple dict that holds a series of Show instances """Simple dict that holds a series of Show instances
""" """
@ -351,12 +347,15 @@ class TVRage:
# cacheControl # cacheControl
if self.config['cache_enabled']: if self.config['cache_enabled']:
sess = CacheControl(requests.Session(), cache_force=True, cache=FileCache(self.config['cache_location'])) from lib.httpcache import CachingHTTPAdapter
sess = requests.Session()
sess.mount('http://', CachingHTTPAdapter())
else: else:
sess = requests.Session() sess = requests.Session()
# get response from TVRage # get response from TVRage
resp = sess.get(url, params=params) resp = sess.get(url, params=params)
sess.close()
except requests.HTTPError, e: except requests.HTTPError, e:
raise tvrage_error("HTTP error " + str(e.errno) + " while loading URL " + str(url)) raise tvrage_error("HTTP error " + str(e.errno) + " while loading URL " + str(url))
@ -366,7 +365,7 @@ class TVRage:
except requests.Timeout, e: except requests.Timeout, e:
raise tvrage_error("Connection timed out " + str(e.message) + " while loading URL " + str(url)) raise tvrage_error("Connection timed out " + str(e.message) + " while loading URL " + str(url))
return resp.content if resp.ok else None return resp.content if resp.ok and resp.content else None
def _getetsrc(self, url, params=None): def _getetsrc(self, url, params=None):
"""Loads a URL using caching, returns an ElementTree of the source """Loads a URL using caching, returns an ElementTree of the source

View file

@ -35,6 +35,7 @@ import base64
from lib import requests from lib import requests
from httplib import BadStatusLine from httplib import BadStatusLine
from itertools import izip, cycle from itertools import izip, cycle
from lib.httpcache import CachingHTTPAdapter
try: try:
import json import json
@ -169,6 +170,9 @@ def getURL(url, post_data=None, headers=None, params=None, timeout=None):
Returns a byte-string retrieved from the url provider. Returns a byte-string retrieved from the url provider.
""" """
# Cache Handler
sess = requests.Session()
sess.mount('http://', CachingHTTPAdapter())
req_headers = ['User-Agent', USER_AGENT, 'Accept-Encoding', 'gzip,deflate'] req_headers = ['User-Agent', USER_AGENT, 'Accept-Encoding', 'gzip,deflate']
if headers: if headers:
@ -182,8 +186,8 @@ Returns a byte-string retrieved from the url provider.
url = urlparse.urlunparse(parsed) url = urlparse.urlunparse(parsed)
it = iter(req_headers) it = iter(req_headers)
sess = requests.session()
resp = sess.get(url, params=params, data=post_data, headers=dict(zip(it, it))) resp = sess.get(url, params=params, data=post_data, headers=dict(zip(it, it)))
sess.close()
except requests.HTTPError, e: except requests.HTTPError, e:
logger.log(u"HTTP error " + str(e.errno) + " while loading URL " + url, logger.WARNING) logger.log(u"HTTP error " + str(e.errno) + " while loading URL " + url, logger.WARNING)
return None return None
@ -196,7 +200,7 @@ Returns a byte-string retrieved from the url provider.
logger.log(u"Connection timed out " + str(e.message) + " while loading URL " + url, logger.WARNING) logger.log(u"Connection timed out " + str(e.message) + " while loading URL " + url, logger.WARNING)
return None return None
return resp.content if resp.ok else None return resp.content if resp.ok and resp.content else None
def _remove_file_failed(file): def _remove_file_failed(file):
try: try:
@ -206,8 +210,11 @@ def _remove_file_failed(file):
def download_file(url, filename): def download_file(url, filename):
try: try:
sess = requests.session() # cache handler
sess = requests.Session()
sess.mount('http://', CachingHTTPAdapter())
req = sess.get(url, stream=True) req = sess.get(url, stream=True)
#CHUNK = 16 * 1024 #CHUNK = 16 * 1024
with open(filename, 'wb') as fp: with open(filename, 'wb') as fp:
for chunk in req.iter_content(chunk_size=(16 *1024)): for chunk in req.iter_content(chunk_size=(16 *1024)):
@ -215,7 +222,7 @@ def download_file(url, filename):
fp.write(chunk) fp.write(chunk)
fp.flush() fp.flush()
fp.close() fp.close()
req.close() sess.close()
except requests.HTTPError, e: except requests.HTTPError, e:
_remove_file_failed(filename) _remove_file_failed(filename)

View file

@ -11,19 +11,16 @@ from sickbeard.indexers.indexer_api import indexerApi
from sickbeard.indexers.indexer_exceptions import indexer_exception from sickbeard.indexers.indexer_exceptions import indexer_exception
class APICheck(unittest.TestCase): class APICheck(unittest.TestCase):
indexer_id = 'Continum' indexer_id = 81189
indexer = 'TVRage' indexer = 'Tvdb'
lang = "en"
# Set our common indexer_api options here # Set our common indexer_api options here
INDEXER_API_PARMS = {'apikey': 'Uhewg1Rr0o62fvZvUIZt', INDEXER_API_PARMS = {'indexer': indexer}
'language': 'en',
'useZip': True}
INDEXER_API_PARMS['indexer'] = indexer
lindexer_api_parms = INDEXER_API_PARMS.copy() lindexer_api_parms = INDEXER_API_PARMS.copy()
try: try:
# showurl = indexerApi(**lindexer_api_parms).config['base_url'] + str(indexer_id) + '/all/en.zip' lang_id = indexerApi().config['langabbv_to_id'][lang]
t = indexerApi(cache=True, **lindexer_api_parms) t = indexerApi(cache=True, **lindexer_api_parms)
myEp = t[indexer_id] myEp = t[indexer_id]