diff --git a/lib/cachecontrol/__init__.py b/lib/cachecontrol/__init__.py deleted file mode 100644 index 693e11f1..00000000 --- a/lib/cachecontrol/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -"""CacheControl import Interface. - -Make it easy to import from cachecontrol without long namespaces. -""" - -# patch our requests.models.Response to make them pickleable in older -# versions of requests. - -import cachecontrol.patch_requests - -from cachecontrol.wrapper import CacheControl -from cachecontrol.adapter import CacheControlAdapter -from cachecontrol.controller import CacheController diff --git a/lib/cachecontrol/adapter.py b/lib/cachecontrol/adapter.py deleted file mode 100644 index e990746c..00000000 --- a/lib/cachecontrol/adapter.py +++ /dev/null @@ -1,70 +0,0 @@ -from requests.adapters import HTTPAdapter - -from cachecontrol.controller import CacheController -from cachecontrol.cache import DictCache - - -class CacheControlAdapter(HTTPAdapter): - invalidating_methods = set(['PUT', 'DELETE']) - - def __init__(self, cache=None, cache_etags=True, cache_force=False, *args, **kw): - super(CacheControlAdapter, self).__init__(*args, **kw) - self.cache = cache or DictCache() - self.controller = CacheController(self.cache, cache_etags=cache_etags, cache_force=cache_force) - - def send(self, request, **kw): - """Send a request. Use the request information to see if it - exists in the cache. - """ - if request.method == 'GET': - cached_response = self.controller.cached_request( - request.url, request.headers - ) - if cached_response: - # Cached responses should not have a raw field since - # they *cannot* be created from some stream. - cached_response.raw = None - return cached_response - - # check for etags and add headers if appropriate - headers = self.controller.add_headers(request.url) - request.headers.update(headers) - - resp = super(CacheControlAdapter, self).send(request, **kw) - return resp - - def build_response(self, request, response): - """Build a response by making a request or using the cache. - - This will end up calling send and returning a potentially - cached response - """ - resp = super(CacheControlAdapter, self).build_response( - request, response - ) - - # See if we should invalidate the cache. - if request.method in self.invalidating_methods and resp.ok: - cache_url = self.controller.cache_url(request.url) - self.cache.delete(cache_url) - - # Try to store the response if it is a GET - elif request.method == 'GET': - if response.status == 304: - # We must have sent an ETag request. This could mean - # that we've been expired already or that we simply - # have an etag. In either case, we want to try and - # update the cache if that is the case. - resp = self.controller.update_cached_response( - request, response - ) - else: - # try to cache the response - self.controller.cache_response(request, resp) - - # Give the request a from_cache attr to let people use it - # rather than testing for hasattr. - if not hasattr(resp, 'from_cache'): - resp.from_cache = False - - return resp diff --git a/lib/cachecontrol/cache.py b/lib/cachecontrol/cache.py deleted file mode 100644 index feb7d3ed..00000000 --- a/lib/cachecontrol/cache.py +++ /dev/null @@ -1,36 +0,0 @@ -""" -The cache object API for implementing caches. The default is just a -dictionary, which in turns means it is not threadsafe for writing. -""" -from threading import Lock - - -class BaseCache(object): - - def get(self, key): - raise NotImplemented() - - def set(self, key, value): - raise NotImplemented() - - def delete(self, key): - raise NotImplemented() - - -class DictCache(BaseCache): - - def __init__(self, init_dict=None): - self.lock = Lock() - self.data = init_dict or {} - - def get(self, key): - return self.data.get(key, None) - - def set(self, key, value): - with self.lock: - self.data.update({key: value}) - - def delete(self, key): - with self.lock: - if key in self.data: - self.data.pop(key) diff --git a/lib/cachecontrol/caches/__init__.py b/lib/cachecontrol/caches/__init__.py deleted file mode 100644 index 5e851b03..00000000 --- a/lib/cachecontrol/caches/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -from textwrap import dedent - -try: - from cachecontrol.caches.file_cache import FileCache -except ImportError: - notice = dedent(''' - NOTE: In order to use the FileCache you must have - lockfile installed. You can install it via pip: - pip install lockfile - ''') - print(notice) - - -try: - import redis - from cachecontrol.caches.redis_cache import RedisCache -except ImportError: - pass diff --git a/lib/cachecontrol/caches/file_cache.py b/lib/cachecontrol/caches/file_cache.py deleted file mode 100644 index 3a7d1a4c..00000000 --- a/lib/cachecontrol/caches/file_cache.py +++ /dev/null @@ -1,43 +0,0 @@ -import os -import codecs - -from hashlib import md5 - -try: - from pickle import load, dump -except ImportError: - from cPickle import load, dump - -from lib.lockfile import FileLock - - -class FileCache(object): - - def __init__(self, directory, forever=False): - self.directory = directory - self.forever = forever - - if not os.path.isdir(self.directory): - os.mkdir(self.directory) - - def encode(self, x): - return md5(x.encode()).hexdigest() - - def _fn(self, name): - return os.path.join(self.directory, self.encode(name)) - - def get(self, key): - name = self._fn(key) - if os.path.exists(name): - return load(codecs.open(name, 'rb')) - - def set(self, key, value): - name = self._fn(key) - lock = FileLock(name) - with lock: - with codecs.open(lock.path, 'w+b') as fh: - dump(value, fh) - - def delete(self, key): - if not self.forever: - os.remove(self._fn(key)) diff --git a/lib/cachecontrol/caches/redis_cache.py b/lib/cachecontrol/caches/redis_cache.py deleted file mode 100644 index d3814ebc..00000000 --- a/lib/cachecontrol/caches/redis_cache.py +++ /dev/null @@ -1,46 +0,0 @@ -from __future__ import division - -from datetime import datetime - -try: - from cPickle import loads, dumps -except ImportError: # Python 3.x - from pickle import loads, dumps - - -def total_seconds(td): - """Python 2.6 compatability""" - if hasattr(td, 'total_seconds'): - return td.total_seconds() - - ms = td.microseconds - secs = (td.seconds + td.days * 24 * 3600) - return (ms + secs * 10**6) / 10**6 - - -class RedisCache(object): - - def __init__(self, conn): - self.conn = conn - - def get(self, key): - val = self.conn.get(key) - if val: - return loads(val) - return None - - def set(self, key, value, expires=None): - if not expires: - self.conn.set(key, dumps(value)) - else: - expires = expires - datetime.now() - self.conn.setex(key, total_seconds(expires), value) - - def delete(self, key): - self.conn.delete(key) - - def clear(self): - """Helper for clearing all the keys in a database. Use with - caution!""" - for key in self.conn.keys(): - self.conn.delete(key) diff --git a/lib/cachecontrol/compat.py b/lib/cachecontrol/compat.py deleted file mode 100644 index 1b6e596e..00000000 --- a/lib/cachecontrol/compat.py +++ /dev/null @@ -1,12 +0,0 @@ -try: - from urllib.parse import urljoin -except ImportError: - from urlparse import urljoin - - -try: - import email.utils - parsedate_tz = email.utils.parsedate_tz -except ImportError: - import email.Utils - parsedate_tz = email.Utils.parsedate_tz diff --git a/lib/cachecontrol/controller.py b/lib/cachecontrol/controller.py deleted file mode 100644 index 07ec8b84..00000000 --- a/lib/cachecontrol/controller.py +++ /dev/null @@ -1,256 +0,0 @@ -""" -The httplib2 algorithms ported for use with requests. -""" -import re -import calendar -import time -import os - -from cachecontrol.cache import DictCache -from cachecontrol.compat import parsedate_tz - - -URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?") - - -def parse_uri(uri): - """Parses a URI using the regex given in Appendix B of RFC 3986. - - (scheme, authority, path, query, fragment) = parse_uri(uri) - """ - groups = URI.match(uri).groups() - return (groups[1], groups[3], groups[4], groups[6], groups[8]) - - -class CacheController(object): - """An interface to see if request should cached or not. - """ - def __init__(self, cache=None, cache_etags=True, cache_force=False): - self.cache = cache or DictCache() - self.cache_etags = cache_etags - self.cache_force = cache_force - - def _urlnorm(self, uri): - """Normalize the URL to create a safe key for the cache""" - (scheme, authority, path, query, fragment) = parse_uri(uri) - if not scheme or not authority: - raise Exception("Only absolute URIs are allowed. uri = %s" % uri) - authority = authority.lower() - scheme = scheme.lower() - if not path: - path = "/" - - # Could do syntax based normalization of the URI before - # computing the digest. See Section 6.2.2 of Std 66. - request_uri = query and "?".join([path, query]) or path - scheme = scheme.lower() - defrag_uri = scheme + "://" + authority + request_uri - - return defrag_uri - - def cache_url(self, uri): - return self._urlnorm(uri) - - def parse_cache_control(self, headers): - """ - Parse the cache control headers returning a dictionary with values - for the different directives. - """ - retval = {} - - cc_header = 'cache-control' - if 'Cache-Control' in headers: - cc_header = 'Cache-Control' - - if cc_header in headers: - parts = headers[cc_header].split(',') - parts_with_args = [ - tuple([x.strip().lower() for x in part.split("=", 1)]) - for part in parts if -1 != part.find("=")] - parts_wo_args = [(name.strip().lower(), 1) - for name in parts if -1 == name.find("=")] - retval = dict(parts_with_args + parts_wo_args) - return retval - - def cached_request(self, url, headers): - cache_url = self.cache_url(url) - cc = self.parse_cache_control(headers) - - # non-caching states - no_cache = True if 'no-cache' in cc else False - if 'max-age' in cc and cc['max-age'] == 0: - no_cache = True - - # see if it is in the cache anyways - in_cache = self.cache.get(cache_url) - if no_cache or not in_cache: - return False - - # It is in the cache, so lets see if it is going to be - # fresh enough - resp = self.cache.get(cache_url) - - # Check our Vary header to make sure our request headers match - # up. We don't delete it from the though, we just don't return - # our cached value. - # - # NOTE: Because httplib2 stores raw content, it denotes - # headers that were sent in the original response by - # adding -varied-$name. We don't have to do that b/c we - # are storing the object which has a reference to the - # original request. If that changes, then I'd propose - # using the varied headers in the cache key to avoid the - # situation all together. - if 'vary' in resp.headers: - varied_headers = resp.headers['vary'].replace(' ', '').split(',') - original_headers = resp.request.headers - for header in varied_headers: - # If our headers don't match for the headers listed in - # the vary header, then don't use the cached response - if headers.get(header, None) != original_headers.get(header): - return False - - now = time.time() - date = calendar.timegm( - parsedate_tz(resp.headers['date']) - ) - current_age = max(0, now - date) - - # TODO: There is an assumption that the result will be a - # requests response object. This may not be best since we - # could probably avoid instantiating or constructing the - # response until we know we need it. - resp_cc = self.parse_cache_control(resp.headers) - - # determine freshness - freshness_lifetime = 0 - if 'max-age' in resp_cc and resp_cc['max-age'].isdigit(): - freshness_lifetime = int(resp_cc['max-age']) - elif 'expires' in resp.headers: - expires = parsedate_tz(resp.headers['expires']) - if expires is not None: - expire_time = calendar.timegm(expires) - date - freshness_lifetime = max(0, expire_time) - - # determine if we are setting freshness limit in the req - if 'max-age' in cc: - try: - freshness_lifetime = int(cc['max-age']) - except ValueError: - freshness_lifetime = 0 - - if 'min-fresh' in cc: - try: - min_fresh = int(cc['min-fresh']) - except ValueError: - min_fresh = 0 - # adjust our current age by our min fresh - current_age += min_fresh - - # see how fresh we actually are - fresh = (freshness_lifetime > current_age) - - if fresh: - if resp.ok: - # make sure we set the from_cache to true - resp.from_cache = True - return resp - return False - - # we're not fresh. If we don't have an Etag, clear it out - if 'etag' not in resp.headers: - self.cache.delete(cache_url) - - if 'etag' in resp.headers: - headers['If-None-Match'] = resp.headers['ETag'] - - if 'last-modified' in resp.headers: - headers['If-Modified-Since'] = resp.headers['Last-Modified'] - - # return the original handler - return False - - def add_headers(self, url, resp=None): - resp = self.cache.get(url) - if resp and 'etag' in resp.headers: - return {'If-None-Match': resp.headers['etag']} - return {} - - def cache_response(self, request, resp): - """ - Algorithm for caching requests. - - This assumes a requests Response object. - """ - # From httplib2: Don't cache 206's since we aren't going to - # handle byte range requests - if resp.status_code not in [200, 203]: - return - - cc_req = self.parse_cache_control(request.headers) - cc = self.parse_cache_control(resp.headers) - - cache_url = self.cache_url(request.url) - - # Delete it from the cache if we happen to have it stored there - no_store = cc.get('no-store') or cc_req.get('no-store') - if no_store and self.cache.get(cache_url): - self.cache.delete(cache_url) - - # If we've been given an etag, then keep the response - if self.cache_etags and 'etag' in resp.headers: - self.cache.set(cache_url, resp) - - # Add to the cache if the response headers demand it. If there - # is no date header then we can't do anything about expiring - # the cache. - elif 'date' in resp.headers: - # cache when there is a max-age > 0 - if cc and cc.get('max-age'): - if int(cc['max-age']) > 0: - self.cache.set(cache_url, resp) - - # If the request can expire, it means we should cache it - # in the meantime. - elif 'expires' in resp.headers: - if resp.headers['expires']: - self.cache.set(cache_url, resp) - - # If the request is for our local cache, it means we should cache it - elif self.cache_force: - resp.headers.update({'cache-control': 'max-age=900, private'}) - self.cache.set(cache_url, resp) - - def update_cached_response(self, request, response): - """On a 304 we will get a new set of headers that we want to - update our cached value with, assuming we have one. - - This should only ever be called when we've sent an ETag and - gotten a 304 as the response. - """ - cache_url = self.cache_url(request.url) - - resp = self.cache.get(cache_url) - - if not resp: - # we didn't have a cached response - return response - - # did so lets update our headers - resp.headers.update(resp.headers) - - # we want a 200 b/c we have content via the cache - request.status_code = 200 - - # update the request as it has the if-none-match header + any - # other headers that the server might have updated (ie Date, - # Cache-Control, Expires, etc.) - resp.request = request - - # update our cache - self.cache.set(cache_url, resp) - - # Let everyone know this was from the cache. - resp.from_cache = True - - return resp diff --git a/lib/cachecontrol/patch_requests.py b/lib/cachecontrol/patch_requests.py deleted file mode 100644 index cad60e17..00000000 --- a/lib/cachecontrol/patch_requests.py +++ /dev/null @@ -1,56 +0,0 @@ -import requests - -from requests import models -from requests.packages.urllib3.response import HTTPResponse - -__attrs__ = [ - '_content', - 'status_code', - 'headers', - 'url', - 'history', - 'encoding', - 'reason', - 'cookies', - 'elapsed', -] - - -def response_getstate(self): - # consume everything - if not self._content_consumed: - self.content - - state = dict( - (attr, getattr(self, attr, None)) - for attr in __attrs__ - ) - - # deal with our raw content b/c we need it for our cookie jar - state['raw_original_response'] = self.raw._original_response - return state - - -def response_setstate(self, state): - for name, value in state.items(): - if name != 'raw_original_response': - setattr(self, name, value) - - setattr(self, 'raw', HTTPResponse()) - self.raw._original_response = state['raw_original_response'] - - -def make_responses_pickleable(): - try: - version_parts = [int(part) for part in requests.__version__.split('.')] - - # must be >= 2.2.x - if not version_parts[0] >= 2 or not version_parts[1] >= 2: - models.Response.__getstate__ = response_getstate - models.Response.__setstate__ = response_setstate - except: - raise - pass - - -make_responses_pickleable() diff --git a/lib/cachecontrol/wrapper.py b/lib/cachecontrol/wrapper.py deleted file mode 100644 index 38b91536..00000000 --- a/lib/cachecontrol/wrapper.py +++ /dev/null @@ -1,10 +0,0 @@ -from cachecontrol.adapter import CacheControlAdapter -from cachecontrol.cache import DictCache - - -def CacheControl(sess, cache=None, cache_etags=True, cache_force=False): - cache = cache or DictCache() - adapter = CacheControlAdapter(cache, cache_etags=cache_etags, cache_force=cache_force) - sess.mount('http://', adapter) - - return sess diff --git a/lib/httpcache/__init__.py b/lib/httpcache/__init__.py new file mode 100644 index 00000000..0b8a963c --- /dev/null +++ b/lib/httpcache/__init__.py @@ -0,0 +1,14 @@ +# -*- coding: utf-8 -*- +""" +__init__.py +~~~~~~~~~~~ + +Defines the public API to the httpcache module. +""" + +__version__ = '0.1.3' + +from .cache import HTTPCache +from .adapter import CachingHTTPAdapter + +__all__ = [HTTPCache, CachingHTTPAdapter] diff --git a/lib/httpcache/adapter.py b/lib/httpcache/adapter.py new file mode 100644 index 00000000..b1d511bf --- /dev/null +++ b/lib/httpcache/adapter.py @@ -0,0 +1,55 @@ +""" +adapter.py +~~~~~~~~~~ + +Contains an implementation of an HTTP adapter for Requests that is aware of the +cache contained in this module. +""" +from requests.adapters import HTTPAdapter +from .cache import HTTPCache + + +class CachingHTTPAdapter(HTTPAdapter): + """ + A HTTP-caching-aware Transport Adapter for Python Requests. The central + portion of the API. + + :param capacity: The maximum capacity of the backing cache. + """ + def __init__(self, capacity=50, **kwargs): + super(CachingHTTPAdapter, self).__init__(**kwargs) + + #: The HTTP Cache backing the adapter. + self.cache = HTTPCache(capacity=capacity) + + def send(self, request, **kwargs): + """ + Sends a PreparedRequest object, respecting RFC 2616's rules about HTTP + caching. Returns a Response object that may have been cached. + + :param request: The Requests :class:`PreparedRequest ` object to send. + """ + cached_resp = self.cache.retrieve(request) + + if cached_resp is not None: + return cached_resp + else: + return super(CachingHTTPAdapter, self).send(request, **kwargs) + + def build_response(self, request, response): + """ + Builds a Response object from a urllib3 response. May involve returning + a cached Response. + + :param request: The Requests :class:`PreparedRequest ` object sent. + :param response: The urllib3 response. + """ + resp = super(CachingHTTPAdapter, self).build_response(request, + response) + + if resp.status_code == 304: + resp = self.cache.handle_304(resp) + else: + self.cache.store(resp) + + return resp diff --git a/lib/httpcache/cache.py b/lib/httpcache/cache.py new file mode 100644 index 00000000..892e8f90 --- /dev/null +++ b/lib/httpcache/cache.py @@ -0,0 +1,207 @@ +# -*- coding: utf-8 -*- +""" +cache.py +~~~~~~~~ + +Contains the primary cache structure used in http-cache. +""" +from .structures import RecentOrderedDict +from .utils import (parse_date_header, build_date_header, + expires_from_cache_control, url_contains_query) +from datetime import datetime + + +# RFC 2616 specifies that we can cache 200 OK, 203 Non Authoritative, +# 206 Partial Content, 300 Multiple Choices, 301 Moved Permanently and +# 410 Gone responses. We don't cache 206s at the moment because we +# don't handle Range and Content-Range headers. +CACHEABLE_RCS = (200, 203, 300, 301, 410) + +# Cacheable verbs. +CACHEABLE_VERBS = ('GET', 'HEAD', 'OPTIONS') + +# Some verbs MUST invalidate the resource in the cache, according to RFC 2616. +# If we send one of these, or any verb we don't recognise, invalidate the +# cache entry for that URL. As it happens, these are also the cacheable +# verbs. That works out well for us. +NON_INVALIDATING_VERBS = CACHEABLE_VERBS + + +class HTTPCache(object): + """ + The HTTP Cache object. Manages caching of responses according to RFC 2616, + adding necessary headers to HTTP request objects, and returning cached + responses based on server responses. + + This object is not expected to be used by most users. It is exposed as part + of the public API for users who feel the need for more control. This API + may change in a minor version increase. Be warned. + + :param capacity: (Optional) The maximum capacity of the HTTP cache. + """ + def __init__(self, capacity=50): + #: The maximum capacity of the HTTP cache. When this many cache entries + #: end up in the cache, the oldest entries are removed. + self.capacity = capacity + + #: The cache backing store. Cache entries are stored here as key-value + #: pairs. The key is the URL used to retrieve the cached response. The + #: value is a python dict, which stores three objects: the response + #: (keyed off of 'response'), the retrieval or creation date (keyed off + #: of 'creation') and the cache expiry date (keyed off of 'expiry'). + #: This last value may be None. + self._cache = RecentOrderedDict() + + def store(self, response): + """ + Takes an HTTP response object and stores it in the cache according to + RFC 2616. Returns a boolean value indicating whether the response was + cached or not. + + :param response: Requests :class:`Response ` object to cache. + """ + # Define an internal utility function. + def date_header_or_default(header_name, default, response): + try: + date_header = response.headers[header_name] + except KeyError: + value = default + else: + value = parse_date_header(date_header) + return value + + if response.status_code not in CACHEABLE_RCS: + return False + + if response.request.method not in CACHEABLE_VERBS: + return False + + url = response.url + now = datetime.utcnow() + + # Get the value of the 'Date' header, if it exists. If it doesn't, just + # use now. + creation = date_header_or_default('Date', now, response) + + # Get the value of the 'Cache-Control' header, if it exists. + cc = response.headers.get('Cache-Control', None) + if cc is not None: + expiry = expires_from_cache_control(cc, now) + + # If the above returns None, we are explicitly instructed not to + # cache this. + if expiry is None: + return False + + # Get the value of the 'Expires' header, if it exists, and if we don't + # have anything from the 'Cache-Control' header. + if cc is None: + expiry = date_header_or_default('Expires', None, response) + + # If the expiry date is earlier or the same as the Date header, don't + # cache the response at all. + if expiry is not None and expiry <= creation: + return False + + # If there's a query portion of the url and it's a GET, don't cache + # this unless explicitly instructed to. + if expiry is None and response.request.method == 'GET': + if url_contains_query(url): + return False + + self._cache[url] = {'response': response, + 'creation': creation, + 'expiry': expiry} + + self.__reduce_cache_count() + + return True + + def handle_304(self, response): + """ + Given a 304 response, retrieves the cached entry. This unconditionally + returns the cached entry, so it can be used when the 'intelligent' + behaviour of retrieve() is not desired. + + Returns None if there is no entry in the cache. + + :param response: The 304 response to find the cached entry for. Should be a Requests :class:`Response `. + """ + try: + cached_response = self._cache[response.url]['response'] + except KeyError: + cached_response = None + + return cached_response + + def retrieve(self, request): + """ + Retrieves a cached response if possible. + + If there is a response that can be unconditionally returned (e.g. one + that had a Cache-Control header set), that response is returned. If + there is one that can be conditionally returned (if a 304 is returned), + applies an If-Modified-Since header to the request and returns None. + + :param request: The Requests :class:`PreparedRequest ` object. + """ + return_response = None + url = request.url + + try: + cached_response = self._cache[url] + except KeyError: + return None + + if request.method not in NON_INVALIDATING_VERBS: + del self._cache[url] + return None + + if cached_response['expiry'] is None: + # We have no explicit expiry time, so we weren't instructed to + # cache. Add an 'If-Modified-Since' header. + creation = cached_response['creation'] + header = build_date_header(creation) + request.headers['If-Modified-Since'] = header + else: + # We have an explicit expiry time. If we're earlier than the expiry + # time, return the response. + now = datetime.utcnow() + + if now <= cached_response['expiry']: + return_response = cached_response['response'] + else: + del self._cache[url] + + return return_response + + def __reduce_cache_count(self): + """ + Drops the number of entries in the cache to the capacity of the cache. + + Walks the backing RecentOrderedDict in order from oldest to youngest. + Deletes cache entries that are either invalid or being speculatively + cached until the number of cache entries drops to the capacity. If this + leaves the cache above capacity, begins deleting the least-used cache + entries that are still valid until the cache has space. + """ + if len(self._cache) <= self.capacity: + return + + to_delete = len(self._cache) - self.capacity + keys = list(self._cache.keys()) + + for key in keys: + if self._cache[key]['expiry'] is None: + del self._cache[key] + to_delete -= 1 + + if to_delete == 0: + return + + keys = list(self._cache.keys()) + + for i in range(to_delete): + del self._cache[keys[i]] + + return diff --git a/lib/httpcache/compat.py b/lib/httpcache/compat.py new file mode 100644 index 00000000..384d38e6 --- /dev/null +++ b/lib/httpcache/compat.py @@ -0,0 +1,10 @@ +# -*- coding: utf-8 -*- +""" +compat.py +~~~~~~~~~ + +Defines cross-platform functions and classes needed to achieve proper +functionality. +""" + +pass diff --git a/lib/httpcache/structures.py b/lib/httpcache/structures.py new file mode 100644 index 00000000..6ac24a92 --- /dev/null +++ b/lib/httpcache/structures.py @@ -0,0 +1,59 @@ +""" +structures.py +~~~~~~~~~~~~~ + +Defines structures used by the httpcache module. +""" + +class RecentOrderedDict(dict): + """ + A custom variant of the dictionary that ensures that the object most + recently inserted _or_ retrieved from the dictionary is enumerated first. + """ + def __init__(self): + self._data = {} + self._order = [] + + def __setitem__(self, key, value): + if key in self._data: + self._order.remove(key) + + self._order.append(key) + self._data[key] = value + + def __getitem__(self, key): + value = self._data[key] + self._order.remove(key) + self._order.append(key) + return value + + def __delitem__(self, key): + del self._data[key] + self._order.remove(key) + + def __iter__(self): + return self._order + + def __len__(self): + return len(self._order) + + def __contains__(self, value): + return self._data.__contains__(value) + + def items(self): + return [(key, self._data[key]) for key in self._order] + + def keys(self): + return self._order + + def values(self): + return [self._data[key] for key in self._order] + + def clear(self): + self._data = {} + self._order = [] + + def copy(self): + c = RecentOrderedDict() + c._data = self._data.copy() + c._order = self._order[:] diff --git a/lib/httpcache/utils.py b/lib/httpcache/utils.py new file mode 100644 index 00000000..0efe9f99 --- /dev/null +++ b/lib/httpcache/utils.py @@ -0,0 +1,97 @@ +# -*- coding: utf-8 -*- +""" +utils.py +~~~~~~~~ + +Utility functions for use with httpcache. +""" +from datetime import datetime, timedelta + +try: # Python 2 + from urlparse import urlparse +except ImportError: # Python 3 + from urllib.parse import urlparse + +RFC_1123_DT_STR = "%a, %d %b %Y %H:%M:%S GMT" +RFC_850_DT_STR = "%A, %d-%b-%y %H:%M:%S GMT" + + +def parse_date_header(header): + """ + Given a date header in the form specified by RFC 2616, return a Python + datetime object. + + RFC 2616 specifies three possible formats for date/time headers, and + makes it clear that all dates/times should be in UTC/GMT. That is assumed + by this library, which simply does everything in UTC. This currently does + not parse the C asctime() string, because that's effort. + + This function does _not_ follow Postel's Law. If a format does not strictly + match the defined strings, this function returns None. This is considered + 'safe' behaviour. + """ + try: + dt = datetime.strptime(header, RFC_1123_DT_STR) + except ValueError: + try: + dt = datetime.strptime(header, RFC_850_DT_STR) + except ValueError: + dt = None + except TypeError: + dt = None + + return dt + + +def build_date_header(dt): + """ + Given a Python datetime object, build a Date header value according to + RFC 2616. + + RFC 2616 specifies that the RFC 1123 form is to be preferred, so that is + what we use. + """ + return dt.strftime(RFC_1123_DT_STR) + + +def expires_from_cache_control(header, current_time): + """ + Given a Cache-Control header, builds a Python datetime object corresponding + to the expiry time (in UTC). This function should respect all relevant + Cache-Control directives. + + Takes current_time as an argument to ensure that 'max-age=0' generates the + correct behaviour without being special-cased. + + Returns None to indicate that a request must not be cached. + """ + # Cache control header values are made of multiple comma separated fields. + # Splitting them like this is probably a bad idea, but I'm going to roll with + # it for now. We'll come back to it. + fields = header.split(', ') + duration = None + + for field in fields: + # Right now we don't handle no-cache applied to specific fields. To be + # as 'nice' as possible, treat any no-cache as applying to the whole + # request. Bail early, because there's no reason to stick around. + if field.startswith('no-cache') or field == 'no-store': + return None + + if field.startswith('max-age'): + _, duration = field.split('=') + duration = int(duration) + + if duration: + interval = timedelta(seconds=int(duration)) + return current_time + interval + +def url_contains_query(url): + """ + A very stupid function for determining if a URL contains a query string + or not. + """ + if urlparse(url).query: + return True + else: + return False diff --git a/lib/requests/adapters.py b/lib/requests/adapters.py index 28bea07c..0f297ab2 100644 --- a/lib/requests/adapters.py +++ b/lib/requests/adapters.py @@ -9,6 +9,7 @@ and maintain connections. """ import socket +import copy from .models import Response from .packages.urllib3.poolmanager import PoolManager, proxy_from_url @@ -62,7 +63,7 @@ class HTTPAdapter(BaseAdapter): Usage:: - >>> import requests + >>> import lib.requests >>> s = requests.Session() >>> a = requests.adapters.HTTPAdapter(max_retries=3) >>> s.mount('http://', a) @@ -390,4 +391,4 @@ class HTTPAdapter(BaseAdapter): if not stream: r.content - return r + return r \ No newline at end of file diff --git a/lib/tvdb_api/tvdb_api.py b/lib/tvdb_api/tvdb_api.py index 9942ee51..62147a8f 100644 --- a/lib/tvdb_api/tvdb_api.py +++ b/lib/tvdb_api/tvdb_api.py @@ -39,19 +39,14 @@ except ImportError: gzip = None from lib import requests -from lib.cachecontrol.wrapper import CacheControl -from lib.cachecontrol.caches.file_cache import FileCache - from tvdb_ui import BaseUI, ConsoleUI from tvdb_exceptions import (tvdb_error, tvdb_userabort, tvdb_shownotfound, tvdb_seasonnotfound, tvdb_episodenotfound, tvdb_attributenotfound) - def log(): return logging.getLogger("tvdb_api") - class ShowContainer(dict): """Simple dict that holds a series of Show instances """ @@ -518,12 +513,15 @@ class Tvdb: # cacheControl if self.config['cache_enabled']: - sess = CacheControl(requests.Session(), cache_force=True, cache=FileCache(self.config['cache_location'])) + from lib.httpcache import CachingHTTPAdapter + sess = requests.Session() + sess.mount('http://', CachingHTTPAdapter()) else: sess = requests.Session() # get response from TVDB resp = sess.get(url, params=params) + sess.close() except requests.HTTPError, e: raise tvdb_error("HTTP error " + str(e.errno) + " while loading URL " + str(url)) @@ -536,7 +534,7 @@ class Tvdb: except Exception, e: raise tvdb_error("Unknown exception occured: " + str(e.message) + " while loading URL " + str(url)) - if resp.ok: + if resp.ok and resp.content: if 'application/zip' in resp.headers.get("Content-Type", ''): try: # TODO: The zip contains actors.xml and banners.xml, which are currently ignored [GH-20] @@ -559,11 +557,11 @@ class Tvdb: try: # TVDB doesn't sanitize \r (CR) from user input in some fields, # remove it to avoid errors. Change from SickBeard, from will14m - return ElementTree.fromstring(src.rstrip("\r")) + return ElementTree.fromstring(src.rstrip("\r")) if src else None except SyntaxError: src = self._loadUrl(url, params=params, language=language) try: - return ElementTree.fromstring(src.rstrip("\r")) + return ElementTree.fromstring(src.rstrip("\r")) if src else None except SyntaxError, exceptionmsg: errormsg = "There was an error with the XML retrieved from thetvdb.com:\n%s" % ( exceptionmsg diff --git a/lib/tvrage_api/tvrage_api.py b/lib/tvrage_api/tvrage_api.py index 422ef251..23c4c22e 100644 --- a/lib/tvrage_api/tvrage_api.py +++ b/lib/tvrage_api/tvrage_api.py @@ -30,10 +30,7 @@ except ImportError: import xml.etree.ElementTree as ElementTree from lib.dateutil.parser import parse - from lib import requests -from lib.cachecontrol.wrapper import CacheControl -from lib.cachecontrol.caches.file_cache import FileCache from tvrage_ui import BaseUI from tvrage_exceptions import (tvrage_error, tvrage_userabort, tvrage_shownotfound, @@ -42,7 +39,6 @@ from tvrage_exceptions import (tvrage_error, tvrage_userabort, tvrage_shownotfou def log(): return logging.getLogger("tvrage_api") - class ShowContainer(dict): """Simple dict that holds a series of Show instances """ @@ -351,12 +347,15 @@ class TVRage: # cacheControl if self.config['cache_enabled']: - sess = CacheControl(requests.Session(), cache_force=True, cache=FileCache(self.config['cache_location'])) + from lib.httpcache import CachingHTTPAdapter + sess = requests.Session() + sess.mount('http://', CachingHTTPAdapter()) else: sess = requests.Session() # get response from TVRage resp = sess.get(url, params=params) + sess.close() except requests.HTTPError, e: raise tvrage_error("HTTP error " + str(e.errno) + " while loading URL " + str(url)) @@ -366,7 +365,7 @@ class TVRage: except requests.Timeout, e: raise tvrage_error("Connection timed out " + str(e.message) + " while loading URL " + str(url)) - return resp.content if resp.ok else None + return resp.content if resp.ok and resp.content else None def _getetsrc(self, url, params=None): """Loads a URL using caching, returns an ElementTree of the source diff --git a/sickbeard/helpers.py b/sickbeard/helpers.py index 8e43edee..83c88602 100644 --- a/sickbeard/helpers.py +++ b/sickbeard/helpers.py @@ -35,6 +35,7 @@ import base64 from lib import requests from httplib import BadStatusLine from itertools import izip, cycle +from lib.httpcache import CachingHTTPAdapter try: import json @@ -169,6 +170,9 @@ def getURL(url, post_data=None, headers=None, params=None, timeout=None): Returns a byte-string retrieved from the url provider. """ + # Cache Handler + sess = requests.Session() + sess.mount('http://', CachingHTTPAdapter()) req_headers = ['User-Agent', USER_AGENT, 'Accept-Encoding', 'gzip,deflate'] if headers: @@ -182,8 +186,8 @@ Returns a byte-string retrieved from the url provider. url = urlparse.urlunparse(parsed) it = iter(req_headers) - sess = requests.session() resp = sess.get(url, params=params, data=post_data, headers=dict(zip(it, it))) + sess.close() except requests.HTTPError, e: logger.log(u"HTTP error " + str(e.errno) + " while loading URL " + url, logger.WARNING) return None @@ -196,7 +200,7 @@ Returns a byte-string retrieved from the url provider. logger.log(u"Connection timed out " + str(e.message) + " while loading URL " + url, logger.WARNING) return None - return resp.content if resp.ok else None + return resp.content if resp.ok and resp.content else None def _remove_file_failed(file): try: @@ -206,8 +210,11 @@ def _remove_file_failed(file): def download_file(url, filename): try: - sess = requests.session() + # cache handler + sess = requests.Session() + sess.mount('http://', CachingHTTPAdapter()) req = sess.get(url, stream=True) + #CHUNK = 16 * 1024 with open(filename, 'wb') as fp: for chunk in req.iter_content(chunk_size=(16 *1024)): @@ -215,7 +222,7 @@ def download_file(url, filename): fp.write(chunk) fp.flush() fp.close() - req.close() + sess.close() except requests.HTTPError, e: _remove_file_failed(filename) diff --git a/sickbeard/indexers/test/test.py b/sickbeard/indexers/test/test.py index 75bc58c2..3973f678 100644 --- a/sickbeard/indexers/test/test.py +++ b/sickbeard/indexers/test/test.py @@ -11,19 +11,16 @@ from sickbeard.indexers.indexer_api import indexerApi from sickbeard.indexers.indexer_exceptions import indexer_exception class APICheck(unittest.TestCase): - indexer_id = 'Continum' - indexer = 'TVRage' + indexer_id = 81189 + indexer = 'Tvdb' + lang = "en" + # Set our common indexer_api options here - INDEXER_API_PARMS = {'apikey': 'Uhewg1Rr0o62fvZvUIZt', - 'language': 'en', - 'useZip': True} - - - INDEXER_API_PARMS['indexer'] = indexer + INDEXER_API_PARMS = {'indexer': indexer} lindexer_api_parms = INDEXER_API_PARMS.copy() try: -# showurl = indexerApi(**lindexer_api_parms).config['base_url'] + str(indexer_id) + '/all/en.zip' + lang_id = indexerApi().config['langabbv_to_id'][lang] t = indexerApi(cache=True, **lindexer_api_parms) myEp = t[indexer_id]