Switched TVDB/TVRage CacheHandlers to CacheControl.

Replaced urllib2 with requests for both TVDB and TVRage.
Updated cache cleanup code to process both TVDB and TVRage cache folders.
This commit is contained in:
echel0n 2014-03-14 10:15:02 -07:00
parent 82db6e9b54
commit 05410e2aa0
42 changed files with 1817 additions and 359 deletions

View file

@ -0,0 +1,13 @@
"""CacheControl import Interface.
Make it easy to import from cachecontrol without long namespaces.
"""
# patch our requests.models.Response to make them pickleable in older
# versions of requests.
import cachecontrol.patch_requests
from cachecontrol.wrapper import CacheControl
from cachecontrol.adapter import CacheControlAdapter
from cachecontrol.controller import CacheController

View file

@ -0,0 +1,70 @@
from requests.adapters import HTTPAdapter
from cachecontrol.controller import CacheController
from cachecontrol.cache import DictCache
class CacheControlAdapter(HTTPAdapter):
invalidating_methods = set(['PUT', 'DELETE'])
def __init__(self, cache=None, cache_etags=True, *args, **kw):
super(CacheControlAdapter, self).__init__(*args, **kw)
self.cache = cache or DictCache()
self.controller = CacheController(self.cache, cache_etags=cache_etags)
def send(self, request, **kw):
"""Send a request. Use the request information to see if it
exists in the cache.
"""
if request.method == 'GET':
cached_response = self.controller.cached_request(
request.url, request.headers
)
if cached_response:
# Cached responses should not have a raw field since
# they *cannot* be created from some stream.
cached_response.raw = None
return cached_response
# check for etags and add headers if appropriate
headers = self.controller.add_headers(request.url)
request.headers.update(headers)
resp = super(CacheControlAdapter, self).send(request, **kw)
return resp
def build_response(self, request, response):
"""Build a response by making a request or using the cache.
This will end up calling send and returning a potentially
cached response
"""
resp = super(CacheControlAdapter, self).build_response(
request, response
)
# See if we should invalidate the cache.
if request.method in self.invalidating_methods and resp.ok:
cache_url = self.controller.cache_url(request.url)
self.cache.delete(cache_url)
# Try to store the response if it is a GET
elif request.method == 'GET':
if response.status == 304:
# We must have sent an ETag request. This could mean
# that we've been expired already or that we simply
# have an etag. In either case, we want to try and
# update the cache if that is the case.
resp = self.controller.update_cached_response(
request, response
)
else:
# try to cache the response
self.controller.cache_response(request, resp)
# Give the request a from_cache attr to let people use it
# rather than testing for hasattr.
if not hasattr(resp, 'from_cache'):
resp.from_cache = False
return resp

36
lib/cachecontrol/cache.py Normal file
View file

@ -0,0 +1,36 @@
"""
The cache object API for implementing caches. The default is just a
dictionary, which in turns means it is not threadsafe for writing.
"""
from threading import Lock
class BaseCache(object):
def get(self, key):
raise NotImplemented()
def set(self, key, value):
raise NotImplemented()
def delete(self, key):
raise NotImplemented()
class DictCache(BaseCache):
def __init__(self, init_dict=None):
self.lock = Lock()
self.data = init_dict or {}
def get(self, key):
return self.data.get(key, None)
def set(self, key, value):
with self.lock:
self.data.update({key: value})
def delete(self, key):
with self.lock:
if key in self.data:
self.data.pop(key)

View file

@ -0,0 +1,18 @@
from textwrap import dedent
try:
from cachecontrol.caches.file_cache import FileCache
except ImportError:
notice = dedent('''
NOTE: In order to use the FileCache you must have
lockfile installed. You can install it via pip:
pip install lockfile
''')
print(notice)
try:
import redis
from cachecontrol.caches.redis_cache import RedisCache
except ImportError:
pass

View file

@ -0,0 +1,43 @@
import os
import codecs
from hashlib import md5
try:
from pickle import load, dump
except ImportError:
from cPickle import load, dump
from lib.lockfile import FileLock
class FileCache(object):
def __init__(self, directory, forever=False):
self.directory = directory
self.forever = forever
if not os.path.isdir(self.directory):
os.mkdir(self.directory)
def encode(self, x):
return md5(x.encode()).hexdigest()
def _fn(self, name):
return os.path.join(self.directory, self.encode(name))
def get(self, key):
name = self._fn(key)
if os.path.exists(name):
return load(codecs.open(name, 'rb'))
def set(self, key, value):
name = self._fn(key)
lock = FileLock(name)
with lock:
with codecs.open(lock.path, 'w+b') as fh:
dump(value, fh)
def delete(self, key):
if not self.forever:
os.remove(self._fn(key))

View file

@ -0,0 +1,46 @@
from __future__ import division
from datetime import datetime
try:
from cPickle import loads, dumps
except ImportError: # Python 3.x
from pickle import loads, dumps
def total_seconds(td):
"""Python 2.6 compatability"""
if hasattr(td, 'total_seconds'):
return td.total_seconds()
ms = td.microseconds
secs = (td.seconds + td.days * 24 * 3600)
return (ms + secs * 10**6) / 10**6
class RedisCache(object):
def __init__(self, conn):
self.conn = conn
def get(self, key):
val = self.conn.get(key)
if val:
return loads(val)
return None
def set(self, key, value, expires=None):
if not expires:
self.conn.set(key, dumps(value))
else:
expires = expires - datetime.now()
self.conn.setex(key, total_seconds(expires), value)
def delete(self, key):
self.conn.delete(key)
def clear(self):
"""Helper for clearing all the keys in a database. Use with
caution!"""
for key in self.conn.keys():
self.conn.delete(key)

View file

@ -0,0 +1,12 @@
try:
from urllib.parse import urljoin
except ImportError:
from urlparse import urljoin
try:
import email.utils
parsedate_tz = email.utils.parsedate_tz
except ImportError:
import email.Utils
parsedate_tz = email.Utils.parsedate_tz

View file

@ -0,0 +1,247 @@
"""
The httplib2 algorithms ported for use with requests.
"""
import re
import calendar
import time
from cachecontrol.cache import DictCache
from cachecontrol.compat import parsedate_tz
URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")
def parse_uri(uri):
"""Parses a URI using the regex given in Appendix B of RFC 3986.
(scheme, authority, path, query, fragment) = parse_uri(uri)
"""
groups = URI.match(uri).groups()
return (groups[1], groups[3], groups[4], groups[6], groups[8])
class CacheController(object):
"""An interface to see if request should cached or not.
"""
def __init__(self, cache=None, cache_etags=True):
self.cache = cache or DictCache()
self.cache_etags = cache_etags
def _urlnorm(self, uri):
"""Normalize the URL to create a safe key for the cache"""
(scheme, authority, path, query, fragment) = parse_uri(uri)
if not scheme or not authority:
raise Exception("Only absolute URIs are allowed. uri = %s" % uri)
authority = authority.lower()
scheme = scheme.lower()
if not path:
path = "/"
# Could do syntax based normalization of the URI before
# computing the digest. See Section 6.2.2 of Std 66.
request_uri = query and "?".join([path, query]) or path
scheme = scheme.lower()
defrag_uri = scheme + "://" + authority + request_uri
return defrag_uri
def cache_url(self, uri):
return self._urlnorm(uri)
def parse_cache_control(self, headers):
"""
Parse the cache control headers returning a dictionary with values
for the different directives.
"""
retval = {}
cc_header = 'cache-control'
if 'Cache-Control' in headers:
cc_header = 'Cache-Control'
if cc_header in headers:
parts = headers[cc_header].split(',')
parts_with_args = [
tuple([x.strip().lower() for x in part.split("=", 1)])
for part in parts if -1 != part.find("=")]
parts_wo_args = [(name.strip().lower(), 1)
for name in parts if -1 == name.find("=")]
retval = dict(parts_with_args + parts_wo_args)
return retval
def cached_request(self, url, headers):
cache_url = self.cache_url(url)
cc = self.parse_cache_control(headers)
# non-caching states
no_cache = True if 'no-cache' in cc else False
if 'max-age' in cc and cc['max-age'] == 0:
no_cache = True
# see if it is in the cache anyways
in_cache = self.cache.get(cache_url)
if no_cache or not in_cache:
return False
# It is in the cache, so lets see if it is going to be
# fresh enough
resp = self.cache.get(cache_url)
# Check our Vary header to make sure our request headers match
# up. We don't delete it from the though, we just don't return
# our cached value.
#
# NOTE: Because httplib2 stores raw content, it denotes
# headers that were sent in the original response by
# adding -varied-$name. We don't have to do that b/c we
# are storing the object which has a reference to the
# original request. If that changes, then I'd propose
# using the varied headers in the cache key to avoid the
# situation all together.
if 'vary' in resp.headers:
varied_headers = resp.headers['vary'].replace(' ', '').split(',')
original_headers = resp.request.headers
for header in varied_headers:
# If our headers don't match for the headers listed in
# the vary header, then don't use the cached response
if headers.get(header, None) != original_headers.get(header):
return False
now = time.time()
date = calendar.timegm(
parsedate_tz(resp.headers['date'])
)
current_age = max(0, now - date)
# TODO: There is an assumption that the result will be a
# requests response object. This may not be best since we
# could probably avoid instantiating or constructing the
# response until we know we need it.
resp_cc = self.parse_cache_control(resp.headers)
# determine freshness
freshness_lifetime = 0
if 'max-age' in resp_cc and resp_cc['max-age'].isdigit():
freshness_lifetime = int(resp_cc['max-age'])
elif 'expires' in resp.headers:
expires = parsedate_tz(resp.headers['expires'])
if expires is not None:
expire_time = calendar.timegm(expires) - date
freshness_lifetime = max(0, expire_time)
# determine if we are setting freshness limit in the req
if 'max-age' in cc:
try:
freshness_lifetime = int(cc['max-age'])
except ValueError:
freshness_lifetime = 0
if 'min-fresh' in cc:
try:
min_fresh = int(cc['min-fresh'])
except ValueError:
min_fresh = 0
# adjust our current age by our min fresh
current_age += min_fresh
# see how fresh we actually are
fresh = (freshness_lifetime > current_age)
if fresh:
# make sure we set the from_cache to true
resp.from_cache = True
return resp
# we're not fresh. If we don't have an Etag, clear it out
if 'etag' not in resp.headers:
self.cache.delete(cache_url)
if 'etag' in resp.headers:
headers['If-None-Match'] = resp.headers['ETag']
if 'last-modified' in resp.headers:
headers['If-Modified-Since'] = resp.headers['Last-Modified']
# return the original handler
return False
def add_headers(self, url):
resp = self.cache.get(url)
if resp and 'etag' in resp.headers:
return {'If-None-Match': resp.headers['etag']}
return {}
def cache_response(self, request, resp):
"""
Algorithm for caching requests.
This assumes a requests Response object.
"""
# From httplib2: Don't cache 206's since we aren't going to
# handle byte range requests
if resp.status_code not in [200, 203]:
return
cc_req = self.parse_cache_control(request.headers)
cc = self.parse_cache_control(resp.headers)
cache_url = self.cache_url(request.url)
# Delete it from the cache if we happen to have it stored there
no_store = cc.get('no-store') or cc_req.get('no-store')
if no_store and self.cache.get(cache_url):
self.cache.delete(cache_url)
# If we've been given an etag, then keep the response
if self.cache_etags and 'etag' in resp.headers:
self.cache.set(cache_url, resp)
# Add to the cache if the response headers demand it. If there
# is no date header then we can't do anything about expiring
# the cache.
elif 'date' in resp.headers:
# cache when there is a max-age > 0
if cc and cc.get('max-age'):
if int(cc['max-age']) > 0:
self.cache.set(cache_url, resp)
# If the request can expire, it means we should cache it
# in the meantime.
elif 'expires' in resp.headers:
if resp.headers['expires']:
self.cache.set(cache_url, resp)
def update_cached_response(self, request, response):
"""On a 304 we will get a new set of headers that we want to
update our cached value with, assuming we have one.
This should only ever be called when we've sent an ETag and
gotten a 304 as the response.
"""
cache_url = self.cache_url(request.url)
resp = self.cache.get(cache_url)
if not resp:
# we didn't have a cached response
return response
# did so lets update our headers
resp.headers.update(resp.headers)
# we want a 200 b/c we have content via the cache
request.status_code = 200
# update the request as it has the if-none-match header + any
# other headers that the server might have updated (ie Date,
# Cache-Control, Expires, etc.)
resp.request = request
# update our cache
self.cache.set(cache_url, resp)
# Let everyone know this was from the cache.
resp.from_cache = True
return resp

View file

@ -0,0 +1,56 @@
import requests
from requests import models
from requests.packages.urllib3.response import HTTPResponse
__attrs__ = [
'_content',
'status_code',
'headers',
'url',
'history',
'encoding',
'reason',
'cookies',
'elapsed',
]
def response_getstate(self):
# consume everything
if not self._content_consumed:
self.content
state = dict(
(attr, getattr(self, attr, None))
for attr in __attrs__
)
# deal with our raw content b/c we need it for our cookie jar
state['raw_original_response'] = self.raw._original_response
return state
def response_setstate(self, state):
for name, value in state.items():
if name != 'raw_original_response':
setattr(self, name, value)
setattr(self, 'raw', HTTPResponse())
self.raw._original_response = state['raw_original_response']
def make_responses_pickleable():
try:
version_parts = [int(part) for part in requests.__version__.split('.')]
# must be >= 2.2.x
if not version_parts[0] >= 2 or not version_parts[1] >= 2:
models.Response.__getstate__ = response_getstate
models.Response.__setstate__ = response_setstate
except:
raise
pass
make_responses_pickleable()

View file

@ -0,0 +1,10 @@
from cachecontrol.adapter import CacheControlAdapter
from cachecontrol.cache import DictCache
def CacheControl(sess, cache=None, cache_etags=True):
cache = cache or DictCache()
adapter = CacheControlAdapter(cache, cache_etags=cache_etags)
sess.mount('http://', adapter)
return sess

317
lib/lockfile/__init__.py Normal file
View file

@ -0,0 +1,317 @@
"""
lockfile.py - Platform-independent advisory file locks.
Requires Python 2.5 unless you apply 2.4.diff
Locking is done on a per-thread basis instead of a per-process basis.
Usage:
>>> lock = LockFile('somefile')
>>> try:
... lock.acquire()
... except AlreadyLocked:
... print 'somefile', 'is locked already.'
... except LockFailed:
... print 'somefile', 'can\\'t be locked.'
... else:
... print 'got lock'
got lock
>>> print lock.is_locked()
True
>>> lock.release()
>>> lock = LockFile('somefile')
>>> print lock.is_locked()
False
>>> with lock:
... print lock.is_locked()
True
>>> print lock.is_locked()
False
>>> lock = LockFile('somefile')
>>> # It is okay to lock twice from the same thread...
>>> with lock:
... lock.acquire()
...
>>> # Though no counter is kept, so you can't unlock multiple times...
>>> print lock.is_locked()
False
Exceptions:
Error - base class for other exceptions
LockError - base class for all locking exceptions
AlreadyLocked - Another thread or process already holds the lock
LockFailed - Lock failed for some other reason
UnlockError - base class for all unlocking exceptions
AlreadyUnlocked - File was not locked.
NotMyLock - File was locked but not by the current thread/process
"""
from __future__ import absolute_import
import sys
import socket
import os
import threading
import time
import urllib
import warnings
import functools
# Work with PEP8 and non-PEP8 versions of threading module.
if not hasattr(threading, "current_thread"):
threading.current_thread = threading.currentThread
if not hasattr(threading.Thread, "get_name"):
threading.Thread.get_name = threading.Thread.getName
__all__ = ['Error', 'LockError', 'LockTimeout', 'AlreadyLocked',
'LockFailed', 'UnlockError', 'NotLocked', 'NotMyLock',
'LinkLockFile', 'MkdirLockFile', 'SQLiteLockFile',
'LockBase', 'locked']
class Error(Exception):
"""
Base class for other exceptions.
>>> try:
... raise Error
... except Exception:
... pass
"""
pass
class LockError(Error):
"""
Base class for error arising from attempts to acquire the lock.
>>> try:
... raise LockError
... except Error:
... pass
"""
pass
class LockTimeout(LockError):
"""Raised when lock creation fails within a user-defined period of time.
>>> try:
... raise LockTimeout
... except LockError:
... pass
"""
pass
class AlreadyLocked(LockError):
"""Some other thread/process is locking the file.
>>> try:
... raise AlreadyLocked
... except LockError:
... pass
"""
pass
class LockFailed(LockError):
"""Lock file creation failed for some other reason.
>>> try:
... raise LockFailed
... except LockError:
... pass
"""
pass
class UnlockError(Error):
"""
Base class for errors arising from attempts to release the lock.
>>> try:
... raise UnlockError
... except Error:
... pass
"""
pass
class NotLocked(UnlockError):
"""Raised when an attempt is made to unlock an unlocked file.
>>> try:
... raise NotLocked
... except UnlockError:
... pass
"""
pass
class NotMyLock(UnlockError):
"""Raised when an attempt is made to unlock a file someone else locked.
>>> try:
... raise NotMyLock
... except UnlockError:
... pass
"""
pass
class LockBase:
"""Base class for platform-specific lock classes."""
def __init__(self, path, threaded=True, timeout=None):
"""
>>> lock = LockBase('somefile')
>>> lock = LockBase('somefile', threaded=False)
"""
self.path = path
self.lock_file = os.path.abspath(path) + ".lock"
self.hostname = socket.gethostname()
self.pid = os.getpid()
if threaded:
t = threading.current_thread()
# Thread objects in Python 2.4 and earlier do not have ident
# attrs. Worm around that.
ident = getattr(t, "ident", hash(t))
self.tname = "-%x" % (ident & 0xffffffff)
else:
self.tname = ""
dirname = os.path.dirname(self.lock_file)
self.unique_name = os.path.join(dirname,
"%s%s.%s" % (self.hostname,
self.tname,
self.pid))
self.timeout = timeout
def acquire(self, timeout=None):
"""
Acquire the lock.
* If timeout is omitted (or None), wait forever trying to lock the
file.
* If timeout > 0, try to acquire the lock for that many seconds. If
the lock period expires and the file is still locked, raise
LockTimeout.
* If timeout <= 0, raise AlreadyLocked immediately if the file is
already locked.
"""
raise NotImplemented("implement in subclass")
def release(self):
"""
Release the lock.
If the file is not locked, raise NotLocked.
"""
raise NotImplemented("implement in subclass")
def is_locked(self):
"""
Tell whether or not the file is locked.
"""
raise NotImplemented("implement in subclass")
def i_am_locking(self):
"""
Return True if this object is locking the file.
"""
raise NotImplemented("implement in subclass")
def break_lock(self):
"""
Remove a lock. Useful if a locking thread failed to unlock.
"""
raise NotImplemented("implement in subclass")
def __enter__(self):
"""
Context manager support.
"""
self.acquire()
return self
def __exit__(self, *_exc):
"""
Context manager support.
"""
self.release()
def __repr__(self):
return "<%s: %r -- %r>" % (self.__class__.__name__, self.unique_name,
self.path)
def _fl_helper(cls, mod, *args, **kwds):
warnings.warn("Import from %s module instead of lockfile package" % mod,
DeprecationWarning, stacklevel=2)
# This is a bit funky, but it's only for awhile. The way the unit tests
# are constructed this function winds up as an unbound method, so it
# actually takes three args, not two. We want to toss out self.
if not isinstance(args[0], str):
# We are testing, avoid the first arg
args = args[1:]
if len(args) == 1 and not kwds:
kwds["threaded"] = True
return cls(*args, **kwds)
def LinkFileLock(*args, **kwds):
"""Factory function provided for backwards compatibility.
Do not use in new code. Instead, import LinkLockFile from the
lockfile.linklockfile module.
"""
from . import linklockfile
return _fl_helper(linklockfile.LinkLockFile, "lockfile.linklockfile",
*args, **kwds)
def MkdirFileLock(*args, **kwds):
"""Factory function provided for backwards compatibility.
Do not use in new code. Instead, import MkdirLockFile from the
lockfile.mkdirlockfile module.
"""
from . import mkdirlockfile
return _fl_helper(mkdirlockfile.MkdirLockFile, "lockfile.mkdirlockfile",
*args, **kwds)
def SQLiteFileLock(*args, **kwds):
"""Factory function provided for backwards compatibility.
Do not use in new code. Instead, import SQLiteLockFile from the
lockfile.mkdirlockfile module.
"""
from . import sqlitelockfile
return _fl_helper(sqlitelockfile.SQLiteLockFile, "lockfile.sqlitelockfile",
*args, **kwds)
def locked(path, timeout=None):
"""Decorator which enables locks for decorated function.
Arguments:
- path: path for lockfile.
- timeout (optional): Timeout for acquiring lock.
Usage:
@locked('/var/run/myname', timeout=0)
def myname(...):
...
"""
def decor(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
lock = FileLock(path, timeout=timeout)
lock.acquire()
try:
return func(*args, **kwargs)
finally:
lock.release()
return wrapper
return decor
if hasattr(os, "link"):
from . import linklockfile as _llf
LockFile = _llf.LinkLockFile
else:
from . import mkdirlockfile as _mlf
LockFile = _mlf.MkdirLockFile
FileLock = LockFile

View file

@ -0,0 +1,73 @@
from __future__ import absolute_import
import time
import os
from . import (LockBase, LockFailed, NotLocked, NotMyLock, LockTimeout,
AlreadyLocked)
class LinkLockFile(LockBase):
"""Lock access to a file using atomic property of link(2).
>>> lock = LinkLockFile('somefile')
>>> lock = LinkLockFile('somefile', threaded=False)
"""
def acquire(self, timeout=None):
try:
open(self.unique_name, "wb").close()
except IOError:
raise LockFailed("failed to create %s" % self.unique_name)
timeout = timeout is not None and timeout or self.timeout
end_time = time.time()
if timeout is not None and timeout > 0:
end_time += timeout
while True:
# Try and create a hard link to it.
try:
os.link(self.unique_name, self.lock_file)
except OSError:
# Link creation failed. Maybe we've double-locked?
nlinks = os.stat(self.unique_name).st_nlink
if nlinks == 2:
# The original link plus the one I created == 2. We're
# good to go.
return
else:
# Otherwise the lock creation failed.
if timeout is not None and time.time() > end_time:
os.unlink(self.unique_name)
if timeout > 0:
raise LockTimeout("Timeout waiting to acquire"
" lock for %s" %
self.path)
else:
raise AlreadyLocked("%s is already locked" %
self.path)
time.sleep(timeout is not None and timeout/10 or 0.1)
else:
# Link creation succeeded. We're good to go.
return
def release(self):
if not self.is_locked():
raise NotLocked("%s is not locked" % self.path)
elif not os.path.exists(self.unique_name):
raise NotMyLock("%s is locked, but not by me" % self.path)
os.unlink(self.unique_name)
os.unlink(self.lock_file)
def is_locked(self):
return os.path.exists(self.lock_file)
def i_am_locking(self):
return (self.is_locked() and
os.path.exists(self.unique_name) and
os.stat(self.unique_name).st_nlink == 2)
def break_lock(self):
if os.path.exists(self.lock_file):
os.unlink(self.lock_file)

View file

@ -0,0 +1,83 @@
from __future__ import absolute_import, division
import time
import os
import sys
import errno
from . import (LockBase, LockFailed, NotLocked, NotMyLock, LockTimeout,
AlreadyLocked)
class MkdirLockFile(LockBase):
"""Lock file by creating a directory."""
def __init__(self, path, threaded=True, timeout=None):
"""
>>> lock = MkdirLockFile('somefile')
>>> lock = MkdirLockFile('somefile', threaded=False)
"""
LockBase.__init__(self, path, threaded, timeout)
# Lock file itself is a directory. Place the unique file name into
# it.
self.unique_name = os.path.join(self.lock_file,
"%s.%s%s" % (self.hostname,
self.tname,
self.pid))
def acquire(self, timeout=None):
timeout = timeout is not None and timeout or self.timeout
end_time = time.time()
if timeout is not None and timeout > 0:
end_time += timeout
if timeout is None:
wait = 0.1
else:
wait = max(0, timeout / 10)
while True:
try:
os.mkdir(self.lock_file)
except OSError:
err = sys.exc_info()[1]
if err.errno == errno.EEXIST:
# Already locked.
if os.path.exists(self.unique_name):
# Already locked by me.
return
if timeout is not None and time.time() > end_time:
if timeout > 0:
raise LockTimeout("Timeout waiting to acquire"
" lock for %s" %
self.path)
else:
# Someone else has the lock.
raise AlreadyLocked("%s is already locked" %
self.path)
time.sleep(wait)
else:
# Couldn't create the lock for some other reason
raise LockFailed("failed to create %s" % self.lock_file)
else:
open(self.unique_name, "wb").close()
return
def release(self):
if not self.is_locked():
raise NotLocked("%s is not locked" % self.path)
elif not os.path.exists(self.unique_name):
raise NotMyLock("%s is locked, but not by me" % self.path)
os.unlink(self.unique_name)
os.rmdir(self.lock_file)
def is_locked(self):
return os.path.exists(self.lock_file)
def i_am_locking(self):
return (self.is_locked() and
os.path.exists(self.unique_name))
def break_lock(self):
if os.path.exists(self.lock_file):
for name in os.listdir(self.lock_file):
os.unlink(os.path.join(self.lock_file, name))
os.rmdir(self.lock_file)

193
lib/lockfile/pidlockfile.py Normal file
View file

@ -0,0 +1,193 @@
# -*- coding: utf-8 -*-
# pidlockfile.py
#
# Copyright © 20082009 Ben Finney <ben+python@benfinney.id.au>
#
# This is free software: you may copy, modify, and/or distribute this work
# under the terms of the Python Software Foundation License, version 2 or
# later as published by the Python Software Foundation.
# No warranty expressed or implied. See the file LICENSE.PSF-2 for details.
""" Lockfile behaviour implemented via Unix PID files.
"""
from __future__ import absolute_import
import os
import sys
import errno
import time
from . import (LockBase, AlreadyLocked, LockFailed, NotLocked, NotMyLock,
LockTimeout)
class PIDLockFile(LockBase):
""" Lockfile implemented as a Unix PID file.
The lock file is a normal file named by the attribute `path`.
A lock's PID file contains a single line of text, containing
the process ID (PID) of the process that acquired the lock.
>>> lock = PIDLockFile('somefile')
>>> lock = PIDLockFile('somefile')
"""
def __init__(self, path, threaded=False, timeout=None):
# pid lockfiles don't support threaded operation, so always force
# False as the threaded arg.
LockBase.__init__(self, path, False, timeout)
dirname = os.path.dirname(self.lock_file)
basename = os.path.split(self.path)[-1]
self.unique_name = self.path
def read_pid(self):
""" Get the PID from the lock file.
"""
return read_pid_from_pidfile(self.path)
def is_locked(self):
""" Test if the lock is currently held.
The lock is held if the PID file for this lock exists.
"""
return os.path.exists(self.path)
def i_am_locking(self):
""" Test if the lock is held by the current process.
Returns ``True`` if the current process ID matches the
number stored in the PID file.
"""
return self.is_locked() and os.getpid() == self.read_pid()
def acquire(self, timeout=None):
""" Acquire the lock.
Creates the PID file for this lock, or raises an error if
the lock could not be acquired.
"""
timeout = timeout is not None and timeout or self.timeout
end_time = time.time()
if timeout is not None and timeout > 0:
end_time += timeout
while True:
try:
write_pid_to_pidfile(self.path)
except OSError as exc:
if exc.errno == errno.EEXIST:
# The lock creation failed. Maybe sleep a bit.
if timeout is not None and time.time() > end_time:
if timeout > 0:
raise LockTimeout("Timeout waiting to acquire"
" lock for %s" %
self.path)
else:
raise AlreadyLocked("%s is already locked" %
self.path)
time.sleep(timeout is not None and timeout/10 or 0.1)
else:
raise LockFailed("failed to create %s" % self.path)
else:
return
def release(self):
""" Release the lock.
Removes the PID file to release the lock, or raises an
error if the current process does not hold the lock.
"""
if not self.is_locked():
raise NotLocked("%s is not locked" % self.path)
if not self.i_am_locking():
raise NotMyLock("%s is locked, but not by me" % self.path)
remove_existing_pidfile(self.path)
def break_lock(self):
""" Break an existing lock.
Removes the PID file if it already exists, otherwise does
nothing.
"""
remove_existing_pidfile(self.path)
def read_pid_from_pidfile(pidfile_path):
""" Read the PID recorded in the named PID file.
Read and return the numeric PID recorded as text in the named
PID file. If the PID file cannot be read, or if the content is
not a valid PID, return ``None``.
"""
pid = None
try:
pidfile = open(pidfile_path, 'r')
except IOError:
pass
else:
# According to the FHS 2.3 section on PID files in /var/run:
#
# The file must consist of the process identifier in
# ASCII-encoded decimal, followed by a newline character.
#
# Programs that read PID files should be somewhat flexible
# in what they accept; i.e., they should ignore extra
# whitespace, leading zeroes, absence of the trailing
# newline, or additional lines in the PID file.
line = pidfile.readline().strip()
try:
pid = int(line)
except ValueError:
pass
pidfile.close()
return pid
def write_pid_to_pidfile(pidfile_path):
""" Write the PID in the named PID file.
Get the numeric process ID (PID) of the current process
and write it to the named file as a line of text.
"""
open_flags = (os.O_CREAT | os.O_EXCL | os.O_WRONLY)
open_mode = 0o644
pidfile_fd = os.open(pidfile_path, open_flags, open_mode)
pidfile = os.fdopen(pidfile_fd, 'w')
# According to the FHS 2.3 section on PID files in /var/run:
#
# The file must consist of the process identifier in
# ASCII-encoded decimal, followed by a newline character. For
# example, if crond was process number 25, /var/run/crond.pid
# would contain three characters: two, five, and newline.
pid = os.getpid()
line = "%(pid)d\n" % vars()
pidfile.write(line)
pidfile.close()
def remove_existing_pidfile(pidfile_path):
""" Remove the named PID file if it exists.
Removing a PID file that doesn't already exist puts us in the
desired state, so we ignore the condition if the file does not
exist.
"""
try:
os.remove(pidfile_path)
except OSError as exc:
if exc.errno == errno.ENOENT:
pass
else:
raise

View file

@ -0,0 +1,155 @@
from __future__ import absolute_import, division
import time
import os
try:
unicode
except NameError:
unicode = str
from . import LockBase, NotLocked, NotMyLock, LockTimeout, AlreadyLocked
class SQLiteLockFile(LockBase):
"Demonstrate SQL-based locking."
testdb = None
def __init__(self, path, threaded=True, timeout=None):
"""
>>> lock = SQLiteLockFile('somefile')
>>> lock = SQLiteLockFile('somefile', threaded=False)
"""
LockBase.__init__(self, path, threaded, timeout)
self.lock_file = unicode(self.lock_file)
self.unique_name = unicode(self.unique_name)
if SQLiteLockFile.testdb is None:
import tempfile
_fd, testdb = tempfile.mkstemp()
os.close(_fd)
os.unlink(testdb)
del _fd, tempfile
SQLiteLockFile.testdb = testdb
import sqlite3
self.connection = sqlite3.connect(SQLiteLockFile.testdb)
c = self.connection.cursor()
try:
c.execute("create table locks"
"("
" lock_file varchar(32),"
" unique_name varchar(32)"
")")
except sqlite3.OperationalError:
pass
else:
self.connection.commit()
import atexit
atexit.register(os.unlink, SQLiteLockFile.testdb)
def acquire(self, timeout=None):
timeout = timeout is not None and timeout or self.timeout
end_time = time.time()
if timeout is not None and timeout > 0:
end_time += timeout
if timeout is None:
wait = 0.1
elif timeout <= 0:
wait = 0
else:
wait = timeout / 10
cursor = self.connection.cursor()
while True:
if not self.is_locked():
# Not locked. Try to lock it.
cursor.execute("insert into locks"
" (lock_file, unique_name)"
" values"
" (?, ?)",
(self.lock_file, self.unique_name))
self.connection.commit()
# Check to see if we are the only lock holder.
cursor.execute("select * from locks"
" where unique_name = ?",
(self.unique_name,))
rows = cursor.fetchall()
if len(rows) > 1:
# Nope. Someone else got there. Remove our lock.
cursor.execute("delete from locks"
" where unique_name = ?",
(self.unique_name,))
self.connection.commit()
else:
# Yup. We're done, so go home.
return
else:
# Check to see if we are the only lock holder.
cursor.execute("select * from locks"
" where unique_name = ?",
(self.unique_name,))
rows = cursor.fetchall()
if len(rows) == 1:
# We're the locker, so go home.
return
# Maybe we should wait a bit longer.
if timeout is not None and time.time() > end_time:
if timeout > 0:
# No more waiting.
raise LockTimeout("Timeout waiting to acquire"
" lock for %s" %
self.path)
else:
# Someone else has the lock and we are impatient..
raise AlreadyLocked("%s is already locked" % self.path)
# Well, okay. We'll give it a bit longer.
time.sleep(wait)
def release(self):
if not self.is_locked():
raise NotLocked("%s is not locked" % self.path)
if not self.i_am_locking():
raise NotMyLock("%s is locked, but not by me (by %s)" %
(self.unique_name, self._who_is_locking()))
cursor = self.connection.cursor()
cursor.execute("delete from locks"
" where unique_name = ?",
(self.unique_name,))
self.connection.commit()
def _who_is_locking(self):
cursor = self.connection.cursor()
cursor.execute("select unique_name from locks"
" where lock_file = ?",
(self.lock_file,))
return cursor.fetchone()[0]
def is_locked(self):
cursor = self.connection.cursor()
cursor.execute("select * from locks"
" where lock_file = ?",
(self.lock_file,))
rows = cursor.fetchall()
return not not rows
def i_am_locking(self):
cursor = self.connection.cursor()
cursor.execute("select * from locks"
" where lock_file = ?"
" and unique_name = ?",
(self.lock_file, self.unique_name))
return not not cursor.fetchall()
def break_lock(self):
cursor = self.connection.cursor()
cursor.execute("delete from locks"
" where lock_file = ?",
(self.lock_file,))
self.connection.commit()

View file

@ -0,0 +1,69 @@
from __future__ import absolute_import
import time
import os
from . import (LockBase, LockFailed, NotLocked, NotMyLock, LockTimeout,
AlreadyLocked)
class SymlinkLockFile(LockBase):
"""Lock access to a file using symlink(2)."""
def __init__(self, path, threaded=True, timeout=None):
# super(SymlinkLockFile).__init(...)
LockBase.__init__(self, path, threaded, timeout)
# split it back!
self.unique_name = os.path.split(self.unique_name)[1]
def acquire(self, timeout=None):
# Hopefully unnecessary for symlink.
#try:
# open(self.unique_name, "wb").close()
#except IOError:
# raise LockFailed("failed to create %s" % self.unique_name)
timeout = timeout is not None and timeout or self.timeout
end_time = time.time()
if timeout is not None and timeout > 0:
end_time += timeout
while True:
# Try and create a symbolic link to it.
try:
os.symlink(self.unique_name, self.lock_file)
except OSError:
# Link creation failed. Maybe we've double-locked?
if self.i_am_locking():
# Linked to out unique name. Proceed.
return
else:
# Otherwise the lock creation failed.
if timeout is not None and time.time() > end_time:
if timeout > 0:
raise LockTimeout("Timeout waiting to acquire"
" lock for %s" %
self.path)
else:
raise AlreadyLocked("%s is already locked" %
self.path)
time.sleep(timeout/10 if timeout is not None else 0.1)
else:
# Link creation succeeded. We're good to go.
return
def release(self):
if not self.is_locked():
raise NotLocked("%s is not locked" % self.path)
elif not self.i_am_locking():
raise NotMyLock("%s is locked, but not by me" % self.path)
os.unlink(self.lock_file)
def is_locked(self):
return os.path.islink(self.lock_file)
def i_am_locking(self):
return os.path.islink(self.lock_file) and \
os.readlink(self.lock_file) == self.unique_name
def break_lock(self):
if os.path.islink(self.lock_file): # exists && link
os.unlink(self.lock_file)

View file

@ -42,8 +42,8 @@ is at <http://python-requests.org>.
"""
__title__ = 'requests'
__version__ = '2.2.0'
__build__ = 0x020200
__version__ = '2.3.0'
__build__ = 0x020300
__author__ = 'Kenneth Reitz'
__license__ = 'Apache 2.0'
__copyright__ = 'Copyright 2014 Kenneth Reitz'

View file

@ -310,10 +310,7 @@ class HTTPAdapter(BaseAdapter):
chunked = not (request.body is None or 'Content-Length' in request.headers)
if stream:
timeout = TimeoutSauce(connect=timeout)
else:
timeout = TimeoutSauce(connect=timeout, read=timeout)
timeout = TimeoutSauce(connect=timeout, read=timeout)
try:
if not chunked:
@ -372,19 +369,19 @@ class HTTPAdapter(BaseAdapter):
conn._put_conn(low_conn)
except socket.error as sockerr:
raise ConnectionError(sockerr)
raise ConnectionError(sockerr, request=request)
except MaxRetryError as e:
raise ConnectionError(e)
raise ConnectionError(e, request=request)
except _ProxyError as e:
raise ProxyError(e)
except (_SSLError, _HTTPError) as e:
if isinstance(e, _SSLError):
raise SSLError(e)
raise SSLError(e, request=request)
elif isinstance(e, TimeoutError):
raise Timeout(e)
raise Timeout(e, request=request)
else:
raise

View file

@ -26,7 +26,7 @@ def request(method, url, **kwargs):
:param cookies: (optional) Dict or CookieJar object to send with the :class:`Request`.
:param files: (optional) Dictionary of 'name': file-like-objects (or {'name': ('filename', fileobj)}) for multipart encoding upload.
:param auth: (optional) Auth tuple to enable Basic/Digest/Custom HTTP Auth.
:param timeout: (optional) Float describing the timeout of the request.
:param timeout: (optional) Float describing the timeout of the request in seconds.
:param allow_redirects: (optional) Boolean. Set to True if POST/PUT/DELETE redirect following is allowed.
:param proxies: (optional) Dictionary mapping protocol to the URL of the proxy.
:param verify: (optional) if ``True``, the SSL cert will be verified. A CA_BUNDLE path can also be provided.

View file

@ -11,7 +11,6 @@ import os
import re
import time
import hashlib
import logging
from base64 import b64encode
@ -19,8 +18,6 @@ from .compat import urlparse, str
from .cookies import extract_cookies_to_jar
from .utils import parse_dict_header
log = logging.getLogger(__name__)
CONTENT_TYPE_FORM_URLENCODED = 'application/x-www-form-urlencoded'
CONTENT_TYPE_MULTI_PART = 'multipart/form-data'

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""

View file

@ -14,15 +14,22 @@ class RequestException(IOError):
"""There was an ambiguous exception that occurred while handling your
request."""
def __init__(self, *args, **kwargs):
"""
Initialize RequestException with `request` and `response` objects.
"""
response = kwargs.pop('response', None)
self.response = response
self.request = kwargs.pop('request', None)
if (response is not None and not self.request and
hasattr(response, 'request')):
self.request = self.response.request
super(RequestException, self).__init__(*args, **kwargs)
class HTTPError(RequestException):
"""An HTTP error occurred."""
def __init__(self, *args, **kwargs):
""" Initializes HTTPError with optional `response` object. """
self.response = kwargs.pop('response', None)
super(HTTPError, self).__init__(*args, **kwargs)
class ConnectionError(RequestException):
"""A Connection error occurred."""

View file

@ -8,7 +8,6 @@ This module contains the primary objects that power Requests.
"""
import collections
import logging
import datetime
from io import BytesIO, UnsupportedOperation
@ -31,12 +30,20 @@ from .utils import (
from .compat import (
cookielib, urlunparse, urlsplit, urlencode, str, bytes, StringIO,
is_py2, chardet, json, builtin_str, basestring, IncompleteRead)
from .status_codes import codes
#: The set of HTTP status codes that indicate an automatically
#: processable redirect.
REDIRECT_STATI = (
codes.moved, # 301
codes.found, # 302
codes.other, # 303
codes.temporary_moved, # 307
)
DEFAULT_REDIRECT_LIMIT = 30
CONTENT_CHUNK_SIZE = 10 * 1024
ITER_CHUNK_SIZE = 512
log = logging.getLogger(__name__)
class RequestEncodingMixin(object):
@property
@ -517,7 +524,7 @@ class Response(object):
self._content = False
self._content_consumed = False
#: Integer Code of responded HTTP Status.
#: Integer Code of responded HTTP Status, e.g. 404 or 200.
self.status_code = None
#: Case-insensitive Dictionary of Response Headers.
@ -541,6 +548,7 @@ class Response(object):
#: up here. The list is sorted from the oldest to the most recent request.
self.history = []
#: Textual reason of responded HTTP Status, e.g. "Not Found" or "OK".
self.reason = None
#: A CookieJar of Cookies the server sent back.
@ -567,6 +575,7 @@ class Response(object):
# pickled objects do not have .raw
setattr(self, '_content_consumed', True)
setattr(self, 'raw', None)
def __repr__(self):
return '<Response [%s]>' % (self.status_code)
@ -591,10 +600,16 @@ class Response(object):
return False
return True
@property
def is_redirect(self):
"""True if this Response is a well-formed HTTP redirect that could have
been processed automatically (by :meth:`Session.resolve_redirects`).
"""
return ('location' in self.headers and self.status_code in REDIRECT_STATI)
@property
def apparent_encoding(self):
"""The apparent encoding, provided by the lovely Charade library
(Thanks, Ian!)."""
"""The apparent encoding, provided by the chardet library"""
return chardet.detect(self.content)['encoding']
def iter_content(self, chunk_size=1, decode_unicode=False):
@ -612,8 +627,7 @@ class Response(object):
try:
# Special case for urllib3.
try:
for chunk in self.raw.stream(chunk_size,
decode_content=True):
for chunk in self.raw.stream(chunk_size, decode_content=True):
yield chunk
except IncompleteRead as e:
raise ChunkedEncodingError(e)
@ -644,8 +658,7 @@ class Response(object):
pending = None
for chunk in self.iter_content(chunk_size=chunk_size,
decode_unicode=decode_unicode):
for chunk in self.iter_content(chunk_size=chunk_size, decode_unicode=decode_unicode):
if pending is not None:
chunk = pending + chunk
@ -693,7 +706,7 @@ class Response(object):
If Response.encoding is None, encoding will be guessed using
``chardet``.
The encoding of the response content is determined based soley on HTTP
The encoding of the response content is determined based solely on HTTP
headers, following RFC 2616 to the letter. If you can take advantage of
non-HTTP knowledge to make a better guess at the encoding, you should
set ``r.encoding`` appropriately before accessing this property.
@ -737,7 +750,14 @@ class Response(object):
# a best guess).
encoding = guess_json_utf(self.content)
if encoding is not None:
return json.loads(self.content.decode(encoding), **kwargs)
try:
return json.loads(self.content.decode(encoding), **kwargs)
except UnicodeDecodeError:
# Wrong UTF codec detected; usually because it's not UTF-8
# but some other 8-bit codec. This is an RFC violation,
# and the server didn't bother to tell us what codec *was*
# used.
pass
return json.loads(self.text, **kwargs)
@property

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python
"""
Script which takes one or more file paths and reports on their detected
encodings

View file

@ -8,9 +8,9 @@ import socket
from socket import timeout as SocketTimeout
try: # Python 3
from http.client import HTTPConnection, HTTPException
from http.client import HTTPConnection as _HTTPConnection, HTTPException
except ImportError:
from httplib import HTTPConnection, HTTPException
from httplib import HTTPConnection as _HTTPConnection, HTTPException
class DummyConnection(object):
"Used to detect a failed ConnectionCls import."
@ -24,9 +24,9 @@ try: # Compiled with SSL?
pass
try: # Python 3
from http.client import HTTPSConnection
from http.client import HTTPSConnection as _HTTPSConnection
except ImportError:
from httplib import HTTPSConnection
from httplib import HTTPSConnection as _HTTPSConnection
import ssl
BaseSSLError = ssl.SSLError
@ -45,6 +45,69 @@ from .util import (
ssl_wrap_socket,
)
port_by_scheme = {
'http': 80,
'https': 443,
}
class HTTPConnection(_HTTPConnection, object):
default_port = port_by_scheme['http']
# By default, disable Nagle's Algorithm.
tcp_nodelay = 1
def _new_conn(self):
""" Establish a socket connection and set nodelay settings on it
:return: a new socket connection
"""
try:
conn = socket.create_connection(
(self.host, self.port),
self.timeout,
self.source_address,
)
except AttributeError: # Python 2.6
conn = socket.create_connection(
(self.host, self.port),
self.timeout,
)
conn.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY,
self.tcp_nodelay)
return conn
def _prepare_conn(self, conn):
self.sock = conn
if self._tunnel_host:
# TODO: Fix tunnel so it doesn't depend on self.sock state.
self._tunnel()
def connect(self):
conn = self._new_conn()
self._prepare_conn(conn)
class HTTPSConnection(HTTPConnection):
default_port = port_by_scheme['https']
def __init__(self, host, port=None, key_file=None, cert_file=None,
strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
source_address=None):
try:
HTTPConnection.__init__(self, host, port, strict, timeout, source_address)
except TypeError: # Python 2.6
HTTPConnection.__init__(self, host, port, strict, timeout)
self.key_file = key_file
self.cert_file = cert_file
def connect(self):
conn = self._new_conn()
self._prepare_conn(conn)
self.sock = ssl.wrap_socket(conn, self.key_file, self.cert_file)
class VerifiedHTTPSConnection(HTTPSConnection):
"""
Based on httplib.HTTPSConnection but wraps the socket with
@ -73,9 +136,12 @@ class VerifiedHTTPSConnection(HTTPSConnection):
timeout=self.timeout,
)
except SocketTimeout:
raise ConnectTimeoutError(
self, "Connection to %s timed out. (connect timeout=%s)" %
(self.host, self.timeout))
raise ConnectTimeoutError(
self, "Connection to %s timed out. (connect timeout=%s)" %
(self.host, self.timeout))
sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY,
self.tcp_nodelay)
resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs)
resolved_ssl_version = resolve_ssl_version(self.ssl_version)
@ -107,4 +173,6 @@ class VerifiedHTTPSConnection(HTTPSConnection):
if ssl:
# Make a copy for testing.
UnverifiedHTTPSConnection = HTTPSConnection
HTTPSConnection = VerifiedHTTPSConnection

View file

@ -31,6 +31,7 @@ from .exceptions import (
from .packages.ssl_match_hostname import CertificateError
from .packages import six
from .connection import (
port_by_scheme,
DummyConnection,
HTTPConnection, HTTPSConnection, VerifiedHTTPSConnection,
HTTPException, BaseSSLError,
@ -51,12 +52,6 @@ log = logging.getLogger(__name__)
_Default = object()
port_by_scheme = {
'http': 80,
'https': 443,
}
## Pool objects
class ConnectionPool(object):
@ -169,7 +164,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
def _new_conn(self):
"""
Return a fresh :class:`httplib.HTTPConnection`.
Return a fresh :class:`HTTPConnection`.
"""
self.num_connections += 1
log.info("Starting new HTTP connection (%d): %s" %
@ -179,9 +174,14 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
if not six.PY3: # Python 2
extra_params['strict'] = self.strict
return self.ConnectionCls(host=self.host, port=self.port,
conn = self.ConnectionCls(host=self.host, port=self.port,
timeout=self.timeout.connect_timeout,
**extra_params)
if self.proxy is not None:
# Enable Nagle's algorithm for proxies, to avoid packet
# fragmentation.
conn.tcp_nodelay = 0
return conn
def _get_conn(self, timeout=None):
"""
@ -260,7 +260,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
def _make_request(self, conn, method, url, timeout=_Default,
**httplib_request_kw):
"""
Perform a request on a given httplib connection object taken from our
Perform a request on a given urllib connection object taken from our
pool.
:param conn:
@ -517,17 +517,17 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
raise
except (HTTPException, SocketError) as e:
if isinstance(e, SocketError) and self.proxy is not None:
raise ProxyError('Cannot connect to proxy. '
'Socket error: %s.' % e)
# Connection broken, discard. It will be replaced next _get_conn().
conn = None
# This is necessary so we can access e below
err = e
if retries == 0:
raise MaxRetryError(self, url, e)
if isinstance(e, SocketError) and self.proxy is not None:
raise ProxyError('Cannot connect to proxy. '
'Socket error: %s.' % e)
else:
raise MaxRetryError(self, url, e)
finally:
if release_conn:
@ -565,7 +565,7 @@ class HTTPSConnectionPool(HTTPConnectionPool):
When Python is compiled with the :mod:`ssl` module, then
:class:`.VerifiedHTTPSConnection` is used, which *can* verify certificates,
instead of :class:`httplib.HTTPSConnection`.
instead of :class:`.HTTPSConnection`.
:class:`.VerifiedHTTPSConnection` uses one of ``assert_fingerprint``,
``assert_hostname`` and ``host`` in this order to verify connections.
@ -652,6 +652,10 @@ class HTTPSConnectionPool(HTTPConnectionPool):
conn = self.ConnectionCls(host=actual_host, port=actual_port,
timeout=self.timeout.connect_timeout,
**extra_params)
if self.proxy is not None:
# Enable Nagle's algorithm for proxies, to avoid packet
# fragmentation.
conn.tcp_nodelay = 0
return self._prepare_conn(conn)

View file

@ -1,4 +1,4 @@
'''SSL with SNI-support for Python 2.
'''SSL with SNI_-support for Python 2.
This needs the following packages installed:
@ -18,12 +18,31 @@ your application begins using ``urllib3``, like this::
Now you can use :mod:`urllib3` as you normally would, and it will support SNI
when the required modules are installed.
Activating this module also has the positive side effect of disabling SSL/TLS
encryption in Python 2 (see `CRIME attack`_).
If you want to configure the default list of supported cipher suites, you can
set the ``urllib3.contrib.pyopenssl.DEFAULT_SSL_CIPHER_LIST`` variable.
Module Variables
----------------
:var DEFAULT_SSL_CIPHER_LIST: The list of supported SSL/TLS cipher suites.
Default: ``EECDH+ECDSA+AESGCM EECDH+aRSA+AESGCM EECDH+ECDSA+SHA256
EECDH+aRSA+SHA256 EECDH+aRSA+RC4 EDH+aRSA EECDH RC4 !aNULL !eNULL !LOW !3DES
!MD5 !EXP !PSK !SRP !DSS'``
.. _sni: https://en.wikipedia.org/wiki/Server_Name_Indication
.. _crime attack: https://en.wikipedia.org/wiki/CRIME_(security_exploit)
'''
from ndg.httpsclient.ssl_peer_verification import SUBJ_ALT_NAME_SUPPORT
from ndg.httpsclient.subj_alt_name import SubjectAltName
from ndg.httpsclient.subj_alt_name import SubjectAltName as BaseSubjectAltName
import OpenSSL.SSL
from pyasn1.codec.der import decoder as der_decoder
from pyasn1.type import univ, constraint
from socket import _fileobject
import ssl
import select
@ -50,6 +69,13 @@ _openssl_verify = {
+ OpenSSL.SSL.VERIFY_FAIL_IF_NO_PEER_CERT,
}
# Default SSL/TLS cipher list.
# Recommendation by https://community.qualys.com/blogs/securitylabs/2013/08/05/
# configuring-apache-nginx-and-openssl-for-forward-secrecy
DEFAULT_SSL_CIPHER_LIST = 'EECDH+ECDSA+AESGCM EECDH+aRSA+AESGCM ' + \
'EECDH+ECDSA+SHA256 EECDH+aRSA+SHA256 EECDH+aRSA+RC4 EDH+aRSA ' + \
'EECDH RC4 !aNULL !eNULL !LOW !3DES !MD5 !EXP !PSK !SRP !DSS'
orig_util_HAS_SNI = util.HAS_SNI
orig_connection_ssl_wrap_socket = connection.ssl_wrap_socket
@ -69,6 +95,17 @@ def extract_from_urllib3():
util.HAS_SNI = orig_util_HAS_SNI
### Note: This is a slightly bug-fixed version of same from ndg-httpsclient.
class SubjectAltName(BaseSubjectAltName):
'''ASN.1 implementation for subjectAltNames support'''
# There is no limit to how many SAN certificates a certificate may have,
# however this needs to have some limit so we'll set an arbitrarily high
# limit.
sizeSpec = univ.SequenceOf.sizeSpec + \
constraint.ValueSizeConstraint(1, 1024)
### Note: This is a slightly bug-fixed version of same from ndg-httpsclient.
def get_subj_alt_name(peer_cert):
# Search through extensions
@ -330,6 +367,13 @@ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None,
except OpenSSL.SSL.Error as e:
raise ssl.SSLError('bad ca_certs: %r' % ca_certs, e)
# Disable TLS compression to migitate CRIME attack (issue #309)
OP_NO_COMPRESSION = 0x20000
ctx.set_options(OP_NO_COMPRESSION)
# Set list of supported ciphersuites.
ctx.set_cipher_list(DEFAULT_SSL_CIPHER_LIST)
cnx = OpenSSL.SSL.Connection(ctx, sock)
cnx.set_tlsext_host_name(server_hostname)
cnx.set_connect_state()

View file

@ -46,16 +46,15 @@ def iter_field_objects(fields):
def iter_fields(fields):
"""
.. deprecated:: 1.6
Iterate over fields.
.. deprecated ::
The addition of `~urllib3.fields.RequestField` makes this function
obsolete. Instead, use :func:`iter_field_objects`, which returns
`~urllib3.fields.RequestField` objects, instead.
The addition of :class:`~urllib3.fields.RequestField` makes this function
obsolete. Instead, use :func:`iter_field_objects`, which returns
:class:`~urllib3.fields.RequestField` objects.
Supports list of (k, v) tuples and dicts.
"""
if isinstance(fields, dict):
return ((k, v) for k, v in six.iteritems(fields))

View file

@ -1,5 +1,5 @@
# urllib3/poolmanager.py
# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
# Copyright 2008-2014 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
#
# This module is part of urllib3 and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php
@ -176,7 +176,7 @@ class ProxyManager(PoolManager):
Behaves just like :class:`PoolManager`, but sends all requests through
the defined proxy, using the CONNECT method for HTTPS URLs.
:param poxy_url:
:param proxy_url:
The URL of the proxy to be used.
:param proxy_headers:

View file

@ -620,6 +620,11 @@ if SSLContext is not None: # Python 3.2+
"""
context = SSLContext(ssl_version)
context.verify_mode = cert_reqs
# Disable TLS compression to migitate CRIME attack (issue #309)
OP_NO_COMPRESSION = 0x20000
context.options |= OP_NO_COMPRESSION
if ca_certs:
try:
context.load_verify_locations(ca_certs)

View file

@ -15,9 +15,9 @@ from datetime import datetime
from .compat import cookielib, OrderedDict, urljoin, urlparse, builtin_str
from .cookies import (
cookiejar_from_dict, extract_cookies_to_jar, RequestsCookieJar, merge_cookies)
from .models import Request, PreparedRequest
from .models import Request, PreparedRequest, DEFAULT_REDIRECT_LIMIT
from .hooks import default_hooks, dispatch_hook
from .utils import to_key_val_list, default_headers
from .utils import to_key_val_list, default_headers, to_native_string
from .exceptions import TooManyRedirects, InvalidSchema
from .structures import CaseInsensitiveDict
@ -26,13 +26,9 @@ from .adapters import HTTPAdapter
from .utils import requote_uri, get_environ_proxies, get_netrc_auth
from .status_codes import codes
REDIRECT_STATI = (
codes.moved, # 301
codes.found, # 302
codes.other, # 303
codes.temporary_moved, # 307
)
DEFAULT_REDIRECT_LIMIT = 30
# formerly defined here, reexposed here for backward compatibility
from .models import REDIRECT_STATI
def merge_setting(request_setting, session_setting, dict_class=OrderedDict):
@ -63,6 +59,8 @@ def merge_setting(request_setting, session_setting, dict_class=OrderedDict):
if v is None:
del merged_setting[k]
merged_setting = dict((k, v) for (k, v) in merged_setting.items() if v is not None)
return merged_setting
@ -89,8 +87,7 @@ class SessionRedirectMixin(object):
i = 0
# ((resp.status_code is codes.see_other))
while ('location' in resp.headers and resp.status_code in REDIRECT_STATI):
while resp.is_redirect:
prepared_request = req.copy()
resp.content # Consume socket so it can be released
@ -121,7 +118,7 @@ class SessionRedirectMixin(object):
else:
url = requote_uri(url)
prepared_request.url = url
prepared_request.url = to_native_string(url)
# http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.3.4
if (resp.status_code == codes.see_other and
@ -153,11 +150,24 @@ class SessionRedirectMixin(object):
except KeyError:
pass
extract_cookies_to_jar(prepared_request._cookies,
prepared_request, resp.raw)
extract_cookies_to_jar(prepared_request._cookies, prepared_request, resp.raw)
prepared_request._cookies.update(self.cookies)
prepared_request.prepare_cookies(prepared_request._cookies)
if 'Authorization' in headers:
# If we get redirected to a new host, we should strip out any
# authentication headers.
original_parsed = urlparse(resp.request.url)
redirect_parsed = urlparse(url)
if (original_parsed.hostname != redirect_parsed.hostname):
del headers['Authorization']
# .netrc might have more auth for us.
new_auth = get_netrc_auth(url) if self.trust_env else None
if new_auth is not None:
prepared_request.prepare_auth(new_auth)
resp = self.send(
prepared_request,
stream=stream,
@ -291,7 +301,7 @@ class Session(SessionRedirectMixin):
def request(self, method, url,
params=None,
data=None,
headers={'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36'},
headers=None,
cookies=None,
files=None,
auth=None,
@ -320,7 +330,7 @@ class Session(SessionRedirectMixin):
:param auth: (optional) Auth tuple or callable to enable
Basic/Digest/Custom HTTP Auth.
:param timeout: (optional) Float describing the timeout of the
request.
request in seconds.
:param allow_redirects: (optional) Boolean. Set to True by default.
:param proxies: (optional) Dictionary mapping protocol to the URL of
the proxy.
@ -467,8 +477,7 @@ class Session(SessionRedirectMixin):
if not isinstance(request, PreparedRequest):
raise ValueError('You can only send PreparedRequests.')
# Set up variables needed for resolve_redirects and dispatching of
# hooks
# Set up variables needed for resolve_redirects and dispatching of hooks
allow_redirects = kwargs.pop('allow_redirects', True)
stream = kwargs.get('stream')
timeout = kwargs.get('timeout')
@ -482,8 +491,10 @@ class Session(SessionRedirectMixin):
# Start time (approximately) of the request
start = datetime.utcnow()
# Send the request
r = adapter.send(request, **kwargs)
# Total elapsed time of the request (approximately)
r.elapsed = datetime.utcnow() - start
@ -492,15 +503,20 @@ class Session(SessionRedirectMixin):
# Persist cookies
if r.history:
# If the hooks create history then we want those cookies too
for resp in r.history:
extract_cookies_to_jar(self.cookies, resp.request, resp.raw)
extract_cookies_to_jar(self.cookies, request, r.raw)
# Redirect resolving generator.
gen = self.resolve_redirects(r, request, stream=stream,
timeout=timeout, verify=verify, cert=cert,
proxies=proxies)
gen = self.resolve_redirects(r, request,
stream=stream,
timeout=timeout,
verify=verify,
cert=cert,
proxies=proxies)
# Resolve redirects if allowed.
history = [resp for resp in gen] if allow_redirects else []
@ -534,8 +550,10 @@ class Session(SessionRedirectMixin):
"""Registers a connection adapter to a prefix.
Adapters are sorted in descending order by key length."""
self.adapters[prefix] = adapter
keys_to_move = [k for k in self.adapters if len(k) < len(prefix)]
for key in keys_to_move:
self.adapters[key] = self.adapters.pop(key)

View file

@ -548,7 +548,7 @@ def default_user_agent(name="python-requests"):
def default_headers():
return CaseInsensitiveDict({
'User-Agent': default_user_agent(),
'Accept-Encoding': ', '.join(('gzip', 'deflate', 'compress')),
'Accept-Encoding': ', '.join(('gzip', 'deflate')),
'Accept': '*/*'
})

View file

@ -20,7 +20,6 @@ __version__ = "1.9"
import os
import time
import urllib
import urllib2
import getpass
import StringIO
import tempfile
@ -39,8 +38,11 @@ try:
except ImportError:
gzip = None
from lib import requests
from urlparse import urlparse, urlsplit
from lib.cachecontrol.wrapper import CacheControl
from lib.cachecontrol.caches.file_cache import FileCache
from tvdb_cache import CacheHandler
from tvdb_ui import BaseUI, ConsoleUI
from tvdb_exceptions import (tvdb_error, tvdb_userabort, tvdb_shownotfound,
@ -435,26 +437,13 @@ class Tvdb:
if cache is True:
self.config['cache_enabled'] = True
self.config['cache_location'] = self._getTempDir()
self.urlopener = urllib2.build_opener(
CacheHandler(self.config['cache_location'])
)
elif cache is False:
self.config['cache_enabled'] = False
self.urlopener = urllib2.build_opener() # default opener with no caching
elif isinstance(cache, basestring):
self.config['cache_enabled'] = True
self.config['cache_location'] = cache
self.urlopener = urllib2.build_opener(
CacheHandler(self.config['cache_location'])
)
elif isinstance(cache, urllib2.OpenerDirector):
# If passed something from urllib2.build_opener, use that
log().debug("Using %r as urlopener" % cache)
self.config['cache_enabled'] = True
self.urlopener = cache
else:
raise ValueError("Invalid value for Cache %r (type was %s)" % (cache, type(cache)))
@ -501,9 +490,11 @@ class Tvdb:
self.config['base_url'] = "http://thetvdb.com"
if self.config['search_all_languages']:
self.config['url_getSeries'] = u"%(base_url)s/api/GetSeries.php?seriesname=%%s&language=all" % self.config
self.config['url_getSeries'] = u"%(base_url)s/api/GetSeries.php" % self.config
self.config['params_getSeries'] = {"seriesname": "", "language": "all"}
else:
self.config['url_getSeries'] = u"%(base_url)s/api/GetSeries.php?seriesname=%%s&language=%(language)s" % self.config
self.config['url_getSeries'] = u"%(base_url)s/api/GetSeries.php" % self.config
self.config['params_getSeries'] = {"seriesname": "", "language": ""}
self.config['url_epInfo'] = u"%(base_url)s/api/%(apikey)s/series/%%s/all/%%s.xml" % self.config
self.config['url_epInfo_zip'] = u"%(base_url)s/api/%(apikey)s/series/%%s/all/%%s.zip" % self.config
@ -529,78 +520,29 @@ class Tvdb:
return os.path.join(tempfile.gettempdir(), "tvdb_api-%s" % (uid))
def retry(ExceptionToCheck, default=None, tries=4, delay=3, backoff=2, logger=None):
"""Retry calling the decorated function using an exponential backoff.
http://www.saltycrane.com/blog/2009/11/trying-out-retry-decorator-python/
original from: http://wiki.python.org/moin/PythonDecoratorLibrary#Retry
:param ExceptionToCheck: the exception to check. may be a tuple of
excpetions to check
:type ExceptionToCheck: Exception or tuple
:param tries: number of times to try (not retry) before giving up
:type tries: int
:param delay: initial delay between retries in seconds
:type delay: int
:param backoff: backoff multiplier e.g. value of 2 will double the delay
each retry
:type backoff: int
:param logger: logger to use. If None, print
:type logger: logging.Logger instance
"""
def deco_retry(f):
def f_retry(*args, **kwargs):
mtries, mdelay = tries, delay
try_one_last_time = True
while mtries > 1:
try:
print args,kwargs
return f(*args, **kwargs)
try_one_last_time = False
break
except ExceptionToCheck, e:
msg = "%s, Retrying in %d seconds..." % (str(e), mdelay)
if logger:
logger.warning(msg)
else:
print msg
time.sleep(mdelay)
mtries -= 1
mdelay *= backoff
if try_one_last_time:
try:
return f(*args, **kwargs)
except ExceptionToCheck, e:
return default
return
return f_retry # true decorator
return deco_retry
@retry(urllib2.URLError, tries=4, delay=3, backoff=2)
def _loadUrl(self, url, recache = False, language=None):
def _loadUrl(self, url, params=None, language=None):
global lastTimeout
try:
log().debug("Retrieving URL %s" % url)
resp = self.urlopener.open(url)
if 'x-local-cache' in resp.headers:
log().debug("URL %s was cached in %s" % (
url,
resp.headers['x-local-cache'])
)
if recache:
log().debug("Attempting to recache %s" % url)
resp.recache()
except (IOError, urllib2.URLError), errormsg:
if not str(errormsg).startswith('HTTP Error'):
lastTimeout = datetime.datetime.now()
raise tvdb_error("Could not connect to server: %s" % (errormsg))
# cacheControl
if self.config['cache_enabled']:
sess = CacheControl(requests.Session(), cache=FileCache(self.config['cache_location']))
else:
sess = requests.Session()
# get response from TVRage
resp = sess.get(url, params=params)
except Exception, e:
if not str(e).startswith('HTTP Error'):
lastTimeout = datetime.datetime.now()
raise tvdb_error("Could not connect to server: %s" % (e))
# handle gzipped content,
# http://dbr.lighthouseapp.com/projects/13342/tickets/72-gzipped-data-patch
if 'gzip' in resp.headers.get("Content-Encoding", ''):
if gzip:
stream = StringIO.StringIO(resp.read())
stream = StringIO.StringIO(resp.content)
gz = gzip.GzipFile(fileobj=stream)
return gz.read()
@ -611,26 +553,24 @@ class Tvdb:
# TODO: The zip contains actors.xml and banners.xml, which are currently ignored [GH-20]
log().debug("We recived a zip file unpacking now ...")
zipdata = StringIO.StringIO()
zipdata.write(resp.read())
zipdata.write(resp.content)
myzipfile = zipfile.ZipFile(zipdata)
return myzipfile.read('%s.xml' % language)
except zipfile.BadZipfile:
if 'x-local-cache' in resp.headers:
resp.delete_cache()
raise tvdb_error("Bad zip file received from thetvdb.com, could not read it")
return resp.read()
return resp.content
def _getetsrc(self, url, language=None):
def _getetsrc(self, url, params=None, language=None):
"""Loads a URL using caching, returns an ElementTree of the source
"""
src = self._loadUrl(url, language=language)
src = self._loadUrl(url, params=params, language=language)
try:
# TVDB doesn't sanitize \r (CR) from user input in some fields,
# remove it to avoid errors. Change from SickBeard, from will14m
return ElementTree.fromstring(src.rstrip("\r"))
except SyntaxError:
src = self._loadUrl(url, recache=True, language=language)
src = self._loadUrl(url, params=None, language=language)
try:
return ElementTree.fromstring(src.rstrip("\r"))
except SyntaxError, exceptionmsg:
@ -694,7 +634,8 @@ class Tvdb:
"""
series = urllib.quote(series.encode("utf-8"))
log().debug("Searching for show %s" % series)
seriesEt = self._getetsrc(self.config['url_getSeries'] % (series))
self.config['params_getSeries']['seriesname'] = series
seriesEt = self._getetsrc(self.config['url_getSeries'], self.config['params_getSeries'])
allSeries = []
for series in seriesEt:
result = dict((k.tag.lower(), k.text) for k in series.getchildren())

View file

@ -16,7 +16,6 @@ import os
import re
import time
import urllib
import urllib2
import getpass
import tempfile
import warnings
@ -28,10 +27,12 @@ try:
except ImportError:
import xml.etree.ElementTree as ElementTree
from lib import requests
from lib.dateutil.parser import parse
from tvrage_cache import CacheHandler
from lib import requests
from lib.cachecontrol.wrapper import CacheControl
from lib.cachecontrol.caches.file_cache import FileCache
from tvrage_ui import BaseUI
from tvrage_exceptions import (tvrage_error, tvrage_userabort, tvrage_shownotfound,
tvrage_seasonnotfound, tvrage_episodenotfound, tvrage_attributenotfound)
@ -276,26 +277,13 @@ class TVRage:
if cache is True:
self.config['cache_enabled'] = True
self.config['cache_location'] = self._getTempDir()
self.urlopener = urllib2.build_opener(
CacheHandler(self.config['cache_location'])
)
elif cache is False:
self.config['cache_enabled'] = False
self.urlopener = urllib2.build_opener() # default opener with no caching
elif isinstance(cache, basestring):
self.config['cache_enabled'] = True
self.config['cache_location'] = cache
self.urlopener = urllib2.build_opener(
CacheHandler(self.config['cache_location'])
)
elif isinstance(cache, urllib2.OpenerDirector):
# If passed something from urllib2.build_opener, use that
log().debug("Using %r as urlopener" % cache)
self.config['cache_enabled'] = True
self.urlopener = cache
else:
raise ValueError("Invalid value for Cache %r (type was %s)" % (cache, type(cache)))
@ -336,13 +324,17 @@ class TVRage:
# The following url_ configs are based of the
# http://tvrage.com/wiki/index.php/Programmers_API
self.config['base_url'] = "http://services.tvrage.com"
self.config['url_getSeries'] = u"%(base_url)s/myfeeds/search.php?key=%(apikey)s&show=%%s" % self.config
self.config['url_getSeries'] = u"%(base_url)s/myfeeds/search.php" % self.config
self.config['params_getSeries'] = {"key": self.config['apikey'], "show": ""}
self.config['url_epInfo'] = u"%(base_url)s/myfeeds/episode_list.php?key=%(apikey)s&sid=%%s" % self.config
self.config['url_epInfo'] = u"%(base_url)s/myfeeds/episode_list.php" % self.config
self.config['params_epInfo'] = {"key": self.config['apikey'], "sid": ""}
self.config['url_seriesInfo'] = u"%(base_url)s/myfeeds/showinfo.php?key=%(apikey)s&sid=%%s" % self.config
self.config['url_seriesInfo'] = u"%(base_url)s/myfeeds/showinfo.php" % self.config
self.config['params_seriesInfo'] = {"key": self.config['apikey'], "sid": ""}
def _getTempDir(self):
"""Returns the [system temp dir]/tvrage_api-u501 (or
@ -359,76 +351,27 @@ class TVRage:
return os.path.join(tempfile.gettempdir(), "tvrage_api-%s" % (uid))
def retry(ExceptionToCheck, default=None, tries=4, delay=3, backoff=2, logger=None):
"""Retry calling the decorated function using an exponential backoff.
http://www.saltycrane.com/blog/2009/11/trying-out-retry-decorator-python/
original from: http://wiki.python.org/moin/PythonDecoratorLibrary#Retry
:param ExceptionToCheck: the exception to check. may be a tuple of
excpetions to check
:type ExceptionToCheck: Exception or tuple
:param tries: number of times to try (not retry) before giving up
:type tries: int
:param delay: initial delay between retries in seconds
:type delay: int
:param backoff: backoff multiplier e.g. value of 2 will double the delay
each retry
:type backoff: int
:param logger: logger to use. If None, print
:type logger: logging.Logger instance
"""
def deco_retry(f):
def f_retry(*args, **kwargs):
mtries, mdelay = tries, delay
try_one_last_time = True
while mtries > 1:
try:
print args,kwargs
return f(*args, **kwargs)
try_one_last_time = False
break
except ExceptionToCheck, e:
msg = "%s, Retrying in %d seconds..." % (str(e), mdelay)
if logger:
logger.warning(msg)
else:
print msg
time.sleep(mdelay)
mtries -= 1
mdelay *= backoff
if try_one_last_time:
try:
return f(*args, **kwargs)
except ExceptionToCheck, e:
return default
return
return f_retry # true decorator
return deco_retry
@retry(urllib2.URLError, tries=4, delay=3, backoff=2)
def _loadUrl(self, url, recache = False):
def _loadUrl(self, url, params=None):
global lastTimeout
try:
log().debug("Retrieving URL %s" % url)
#resp = self.urlopener.open(url)
resp = requests.get(url)
if 'x-local-cache' in resp.headers:
log().debug("URL %s was cached in %s" % (
url,
resp.headers['x-local-cache'])
)
if recache:
log().debug("Attempting to recache %s" % url)
resp.recache()
except (IOError, urllib2.URLError), errormsg:
if not str(errormsg).startswith('HTTP Error'):
# cacheControl
if self.config['cache_enabled']:
sess = CacheControl(requests.Session(), cache=FileCache(self.config['cache_location']))
else:
sess = requests.Session()
# get response from TVRage
resp = sess.get(url, params=params)
except Exception, e:
if not str(e).startswith('HTTP Error'):
lastTimeout = dt.datetime.now()
raise tvrage_error("Could not connect to server: %s" % (errormsg))
raise tvrage_error("Could not connect to server: %s" % (e))
return resp.content
def _getetsrc(self, url):
def _getetsrc(self, url, params=None):
"""Loads a URL using caching, returns an ElementTree of the source
"""
reDict = {
@ -449,7 +392,7 @@ class TVRage:
}
robj = re.compile('|'.join(reDict.keys()))
src = self._loadUrl(url)
src = self._loadUrl(url, params)
try:
# TVRAGE doesn't sanitize \r (CR) from user input in some fields,
# remove it to avoid errors. Change from SickBeard, from will14m
@ -459,24 +402,30 @@ class TVRage:
elm.tag = robj.sub(lambda m: reDict[m.group(0)], elm.tag)
if elm.tag in 'firstaired':
fixDate = parse(elm.text)
value = fixDate.strftime("%Y-%m-%d")
elm.text = value
if elm.text is "0000-00-00":
elm.text = str(dt.date.fromordinal(1))
try:
fixDate = parse(elm.text, fuzzy=True)
elm.text = fixDate.strftime("%Y-%m-%d")
except:
pass
return ElementTree.fromstring(ElementTree.tostring(xml))
except SyntaxError:
src = self._loadUrl(url, recache=True)
src = self._loadUrl(url, params)
try:
xml = ElementTree.fromstring(src.rstrip("\r"))
tree = ElementTree.ElementTree(xml)
for elm in tree.iter():
elm.tag = robj.sub(lambda m: reDict[m.group(0)], elm.tag)
if elm.tag in 'firstaired':
fixDate = parse(elm.text)
value = fixDate.strftime("%Y-%m-%d")
elm.text = value
if elm.tag in 'firstaired' and elm.text:
if elm.text is "0000-00-00":
elm.text = str(dt.date.fromordinal(1))
try:
fixDate = parse(elm.text, fuzzy=True)
elm.text = fixDate.strftime("%Y-%m-%d")
except:
pass
return ElementTree.fromstring(ElementTree.tostring(xml))
except SyntaxError, exceptionmsg:
errormsg = "There was an error with the XML retrieved from tvrage.com:\n%s" % (
@ -538,7 +487,8 @@ class TVRage:
"""
series = urllib.quote(series.encode("utf-8"))
log().debug("Searching for show %s" % series)
seriesEt = self._getetsrc(self.config['url_getSeries'] % (series))
self.config['params_getSeries']['show'] = series
seriesEt = self._getetsrc(self.config['url_getSeries'], self.config['params_getSeries'])
allSeries = []
seriesResult = {}
for series in seriesEt:
@ -580,8 +530,10 @@ class TVRage:
# Parse show information
log().debug('Getting all series data for %s' % (sid))
self.config['params_seriesInfo']['sid'] = sid
seriesInfoEt = self._getetsrc(
self.config['url_seriesInfo'] % (sid)
self.config['url_seriesInfo'],
self.config['params_seriesInfo']
)
for curInfo in seriesInfoEt:
@ -610,8 +562,8 @@ class TVRage:
# Parse episode data
log().debug('Getting all episodes of %s' % (sid))
url = self.config['url_epInfo'] % (sid)
epsEt = self._getetsrc(url)
self.config['params_epInfo']['sid'] = sid
epsEt = self._getetsrc(self.config['url_epInfo'], self.config['params_epInfo'])
for cur_list in epsEt.findall("Episodelist"):
for cur_seas in cur_list:
try:

View file

@ -15,6 +15,9 @@
#
# You should have received a copy of the GNU General Public License
# along with Sick Beard. If not, see <http://www.gnu.org/licenses/>.
import os
import sickbeard
class GenericIndexer(object):
def __init__(self, indexer):
@ -23,10 +26,10 @@ class GenericIndexer(object):
INDEXER_TVDB = 'Tvdb'
INDEXER_TVRAGE = 'TVRage'
INDEXER_NAME = {}
INDEXER_NAME[INDEXER_NONE] = ''
INDEXER_NAME[INDEXER_TVDB] = 'theTVDB'
INDEXER_NAME[INDEXER_TVRAGE] = 'TVRage'
INDEXERS = {}
INDEXERS[INDEXER_NONE] = ''
INDEXERS[INDEXER_TVDB] = 'theTVDB'
INDEXERS[INDEXER_TVRAGE] = 'TVRage'
INDEXER_API_KEY = {}
INDEXER_API_KEY[INDEXER_NONE] = ''
@ -57,6 +60,8 @@ class GenericIndexer(object):
'tr': 21, 'pl': 18, 'fr': 17, 'hr': 31, 'de': 14, 'da': 10, 'fi': 11,
'hu': 19, 'ja': 25, 'he': 24, 'ko': 32, 'sv': 8, 'sl': 30}
self.config['base_url'] = INDEXER_BASEURL[indexer]
self.config['api_parms'] = INDEXER_API_PARMS[indexer]
self.config['name'] = INDEXER_NAME[indexer]
self.base_url = INDEXER_BASEURL[indexer]
self.api_parms = INDEXER_API_PARMS[indexer]
self.indexerName = INDEXERS[indexer]
self.cache = os.path.join(sickbeard.CACHE_DIR, indexer)
self.indexers = [indexer for indexer in INDEXERS]

View file

@ -16,6 +16,7 @@
# You should have received a copy of the GNU General Public License
# along with Sick Beard. If not, see <http://www.gnu.org/licenses/>.
import os
import datetime
import sickbeard
import generic
@ -25,19 +26,18 @@ from lib.tvdb_api.tvdb_api import Tvdb
from lib.tvrage_api.tvrage_api import TVRage
class indexerApi(generic.GenericIndexer):
def __init__(self, *args, **kwargs):
indexer = kwargs.pop('indexer',None)
def __init__(self, indexer=None, *args, **kwargs):
super(indexerApi, self).__init__(indexer)
self.name = self.config['name']
self.name = self.indexerName
if indexer:
self.config['api_parms'].update(**kwargs)
self.api_parms.update(**kwargs)
if sickbeard.CACHE_DIR:
self.config['api_parms']['cache'] = os.path.join(sickbeard.CACHE_DIR, indexer)
self.api_parms['cache'] = self.cache
# wrap the indexer API object and return it back
self._wrapped = eval(indexer)(*args, **self.config['api_parms'])
self._wrapped = eval(indexer)(*args, **self.api_parms)
def __getattr__(self, attr):
return getattr(self._wrapped, attr)

View file

@ -1,29 +0,0 @@
INDEXER_TVDB = 'Tvdb'
INDEXER_TVRAGE = 'TVRage'
INDEXER_API_KEY = {}
INDEXER_API_KEY[INDEXER_TVDB] = '9DAF49C96CBF8DAC'
INDEXER_API_KEY[INDEXER_TVRAGE] = 'Uhewg1Rr0o62fvZvUIZt'
INDEXER_BASEURL = {}
INDEXER_BASEURL[INDEXER_TVDB] = 'http://thetvdb.com/api/' + INDEXER_API_KEY[INDEXER_TVDB]
INDEXER_BASEURL[INDEXER_TVRAGE] = 'http://tvrage.com/feeds/' + INDEXER_API_KEY[INDEXER_TVRAGE]
INDEXER_API_PARMS = {}
INDEXER_API_PARMS[INDEXER_TVDB] = {'apikey': INDEXER_API_KEY[INDEXER_TVDB],
'language': 'en',
'useZip': True}
INDEXER_API_PARMS[INDEXER_TVRAGE] = {'apikey': INDEXER_API_KEY[INDEXER_TVRAGE],
'language': 'en'}
INDEXER_CONFIG = {}
INDEXER_CONFIG['valid_languages'] = [
"da", "fi", "nl", "de", "it", "es", "fr","pl", "hu","el","tr",
"ru","he","ja","pt","zh","cs","sl", "hr","ko","en","sv","no"]
INDEXER_CONFIG['langabbv_to_id'] = {'el': 20, 'en': 7, 'zh': 27,
'it': 15, 'cs': 28, 'es': 16, 'ru': 22, 'nl': 13, 'pt': 26, 'no': 9,
'tr': 21, 'pl': 18, 'fr': 17, 'hr': 31, 'de': 14, 'da': 10, 'fi': 11,
'hu': 19, 'ja': 25, 'he': 24, 'ko': 32, 'sv': 8, 'sl': 30}

View file

@ -2,30 +2,19 @@ from __future__ import with_statement
import unittest
import sqlite3
import sys
import os.path
sys.path.append(os.path.abspath('..'))
sys.path.append(os.path.abspath('../lib'))
sys.path.append(os.path.abspath('../../../lib'))
import sickbeard
import shutil
from sickbeard import encodingKludge as ek, providers, tvcache
from sickbeard import db
from sickbeard.databases import mainDB
from sickbeard.databases import cache_db
from indexer_api import indexerApi
from indexer_exceptions import indexer_exception
from sickbeard.indexers.indexer_api import indexerApi
from sickbeard.indexers.indexer_exceptions import indexer_exception
class APICheck(unittest.TestCase):
indexer_id = 258171
indexer = 'Tvdb'
indexer_id = 'Continum'
indexer = 'TVRage'
# Set our common indexer_api options here
INDEXER_API_PARMS = {'apikey': '9DAF49C96CBF8DAC',
INDEXER_API_PARMS = {'apikey': 'Uhewg1Rr0o62fvZvUIZt',
'language': 'en',
'useZip': True}
@ -34,11 +23,9 @@ class APICheck(unittest.TestCase):
lindexer_api_parms = INDEXER_API_PARMS.copy()
try:
imdbid = " "
showurl = indexerApi(**lindexer_api_parms).config['base_url'] + indexer_id + '/all/en.zip'
t = indexerApi().config['valid_languages']
t = indexerApi(**lindexer_api_parms)
myEp = t[258171]
# showurl = indexerApi(**lindexer_api_parms).config['base_url'] + str(indexer_id) + '/all/en.zip'
t = indexerApi(cache=True, **lindexer_api_parms)
myEp = t[indexer_id]
if getattr(myEp, 'seriesname', None) is not None:
print "FOUND"

View file

@ -153,7 +153,7 @@ class XBMC_12PlusMetadata(generic.GenericMetadata):
episodeguideurl = etree.SubElement(episodeguide, "url")
episodeguideurl2 = etree.SubElement(tv_node, "episodeguideurl")
if getattr(myShow, 'id', None) is not None:
showurl = t.config['base_url'] + myShow["id"] + '/all/en.zip'
showurl = t.base_url + myShow["id"] + '/all/en.zip'
episodeguideurl.text = showurl
episodeguideurl2.text = showurl

View file

@ -28,6 +28,7 @@ from sickbeard.exceptions import ex
from sickbeard import encodingKludge as ek
from sickbeard import db
from indexers.indexer_api import indexerApi
class ShowUpdater():
@ -39,7 +40,7 @@ class ShowUpdater():
# update at 3 AM
run_updater_time = datetime.time(hour=3)
update_datetime = datetime.datetime.today()
update_datetime = datetime.time.today()
update_date = update_datetime.date()
logger.log(u"Checking update interval", logger.DEBUG)
@ -54,29 +55,30 @@ class ShowUpdater():
# clean out cache directory, remove everything > 12 hours old
if sickbeard.CACHE_DIR:
cache_dir = sickbeard.INDEXER_API_PARMS['cache']
logger.log(u"Trying to clean cache folder " + cache_dir)
for indexer in indexerApi().indexers:
cache_dir = indexerApi(indexer=indexer).cache
logger.log(u"Trying to clean cache folder " + cache_dir)
# Does our cache_dir exists
if not ek.ek(os.path.isdir, cache_dir):
logger.log(u"Can't clean " + cache_dir + " if it doesn't exist", logger.WARNING)
else:
max_age = datetime.timedelta(hours=12)
# Get all our cache files
cache_files = ek.ek(os.listdir, cache_dir)
# Does our cache_dir exists
if not ek.ek(os.path.isdir, cache_dir):
logger.log(u"Can't clean " + cache_dir + " if it doesn't exist", logger.WARNING)
else:
max_age = datetime.timedelta(hours=12)
# Get all our cache files
cache_files = ek.ek(os.listdir, cache_dir)
for cache_file in cache_files:
cache_file_path = ek.ek(os.path.join, cache_dir, cache_file)
for cache_file in cache_files:
cache_file_path = ek.ek(os.path.join, cache_dir, cache_file)
if ek.ek(os.path.isfile, cache_file_path):
cache_file_modified = datetime.datetime.fromtimestamp(ek.ek(os.path.getmtime, cache_file_path))
if ek.ek(os.path.isfile, cache_file_path):
cache_file_modified = datetime.datetime.fromtimestamp(ek.ek(os.path.getmtime, cache_file_path))
if update_datetime - cache_file_modified > max_age:
try:
ek.ek(os.remove, cache_file_path)
except OSError, e:
logger.log(u"Unable to clean " + cache_dir + ": " + repr(e) + " / " + str(e), logger.WARNING)
break
if update_datetime - cache_file_modified > max_age:
try:
ek.ek(os.remove, cache_file_path)
except OSError, e:
logger.log(u"Unable to clean " + cache_dir + ": " + repr(e) + " / " + str(e), logger.WARNING)
break
# select 10 'Ended' tv_shows updated more than 90 days ago to include in this update
stale_should_update = []

View file

@ -451,7 +451,7 @@ class QueueItemUpdate(ShowQueueItem):
logger.log(u"Retrieving show info from " + self.show.indexer + "", logger.DEBUG)
try:
self.show.loadFromIndexer(cache=not self.force)
self.show.loadFromIndexer(cache=self.force)
except indexer_exceptions.indexer_error, e:
logger.log(u"Unable to contact " + self.show.indexer + ", aborting: " + ex(e), logger.WARNING)
return