mirror of
https://github.com/SickGear/SickGear.git
synced 2024-11-27 15:13:38 +00:00
d2f7bb6104
Fix TVC images. Change center the default card background and hide hover text decoration. Change correct typos
209 lines
8.9 KiB
Python
209 lines
8.9 KiB
Python
from requests.exceptions import RequestException
|
|
from requests.models import Response
|
|
from requests.sessions import Session
|
|
|
|
import logging
|
|
import random
|
|
import re
|
|
import time
|
|
|
|
from _23 import b64encodestring, urlparse
|
|
|
|
|
|
DEFAULT_USER_AGENTS = [
|
|
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko)'
|
|
' Chrome/41.0.2228.0 Safari/537.36',
|
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko)'
|
|
' Chrome/50.0.2661.102 Safari/537.36',
|
|
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko)'
|
|
' Chrome/52.0.2743.116 Safari/537.36',
|
|
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:46.0)'
|
|
' Gecko/20100101 Firefox/46.0',
|
|
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:41.0)'
|
|
' Gecko/20100101 Firefox/41.0'
|
|
]
|
|
|
|
|
|
class CloudflareError(RequestException):
|
|
pass
|
|
|
|
|
|
class CloudflareScraper(Session):
|
|
def __init__(self, **kwargs):
|
|
super(CloudflareScraper, self).__init__()
|
|
|
|
if 'requests' in self.headers['User-Agent']:
|
|
# Set a random User-Agent if no custom User-Agent has been set
|
|
self.headers['User-Agent'] = random.choice(DEFAULT_USER_AGENTS)
|
|
self.cf_ua = self.headers['User-Agent']
|
|
|
|
self.default_delay = 8
|
|
self.delay = kwargs.pop('delay', self.default_delay)
|
|
self.start_time = None
|
|
|
|
self.trust_env = False
|
|
|
|
def request(self, method, url, *args, **kwargs):
|
|
url_solver = kwargs.pop('url_solver', None)
|
|
|
|
if not kwargs.pop('proxy_browser', None):
|
|
resp = super(CloudflareScraper, self).request(method, url, *args, **kwargs)
|
|
else:
|
|
resp = self.get_content(method, url, url_solver,
|
|
user_agent=self.headers.get('User-Agent'), proxy_browser=True, **kwargs)
|
|
|
|
if (isinstance(resp, type(Response()))
|
|
and resp.status_code in (503, 429, 403)):
|
|
self.start_time = time.time()
|
|
if (re.search('(?i)cloudflare', resp.headers.get('Server', ''))
|
|
and b'_cf_chl_' in resp.content
|
|
or (b'jschl_vc' in resp.content and b'jschl_answer' in resp.content)):
|
|
resp = self.solve_cf_challenge(resp, url_solver, **kwargs)
|
|
elif b'ddgu' in resp.content:
|
|
resp = self.solve_ddg_challenge(resp, **kwargs)
|
|
|
|
return resp
|
|
|
|
def wait(self):
|
|
delay = self.delay - (time.time() - self.start_time)
|
|
time.sleep((0, delay)[0 < delay]) # required delay before solving the challenge
|
|
|
|
def solve_ddg_challenge(self, resp, **original_kwargs):
|
|
parsed_url = urlparse(resp.url)
|
|
try:
|
|
submit_url = parsed_url.scheme + ':' + re.findall('"frm"[^>]+?action="([^"]+)"', resp.text)[0]
|
|
kwargs = {k: v for k, v in original_kwargs.items() if k not in ['hooks']}
|
|
kwargs.setdefault('headers', {})
|
|
kwargs.setdefault('data', dict(
|
|
h=b64encodestring('%s://%s' % (parsed_url.scheme, parsed_url.hostname)),
|
|
u=b64encodestring(parsed_url.path), p=b64encodestring(parsed_url.port or '')
|
|
))
|
|
self.wait()
|
|
resp = self.request('POST', submit_url, **kwargs)
|
|
except (BaseException, Exception):
|
|
pass
|
|
return resp
|
|
|
|
def test_flaresolverr(self, url_solver):
|
|
# test if FlareSolverr software is running
|
|
response_test = super(CloudflareScraper, self).request('GET', url_solver)
|
|
fs_ver = None
|
|
if 200 == response_test.status_code and response_test.ok:
|
|
json_data = response_test.json()
|
|
if any([json_data.get('version')]):
|
|
fs_ver = json_data.get('version')
|
|
if None is fs_ver:
|
|
raise ValueError('FlareSolverr software not found (is it running?)')
|
|
return fs_ver
|
|
|
|
def get_content(self, method, url, url_solver, user_agent, proxy_browser=False, **kwargs):
|
|
|
|
url_solver = url_solver and re.sub(r'(/|v1)*$', '', url_solver) or 'http://localhost:8191'
|
|
if not self.test_flaresolverr(url_solver):
|
|
raise ValueError('No FlareSolverr software running %sat %s' % (('to solve Cloudflare challenge ',
|
|
'')[proxy_browser], url_solver))
|
|
try:
|
|
params = {} if 'v1' not in self.test_flaresolverr(url_solver) else dict(userAgent=user_agent)
|
|
params.update(dict(
|
|
cmd='request.%s' % method.lower(), url=url,
|
|
cookies=[{'name': cur_ckee.name, 'value': cur_ckee.value,
|
|
'domain': cur_ckee.domain, 'path': cur_ckee.path} for cur_ckee in self.cookies]))
|
|
response = super(CloudflareScraper, self).request('POST', '%s/v1' % url_solver, json=params)
|
|
except(BaseException, Exception) as e:
|
|
raise ValueError('FlareSolverr software unable to %s: %r' % (('solve Cloudflare anti-bot IUAM challenge',
|
|
'fetch content')[proxy_browser], e))
|
|
if None is not response:
|
|
data_json = response.json()
|
|
result = ({}, data_json)[isinstance(data_json, (dict, list))]
|
|
if response.ok:
|
|
if 'ok' == result.get('status'):
|
|
self.cookies.clear()
|
|
for cur_ckee in result.get('solution', {}).get('cookies', []):
|
|
if cur_ckee.get('value') and cur_ckee.get('name') not in ('', None, '_gid', '_ga', '_gat'):
|
|
self.cookies.set(
|
|
cur_ckee['name'], cur_ckee['value'],
|
|
rest={'httpOnly': cur_ckee.get('httpOnly'), 'session': cur_ckee.get('session')},
|
|
**dict([(k, cur_ckee.get(k)) for k in ('expires', 'domain', 'path', 'secure')]))
|
|
else:
|
|
response = None
|
|
elif 'error' == result.get('status'):
|
|
raise ValueError('Failure with FlareSolverr: %s' % result.get('message', 'See the FlareSolver output'))
|
|
|
|
return response
|
|
|
|
def solve_cf_challenge(self, resp, url_solver, **original_kwargs):
|
|
body = resp.text
|
|
parsed_url = urlparse(resp.url)
|
|
domain = parsed_url.netloc
|
|
|
|
if '/cdn-cgi/l/chk_captcha' in body or 'cf_chl_captcha' in body:
|
|
raise CloudflareError(
|
|
'Cloudflare captcha presented for %s, safe to ignore as this shouldn\'t happen every time, ua: %s' %
|
|
(domain, self.cf_ua), response=resp)
|
|
|
|
final_response = self.get_content(
|
|
'GET', (resp.request.url, '%s://%s/' % (parsed_url.scheme, domain))['POST' == resp.request.method],
|
|
url_solver, user_agent=resp.request.headers.get('User-Agent'))
|
|
if None is final_response:
|
|
raise ValueError('Failed to validate Cloudflare anti-bot IUAM challenge')
|
|
|
|
return final_response
|
|
|
|
@classmethod
|
|
def create_scraper(cls, sess=None, **kwargs):
|
|
"""
|
|
Convenience function for creating a ready-to-go CloudflareScraper object.
|
|
"""
|
|
scraper = cls(**kwargs)
|
|
|
|
if sess:
|
|
attrs = ['auth', 'cert', 'cookies', 'headers', 'hooks', 'params', 'proxies', 'data']
|
|
for attr in attrs:
|
|
val = getattr(sess, attr, None)
|
|
if val:
|
|
setattr(scraper, attr, val)
|
|
|
|
return scraper
|
|
|
|
# Functions for integrating cloudflare-scrape with other applications and scripts
|
|
|
|
@classmethod
|
|
def get_tokens(cls, url, user_agent=None, **kwargs):
|
|
scraper = cls.create_scraper()
|
|
if user_agent:
|
|
scraper.headers['User-Agent'] = user_agent
|
|
|
|
try:
|
|
resp = scraper.get(url, **kwargs)
|
|
resp.raise_for_status()
|
|
except (BaseException, Exception):
|
|
logging.error('[%s] returned an error. Could not collect tokens.' % url)
|
|
raise
|
|
|
|
domain = urlparse(resp.url).netloc
|
|
|
|
for d in scraper.cookies.list_domains():
|
|
if d.startswith('.') and d in ('.' + domain):
|
|
cookie_domain = d
|
|
break
|
|
else:
|
|
raise ValueError('Unable to find Cloudflare cookies.'
|
|
' Does the site actually have Cloudflare IUAM (\'I\'m Under Attack Mode\') enabled?')
|
|
|
|
return (
|
|
{'__cfduid': scraper.cookies.get('__cfduid', '', domain=cookie_domain),
|
|
'cf_clearance': scraper.cookies.get('cf_clearance', '', domain=cookie_domain)},
|
|
scraper.headers['User-Agent'])
|
|
|
|
@classmethod
|
|
def get_cookie_string(cls, url, user_agent=None, **kwargs):
|
|
"""
|
|
Convenience function for building a Cookie HTTP header value.
|
|
"""
|
|
tokens, user_agent = cls.get_tokens(url, user_agent=user_agent, **kwargs)
|
|
return '; '.join(['='.join(pair) for pair in tokens.items()]), user_agent
|
|
|
|
|
|
create_scraper = CloudflareScraper.create_scraper
|
|
get_tokens = CloudflareScraper.get_tokens
|
|
get_cookie_string = CloudflareScraper.get_cookie_string
|