Add MagnetDL torrent provider.

DRY refactor download_file with getURL.
This commit is contained in:
JackDandy 2017-06-23 23:13:58 +01:00
parent 92ef777f2b
commit 297c4a2785
5 changed files with 183 additions and 112 deletions

View file

@ -50,6 +50,7 @@
* Add option to limit WebDL propers to original release group under Config/Search/Media Search
* Change add IPv4 config option when enabling IPv6.
* Add autoProcessTV/onTxComplete.bat to improve Windows clients Deluge, qBittorrent, Tranmission, and uTorrent
* Add MagnetDL torrent provider
* Add Skytorrents torrent provider
* Change do not have shows checked by default on import page. To re-enable import shows checked by default,
1) On config page 'Save' 2) Stop SG 3) Find 'import_default_checked_shows' in config.ini and set '1' 4) Start SG

Binary file not shown.

After

Width:  |  Height:  |  Size: 969 B

View file

@ -1103,46 +1103,58 @@ def proxy_setting(proxy_setting, request_url, force=False):
def getURL(url, post_data=None, params=None, headers=None, timeout=30, session=None, json=False,
raise_status_code=False, raise_exceptions=False, **kwargs):
"""
Returns a byte-string retrieved from the url provider.
Either
1) Returns a byte-string retrieved from the url provider.
2) Return True/False if success after using kwargs 'savefile' set to file pathname.
"""
# request session
if None is session:
session = CloudflareScraper.create_scraper()
if not kwargs.get('nocache'):
cache_dir = sickbeard.CACHE_DIR or _getTempDir()
session = CacheControl(sess=session, cache=caches.FileCache(ek.ek(os.path.join, cache_dir, 'sessions')))
else:
del(kwargs['nocache'])
# request session headers
req_headers = {'User-Agent': USER_AGENT, 'Accept-Encoding': 'gzip,deflate'}
if headers:
req_headers.update(headers)
if hasattr(session, 'reserved') and 'headers' in session.reserved:
req_headers.update(session.reserved['headers'] or {})
session.headers.update(req_headers)
# download and save file or simply fetch url
savename = None
if 'savename' in kwargs:
# session streaming
session.stream = True
savename = kwargs.pop('savename')
# selectively mute some errors
mute = []
for muted in filter(
lambda x: kwargs.get(x, False), ['mute_connect_err', 'mute_read_timeout', 'mute_connect_timeout']):
mute += [muted]
del kwargs[muted]
# request session ssl verify
session.verify = False
# reuse or instantiate request session
if None is session:
session = CloudflareScraper.create_scraper()
# request session paramaters
if 'nocache' in kwargs:
del kwargs['nocache']
else:
cache_dir = sickbeard.CACHE_DIR or _getTempDir()
session = CacheControl(sess=session, cache=caches.FileCache(ek.ek(os.path.join, cache_dir, 'sessions')))
# session master headers
req_headers = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Encoding': 'gzip,deflate', 'User-Agent': USER_AGENT}
if headers:
req_headers.update(headers)
if hasattr(session, 'reserved') and 'headers' in session.reserved:
req_headers.update(session.reserved['headers'] or {})
session.headers.update(req_headers)
# session paramaters
session.params = params
# session ssl verify
session.verify = False
response = None
try:
# Remove double-slashes from url
# sanitise url
parsed = list(urlparse.urlparse(url))
parsed[2] = re.sub("/{2,}", "/", parsed[2]) # replace two or more / with one
parsed[2] = re.sub('/{2,}', '/', parsed[2]) # replace two or more / with one
url = urlparse.urlunparse(parsed)
# request session proxies
# session proxies
if sickbeard.PROXY_SETTING:
(proxy_address, pac_found) = proxy_setting(sickbeard.PROXY_SETTING, url)
msg = '%sproxy for url: %s' % (('', 'PAC parsed ')[pac_found], url)
@ -1151,46 +1163,45 @@ def getURL(url, post_data=None, params=None, headers=None, timeout=30, session=N
return
elif proxy_address:
logger.log('Using %s' % msg, logger.DEBUG)
session.proxies = {
'http': proxy_address,
'https': proxy_address
}
session.proxies = {'http': proxy_address, 'https': proxy_address}
# decide if we get or post data to server
if 'post_json' in kwargs:
kwargs.setdefault('json', kwargs.get('post_json'))
del(kwargs['post_json'])
kwargs.setdefault('json', kwargs.pop('post_json'))
if post_data:
kwargs.setdefault('data', post_data)
if 'data' in kwargs or 'json' in kwargs:
resp = session.post(url, timeout=timeout, **kwargs)
response = session.post(url, timeout=timeout, **kwargs)
else:
resp = session.get(url, timeout=timeout, **kwargs)
if resp.ok and not resp.content and 'url=' in resp.headers.get('Refresh', '').lower():
url = resp.headers.get('Refresh').lower().split('url=')[1].strip('/')
response = session.get(url, timeout=timeout, **kwargs)
if response.ok and not response.content and 'url=' in response.headers.get('Refresh', '').lower():
url = response.headers.get('Refresh').lower().split('url=')[1].strip('/')
if not url.startswith('http'):
parsed[2] = '/%s' % url
url = urlparse.urlunparse(parsed)
resp = session.get(url, timeout=timeout, **kwargs)
response = session.get(url, timeout=timeout, **kwargs)
if raise_status_code:
resp.raise_for_status()
response.raise_for_status()
if not resp.ok:
http_err_text = 'CloudFlare Ray ID' in resp.content and 'CloudFlare reports, "Website is offline"; ' or ''
if resp.status_code in clients.http_error_code:
http_err_text += clients.http_error_code[resp.status_code]
elif resp.status_code in range(520, 527):
if not response.ok:
http_err_text = 'CloudFlare Ray ID' in response.content and \
'CloudFlare reports, "Website is offline"; ' or ''
if response.status_code in clients.http_error_code:
http_err_text += clients.http_error_code[response.status_code]
elif response.status_code in range(520, 527):
http_err_text += 'Origin server connection failure'
else:
http_err_text = 'Custom HTTP error code'
logger.log(u'Response not ok. %s: %s from requested url %s'
% (resp.status_code, http_err_text, url), logger.DEBUG)
% (response.status_code, http_err_text, url), logger.DEBUG)
return
except requests.exceptions.HTTPError as e:
if raise_status_code:
resp.raise_for_status()
response.raise_for_status()
logger.log(u'HTTP error %s while loading URL%s' % (
e.errno, _maybe_request_url(e)), logger.WARNING)
return
@ -1228,7 +1239,7 @@ def getURL(url, post_data=None, params=None, headers=None, timeout=30, session=N
if json:
try:
data_json = resp.json()
data_json = response.json()
return ({}, data_json)[isinstance(data_json, (dict, list))]
except (TypeError, Exception) as e:
logger.log(u'JSON data issue from URL %s\r\nDetail... %s' % (url, e.message), logger.WARNING)
@ -1236,81 +1247,36 @@ def getURL(url, post_data=None, params=None, headers=None, timeout=30, session=N
raise e
return None
return resp.content
if savename:
try:
with open(savename, 'wb') as fp:
for chunk in response.iter_content(chunk_size=1024):
if chunk:
fp.write(chunk)
fp.flush()
ek.ek(os.fsync, fp.fileno())
chmodAsParent(savename)
except EnvironmentError as e:
logger.log(u'Unable to save the file: ' + ex(e), logger.ERROR)
if raise_exceptions:
raise e
return
return True
return response.content
def _maybe_request_url(e, def_url=''):
return hasattr(e, 'request') and hasattr(e.request, 'url') and ' ' + e.request.url or def_url
def download_file(url, filename, session=None):
# create session
if None is session:
session = CloudflareScraper.create_scraper()
cache_dir = sickbeard.CACHE_DIR or _getTempDir()
session = CacheControl(sess=session, cache=caches.FileCache(ek.ek(os.path.join, cache_dir, 'sessions')))
def download_file(url, filename, session=None, **kwargs):
# request session headers
session.headers.update({'User-Agent': USER_AGENT, 'Accept-Encoding': 'gzip,deflate'})
if hasattr(session, 'reserved') and 'headers' in session.reserved:
session.headers.update(session.reserved['headers'] or {})
# request session ssl verify
session.verify = False
# request session streaming
session.stream = True
# request session proxies
if sickbeard.PROXY_SETTING:
(proxy_address, pac_found) = proxy_setting(sickbeard.PROXY_SETTING, url)
msg = '%sproxy for url: %s' % (('', 'PAC parsed ')[pac_found], url)
if None is proxy_address:
logger.log('Proxy error, aborted the request using %s' % msg, logger.DEBUG)
return
elif proxy_address:
logger.log('Using %s' % msg, logger.DEBUG)
session.proxies = {
'http': proxy_address,
'https': proxy_address
}
try:
resp = session.get(url)
if not resp.ok:
logger.log(u"Requested url " + url + " returned status code is " + str(
resp.status_code) + ': ' + clients.http_error_code[resp.status_code], logger.DEBUG)
return False
with open(filename, 'wb') as fp:
for chunk in resp.iter_content(chunk_size=1024):
if chunk:
fp.write(chunk)
fp.flush()
ek.ek(os.fsync, fp.fileno())
chmodAsParent(filename)
except requests.exceptions.HTTPError as e:
if None is getURL(url, session=session, savename=filename, **kwargs):
remove_file_failed(filename)
logger.log(u"HTTP error " + str(e.errno) + " while loading URL " + url, logger.WARNING)
return False
except requests.exceptions.ConnectionError as e:
remove_file_failed(filename)
logger.log(u"Connection error " + str(e.message) + " while loading URL " + url, logger.WARNING)
return False
except requests.exceptions.Timeout as e:
remove_file_failed(filename)
logger.log(u"Connection timed out " + str(e.message) + " while loading URL " + url, logger.WARNING)
return False
except EnvironmentError as e:
remove_file_failed(filename)
logger.log(u"Unable to save the file: " + ex(e), logger.ERROR)
return False
except Exception:
remove_file_failed(filename)
logger.log(u"Unknown exception while loading URL " + url + ": " + traceback.format_exc(), logger.WARNING)
return False
return True

View file

@ -28,7 +28,7 @@ from . import newznab, omgwtfnzbs
# torrent
from . import alpharatio, beyondhd, bithdtv, bitmetv, btn, btscene, dh, \
fano, filelist, funfile, gftracker, grabtheinfo, hd4free, hdbits, hdspace, hdtorrents, \
iptorrents, limetorrents, morethan, nebulance, ncore, nyaa, pisexy, pretome, privatehd, ptf, \
iptorrents, limetorrents, magnetdl, morethan, nebulance, ncore, nyaa, pisexy, pretome, privatehd, ptf, \
rarbg, revtt, scenetime, shazbat, skytorrents, speedcd, \
thepiratebay, torlock, torrentday, torrenting, torrentleech, \
torrentz2, tvchaosuk, zooqle
@ -37,7 +37,7 @@ from . import anizb, tokyotoshokan
# custom
try:
from . import custom01
except:
except (StandardError, Exception):
pass
__all__ = ['omgwtfnzbs',
@ -61,6 +61,7 @@ __all__ = ['omgwtfnzbs',
'hdtorrents',
'iptorrents',
'limetorrents',
'magnetdl',
'morethan',
'nebulance',
'ncore',

View file

@ -0,0 +1,103 @@
# coding=utf-8
#
# This file is part of SickGear.
#
# SickGear is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# SickGear is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with SickGear. If not, see <http://www.gnu.org/licenses/>.
import re
import traceback
from . import generic
from sickbeard import logger
from sickbeard.bs4_parser import BS4Parser
from sickbeard.helpers import tryInt
from lib.unidecode import unidecode
class MagnetDLProvider(generic.TorrentProvider):
def __init__(self):
generic.TorrentProvider.__init__(self, 'MagnetDL', cache_update_freq=6)
self.url_base = 'http://www.magnetdl.com/'
self.urls = {'config_provider_home_uri': self.url_base,
'browse': self.url_base + 'download/tv/', 'search': self.url_base + 'search/?m=1&q=%s'}
self.minseed, self.minleech = 2 * [None]
def _search_provider(self, search_params, **kwargs):
results = []
items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}
rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {'info': '^/file/', 'get': '^magnet:'}.items())
for mode in search_params.keys():
for search_string in search_params[mode]:
search_url = self.urls['browse']
if 'Cache' != mode:
search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string
search_url = self.urls['search'] % re.sub('[.\s]+', ' ', search_string)
html = self.get_url(search_url)
cnt = len(items[mode])
try:
if not html or self._has_no_results(html):
raise generic.HaltParseException
with BS4Parser(html, features=['html5lib', 'permissive']) as soup:
torrent_table = soup.find('table', attrs={'class': 'download'})
torrent_rows = [] if not torrent_table else torrent_table.find_all('tr')
if 2 > len(torrent_rows):
raise generic.HaltParseException
head = None
for tr in torrent_rows[1:]:
cells = tr.find_all('td')
if 5 > len(cells):
continue
try:
head = head if None is not head else self._header_row(tr)
seeders, leechers, size = [tryInt(n, n) for n in [
cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']]
if self._peers_fail(mode, seeders, leechers):
continue
info = tr.find('a', href=rc['info'])
title = (info.attrs.get('title') or info.get_text()).strip()
download_url = self._link(tr.find('a', href=rc['get'])['href'])
except (AttributeError, TypeError, ValueError, KeyError):
continue
if title and download_url:
items[mode].append((title, download_url, seeders, self._bytesizer(size)))
except generic.HaltParseException:
pass
except (StandardError, Exception):
logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR)
self._log_search(mode, len(items[mode]) - cnt, search_url)
results = self._sort_seeding(mode, results + items[mode])
return results
provider = MagnetDLProvider()