Change improve parser tolerance for torrent providers.

Change improve TvChaos item parsing and can use qualities instead of 'Unknown'.
This commit is contained in:
JackDandy 2016-11-01 18:13:51 +00:00
parent b114dd1325
commit 916cec554c
37 changed files with 268 additions and 90 deletions

View file

@ -223,6 +223,7 @@
* Fix PiSexy for vip user class
* Change retry sleep timeout for Trakt searches to prevent issues when Trakt is down
* Fix TVDb search issue when only 1 result is returned
* Change improve TvChaos item parsing and can use qualities instead of 'Unknown'
### 0.11.16 (2016-10-16 17:30:00 UTC)

View file

@ -183,7 +183,7 @@ class GenericClient(object):
r_code = self._add_torrent_file(result)
if not r_code:
logger.log('%s: Unable to send Torrent: Return code undefined' % self.name, logger.ERROR)
logger.log('%s: Unable to send Torrent: Return code undefined (already exists in client?)' % self.name, logger.ERROR)
return False
if not self._set_torrent_pause(result):

View file

@ -103,7 +103,8 @@ def remove_non_release_groups(name, is_anime=False):
if name:
rc = [re.compile(r'(?i)' + v) for v in [
'([\s\.\-_\[\{\(]*(no-rar|nzbgeek|ripsalot|rp|siklopentan)[\s\.\-_\]\}\)]*)$',
'([\s\.\-_\[\{\(]*(no-rar|nzbgeek|ripsalot|siklopentan)[\s\.\-_\]\}\)]*)$',
'([\s\.\-_\[\{\(]rp[\s\.\-_\]\}\)]*)$',
'(?<=\w)([\s\.\-_]*[\[\{\(][\s\.\-_]*(www\.\w+.\w+)[\s\.\-_]*[\]\}\)][\s\.\-_]*)$',
'(?<=\w)([\s\.\-_]*[\[\{\(]\s*(rar(bg|tv)|((e[tz]|v)tv))[\s\.\-_]*[\]\}\)][\s\.\-_]*)$'] +
(['(?<=\w)([\s\.\-_]*[\[\{\(][\s\.\-_]*[\w\s\.\-\_]+[\s\.\-_]*[\]\}\)][\s\.\-_]*)$',

View file

@ -79,13 +79,15 @@ class AlphaRatioProvider(generic.TorrentProvider):
if 2 > len(torrent_rows):
raise generic.HaltParseException
head = None
for tr in torrent_rows[1:]:
cells = tr.find_all('td')
if 5 > len(cells):
continue
try:
head = head if None is not head else self._header_row(tr)
seeders, leechers, size = [tryInt(n, n) for n in [
cells[x].get_text().strip() for x in -2, -1, -4]]
cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']]
if self._peers_fail(mode, seeders, leechers):
continue

View file

@ -69,9 +69,10 @@ class BitHDTVProvider(generic.TorrentProvider):
cnt = len(items[mode])
try:
if not html or self._has_no_results(html):
if not html or self._has_no_results(html) or 'width=750' not in html:
raise generic.HaltParseException
html = re.sub(r'</td>([^<]*)<tr>', '</td></tr>\1<tr>', html)
with BS4Parser(html, 'html.parser', attr='width=750') as soup:
torrent_table = soup.find('table', attrs={'width': 750})
torrent_rows = [] if not torrent_table else torrent_table.find_all('tr')
@ -79,13 +80,15 @@ class BitHDTVProvider(generic.TorrentProvider):
if 2 > len(torrent_rows):
raise generic.HaltParseException
head = None
for tr in torrent_rows[1:]:
cells = tr.find_all('td')
if 6 > len(cells):
continue
try:
head = head if None is not head else self._header_row(tr)
seeders, leechers, size = [tryInt(n, n) for n in [
cells[x].get_text().strip() for x in -3, -2, -5]]
cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']]
if self.freeleech and not tr.attrs.get('bgcolor').endswith('FF99') or \
self._peers_fail(mode, seeders, leechers):
continue

View file

@ -78,13 +78,15 @@ class BitmetvProvider(generic.TorrentProvider):
if 2 > len(torrent_rows):
raise generic.HaltParseException
head = None
for tr in torrent_rows[1:]:
cells = tr.find_all('td')
if 6 > len(cells):
continue
try:
head = head if None is not head else self._header_row(tr)
seeders, leechers, size = [tryInt(n, n) for n in [
cells[x].get_text().strip() for x in -3, -2, -5]]
cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']]
if self._peers_fail(mode, seeders, leechers):
continue

View file

@ -34,9 +34,9 @@ class BTSceneProvider(generic.TorrentProvider):
self.url_home = ['http://www.btstorrent.cc/', 'http://bittorrentstart.com/',
'http://diriri.xyz/', 'http://mytorrentz.tv/']
self.url_vars = {'search': 'results.php?q=%s&category=series&order=1', 'browse': 'lastdaycat/type/Series/',
self.url_vars = {'search': '?q=%s&category=series&order=1', 'browse': 'lastdaycat/type/Series/',
'get': 'torrentdownload.php?id=%s'}
self.url_tmpl = {'config_provider_home_uri': '%(home)s', 'search': '%(home)s%(vars)s',
self.url_tmpl = {'config_provider_home_uri': '%(home)s', 'search': '%(vars)s',
'browse': '%(home)s%(vars)s', 'get': '%(home)s%(vars)s'}
self.minseed, self.minleech = 2 * [None]
@ -56,13 +56,24 @@ class BTSceneProvider(generic.TorrentProvider):
rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {
'info': '\w+?(\d+)[.]html', 'verified': 'Verified'}.iteritems())
url = self.url
response = self.get_url(url)
form = re.findall('(?is)(<form[^>]+)', response)
response = any(form) and form[0] or response
action = re.findall('<form[^>]+action=[\'"]([^\'"]*)', response)[0]
url = action if action.startswith('http') else \
url if not action else \
(url + action) if action.startswith('?') else \
self.urls['config_provider_home_uri'] + action.lstrip('/')
for mode in search_params.keys():
for search_string in search_params[mode]:
search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string
search_url = self.urls['browse'] if 'Cache' == mode \
else self.urls['search'] % (urllib.quote_plus(search_string))
else url + self.urls['search'] % (urllib.quote_plus(search_string))
html = self.get_url(search_url)
@ -76,13 +87,15 @@ class BTSceneProvider(generic.TorrentProvider):
if not len(torrent_rows):
raise generic.HaltParseException
head = None
for tr in torrent_rows:
cells = tr.find_all('td')
if 6 > len(cells):
continue
try:
head = head if None is not head else self._header_row(tr)
seeders, leechers, size = [tryInt(n, n) for n in [
cells[x].get_text().strip() for x in -4, -3, -5]]
cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']]
if self._peers_fail(mode, seeders, leechers) or \
self.confirmed and not (tr.find('img', src=rc['verified'])
or tr.find('img', title=rc['verified'])):

View file

@ -79,13 +79,15 @@ class DHProvider(generic.TorrentProvider):
if 2 > len(torrent_rows):
raise generic.HaltParseException
head = None
for tr in torrent_rows[1:]:
cells = tr.find_all('td')
if 6 > len(cells):
continue
try:
head = head if None is not head else self._header_row(tr)
seeders, leechers, size = [tryInt(n, n) for n in [
cells[x].get_text().strip() for x in -3, -2, -5]]
cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']]
if self._peers_fail(mode, seeders, leechers) or not tr.find('a', href=rc['cats']):
continue

View file

@ -77,13 +77,15 @@ class ExtraTorrentProvider(generic.TorrentProvider):
if 2 > len(torrent_rows):
raise generic.HaltParseException
head = None
for tr in torrent_rows[1:]:
cells = tr.find_all('td')
if 5 > len(cells):
continue
try:
head = head if None is not head else self._header_row(tr)
seeders, leechers, size = [tryInt(n.replace('---', '0'), n) for n in [
cells[x].get_text().strip() for x in -3, -2, -4]]
cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']]
if self._peers_fail(mode, seeders, leechers):
continue

View file

@ -96,6 +96,7 @@ class FanoProvider(generic.TorrentProvider):
if 2 > len(torrent_rows):
raise generic.HaltParseException
head = None
for tr in torrent_rows[1:]:
cells = tr.find_all('td')
if (5 > len(cells)
@ -104,8 +105,9 @@ class FanoProvider(generic.TorrentProvider):
or (not non_marked and not rc['filter'].search(str(tr)))))):
continue
try:
head = head if None is not head else self._header_row(tr)
seeders, leechers, size = [tryInt(n, n) for n in [
cells[x].get_text().strip() for x in -2, -1, -4]]
cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']]
if self._peers_fail(mode, seeders, leechers) or not tr.find('a', href=rc['cats']):
continue

View file

@ -101,6 +101,7 @@ class FreshOnTVProvider(generic.TorrentProvider):
if 2 > len(torrent_rows):
raise generic.HaltParseException
head = None
for tr in torrent_rows[1:]:
cells = tr.find_all('td')
if (5 > len(cells) or tr.find('img', alt='Nuked')
@ -109,8 +110,9 @@ class FreshOnTVProvider(generic.TorrentProvider):
or (not non_marked and not tr.find('img', src=rc['filter']))))):
continue
try:
head = head if None is not head else self._header_row(tr)
seeders, leechers, size = [tryInt(n, n) for n in [
cells[x].get_text().strip() for x in -2, -1, -4]]
cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']]
if self._peers_fail(mode, seeders, leechers):
continue

View file

@ -80,14 +80,17 @@ class FunFileProvider(generic.TorrentProvider):
if 2 > len(torrent_rows):
raise generic.HaltParseException
head = None
for tr in torrent_rows[1:]:
cells = tr.find_all('td')
info = tr.find('a', href=rc['info'])
if 5 > len(cells) or not info:
continue
try:
head = head if None is not head else self._header_row(
tr, {'seed': r'(?:up\.gif|seed|s/l)', 'leech': r'(?:down\.gif|leech|peers)'})
seeders, leechers, size = [tryInt(n, n) for n in [
cells[x].get_text().strip() for x in -2, -1, -4]]
cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']]
if None is tr.find('a', href=rc['cats']) or self._peers_fail(mode, seeders, leechers):
continue

View file

@ -325,6 +325,56 @@ class GenericProvider:
url_tmpl = '%s'
return url if re.match('(?i)https?://', url) else (url_tmpl % url.lstrip('/'))
def _header_row(self, table_row, custom_match=None, header_strip=''):
"""
:param header_row: Soup resultset of table header row
:param custom_match: Dict key/values to override one or more default regexes
:param header_strip: String regex of ambiguities to remove from headers
:return: dict column indices or None for leech, seeds, and size
"""
results = {}
rc = dict((k, re.compile('(?i)' + r)) for (k, r) in dict(
{'seed': r'(?:seed|s/l)', 'leech': r'(?:leech|peers)', 'size': r'(?:size)'}.items()
+ ({}, custom_match)[any([custom_match])].items()).items())
table = table_row.find_parent('table')
header_row = table.tr or table.thead.tr or table.tbody.tr
for y in [x for x in header_row(True) if x.attrs.get('class')]:
y['class'] = '..'.join(y['class'])
all_cells = header_row.find_all('th')
all_cells = all_cells if any(all_cells) else header_row.find_all('td')
headers = [re.sub(
r'[\s]+', '',
((any([cell.get_text()]) and any([rc[x].search(cell.get_text()) for x in rc.keys()]) and cell.get_text())
or (cell.attrs.get('id') and any([rc[x].search(cell['id']) for x in rc.keys()]) and cell['id'])
or (cell.attrs.get('title') and any([rc[x].search(cell['title']) for x in rc.keys()]) and cell['title'])
or next(iter(set(filter(lambda z: any([z]), [
next(iter(set(filter(lambda y: any([y]), [
cell.find(tag, **p) for p in [{attr: rc[x]} for x in rc.keys()]]))), {}).get(attr)
for (tag, attr) in [
('img', 'title'), ('img', 'src'), ('i', 'title'), ('i', 'class'),
('abbr', 'title'), ('a', 'title'), ('a', 'href')]]))), '')
or cell.get_text()
)).strip() for cell in all_cells]
headers = [re.sub(header_strip, '', x) for x in headers]
all_headers = headers
colspans = [int(cell.attrs.get('colspan', 0)) for cell in all_cells]
if any(colspans):
all_headers = []
for i, width in enumerate(colspans):
all_headers += [headers[i]] + ([''] * (width - 1))
for k, r in rc.iteritems():
if k not in results:
for name in filter(lambda v: any([v]) and r.search(v), all_headers[::-1]):
results[k] = all_headers.index(name) - len(all_headers)
break
for missing in set(rc.keys()) - set(results.keys()):
results[missing] = None
return results
@staticmethod
def _dhtless_magnet(btih, name=None):
"""
@ -1091,14 +1141,15 @@ class TorrentProvider(object, GenericProvider):
return results
@staticmethod
def _has_no_results(*html):
def _has_no_results(html):
return re.search(r'(?i)<(?:b|div|h\d|p|span|strong|td)[^>]*>\s*(?:' +
'your\ssearch.*?did\snot\smatch|' +
'(?:nothing|0</b>\s+torrents)\sfound|' +
'(sorry,\s)?no\s(?:results|torrents)\s(found|here|match)|' +
'.*?there\sare\sno\sresults|' +
'.*?no\shits\.\sTry\sadding' +
')', html[0])
'(?:sorry,\s)?no\s(?:results|torrents)\s(found|here|match)|' +
'no\s(?:match|results|torrents)!*|'
'[^<]*?there\sare\sno\sresults|' +
'[^<]*?no\shits\.\sTry\sadding' +
')', html)
def _cache_data(self):

View file

@ -81,12 +81,14 @@ class GFTrackerProvider(generic.TorrentProvider):
if 2 > len(torrent_rows):
raise generic.HaltParseException
head = None
for tr in torrent_rows[1:]:
cells = tr.find_all('td')
if 3 > len(cells):
continue
try:
seeders, leechers = 2 * [cells[-1].get_text().strip()]
head = head if None is not head else self._header_row(tr)
seeders, leechers = 2 * [cells[head['seed']].get_text().strip()]
seeders, leechers = [tryInt(n) for n in [
rc['seeders'].findall(seeders)[0], rc['leechers'].findall(leechers)[0]]]
if self._peers_fail(mode, seeders, leechers):
@ -94,7 +96,7 @@ class GFTrackerProvider(generic.TorrentProvider):
info = tr.find('a', href=rc['info'])
title = (info.attrs.get('title') or info.get_text()).strip()
size = cells[-2].get_text().strip()
size = cells[head['size']].get_text().strip()
download_url = self._link(tr.find('a', href=rc['get'])['href'])
except (AttributeError, TypeError, ValueError):
continue

View file

@ -91,13 +91,15 @@ class GrabTheInfoProvider(generic.TorrentProvider):
if not shows_found or 2 > (len(torrent_rows) - shows_found):
raise generic.HaltParseException
head = None
for tr in torrent_rows[1 + shows_found:]:
cells = tr.find_all('td')
if 4 > len(cells):
continue
try:
head = head if None is not head else self._header_row(torrent_rows[shows_found])
seeders, leechers, size = [tryInt(n, n) for n in [
cells[x].get_text().strip() for x in -2, -1, -3]]
cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']]
if self._peers_fail(mode, seeders, leechers):
continue

View file

@ -113,7 +113,7 @@ class HDBitsProvider(generic.TorrentProvider):
json_resp = self.get_url(search_url, post_data=post_data, json=True)
if not (json_resp and 'data' in json_resp and self.check_auth_from_data(json_resp)):
if not (json_resp and self.check_auth_from_data(json_resp) and 'data' in json_resp):
logger.log(u'Response from %s does not contain any json data, abort' % self.name, logger.ERROR)
return results

View file

@ -98,6 +98,7 @@ class HDSpaceProvider(generic.TorrentProvider):
if 2 > len(torrent_rows):
raise generic.HaltParseException
head = None
for tr in torrent_rows[1:]:
cells = tr.find_all('td')
if (6 > len(cells) or tr.find('td', class_='header')
@ -109,6 +110,7 @@ class HDSpaceProvider(generic.TorrentProvider):
if None is downlink:
continue
try:
head = head if None is not head else self._header_row(tr)
seeders, leechers = [tryInt(x.get_text().strip())
for x in tr.find_all('a', href=rc['peers'])]
if self._peers_fail(mode, seeders, leechers):
@ -116,7 +118,7 @@ class HDSpaceProvider(generic.TorrentProvider):
info = tr.find('a', href=rc['info'])
title = (info.attrs.get('title') or info.get_text()).strip()
size = cells[-5].get_text().strip()
size = cells[head['size']].get_text().strip()
download_url = self._link(downlink['href'])
except (AttributeError, TypeError, ValueError):
continue

View file

@ -106,6 +106,7 @@ class HDTorrentsProvider(generic.TorrentProvider):
if not len(torrent_rows):
raise generic.HaltParseException
head = None
for tr in torrent_rows:
cells = tr.find_all('td')
if (6 > len(cells) or any(self.filter)
@ -113,8 +114,9 @@ class HDTorrentsProvider(generic.TorrentProvider):
or (not non_marked and not tr.find('img', src=rc['filter'])))):
continue
try:
head = head if None is not head else self._header_row(tr)
seeders, leechers, size = [tryInt(n, n) for n in [
cells[x].get_text().strip() for x in -3, -2, -5]]
cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']]
if self._peers_fail(mode, seeders, leechers) or not tr.find('a', href=rc['cats']):
continue
title = tr.find('a', href=rc['info']).get_text().strip()

View file

@ -86,11 +86,14 @@ class IPTorrentsProvider(generic.TorrentProvider):
if 2 > len(torrent_rows):
raise generic.HaltParseException
head = None
for tr in torrent_rows[1:]:
cells = tr.find_all('td')
if 5 > len(cells):
continue
try:
head = head if None is not head else self._header_row(
tr, header_strip='(?i)(?:leechers|seeders|size);')
seeders, leechers = [tryInt(tr.find('td', class_='t_' + x).get_text().strip())
for x in 'seeders', 'leechers']
if self._peers_fail(mode, seeders, leechers):
@ -98,7 +101,7 @@ class IPTorrentsProvider(generic.TorrentProvider):
info = tr.find('a', href=rc['info'])
title = (info.attrs.get('title') or info.get_text()).strip()
size = cells[-4].get_text().strip()
size = cells[head['size']].get_text().strip()
download_url = self._link(tr.find('a', href=rc['get'])['href'])
except (AttributeError, TypeError, ValueError):
continue

View file

@ -77,13 +77,15 @@ class LimeTorrentsProvider(generic.TorrentProvider):
if not len(torrent_rows):
raise generic.HaltParseException
head = None
for tr in torrent_rows[0]: # 0 = all rows
cells = tr.find_all('td')
if 5 > len(cells):
continue
try:
head = head if None is not head else self._header_row(tr)
seeders, leechers, size = [tryInt(n.replace(',', ''), n) for n in [
cells[x].get_text().strip() for x in -3, -2, -4]]
cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']]
if self._peers_fail(mode, seeders, leechers):
continue

View file

@ -80,13 +80,15 @@ class MoreThanProvider(generic.TorrentProvider):
if 2 > len(torrent_rows):
raise generic.HaltParseException
head = None
for tr in torrent_rows[1:]:
cells = tr.find_all('td')
if 5 > len(cells) or tr.find('img', alt=rc['nuked']):
continue
try:
head = head if None is not head else self._header_row(tr)
seeders, leechers, size = [tryInt(n, n) for n in [
cells[x].get_text().strip() for x in -2, -1, -4]]
cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']]
if self._peers_fail(mode, seeders, leechers):
continue

View file

@ -73,15 +73,14 @@ class PiSexyProvider(generic.TorrentProvider):
if 2 > len(torrent_rows):
raise generic.HaltParseException
header = torrent_rows[0].find_all('td')
peers_td = ([x.get_text().strip().lower() for x in header].index('see/lee') - len(header))
size_td = ([x.get_text().strip().lower() for x in header].index('size'))
head = None
for tr in torrent_rows[1:]:
cells = tr.find_all('td')
if 5 > len(cells):
continue
try:
seeders, leechers = 2 * [cells[peers_td].get_text().strip()]
head = head if None is not head else self._header_row(tr, {'seed': r'(?:see/lee|seed)'})
seeders, leechers = 2 * [cells[head['seed']].get_text().strip()]
seeders, leechers = [tryInt(n) for n in [
rc['seeders'].findall(seeders)[0], rc['leechers'].findall(leechers)[0]]]
if self._peers_fail(mode, seeders, leechers) or not tr.find('a', href=rc['valid_cat']) \
@ -90,7 +89,7 @@ class PiSexyProvider(generic.TorrentProvider):
info = tr.find('a', href=rc['info'])
title = (rc['title'].sub('', info.attrs.get('title', '')) or info.get_text()).strip()
size = cells[size_td].get_text().strip()
size = cells[head['size']].get_text().strip()
download_url = self._link(tr.find('a', href=rc['get'])['href'])
except (AttributeError, TypeError, ValueError, KeyError, IndexError):
continue

View file

@ -110,6 +110,7 @@ class PrivateHDProvider(generic.TorrentProvider):
if 2 > len(torrent_rows):
raise generic.HaltParseException
head = None
for tr in torrent_rows[1:]:
cells = tr.find_all('td')
if 5 > len(cells) or (self.confirmed and tr.find('i', title=re.compile('(?i)unverified'))):
@ -122,8 +123,9 @@ class PrivateHDProvider(generic.TorrentProvider):
(not non_marked and not rc['filter'].search(munged))):
continue
try:
head = head if None is not head else self._header_row(tr)
seeders, leechers, size = [tryInt(n, n) for n in [
cells[x].get_text().strip() for x in -3, -2, -4]]
cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']]
if self._peers_fail(mode, seeders, leechers):
continue

View file

@ -104,6 +104,7 @@ class PTFProvider(generic.TorrentProvider):
if 2 > len(torrent_rows):
raise generic.HaltParseException
head = None
for tr in torrent_rows[1:]:
cells = tr.find_all('td')
if 4 > len(cells):
@ -118,7 +119,8 @@ class PTFProvider(generic.TorrentProvider):
(not non_marked and not rc['filter'].search(marker))):
continue
try:
seeders, leechers = 2 * [cells[-2].get_text().strip()]
head = head if None is not head else self._header_row(tr)
seeders, leechers = 2 * [cells[head['seed']].get_text().strip()]
seeders, leechers = [tryInt(n) for n in [
rc['seeders'].findall(seeders)[0], rc['leechers'].findall(leechers)[0]]]
if self._peers_fail(mode, seeders, leechers) or\
@ -127,7 +129,7 @@ class PTFProvider(generic.TorrentProvider):
title = tr.find('a', href=rc['info']).get_text().strip()
snatches = tr.find('a', href=rc['snatch']).get_text().strip()
size = cells[-3].get_text().strip().replace(snatches, '')
size = cells[head['size']].get_text().strip().replace(snatches, '')
download_url = self._link(tr.find('a', href=rc['get'])['href'])
except (AttributeError, TypeError, ValueError, IndexError):
continue

View file

@ -77,13 +77,16 @@ class RevTTProvider(generic.TorrentProvider):
if 2 > len(torrent_rows):
raise generic.HaltParseException
head = None
for tr in torrent_rows[1:]:
cells = tr.find_all('td')
if 5 > len(cells):
continue
try:
head = head if None is not head else self._header_row(
tr, {'seed': r'(?:up\.png|seed|s/l)', 'leech': r'(?:down\.png|leech|peers)'})
seeders, leechers, size = [tryInt(n, n) for n in [
cells[x].get_text().strip() for x in -2, -1, -4]]
cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']]
if self._peers_fail(mode, seeders, leechers) or not tr.find('a', href=rc['cats']):
continue

View file

@ -93,13 +93,15 @@ class SceneTimeProvider(generic.TorrentProvider):
if 2 > len(torrent_rows):
raise generic.HaltParseException
head = None
for tr in torrent_rows[1:]:
cells = tr.find_all('td')
if 4 > len(cells):
continue
try:
head = head if None is not head else self._header_row(tr)
seeders, leechers, size = [tryInt(n, n) for n in [
cells[x].get_text().strip() for x in -2, -1, -3]]
cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']]
if None is tr.find('a', href=rc['cats'])\
or self.freeleech and None is rc['fl'].search(cells[1].get_text())\
or self._peers_fail(mode, seeders, leechers):

View file

@ -98,12 +98,14 @@ class ShazbatProvider(generic.TorrentProvider):
if 2 > len(torrent_rows):
raise generic.HaltParseException
head = None
for tr in torrent_rows[0:]:
cells = tr.find_all('td')
if 4 > len(cells):
continue
try:
stats = cells[3].get_text().strip()
head = head if None is not head else self._header_row(tr)
stats = cells[head['leech']].get_text().strip()
seeders, leechers = [(tryInt(x[0], 0), tryInt(x[1], 0)) for x in
re.findall('(?::(\d+))(?:\W*[/]\W*:(\d+))?', stats) if x[0]][0]
if self._peers_fail(mode, seeders, leechers):

View file

@ -78,13 +78,15 @@ class SpeedCDProvider(generic.TorrentProvider):
if 2 > len(torrent_rows):
raise generic.HaltParseException
head = None
for tr in torrent_rows[1:]:
cells = tr.find_all('td')
if 4 > len(cells):
continue
try:
head = head if None is not head else self._header_row(tr)
seeders, leechers, size = [tryInt(n, n) for n in [
cells[x].get_text().strip() for x in -2, -1, -3]]
cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']]
if None is tr.find('a', href=rc['cats']) \
or self.freeleech and None is rc['fl'].search(cells[1].get_text()) \
or self._peers_fail(mode, seeders, leechers):

View file

@ -161,12 +161,14 @@ class ThePirateBayProvider(generic.TorrentProvider):
if 2 > len(torrent_rows):
raise generic.HaltParseException
head = None
for tr in torrent_table.find_all('tr')[1:]:
cells = tr.find_all('td')
if 3 > len(cells):
continue
try:
seeders, leechers = [tryInt(cells[x].get_text().strip()) for x in -2, -1]
head = head if None is not head else self._header_row(tr)
seeders, leechers = [tryInt(cells[head[x]].get_text().strip()) for x in 'seed', 'leech']
if self._peers_fail(mode, seeders, leechers):
continue

View file

@ -83,13 +83,15 @@ class TorLockProvider(generic.TorrentProvider):
if 2 > len(torrent_rows):
raise generic.HaltParseException
head = None
for tr in torrent_rows[1:]:
cells = tr.find_all('td')
if 5 > len(cells):
continue
try:
head = head if None is not head else self._header_row(tr)
seeders, leechers, size = [tryInt(n, n) for n in [
cells[x].get_text().strip() for x in -3, -2, -4]]
cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']]
if self._peers_fail(mode, seeders, leechers) \
or self.confirmed and not (tr.find('img', src=rc['versrc'])
or tr.find('img', title=rc['verified'])):

View file

@ -74,14 +74,16 @@ class TorrentBytesProvider(generic.TorrentProvider):
if 2 > len(torrent_rows):
raise generic.HaltParseException
head = None
for tr in torrent_rows[1:]:
cells = tr.find_all('td')
if 5 > len(cells):
continue
try:
info = tr.find('a', href=rc['info'])
head = head if None is not head else self._header_row(tr)
seeders, leechers, size = [tryInt(n, n) for n in [
cells[x].get_text().strip() for x in -2, -1, -4]]
cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']]
if self.freeleech and (len(info.contents) < 2 or not rc['fl'].search(
info.contents[1].string.strip())) or self._peers_fail(mode, seeders, leechers):
continue

View file

@ -86,18 +86,20 @@ class TorrentDayProvider(generic.TorrentProvider):
if 2 > len(torrent_rows):
raise generic.HaltParseException
head = None
for tr in torrent_rows[1:]:
cells = tr.find_all('td')
if 4 > len(cells):
continue
try:
seeders, leechers = [tryInt(tr.find('td', class_=x + 'ersInfo').get_text().strip())
for x in 'seed', 'leech']
head = head if None is not head else self._header_row(
tr, header_strip='(?i)(?:leechers|seeders|size);')
seeders, leechers, size = [tryInt(n, n) for n in [
cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']]
if self._peers_fail(mode, seeders, leechers):
continue
title = tr.find('a', href=rc['info']).get_text().strip()
size = cells[-3].get_text().strip()
download_url = self._link(tr.find('a', href=rc['get'])['href'])
except (AttributeError, TypeError, ValueError):
continue

View file

@ -82,13 +82,15 @@ class TorrentingProvider(generic.TorrentProvider):
if 2 > len(torrent_rows):
raise generic.HaltParseException
head = None
for tr in torrent_rows[1:]:
cells = tr.find_all('td')
if 4 > len(cells):
continue
try:
head = head if None is not head else self._header_row(tr)
seeders, leechers, size = [tryInt(n, n) for n in [
cells[x].get_text().strip() for x in -2, -1, -3]]
cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']]
if None is tr.find('a', href=rc['cats']) or self._peers_fail(mode, seeders, leechers):
continue

View file

@ -76,11 +76,13 @@ class TorrentLeechProvider(generic.TorrentProvider):
if 2 > len(torrent_rows):
raise generic.HaltParseException
head = None
for tr in torrent_rows[1:]:
cells = tr.find_all('td')
if 6 > len(cells):
continue
try:
head = head if None is not head else self._header_row(tr)
seeders, leechers = [tryInt(n) for n in [
tr.find('td', class_=x).get_text().strip() for x in 'seeders', 'leechers']]
if self._peers_fail(mode, seeders, leechers):
@ -88,7 +90,7 @@ class TorrentLeechProvider(generic.TorrentProvider):
info = tr.find('td', class_='name').a
title = (info.attrs.get('title') or info.get_text()).strip()
size = cells[-5].get_text().strip()
size = cells[head['size']].get_text().strip()
download_url = self._link(tr.find('a', href=rc['get'])['href'])
except (AttributeError, TypeError, ValueError):
continue

View file

@ -81,13 +81,15 @@ class TorrentShackProvider(generic.TorrentProvider):
if 2 > len(torrent_rows):
raise generic.HaltParseException
head = None
for tr in torrent_rows[1:]:
cells = tr.find_all('td')
if 5 > len(cells):
continue
try:
head = head if None is not head else self._header_row(tr)
seeders, leechers, size = [tryInt(n, n) for n in [
cells[x].get_text().strip() for x in -2, -1, -4]]
cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']]
if self._peers_fail(mode, seeders, leechers):
continue

View file

@ -61,7 +61,6 @@ class TVChaosUKProvider(generic.TorrentProvider):
for search_string in search_params[mode]:
search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string
if 'Cache' != mode:
kwargs = dict(post_data={'keywords': search_string, 'do': 'quick_sort', 'page': '0',
'category': '0', 'search_type': 't_name', 'sort': 'added',
'order': 'desc', 'daysprune': '-1'})
@ -81,13 +80,15 @@ class TVChaosUKProvider(generic.TorrentProvider):
if 2 > len(torrent_rows):
raise generic.HaltParseException
head = None
for tr in torrent_rows[1:]:
cells = tr.find_all('td')
if 6 > len(cells):
continue
try:
head = head if None is not head else self._header_row(tr)
seeders, leechers, size = [tryInt(n, n) for n in [
cells[x].get_text().strip() for x in -3, -2, -5]]
cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']]
if self._peers_fail(mode, seeders, leechers) \
or self.freeleech and None is cells[1].find('img', title=rc['fl']):
continue
@ -114,37 +115,7 @@ class TVChaosUKProvider(generic.TorrentProvider):
get_detail = False
try:
has_series = re.findall('(?i)(.*?series[^\d]*?\d+)(.*)', title)
if has_series:
rc_xtras = re.compile('(?i)([. _-]|^)(special|extra)s?\w*([. _-]|$)')
has_special = rc_xtras.findall(has_series[0][1])
if has_special:
title = has_series[0][0] + rc_xtras.sub(list(set(
list(has_special[0][0]) + list(has_special[0][2])))[0], has_series[0][1])
title = re.sub('(?i)series', r'Season', title)
title_parts = re.findall(
'(?im)^(.*?)(?:Season[^\d]*?(\d+).*?)?' +
'(?:(?:pack|part|pt)\W*?)?(\d+)[^\d]*?of[^\d]*?(?:\d+)(.*?)$', title)
if len(title_parts):
new_parts = [tryInt(part, part.strip()) for part in title_parts[0]]
if not new_parts[1]:
new_parts[1] = 1
new_parts[2] = ('E%02d', ' Pack %d')[mode in 'Season'] % new_parts[2]
title = '%s.S%02d%s.%s' % tuple(new_parts)
dated = re.findall(
'(?i)([(\s]*)((?:\d\d\s)?[adfjmnos]\w{2,}\s+(?:19|20)\d\d)([)\s]*)', title)
if dated:
title = title.replace(''.join(dated[0]), '%s%s%s' % (
('', ' ')[1 < len(dated[0][0])], parse(dated[0][1]).strftime('%Y-%m-%d'),
('', ' ')[1 < len(dated[0][2])]))
add_pad = re.findall('((?:19|20)\d\d[-]\d\d[-]\d\d)([\w\W])', title)
if len(add_pad) and add_pad[0][1] not in [' ', '.']:
title = title.replace(''.join(
add_pad[0]), '%s %s' % (add_pad[0][0], add_pad[0][1]))
title = re.sub(r'(?sim)(.*?)(?:Episode|Season).\d+.(.*)', r'\1\2', title)
title = self.regulate_title(title, mode)
if title and download_url:
items[mode].append((title, download_url, seeders, self._bytesizer(size)))
except (StandardError, Exception):
@ -165,6 +136,84 @@ class TVChaosUKProvider(generic.TorrentProvider):
return results
@staticmethod
def regulate_title(title, mode='-'):
has_series = re.findall('(?i)(.*?series[^\d]*?\d+)(.*)', title)
if has_series:
rc_xtras = re.compile('(?i)([. _-]|^)(special|extra)s?\w*([. _-]|$)')
has_special = rc_xtras.findall(has_series[0][1])
if has_special:
title = has_series[0][0] + rc_xtras.sub(list(set(
list(has_special[0][0]) + list(has_special[0][2])))[0], has_series[0][1])
title = re.sub('(?i)series', r'Season', title)
years = re.findall('((?:19|20)\d\d)', title)
title = re.sub('(19|20)\d\d', r'{{yr}}', title)
title_parts = re.findall(
'(?im)^(.*?)(?:Season[^\d]*?(\d+).*?)?' +
'(?:(?:pack|part|pt)\W*?)?(\d+)[^\d]*?of[^\d]*?(?:\d+)(.*?)$', title)
if len(title_parts):
new_parts = [tryInt(part, part) for part in title_parts[0]]
if not new_parts[1]:
new_parts[1] = 1
new_parts[2] = ('E%02d', ' Pack %d')[any([re.search('(?i)season|series', title),
mode in 'Season'])] % new_parts[2]
title = '%s`S%02d%s`%s' % tuple(new_parts)
for yr in years:
title = re.sub('\{\{yr\}\}', yr, title, count=1)
dated = re.findall('(?i)([(\s]*)((?:\d+\s)?)([adfjmnos]\w{2,}\s+)((?:19|20)\d\d)([)\s]*)', title)
for d in dated:
try:
dout = parse(''.join(d[1:4])).strftime('%Y-%m-%d')
title = title.replace(''.join(d), '%s%s%s' % (
('', ' ')[1 < len(d[0])], dout[0: not any(d[2]) and 4 or not any(d[1]) and 7 or len(dout)],
('', ' ')[1 < len(d[4])]))
except (StandardError, Exception):
pass
if dated:
add_pad = re.findall('((?:19|20)\d\d[-]\d\d[-]\d\d)([\w\W])', title)
if any(add_pad) and add_pad[0][1] not in [' ', '.']:
title = title.replace(''.join(
add_pad[0]), '%s %s' % (add_pad[0][0], add_pad[0][1]))
title = re.sub(r'(?sim)(.*?)(?:Episode|Season).\d+.(.*)', r'\1\2', title)
t = ['']
bl = '[*\[({]+\s*'
br = '\s*[})\]*]+'
title = re.sub('(.*?)((?i)%sproper%s)(.*)' % (bl, br), r'\1\3\2', title)
for r in '\s+-\s+', '(?:19|20)\d\d(?:\-\d\d\-\d\d)?', 'S\d\d+(?:E\d\d+)?':
m = re.findall('(.*%s)(.*)' % r, title)
if any(m) and len(m[0][0]) > len(t[0]):
t = m[0]
t = ([title], t)[any(t)]
tags = [re.findall(x, t[-1], flags=re.X) for x in
('(?i)%sProper%s|\bProper\b$' % (bl, br),
'(?i)\d{3,4}(?:[pi]|hd)',
'''
(?i)(hr.ws.pdtv|blu.?ray|hddvd|
pdtv|hdtv|dsr|tvrip|web.?(?:dl|rip)|dvd.?rip|b[r|d]rip|mpeg-?2)
''', '''
(?i)([hx].?26[45]|divx|xvid)
''', '''
(?i)(avi|mkv|mp4|sub(?:b?ed|pack|s))
''')]
title = ('%s`%s' % (
re.sub('|'.join(['|'.join([re.escape(y) for y in x]) for x in tags if x]).strip('|'), '', t[-1]),
re.sub('(?i)(\d{3,4})hd', r'\1p', '`'.join(['`'.join(x) for x in tags[:-1]]).rstrip('`')) +
('', '`hdtv')[not any(tags[2])] + ('', '`x264')[not any(tags[3])]))
for r in [('(?i)(?:\W(?:Series|Season))?\W(Repack)\W', r'`\1`'),
('(?i)%s(Proper)%s' % (bl, br), r'`\1`'), ('%s\s*%s' % (bl, br), '`')]:
title = re.sub(r[0], r[1], title)
title = '%s%s-nogrp' % (('', t[0])[1 < len(t)], title)
for r in [('\s+[-]?\s+|\s+`|`\s+', '`'), ('`+', '.')]:
title = re.sub(r[0], r[1], title)
return title
def _season_strings(self, ep_obj, **kwargs):
return generic.TorrentProvider._season_strings(self, ep_obj, scene=False, prefix='%', sp_detail=(
@ -183,7 +232,7 @@ class TVChaosUKProvider(generic.TorrentProvider):
def ui_string(key):
return ('tvchaosuk_tip' == key
and 'has missing quality data so you must add quality Custom/Unknown to any wanted show' or '')
and 'releases are often "Air by date release names" - edit search settings of show if required' or '')
provider = TVChaosUKProvider()

View file

@ -71,19 +71,23 @@ class ZooqleProvider(generic.TorrentProvider):
if 2 > len(torrent_rows):
raise generic.HaltParseException
head = None
for tr in torrent_rows[1:]:
cells = tr.find_all('td')
if 4 > len(cells):
continue
try:
stats = rc['peers'].findall((cells[-1].find(class_='progress') or {}).get('title', ''))
head = head if None is not head else self._header_row(
tr, {'peers': r'(?:zqf\-cloud)', 'size': r'(?:zqf\-files)'})
stats = rc['peers'].findall(
(cells[head['peers']].find(class_='progress') or {}).get('title', ''))
seeders, leechers = any(stats) and [tryInt(x) for x in stats[0]] or (0, 0)
if self._peers_fail(mode, seeders, leechers):
continue
info = cells[1].find('a', href=rc['info'])
info = cells[1].find('a', href=rc['info']) or cells[0].find('a', href=rc['info'])
title = info and info.get_text().strip()
size = cells[-3].get_text().strip()
size = cells[head['size']].get_text().strip()
download_url = info and (self.urls['get'] % rc['info'].findall(info['href'])[0])
except (AttributeError, TypeError, ValueError, IndexError):
continue