diff --git a/CHANGES.md b/CHANGES.md index 4f781c18..2cd546d3 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -223,6 +223,7 @@ * Fix PiSexy for vip user class * Change retry sleep timeout for Trakt searches to prevent issues when Trakt is down * Fix TVDb search issue when only 1 result is returned +* Change improve TvChaos item parsing and can use qualities instead of 'Unknown' ### 0.11.16 (2016-10-16 17:30:00 UTC) diff --git a/sickbeard/clients/generic.py b/sickbeard/clients/generic.py index 543f14ea..254875bd 100644 --- a/sickbeard/clients/generic.py +++ b/sickbeard/clients/generic.py @@ -183,7 +183,7 @@ class GenericClient(object): r_code = self._add_torrent_file(result) if not r_code: - logger.log('%s: Unable to send Torrent: Return code undefined' % self.name, logger.ERROR) + logger.log('%s: Unable to send Torrent: Return code undefined (already exists in client?)' % self.name, logger.ERROR) return False if not self._set_torrent_pause(result): diff --git a/sickbeard/helpers.py b/sickbeard/helpers.py index f781eb7e..4cbce768 100644 --- a/sickbeard/helpers.py +++ b/sickbeard/helpers.py @@ -103,7 +103,8 @@ def remove_non_release_groups(name, is_anime=False): if name: rc = [re.compile(r'(?i)' + v) for v in [ - '([\s\.\-_\[\{\(]*(no-rar|nzbgeek|ripsalot|rp|siklopentan)[\s\.\-_\]\}\)]*)$', + '([\s\.\-_\[\{\(]*(no-rar|nzbgeek|ripsalot|siklopentan)[\s\.\-_\]\}\)]*)$', + '([\s\.\-_\[\{\(]rp[\s\.\-_\]\}\)]*)$', '(?<=\w)([\s\.\-_]*[\[\{\(][\s\.\-_]*(www\.\w+.\w+)[\s\.\-_]*[\]\}\)][\s\.\-_]*)$', '(?<=\w)([\s\.\-_]*[\[\{\(]\s*(rar(bg|tv)|((e[tz]|v)tv))[\s\.\-_]*[\]\}\)][\s\.\-_]*)$'] + (['(?<=\w)([\s\.\-_]*[\[\{\(][\s\.\-_]*[\w\s\.\-\_]+[\s\.\-_]*[\]\}\)][\s\.\-_]*)$', diff --git a/sickbeard/providers/alpharatio.py b/sickbeard/providers/alpharatio.py index db8fb3b7..375a44bb 100644 --- a/sickbeard/providers/alpharatio.py +++ b/sickbeard/providers/alpharatio.py @@ -79,13 +79,15 @@ class AlphaRatioProvider(generic.TorrentProvider): if 2 > len(torrent_rows): raise generic.HaltParseException + head = None for tr in torrent_rows[1:]: cells = tr.find_all('td') if 5 > len(cells): continue try: + head = head if None is not head else self._header_row(tr) seeders, leechers, size = [tryInt(n, n) for n in [ - cells[x].get_text().strip() for x in -2, -1, -4]] + cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']] if self._peers_fail(mode, seeders, leechers): continue diff --git a/sickbeard/providers/bithdtv.py b/sickbeard/providers/bithdtv.py index 6367aabc..478e3e51 100644 --- a/sickbeard/providers/bithdtv.py +++ b/sickbeard/providers/bithdtv.py @@ -69,9 +69,10 @@ class BitHDTVProvider(generic.TorrentProvider): cnt = len(items[mode]) try: - if not html or self._has_no_results(html): + if not html or self._has_no_results(html) or 'width=750' not in html: raise generic.HaltParseException + html = re.sub(r'([^<]*)', '\1', html) with BS4Parser(html, 'html.parser', attr='width=750') as soup: torrent_table = soup.find('table', attrs={'width': 750}) torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') @@ -79,13 +80,15 @@ class BitHDTVProvider(generic.TorrentProvider): if 2 > len(torrent_rows): raise generic.HaltParseException + head = None for tr in torrent_rows[1:]: cells = tr.find_all('td') if 6 > len(cells): continue try: + head = head if None is not head else self._header_row(tr) seeders, leechers, size = [tryInt(n, n) for n in [ - cells[x].get_text().strip() for x in -3, -2, -5]] + cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']] if self.freeleech and not tr.attrs.get('bgcolor').endswith('FF99') or \ self._peers_fail(mode, seeders, leechers): continue diff --git a/sickbeard/providers/bitmetv.py b/sickbeard/providers/bitmetv.py index ba003a16..84d44792 100644 --- a/sickbeard/providers/bitmetv.py +++ b/sickbeard/providers/bitmetv.py @@ -78,13 +78,15 @@ class BitmetvProvider(generic.TorrentProvider): if 2 > len(torrent_rows): raise generic.HaltParseException + head = None for tr in torrent_rows[1:]: cells = tr.find_all('td') if 6 > len(cells): continue try: + head = head if None is not head else self._header_row(tr) seeders, leechers, size = [tryInt(n, n) for n in [ - cells[x].get_text().strip() for x in -3, -2, -5]] + cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']] if self._peers_fail(mode, seeders, leechers): continue diff --git a/sickbeard/providers/btscene.py b/sickbeard/providers/btscene.py index 5b1ee1be..bf4f46d7 100644 --- a/sickbeard/providers/btscene.py +++ b/sickbeard/providers/btscene.py @@ -34,9 +34,9 @@ class BTSceneProvider(generic.TorrentProvider): self.url_home = ['http://www.btstorrent.cc/', 'http://bittorrentstart.com/', 'http://diriri.xyz/', 'http://mytorrentz.tv/'] - self.url_vars = {'search': 'results.php?q=%s&category=series&order=1', 'browse': 'lastdaycat/type/Series/', + self.url_vars = {'search': '?q=%s&category=series&order=1', 'browse': 'lastdaycat/type/Series/', 'get': 'torrentdownload.php?id=%s'} - self.url_tmpl = {'config_provider_home_uri': '%(home)s', 'search': '%(home)s%(vars)s', + self.url_tmpl = {'config_provider_home_uri': '%(home)s', 'search': '%(vars)s', 'browse': '%(home)s%(vars)s', 'get': '%(home)s%(vars)s'} self.minseed, self.minleech = 2 * [None] @@ -56,13 +56,24 @@ class BTSceneProvider(generic.TorrentProvider): rc = dict((k, re.compile('(?i)' + v)) for (k, v) in { 'info': '\w+?(\d+)[.]html', 'verified': 'Verified'}.iteritems()) + + url = self.url + response = self.get_url(url) + form = re.findall('(?is)(]+)', response) + response = any(form) and form[0] or response + action = re.findall(']+action=[\'"]([^\'"]*)', response)[0] + url = action if action.startswith('http') else \ + url if not action else \ + (url + action) if action.startswith('?') else \ + self.urls['config_provider_home_uri'] + action.lstrip('/') + for mode in search_params.keys(): for search_string in search_params[mode]: search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string search_url = self.urls['browse'] if 'Cache' == mode \ - else self.urls['search'] % (urllib.quote_plus(search_string)) + else url + self.urls['search'] % (urllib.quote_plus(search_string)) html = self.get_url(search_url) @@ -76,13 +87,15 @@ class BTSceneProvider(generic.TorrentProvider): if not len(torrent_rows): raise generic.HaltParseException + head = None for tr in torrent_rows: cells = tr.find_all('td') if 6 > len(cells): continue try: + head = head if None is not head else self._header_row(tr) seeders, leechers, size = [tryInt(n, n) for n in [ - cells[x].get_text().strip() for x in -4, -3, -5]] + cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']] if self._peers_fail(mode, seeders, leechers) or \ self.confirmed and not (tr.find('img', src=rc['verified']) or tr.find('img', title=rc['verified'])): diff --git a/sickbeard/providers/dh.py b/sickbeard/providers/dh.py index 9eb22ec1..2cad9adb 100644 --- a/sickbeard/providers/dh.py +++ b/sickbeard/providers/dh.py @@ -79,13 +79,15 @@ class DHProvider(generic.TorrentProvider): if 2 > len(torrent_rows): raise generic.HaltParseException + head = None for tr in torrent_rows[1:]: cells = tr.find_all('td') if 6 > len(cells): continue try: + head = head if None is not head else self._header_row(tr) seeders, leechers, size = [tryInt(n, n) for n in [ - cells[x].get_text().strip() for x in -3, -2, -5]] + cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']] if self._peers_fail(mode, seeders, leechers) or not tr.find('a', href=rc['cats']): continue diff --git a/sickbeard/providers/extratorrent.py b/sickbeard/providers/extratorrent.py index 11806bd8..447e20e4 100644 --- a/sickbeard/providers/extratorrent.py +++ b/sickbeard/providers/extratorrent.py @@ -77,13 +77,15 @@ class ExtraTorrentProvider(generic.TorrentProvider): if 2 > len(torrent_rows): raise generic.HaltParseException + head = None for tr in torrent_rows[1:]: cells = tr.find_all('td') if 5 > len(cells): continue try: + head = head if None is not head else self._header_row(tr) seeders, leechers, size = [tryInt(n.replace('---', '0'), n) for n in [ - cells[x].get_text().strip() for x in -3, -2, -4]] + cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']] if self._peers_fail(mode, seeders, leechers): continue diff --git a/sickbeard/providers/fano.py b/sickbeard/providers/fano.py index 140b8075..b8ea33a0 100644 --- a/sickbeard/providers/fano.py +++ b/sickbeard/providers/fano.py @@ -96,6 +96,7 @@ class FanoProvider(generic.TorrentProvider): if 2 > len(torrent_rows): raise generic.HaltParseException + head = None for tr in torrent_rows[1:]: cells = tr.find_all('td') if (5 > len(cells) @@ -104,8 +105,9 @@ class FanoProvider(generic.TorrentProvider): or (not non_marked and not rc['filter'].search(str(tr)))))): continue try: + head = head if None is not head else self._header_row(tr) seeders, leechers, size = [tryInt(n, n) for n in [ - cells[x].get_text().strip() for x in -2, -1, -4]] + cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']] if self._peers_fail(mode, seeders, leechers) or not tr.find('a', href=rc['cats']): continue diff --git a/sickbeard/providers/freshontv.py b/sickbeard/providers/freshontv.py index 91d91e61..f9d8aac3 100644 --- a/sickbeard/providers/freshontv.py +++ b/sickbeard/providers/freshontv.py @@ -101,6 +101,7 @@ class FreshOnTVProvider(generic.TorrentProvider): if 2 > len(torrent_rows): raise generic.HaltParseException + head = None for tr in torrent_rows[1:]: cells = tr.find_all('td') if (5 > len(cells) or tr.find('img', alt='Nuked') @@ -109,8 +110,9 @@ class FreshOnTVProvider(generic.TorrentProvider): or (not non_marked and not tr.find('img', src=rc['filter']))))): continue try: + head = head if None is not head else self._header_row(tr) seeders, leechers, size = [tryInt(n, n) for n in [ - cells[x].get_text().strip() for x in -2, -1, -4]] + cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']] if self._peers_fail(mode, seeders, leechers): continue diff --git a/sickbeard/providers/funfile.py b/sickbeard/providers/funfile.py index 0bd400af..b82d5e64 100644 --- a/sickbeard/providers/funfile.py +++ b/sickbeard/providers/funfile.py @@ -80,14 +80,17 @@ class FunFileProvider(generic.TorrentProvider): if 2 > len(torrent_rows): raise generic.HaltParseException + head = None for tr in torrent_rows[1:]: cells = tr.find_all('td') info = tr.find('a', href=rc['info']) if 5 > len(cells) or not info: continue try: + head = head if None is not head else self._header_row( + tr, {'seed': r'(?:up\.gif|seed|s/l)', 'leech': r'(?:down\.gif|leech|peers)'}) seeders, leechers, size = [tryInt(n, n) for n in [ - cells[x].get_text().strip() for x in -2, -1, -4]] + cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']] if None is tr.find('a', href=rc['cats']) or self._peers_fail(mode, seeders, leechers): continue diff --git a/sickbeard/providers/generic.py b/sickbeard/providers/generic.py index 4a3edb4b..dcbb132f 100644 --- a/sickbeard/providers/generic.py +++ b/sickbeard/providers/generic.py @@ -325,6 +325,56 @@ class GenericProvider: url_tmpl = '%s' return url if re.match('(?i)https?://', url) else (url_tmpl % url.lstrip('/')) + def _header_row(self, table_row, custom_match=None, header_strip=''): + """ + :param header_row: Soup resultset of table header row + :param custom_match: Dict key/values to override one or more default regexes + :param header_strip: String regex of ambiguities to remove from headers + :return: dict column indices or None for leech, seeds, and size + """ + results = {} + rc = dict((k, re.compile('(?i)' + r)) for (k, r) in dict( + {'seed': r'(?:seed|s/l)', 'leech': r'(?:leech|peers)', 'size': r'(?:size)'}.items() + + ({}, custom_match)[any([custom_match])].items()).items()) + table = table_row.find_parent('table') + header_row = table.tr or table.thead.tr or table.tbody.tr + for y in [x for x in header_row(True) if x.attrs.get('class')]: + y['class'] = '..'.join(y['class']) + all_cells = header_row.find_all('th') + all_cells = all_cells if any(all_cells) else header_row.find_all('td') + + headers = [re.sub( + r'[\s]+', '', + ((any([cell.get_text()]) and any([rc[x].search(cell.get_text()) for x in rc.keys()]) and cell.get_text()) + or (cell.attrs.get('id') and any([rc[x].search(cell['id']) for x in rc.keys()]) and cell['id']) + or (cell.attrs.get('title') and any([rc[x].search(cell['title']) for x in rc.keys()]) and cell['title']) + or next(iter(set(filter(lambda z: any([z]), [ + next(iter(set(filter(lambda y: any([y]), [ + cell.find(tag, **p) for p in [{attr: rc[x]} for x in rc.keys()]]))), {}).get(attr) + for (tag, attr) in [ + ('img', 'title'), ('img', 'src'), ('i', 'title'), ('i', 'class'), + ('abbr', 'title'), ('a', 'title'), ('a', 'href')]]))), '') + or cell.get_text() + )).strip() for cell in all_cells] + headers = [re.sub(header_strip, '', x) for x in headers] + all_headers = headers + colspans = [int(cell.attrs.get('colspan', 0)) for cell in all_cells] + if any(colspans): + all_headers = [] + for i, width in enumerate(colspans): + all_headers += [headers[i]] + ([''] * (width - 1)) + + for k, r in rc.iteritems(): + if k not in results: + for name in filter(lambda v: any([v]) and r.search(v), all_headers[::-1]): + results[k] = all_headers.index(name) - len(all_headers) + break + + for missing in set(rc.keys()) - set(results.keys()): + results[missing] = None + + return results + @staticmethod def _dhtless_magnet(btih, name=None): """ @@ -1091,14 +1141,15 @@ class TorrentProvider(object, GenericProvider): return results @staticmethod - def _has_no_results(*html): + def _has_no_results(html): return re.search(r'(?i)<(?:b|div|h\d|p|span|strong|td)[^>]*>\s*(?:' + 'your\ssearch.*?did\snot\smatch|' + '(?:nothing|0\s+torrents)\sfound|' + - '(sorry,\s)?no\s(?:results|torrents)\s(found|here|match)|' + - '.*?there\sare\sno\sresults|' + - '.*?no\shits\.\sTry\sadding' + - ')', html[0]) + '(?:sorry,\s)?no\s(?:results|torrents)\s(found|here|match)|' + + 'no\s(?:match|results|torrents)!*|' + '[^<]*?there\sare\sno\sresults|' + + '[^<]*?no\shits\.\sTry\sadding' + + ')', html) def _cache_data(self): diff --git a/sickbeard/providers/gftracker.py b/sickbeard/providers/gftracker.py index 2510c77b..948119c3 100644 --- a/sickbeard/providers/gftracker.py +++ b/sickbeard/providers/gftracker.py @@ -81,12 +81,14 @@ class GFTrackerProvider(generic.TorrentProvider): if 2 > len(torrent_rows): raise generic.HaltParseException + head = None for tr in torrent_rows[1:]: cells = tr.find_all('td') if 3 > len(cells): continue try: - seeders, leechers = 2 * [cells[-1].get_text().strip()] + head = head if None is not head else self._header_row(tr) + seeders, leechers = 2 * [cells[head['seed']].get_text().strip()] seeders, leechers = [tryInt(n) for n in [ rc['seeders'].findall(seeders)[0], rc['leechers'].findall(leechers)[0]]] if self._peers_fail(mode, seeders, leechers): @@ -94,7 +96,7 @@ class GFTrackerProvider(generic.TorrentProvider): info = tr.find('a', href=rc['info']) title = (info.attrs.get('title') or info.get_text()).strip() - size = cells[-2].get_text().strip() + size = cells[head['size']].get_text().strip() download_url = self._link(tr.find('a', href=rc['get'])['href']) except (AttributeError, TypeError, ValueError): continue diff --git a/sickbeard/providers/grabtheinfo.py b/sickbeard/providers/grabtheinfo.py index bde8a422..e852ae75 100644 --- a/sickbeard/providers/grabtheinfo.py +++ b/sickbeard/providers/grabtheinfo.py @@ -91,13 +91,15 @@ class GrabTheInfoProvider(generic.TorrentProvider): if not shows_found or 2 > (len(torrent_rows) - shows_found): raise generic.HaltParseException + head = None for tr in torrent_rows[1 + shows_found:]: cells = tr.find_all('td') if 4 > len(cells): continue try: + head = head if None is not head else self._header_row(torrent_rows[shows_found]) seeders, leechers, size = [tryInt(n, n) for n in [ - cells[x].get_text().strip() for x in -2, -1, -3]] + cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']] if self._peers_fail(mode, seeders, leechers): continue diff --git a/sickbeard/providers/hdbits.py b/sickbeard/providers/hdbits.py index 3a8e78f5..439e6a56 100644 --- a/sickbeard/providers/hdbits.py +++ b/sickbeard/providers/hdbits.py @@ -113,7 +113,7 @@ class HDBitsProvider(generic.TorrentProvider): json_resp = self.get_url(search_url, post_data=post_data, json=True) - if not (json_resp and 'data' in json_resp and self.check_auth_from_data(json_resp)): + if not (json_resp and self.check_auth_from_data(json_resp) and 'data' in json_resp): logger.log(u'Response from %s does not contain any json data, abort' % self.name, logger.ERROR) return results diff --git a/sickbeard/providers/hdspace.py b/sickbeard/providers/hdspace.py index 6083ee4c..9f3c3e1b 100644 --- a/sickbeard/providers/hdspace.py +++ b/sickbeard/providers/hdspace.py @@ -98,6 +98,7 @@ class HDSpaceProvider(generic.TorrentProvider): if 2 > len(torrent_rows): raise generic.HaltParseException + head = None for tr in torrent_rows[1:]: cells = tr.find_all('td') if (6 > len(cells) or tr.find('td', class_='header') @@ -109,6 +110,7 @@ class HDSpaceProvider(generic.TorrentProvider): if None is downlink: continue try: + head = head if None is not head else self._header_row(tr) seeders, leechers = [tryInt(x.get_text().strip()) for x in tr.find_all('a', href=rc['peers'])] if self._peers_fail(mode, seeders, leechers): @@ -116,7 +118,7 @@ class HDSpaceProvider(generic.TorrentProvider): info = tr.find('a', href=rc['info']) title = (info.attrs.get('title') or info.get_text()).strip() - size = cells[-5].get_text().strip() + size = cells[head['size']].get_text().strip() download_url = self._link(downlink['href']) except (AttributeError, TypeError, ValueError): continue diff --git a/sickbeard/providers/hdtorrents.py b/sickbeard/providers/hdtorrents.py index ce2ea8b3..2af244e7 100644 --- a/sickbeard/providers/hdtorrents.py +++ b/sickbeard/providers/hdtorrents.py @@ -106,6 +106,7 @@ class HDTorrentsProvider(generic.TorrentProvider): if not len(torrent_rows): raise generic.HaltParseException + head = None for tr in torrent_rows: cells = tr.find_all('td') if (6 > len(cells) or any(self.filter) @@ -113,8 +114,9 @@ class HDTorrentsProvider(generic.TorrentProvider): or (not non_marked and not tr.find('img', src=rc['filter'])))): continue try: + head = head if None is not head else self._header_row(tr) seeders, leechers, size = [tryInt(n, n) for n in [ - cells[x].get_text().strip() for x in -3, -2, -5]] + cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']] if self._peers_fail(mode, seeders, leechers) or not tr.find('a', href=rc['cats']): continue title = tr.find('a', href=rc['info']).get_text().strip() diff --git a/sickbeard/providers/iptorrents.py b/sickbeard/providers/iptorrents.py index f1068b8d..50cf484d 100644 --- a/sickbeard/providers/iptorrents.py +++ b/sickbeard/providers/iptorrents.py @@ -86,11 +86,14 @@ class IPTorrentsProvider(generic.TorrentProvider): if 2 > len(torrent_rows): raise generic.HaltParseException + head = None for tr in torrent_rows[1:]: cells = tr.find_all('td') if 5 > len(cells): continue try: + head = head if None is not head else self._header_row( + tr, header_strip='(?i)(?:leechers|seeders|size);') seeders, leechers = [tryInt(tr.find('td', class_='t_' + x).get_text().strip()) for x in 'seeders', 'leechers'] if self._peers_fail(mode, seeders, leechers): @@ -98,7 +101,7 @@ class IPTorrentsProvider(generic.TorrentProvider): info = tr.find('a', href=rc['info']) title = (info.attrs.get('title') or info.get_text()).strip() - size = cells[-4].get_text().strip() + size = cells[head['size']].get_text().strip() download_url = self._link(tr.find('a', href=rc['get'])['href']) except (AttributeError, TypeError, ValueError): continue diff --git a/sickbeard/providers/limetorrents.py b/sickbeard/providers/limetorrents.py index 18828dd5..49e0db2f 100644 --- a/sickbeard/providers/limetorrents.py +++ b/sickbeard/providers/limetorrents.py @@ -77,13 +77,15 @@ class LimeTorrentsProvider(generic.TorrentProvider): if not len(torrent_rows): raise generic.HaltParseException + head = None for tr in torrent_rows[0]: # 0 = all rows cells = tr.find_all('td') if 5 > len(cells): continue try: + head = head if None is not head else self._header_row(tr) seeders, leechers, size = [tryInt(n.replace(',', ''), n) for n in [ - cells[x].get_text().strip() for x in -3, -2, -4]] + cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']] if self._peers_fail(mode, seeders, leechers): continue diff --git a/sickbeard/providers/morethan.py b/sickbeard/providers/morethan.py index c98ee8e2..eeabc768 100644 --- a/sickbeard/providers/morethan.py +++ b/sickbeard/providers/morethan.py @@ -80,13 +80,15 @@ class MoreThanProvider(generic.TorrentProvider): if 2 > len(torrent_rows): raise generic.HaltParseException + head = None for tr in torrent_rows[1:]: cells = tr.find_all('td') if 5 > len(cells) or tr.find('img', alt=rc['nuked']): continue try: + head = head if None is not head else self._header_row(tr) seeders, leechers, size = [tryInt(n, n) for n in [ - cells[x].get_text().strip() for x in -2, -1, -4]] + cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']] if self._peers_fail(mode, seeders, leechers): continue diff --git a/sickbeard/providers/pisexy.py b/sickbeard/providers/pisexy.py index 6beaed5a..7b945300 100644 --- a/sickbeard/providers/pisexy.py +++ b/sickbeard/providers/pisexy.py @@ -73,15 +73,14 @@ class PiSexyProvider(generic.TorrentProvider): if 2 > len(torrent_rows): raise generic.HaltParseException - header = torrent_rows[0].find_all('td') - peers_td = ([x.get_text().strip().lower() for x in header].index('see/lee') - len(header)) - size_td = ([x.get_text().strip().lower() for x in header].index('size')) + head = None for tr in torrent_rows[1:]: cells = tr.find_all('td') if 5 > len(cells): continue try: - seeders, leechers = 2 * [cells[peers_td].get_text().strip()] + head = head if None is not head else self._header_row(tr, {'seed': r'(?:see/lee|seed)'}) + seeders, leechers = 2 * [cells[head['seed']].get_text().strip()] seeders, leechers = [tryInt(n) for n in [ rc['seeders'].findall(seeders)[0], rc['leechers'].findall(leechers)[0]]] if self._peers_fail(mode, seeders, leechers) or not tr.find('a', href=rc['valid_cat']) \ @@ -90,7 +89,7 @@ class PiSexyProvider(generic.TorrentProvider): info = tr.find('a', href=rc['info']) title = (rc['title'].sub('', info.attrs.get('title', '')) or info.get_text()).strip() - size = cells[size_td].get_text().strip() + size = cells[head['size']].get_text().strip() download_url = self._link(tr.find('a', href=rc['get'])['href']) except (AttributeError, TypeError, ValueError, KeyError, IndexError): continue diff --git a/sickbeard/providers/privatehd.py b/sickbeard/providers/privatehd.py index af701e45..57961c51 100644 --- a/sickbeard/providers/privatehd.py +++ b/sickbeard/providers/privatehd.py @@ -110,6 +110,7 @@ class PrivateHDProvider(generic.TorrentProvider): if 2 > len(torrent_rows): raise generic.HaltParseException + head = None for tr in torrent_rows[1:]: cells = tr.find_all('td') if 5 > len(cells) or (self.confirmed and tr.find('i', title=re.compile('(?i)unverified'))): @@ -122,8 +123,9 @@ class PrivateHDProvider(generic.TorrentProvider): (not non_marked and not rc['filter'].search(munged))): continue try: + head = head if None is not head else self._header_row(tr) seeders, leechers, size = [tryInt(n, n) for n in [ - cells[x].get_text().strip() for x in -3, -2, -4]] + cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']] if self._peers_fail(mode, seeders, leechers): continue diff --git a/sickbeard/providers/ptf.py b/sickbeard/providers/ptf.py index 35929dd3..7d4145fb 100644 --- a/sickbeard/providers/ptf.py +++ b/sickbeard/providers/ptf.py @@ -104,6 +104,7 @@ class PTFProvider(generic.TorrentProvider): if 2 > len(torrent_rows): raise generic.HaltParseException + head = None for tr in torrent_rows[1:]: cells = tr.find_all('td') if 4 > len(cells): @@ -118,7 +119,8 @@ class PTFProvider(generic.TorrentProvider): (not non_marked and not rc['filter'].search(marker))): continue try: - seeders, leechers = 2 * [cells[-2].get_text().strip()] + head = head if None is not head else self._header_row(tr) + seeders, leechers = 2 * [cells[head['seed']].get_text().strip()] seeders, leechers = [tryInt(n) for n in [ rc['seeders'].findall(seeders)[0], rc['leechers'].findall(leechers)[0]]] if self._peers_fail(mode, seeders, leechers) or\ @@ -127,7 +129,7 @@ class PTFProvider(generic.TorrentProvider): title = tr.find('a', href=rc['info']).get_text().strip() snatches = tr.find('a', href=rc['snatch']).get_text().strip() - size = cells[-3].get_text().strip().replace(snatches, '') + size = cells[head['size']].get_text().strip().replace(snatches, '') download_url = self._link(tr.find('a', href=rc['get'])['href']) except (AttributeError, TypeError, ValueError, IndexError): continue diff --git a/sickbeard/providers/revtt.py b/sickbeard/providers/revtt.py index 4d4a491e..0cfabc6a 100644 --- a/sickbeard/providers/revtt.py +++ b/sickbeard/providers/revtt.py @@ -77,13 +77,16 @@ class RevTTProvider(generic.TorrentProvider): if 2 > len(torrent_rows): raise generic.HaltParseException + head = None for tr in torrent_rows[1:]: cells = tr.find_all('td') if 5 > len(cells): continue try: + head = head if None is not head else self._header_row( + tr, {'seed': r'(?:up\.png|seed|s/l)', 'leech': r'(?:down\.png|leech|peers)'}) seeders, leechers, size = [tryInt(n, n) for n in [ - cells[x].get_text().strip() for x in -2, -1, -4]] + cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']] if self._peers_fail(mode, seeders, leechers) or not tr.find('a', href=rc['cats']): continue diff --git a/sickbeard/providers/scenetime.py b/sickbeard/providers/scenetime.py index 35a5ce32..1be278a8 100644 --- a/sickbeard/providers/scenetime.py +++ b/sickbeard/providers/scenetime.py @@ -93,13 +93,15 @@ class SceneTimeProvider(generic.TorrentProvider): if 2 > len(torrent_rows): raise generic.HaltParseException + head = None for tr in torrent_rows[1:]: cells = tr.find_all('td') if 4 > len(cells): continue try: + head = head if None is not head else self._header_row(tr) seeders, leechers, size = [tryInt(n, n) for n in [ - cells[x].get_text().strip() for x in -2, -1, -3]] + cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']] if None is tr.find('a', href=rc['cats'])\ or self.freeleech and None is rc['fl'].search(cells[1].get_text())\ or self._peers_fail(mode, seeders, leechers): diff --git a/sickbeard/providers/shazbat.py b/sickbeard/providers/shazbat.py index 4bbeeec0..a7dff22e 100644 --- a/sickbeard/providers/shazbat.py +++ b/sickbeard/providers/shazbat.py @@ -98,12 +98,14 @@ class ShazbatProvider(generic.TorrentProvider): if 2 > len(torrent_rows): raise generic.HaltParseException + head = None for tr in torrent_rows[0:]: cells = tr.find_all('td') if 4 > len(cells): continue try: - stats = cells[3].get_text().strip() + head = head if None is not head else self._header_row(tr) + stats = cells[head['leech']].get_text().strip() seeders, leechers = [(tryInt(x[0], 0), tryInt(x[1], 0)) for x in re.findall('(?::(\d+))(?:\W*[/]\W*:(\d+))?', stats) if x[0]][0] if self._peers_fail(mode, seeders, leechers): diff --git a/sickbeard/providers/speedcd.py b/sickbeard/providers/speedcd.py index 2aac1c14..103899cb 100644 --- a/sickbeard/providers/speedcd.py +++ b/sickbeard/providers/speedcd.py @@ -78,13 +78,15 @@ class SpeedCDProvider(generic.TorrentProvider): if 2 > len(torrent_rows): raise generic.HaltParseException + head = None for tr in torrent_rows[1:]: cells = tr.find_all('td') if 4 > len(cells): continue try: + head = head if None is not head else self._header_row(tr) seeders, leechers, size = [tryInt(n, n) for n in [ - cells[x].get_text().strip() for x in -2, -1, -3]] + cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']] if None is tr.find('a', href=rc['cats']) \ or self.freeleech and None is rc['fl'].search(cells[1].get_text()) \ or self._peers_fail(mode, seeders, leechers): diff --git a/sickbeard/providers/thepiratebay.py b/sickbeard/providers/thepiratebay.py index e1ab2c51..51705a7b 100644 --- a/sickbeard/providers/thepiratebay.py +++ b/sickbeard/providers/thepiratebay.py @@ -161,12 +161,14 @@ class ThePirateBayProvider(generic.TorrentProvider): if 2 > len(torrent_rows): raise generic.HaltParseException + head = None for tr in torrent_table.find_all('tr')[1:]: cells = tr.find_all('td') if 3 > len(cells): continue try: - seeders, leechers = [tryInt(cells[x].get_text().strip()) for x in -2, -1] + head = head if None is not head else self._header_row(tr) + seeders, leechers = [tryInt(cells[head[x]].get_text().strip()) for x in 'seed', 'leech'] if self._peers_fail(mode, seeders, leechers): continue diff --git a/sickbeard/providers/torlock.py b/sickbeard/providers/torlock.py index ceab716a..b5b561b8 100644 --- a/sickbeard/providers/torlock.py +++ b/sickbeard/providers/torlock.py @@ -83,13 +83,15 @@ class TorLockProvider(generic.TorrentProvider): if 2 > len(torrent_rows): raise generic.HaltParseException + head = None for tr in torrent_rows[1:]: cells = tr.find_all('td') if 5 > len(cells): continue try: + head = head if None is not head else self._header_row(tr) seeders, leechers, size = [tryInt(n, n) for n in [ - cells[x].get_text().strip() for x in -3, -2, -4]] + cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']] if self._peers_fail(mode, seeders, leechers) \ or self.confirmed and not (tr.find('img', src=rc['versrc']) or tr.find('img', title=rc['verified'])): diff --git a/sickbeard/providers/torrentbytes.py b/sickbeard/providers/torrentbytes.py index 0a8eeb1f..bb072acb 100644 --- a/sickbeard/providers/torrentbytes.py +++ b/sickbeard/providers/torrentbytes.py @@ -74,14 +74,16 @@ class TorrentBytesProvider(generic.TorrentProvider): if 2 > len(torrent_rows): raise generic.HaltParseException + head = None for tr in torrent_rows[1:]: cells = tr.find_all('td') if 5 > len(cells): continue try: info = tr.find('a', href=rc['info']) + head = head if None is not head else self._header_row(tr) seeders, leechers, size = [tryInt(n, n) for n in [ - cells[x].get_text().strip() for x in -2, -1, -4]] + cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']] if self.freeleech and (len(info.contents) < 2 or not rc['fl'].search( info.contents[1].string.strip())) or self._peers_fail(mode, seeders, leechers): continue diff --git a/sickbeard/providers/torrentday.py b/sickbeard/providers/torrentday.py index bb6c7f21..65c975d8 100644 --- a/sickbeard/providers/torrentday.py +++ b/sickbeard/providers/torrentday.py @@ -86,18 +86,20 @@ class TorrentDayProvider(generic.TorrentProvider): if 2 > len(torrent_rows): raise generic.HaltParseException + head = None for tr in torrent_rows[1:]: cells = tr.find_all('td') if 4 > len(cells): continue try: - seeders, leechers = [tryInt(tr.find('td', class_=x + 'ersInfo').get_text().strip()) - for x in 'seed', 'leech'] + head = head if None is not head else self._header_row( + tr, header_strip='(?i)(?:leechers|seeders|size);') + seeders, leechers, size = [tryInt(n, n) for n in [ + cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']] if self._peers_fail(mode, seeders, leechers): continue title = tr.find('a', href=rc['info']).get_text().strip() - size = cells[-3].get_text().strip() download_url = self._link(tr.find('a', href=rc['get'])['href']) except (AttributeError, TypeError, ValueError): continue diff --git a/sickbeard/providers/torrenting.py b/sickbeard/providers/torrenting.py index b2f3a5c6..eb926648 100644 --- a/sickbeard/providers/torrenting.py +++ b/sickbeard/providers/torrenting.py @@ -82,13 +82,15 @@ class TorrentingProvider(generic.TorrentProvider): if 2 > len(torrent_rows): raise generic.HaltParseException + head = None for tr in torrent_rows[1:]: cells = tr.find_all('td') if 4 > len(cells): continue try: + head = head if None is not head else self._header_row(tr) seeders, leechers, size = [tryInt(n, n) for n in [ - cells[x].get_text().strip() for x in -2, -1, -3]] + cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']] if None is tr.find('a', href=rc['cats']) or self._peers_fail(mode, seeders, leechers): continue diff --git a/sickbeard/providers/torrentleech.py b/sickbeard/providers/torrentleech.py index bdc1f336..7da261a1 100644 --- a/sickbeard/providers/torrentleech.py +++ b/sickbeard/providers/torrentleech.py @@ -76,11 +76,13 @@ class TorrentLeechProvider(generic.TorrentProvider): if 2 > len(torrent_rows): raise generic.HaltParseException + head = None for tr in torrent_rows[1:]: cells = tr.find_all('td') if 6 > len(cells): continue try: + head = head if None is not head else self._header_row(tr) seeders, leechers = [tryInt(n) for n in [ tr.find('td', class_=x).get_text().strip() for x in 'seeders', 'leechers']] if self._peers_fail(mode, seeders, leechers): @@ -88,7 +90,7 @@ class TorrentLeechProvider(generic.TorrentProvider): info = tr.find('td', class_='name').a title = (info.attrs.get('title') or info.get_text()).strip() - size = cells[-5].get_text().strip() + size = cells[head['size']].get_text().strip() download_url = self._link(tr.find('a', href=rc['get'])['href']) except (AttributeError, TypeError, ValueError): continue diff --git a/sickbeard/providers/torrentshack.py b/sickbeard/providers/torrentshack.py index c3ab0e0e..14988199 100644 --- a/sickbeard/providers/torrentshack.py +++ b/sickbeard/providers/torrentshack.py @@ -81,13 +81,15 @@ class TorrentShackProvider(generic.TorrentProvider): if 2 > len(torrent_rows): raise generic.HaltParseException + head = None for tr in torrent_rows[1:]: cells = tr.find_all('td') if 5 > len(cells): continue try: + head = head if None is not head else self._header_row(tr) seeders, leechers, size = [tryInt(n, n) for n in [ - cells[x].get_text().strip() for x in -2, -1, -4]] + cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']] if self._peers_fail(mode, seeders, leechers): continue diff --git a/sickbeard/providers/tvchaosuk.py b/sickbeard/providers/tvchaosuk.py index 041be9eb..de51e85c 100644 --- a/sickbeard/providers/tvchaosuk.py +++ b/sickbeard/providers/tvchaosuk.py @@ -61,10 +61,9 @@ class TVChaosUKProvider(generic.TorrentProvider): for search_string in search_params[mode]: search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string - if 'Cache' != mode: - kwargs = dict(post_data={'keywords': search_string, 'do': 'quick_sort', 'page': '0', - 'category': '0', 'search_type': 't_name', 'sort': 'added', - 'order': 'desc', 'daysprune': '-1'}) + kwargs = dict(post_data={'keywords': search_string, 'do': 'quick_sort', 'page': '0', + 'category': '0', 'search_type': 't_name', 'sort': 'added', + 'order': 'desc', 'daysprune': '-1'}) html = self.get_url(self.urls['search'], **kwargs) @@ -81,13 +80,15 @@ class TVChaosUKProvider(generic.TorrentProvider): if 2 > len(torrent_rows): raise generic.HaltParseException + head = None for tr in torrent_rows[1:]: cells = tr.find_all('td') if 6 > len(cells): continue try: + head = head if None is not head else self._header_row(tr) seeders, leechers, size = [tryInt(n, n) for n in [ - cells[x].get_text().strip() for x in -3, -2, -5]] + cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']] if self._peers_fail(mode, seeders, leechers) \ or self.freeleech and None is cells[1].find('img', title=rc['fl']): continue @@ -114,37 +115,7 @@ class TVChaosUKProvider(generic.TorrentProvider): get_detail = False try: - has_series = re.findall('(?i)(.*?series[^\d]*?\d+)(.*)', title) - if has_series: - rc_xtras = re.compile('(?i)([. _-]|^)(special|extra)s?\w*([. _-]|$)') - has_special = rc_xtras.findall(has_series[0][1]) - if has_special: - title = has_series[0][0] + rc_xtras.sub(list(set( - list(has_special[0][0]) + list(has_special[0][2])))[0], has_series[0][1]) - title = re.sub('(?i)series', r'Season', title) - - title_parts = re.findall( - '(?im)^(.*?)(?:Season[^\d]*?(\d+).*?)?' + - '(?:(?:pack|part|pt)\W*?)?(\d+)[^\d]*?of[^\d]*?(?:\d+)(.*?)$', title) - if len(title_parts): - new_parts = [tryInt(part, part.strip()) for part in title_parts[0]] - if not new_parts[1]: - new_parts[1] = 1 - new_parts[2] = ('E%02d', ' Pack %d')[mode in 'Season'] % new_parts[2] - title = '%s.S%02d%s.%s' % tuple(new_parts) - - dated = re.findall( - '(?i)([(\s]*)((?:\d\d\s)?[adfjmnos]\w{2,}\s+(?:19|20)\d\d)([)\s]*)', title) - if dated: - title = title.replace(''.join(dated[0]), '%s%s%s' % ( - ('', ' ')[1 < len(dated[0][0])], parse(dated[0][1]).strftime('%Y-%m-%d'), - ('', ' ')[1 < len(dated[0][2])])) - add_pad = re.findall('((?:19|20)\d\d[-]\d\d[-]\d\d)([\w\W])', title) - if len(add_pad) and add_pad[0][1] not in [' ', '.']: - title = title.replace(''.join( - add_pad[0]), '%s %s' % (add_pad[0][0], add_pad[0][1])) - title = re.sub(r'(?sim)(.*?)(?:Episode|Season).\d+.(.*)', r'\1\2', title) - + title = self.regulate_title(title, mode) if title and download_url: items[mode].append((title, download_url, seeders, self._bytesizer(size))) except (StandardError, Exception): @@ -165,6 +136,84 @@ class TVChaosUKProvider(generic.TorrentProvider): return results + @staticmethod + def regulate_title(title, mode='-'): + + has_series = re.findall('(?i)(.*?series[^\d]*?\d+)(.*)', title) + if has_series: + rc_xtras = re.compile('(?i)([. _-]|^)(special|extra)s?\w*([. _-]|$)') + has_special = rc_xtras.findall(has_series[0][1]) + if has_special: + title = has_series[0][0] + rc_xtras.sub(list(set( + list(has_special[0][0]) + list(has_special[0][2])))[0], has_series[0][1]) + title = re.sub('(?i)series', r'Season', title) + + years = re.findall('((?:19|20)\d\d)', title) + title = re.sub('(19|20)\d\d', r'{{yr}}', title) + title_parts = re.findall( + '(?im)^(.*?)(?:Season[^\d]*?(\d+).*?)?' + + '(?:(?:pack|part|pt)\W*?)?(\d+)[^\d]*?of[^\d]*?(?:\d+)(.*?)$', title) + if len(title_parts): + new_parts = [tryInt(part, part) for part in title_parts[0]] + if not new_parts[1]: + new_parts[1] = 1 + new_parts[2] = ('E%02d', ' Pack %d')[any([re.search('(?i)season|series', title), + mode in 'Season'])] % new_parts[2] + title = '%s`S%02d%s`%s' % tuple(new_parts) + for yr in years: + title = re.sub('\{\{yr\}\}', yr, title, count=1) + + dated = re.findall('(?i)([(\s]*)((?:\d+\s)?)([adfjmnos]\w{2,}\s+)((?:19|20)\d\d)([)\s]*)', title) + for d in dated: + try: + dout = parse(''.join(d[1:4])).strftime('%Y-%m-%d') + title = title.replace(''.join(d), '%s%s%s' % ( + ('', ' ')[1 < len(d[0])], dout[0: not any(d[2]) and 4 or not any(d[1]) and 7 or len(dout)], + ('', ' ')[1 < len(d[4])])) + except (StandardError, Exception): + pass + if dated: + add_pad = re.findall('((?:19|20)\d\d[-]\d\d[-]\d\d)([\w\W])', title) + if any(add_pad) and add_pad[0][1] not in [' ', '.']: + title = title.replace(''.join( + add_pad[0]), '%s %s' % (add_pad[0][0], add_pad[0][1])) + title = re.sub(r'(?sim)(.*?)(?:Episode|Season).\d+.(.*)', r'\1\2', title) + + t = [''] + bl = '[*\[({]+\s*' + br = '\s*[})\]*]+' + title = re.sub('(.*?)((?i)%sproper%s)(.*)' % (bl, br), r'\1\3\2', title) + for r in '\s+-\s+', '(?:19|20)\d\d(?:\-\d\d\-\d\d)?', 'S\d\d+(?:E\d\d+)?': + m = re.findall('(.*%s)(.*)' % r, title) + if any(m) and len(m[0][0]) > len(t[0]): + t = m[0] + t = ([title], t)[any(t)] + + tags = [re.findall(x, t[-1], flags=re.X) for x in + ('(?i)%sProper%s|\bProper\b$' % (bl, br), + '(?i)\d{3,4}(?:[pi]|hd)', + ''' + (?i)(hr.ws.pdtv|blu.?ray|hddvd| + pdtv|hdtv|dsr|tvrip|web.?(?:dl|rip)|dvd.?rip|b[r|d]rip|mpeg-?2) + ''', ''' + (?i)([hx].?26[45]|divx|xvid) + ''', ''' + (?i)(avi|mkv|mp4|sub(?:b?ed|pack|s)) + ''')] + title = ('%s`%s' % ( + re.sub('|'.join(['|'.join([re.escape(y) for y in x]) for x in tags if x]).strip('|'), '', t[-1]), + re.sub('(?i)(\d{3,4})hd', r'\1p', '`'.join(['`'.join(x) for x in tags[:-1]]).rstrip('`')) + + ('', '`hdtv')[not any(tags[2])] + ('', '`x264')[not any(tags[3])])) + for r in [('(?i)(?:\W(?:Series|Season))?\W(Repack)\W', r'`\1`'), + ('(?i)%s(Proper)%s' % (bl, br), r'`\1`'), ('%s\s*%s' % (bl, br), '`')]: + title = re.sub(r[0], r[1], title) + + title = '%s%s-nogrp' % (('', t[0])[1 < len(t)], title) + for r in [('\s+[-]?\s+|\s+`|`\s+', '`'), ('`+', '.')]: + title = re.sub(r[0], r[1], title) + + return title + def _season_strings(self, ep_obj, **kwargs): return generic.TorrentProvider._season_strings(self, ep_obj, scene=False, prefix='%', sp_detail=( @@ -183,7 +232,7 @@ class TVChaosUKProvider(generic.TorrentProvider): def ui_string(key): return ('tvchaosuk_tip' == key - and 'has missing quality data so you must add quality Custom/Unknown to any wanted show' or '') + and 'releases are often "Air by date release names" - edit search settings of show if required' or '') provider = TVChaosUKProvider() diff --git a/sickbeard/providers/zooqle.py b/sickbeard/providers/zooqle.py index 8dd7450d..b35a7b11 100644 --- a/sickbeard/providers/zooqle.py +++ b/sickbeard/providers/zooqle.py @@ -71,19 +71,23 @@ class ZooqleProvider(generic.TorrentProvider): if 2 > len(torrent_rows): raise generic.HaltParseException + head = None for tr in torrent_rows[1:]: cells = tr.find_all('td') if 4 > len(cells): continue try: - stats = rc['peers'].findall((cells[-1].find(class_='progress') or {}).get('title', '')) + head = head if None is not head else self._header_row( + tr, {'peers': r'(?:zqf\-cloud)', 'size': r'(?:zqf\-files)'}) + stats = rc['peers'].findall( + (cells[head['peers']].find(class_='progress') or {}).get('title', '')) seeders, leechers = any(stats) and [tryInt(x) for x in stats[0]] or (0, 0) if self._peers_fail(mode, seeders, leechers): continue - info = cells[1].find('a', href=rc['info']) + info = cells[1].find('a', href=rc['info']) or cells[0].find('a', href=rc['info']) title = info and info.get_text().strip() - size = cells[-3].get_text().strip() + size = cells[head['size']].get_text().strip() download_url = info and (self.urls['get'] % rc['info'].findall(info['href'])[0]) except (AttributeError, TypeError, ValueError, IndexError): continue