From 49cd0b3a99a545afc3ee86f3a4f789504f607247 Mon Sep 17 00:00:00 2001 From: JackDandy Date: Tue, 23 Jun 2015 16:23:30 +0100 Subject: [PATCH] Change refactor KAT to use torrent provider simplification and PEP8. --- CHANGES.md | 1 + sickbeard/providers/kat.py | 280 ++++++++++++------------------------- 2 files changed, 91 insertions(+), 190 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 6ecbfcab..baa6d1a6 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -43,6 +43,7 @@ * Change refactor BTS to use torrent provider simplification and PEP8 * Change refactor FSH to use torrent provider simplification and PEP8 * Change refactor IPT to use torrent provider simplification and PEP8 +* Change refactor KAT to use torrent provider simplification and PEP8 * Remove HDTorrents torrent provider * Remove NextGen torrent provider * Add Rarbg torrent provider diff --git a/sickbeard/providers/kat.py b/sickbeard/providers/kat.py index d1820d00..52c91bf7 100644 --- a/sickbeard/providers/kat.py +++ b/sickbeard/providers/kat.py @@ -17,69 +17,37 @@ from __future__ import with_statement +import re import os +import datetime import traceback import urllib -import re -import datetime -import urlparse -import sickbeard -import generic +from . import generic +from sickbeard import config, logger, tvcache, show_name_helpers, helpers +from sickbeard.bs4_parser import BS4Parser from sickbeard.common import Quality, mediaExtensions from sickbeard.name_parser.parser import NameParser, InvalidNameException, InvalidShowException -from sickbeard import logger, tvcache, helpers, db, classes -from sickbeard.show_name_helpers import allPossibleShowNames, sanitizeSceneName -from sickbeard.bs4_parser import BS4Parser from lib.unidecode import unidecode class KATProvider(generic.TorrentProvider): + def __init__(self): - generic.TorrentProvider.__init__(self, 'KickAssTorrents', True, False) + generic.TorrentProvider.__init__(self, 'KickAssTorrents') + self.url_base = 'https://kat.ph/' + self.urls = {'config_provider_home_uri': self.url_base, + 'search': [self.url_base, 'http://katproxy.com/'], + 'cache_params': 'tv/?field=time_add&sorder=desc', + 'search_params': 'usearch/%s/?field=seeders&sorder=desc'} + + self.url = self.urls['config_provider_home_uri'] + + self.minseed, self.minleech = 2 * [None] self.confirmed = False - self.ratio = None - self.minseed = None - self.minleech = None - - self.urls = ['https://kat.ph/', 'http://katproxy.com/'] - - self.url = self.urls[0] - self.cache = KATCache(self) - def getQuality(self, item, anime=False): - - quality = Quality.sceneQuality(item[0], anime) - return quality - - @staticmethod - def _reverse_quality(quality): - - quality_string = '' - - if quality == Quality.SDTV: - quality_string = 'HDTV x264' - if quality == Quality.SDDVD: - quality_string = 'DVDRIP' - elif quality == Quality.HDTV: - quality_string = '720p HDTV x264' - elif quality == Quality.FULLHDTV: - quality_string = '1080p HDTV x264' - elif quality == Quality.RAWHDTV: - quality_string = '1080i HDTV mpeg2' - elif quality == Quality.HDWEBDL: - quality_string = '720p WEB-DL h264' - elif quality == Quality.FULLHDWEBDL: - quality_string = '1080p WEB-DL h264' - elif quality == Quality.HDBLURAY: - quality_string = '720p Bluray x264' - elif quality == Quality.FULLHDBLURAY: - quality_string = '1080p Bluray x264' - - return quality_string - def _find_season_quality(self, title, torrent_link, ep_number): """ Return the modified title of a Season Torrent with the quality found inspecting torrent file list """ @@ -135,202 +103,134 @@ class KATProvider(generic.TorrentProvider): if parse_result.series_name and parse_result.season_number: title = parse_result.series_name + ' S%02d %s' % (int(parse_result.season_number), self._reverse_quality(quality)) - return title except Exception: logger.log(u'Failed to quality parse ' + self.name + ' Traceback: ' + traceback.format_exc(), logger.ERROR) - def _get_season_search_strings(self, ep_obj): - search_string = {'Season': []} + def _get_season_search_strings(self, ep_obj, **kwargs): - for show_name in set(allPossibleShowNames(self.show)): - if ep_obj.show.air_by_date or ep_obj.show.sports: - ep_string = show_name + ' ' + str(ep_obj.airdate).split('-')[0] - search_string['Season'].append(ep_string) - ep_string = show_name + ' Season ' + str(ep_obj.airdate).split('-')[0] - search_string['Season'].append(ep_string) - elif ep_obj.show.anime: - ep_string = show_name + ' ' + '%02d' % ep_obj.scene_absolute_number - search_string['Season'].append(ep_string) - else: - ep_string = show_name + ' S%02d' % int(ep_obj.scene_season) + ' -S%02d' % int( - ep_obj.scene_season) + 'E' + ' category:tv' # 1) showName SXX -SXXE - search_string['Season'].append(ep_string) - ep_string = show_name + ' Season ' + str( - ep_obj.scene_season) + ' -Ep*' + ' category:tv' # 2) showName Season X - search_string['Season'].append(ep_string) - - return [search_string] - - def _get_episode_search_strings(self, ep_obj, add_string=''): - search_string = {'Episode': []} - - if self.show.air_by_date: - for show_name in set(allPossibleShowNames(self.show)): - ep_string = sanitizeSceneName(show_name) + ' ' + \ - str(ep_obj.airdate).replace('-', ' ') - search_string['Episode'].append(ep_string) - elif self.show.sports: - for show_name in set(allPossibleShowNames(self.show)): - ep_string = sanitizeSceneName(show_name) + ' ' + \ - str(ep_obj.airdate).replace('-', '|') + '|' + \ - ep_obj.airdate.strftime('%b') - search_string['Episode'].append(ep_string) - elif self.show.anime: - for show_name in set(allPossibleShowNames(self.show)): - ep_string = sanitizeSceneName(show_name) + ' ' + \ - '%02i' % int(ep_obj.scene_absolute_number) - search_string['Episode'].append(ep_string) + if ep_obj.show.air_by_date or ep_obj.show.sports: + airdate = str(ep_obj.airdate).split('-')[0] + ep_detail = [airdate, 'Season ' + airdate] + elif ep_obj.show.anime: + ep_detail = '%02i' % ep_obj.scene_absolute_number else: - for show_name in set(allPossibleShowNames(self.show)): - ep_string = sanitizeSceneName(show_name) + ' ' + \ - sickbeard.config.naming_ep_type[2] % {'seasonnumber': ep_obj.scene_season, - 'episodenumber': ep_obj.scene_episode} + '|' + \ - sickbeard.config.naming_ep_type[0] % {'seasonnumber': ep_obj.scene_season, - 'episodenumber': ep_obj.scene_episode} + ' %s category:tv' % add_string - search_string['Episode'].append(re.sub('\s+', ' ', ep_string)) + ep_detail = ['S%(s)02i -S%(s)02iE' % {'s': ep_obj.scene_season}, + 'Season %s -Ep*' % ep_obj.scene_season] - return [search_string] + return [{'Season': self._build_search_strings(ep_detail, append=(' category:tv', '')[self.show.anime])}] + + def _get_episode_search_strings(self, ep_obj, add_string='', **kwargs): + + if not ep_obj: + return [] + + if self.show.air_by_date or self.show.sports: + ep_detail = str(ep_obj.airdate).replace('-', ' ') + if self.show.sports: + ep_detail += '|' + ep_obj.airdate.strftime('%b') + elif self.show.anime: + ep_detail = '%02i' % ep_obj.scene_absolute_number + else: + ep_detail = '%s|%s' % (config.naming_ep_type[2] % {'seasonnumber': ep_obj.scene_season, + 'episodenumber': ep_obj.scene_episode}, + config.naming_ep_type[0] % {'seasonnumber': ep_obj.scene_season, + 'episodenumber': ep_obj.scene_episode}) + # include provider specific appends + if not isinstance(add_string, list): + add_string = [add_string] + add_string = [x + ' category:tv' for x in add_string] + + return [{'Episode': self._build_search_strings(ep_detail, append=(add_string, '')[self.show.anime])}] def _doSearch(self, search_params, search_mode='eponly', epcount=0, age=0): results = [] - items = {'Season': [], 'Episode': [], 'RSS': []} + items = {'Season': [], 'Episode': [], 'Cache': []} + rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {'link': 'normal'}.items()) + url = 0 for mode in search_params.keys(): for search_string in search_params[mode]: - for url in self.urls: - search_url = url - if 'RSS' == mode: - search_url += 'tv/?field=time_add&sorder=desc' - logger.log(u'KAT cache update URL: ' + search_url, logger.DEBUG) - else: - search_url += 'usearch/%s/?field=seeders&sorder=desc' % (urllib.quote(unidecode(search_string))) - logger.log(u'Search string: ' + search_url, logger.DEBUG) + self.url = self.urls['search'][url] + search_args = ('search_params', 'cache_params')['Cache' == mode] + search_url = self.url + self.urls[search_args] + if 'Cache' != mode: + search_url %= urllib.quote(unidecode(search_string)) - html = self.getURL(search_url) - if html: - self.url = url - break - - if not html: - continue + html = helpers.getURL(search_url) + cnt = len(items[mode]) try: + if not html or self._has_no_results(html) or re.search(r'did not match any documents', html): + if html and 'kastatic' not in html: + url += (1, 0)[url == len(self.urls['search'])] + raise generic.HaltParseException + with BS4Parser(html, features=['html5lib', 'permissive']) as soup: torrent_table = soup.find('table', attrs={'class': 'data'}) - torrent_rows = torrent_table.find_all('tr') if torrent_table else [] + torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') - # Continue only if one Release is found - if len(torrent_rows) < 2: - logger.log(u'The data returned from ' + self.name + ' does not contain any torrents', - logger.WARNING) - continue + if 2 > len(torrent_rows): + raise generic.HaltParseException for tr in torrent_rows[1:]: try: - link = urlparse.urljoin(self.url, - (tr.find('div', {'class': 'torrentname'}).find_all('a')[1])['href']) - tid = tr.get('id')[-7:] - title = (tr.find('div', {'class': 'torrentname'}).find_all('a')[1]).text \ - or (tr.find('div', {'class': 'torrentname'}).find_all('a')[2]).text - url = tr.find('a', 'imagnet')['href'] - verified = True if tr.find('a', 'iverify') else False - # trusted = True if tr.find('img', {'alt': 'verified'}) else False - seeders = int(tr.find_all('td')[-2].text) - leechers = int(tr.find_all('td')[-1].text) + seeders, leechers = [int(tr.find_all('td')[x].get_text().strip()) for x in (-2, -1)] + if 'Cache' != mode and (seeders < self.minseed or leechers < self.minleech): + continue + + info = tr.find('div', {'class': 'torrentname'}) + title = (info.find_all('a')[1].get_text() or info.find('a', 'cellMainLink').get_text())\ + .strip() + link = self.url + info.find('a', {'class': rc['link']})['href'].lstrip('/') + + download_magnet = tr.find('a', 'imagnet')['href'] except (AttributeError, TypeError): continue - if 'RSS' != mode and (seeders < self.minseed or leechers < self.minleech): - continue - - if self.confirmed and not verified: - logger.log( - u'KAT Provider found result ' + title + ' but that doesn\'t seem like a verified result so I\'m ignoring it', - logger.DEBUG) + if self.confirmed and not tr.find('a', 'iverify'): + logger.log(u'Skipping untrusted non-verified result: %s' % title, logger.DEBUG) continue # Check number video files = episode in season and find the real Quality for full season torrent analyzing files in torrent if 'Season' == mode and 'sponly' == search_mode: - ep_number = int(epcount / len(set(allPossibleShowNames(self.show)))) + ep_number = int(epcount / len(set(show_name_helpers.allPossibleShowNames(self.show)))) title = self._find_season_quality(title, link, ep_number) - if not title or not url: - continue - - item = title, url, tid, seeders, leechers - - items[mode].append(item) + if title and download_magnet: + items[mode].append((title, download_magnet, seeders)) + except generic.HaltParseException: + pass except Exception: - logger.log(u'Failed to parse ' + self.name + ' Traceback: ' + traceback.format_exc(), - logger.ERROR) + logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) + self._log_result(mode, len(items[mode]) - cnt, search_url) # For each search mode sort all the items by seeders - items[mode].sort(key=lambda tup: tup[3], reverse=True) + items[mode].sort(key=lambda tup: tup[2], reverse=True) results += items[mode] return results - def _get_title_and_url(self, item): - - title, url, tid, seeders, leechers = item - - if title: - title = u'' + title.replace(' ', '.') - - if url: - url = url.replace('&', '&') - - return title, url - def findPropers(self, search_date=datetime.datetime.today()): - results = [] - - my_db = db.DBConnection() - sql_results = my_db.select( - 'SELECT s.show_name, e.showid, e.season, e.episode, e.status, e.airdate, s.indexer FROM tv_episodes AS e' + - ' INNER JOIN tv_shows AS s ON (e.showid = s.indexer_id)' + - ' WHERE e.airdate >= ' + str(search_date.toordinal()) + - ' AND (e.status IN (' + ','.join([str(x) for x in Quality.DOWNLOADED]) + ')' + - ' OR (e.status IN (' + ','.join([str(x) for x in Quality.SNATCHED]) + ')))' - ) - - if not sql_results: - return [] - - for sqlshow in sql_results: - self.show = helpers.findCertainShow(sickbeard.showList, int(sqlshow['showid'])) - if self.show: - cur_ep = self.show.getEpisode(int(sqlshow['season']), int(sqlshow['episode'])) - - search_string = self._get_episode_search_strings(cur_ep, add_string='PROPER|REPACK') - - for item in self._doSearch(search_string[0]): - title, url = self._get_title_and_url(item) - results.append(classes.Proper(title, url, datetime.datetime.today(), self.show)) - - return results - - def seedRatio(self): - return self.ratio + return self._find_propers(search_date, '') class KATCache(tvcache.TVCache): - def __init__(self, this_provider): + def __init__(self, this_provider): tvcache.TVCache.__init__(self, this_provider) self.minTime = 20 # cache update frequency def _getRSSData(self): - search_params = {'RSS': ['rss']} - return self.provider._doSearch(search_params) + + return self.provider.get_cache_data() provider = KATProvider()