From 208ddfb42abc99417962033e32fb5ba2df7e5dcc Mon Sep 17 00:00:00 2001 From: JackDandy Date: Fri, 20 Apr 2018 10:22:53 +0100 Subject: [PATCH 1/2] Change prefer modern html5lib over old to prevent display show issue on systems that fail to clean libs. In rare cases, systems *fail* to remove the deprecated "_base.pyc" file (and probably others) in \lib\html5lib\treebuilders\. Therefore, the startup cleanup process will now list files that cannot be auto deleted - user must then manually delete files listed in "__README-DANGER.txt". Change add un/pw for cookie support to improve SpeedCD torrent provider. Change improve handling faults when downloading .torrent files. Remove TorrentBytes provider. Change remove redundant log messages for releases never to be cached removing <30% log spam Change remove redundant log messages for items not found in cache removing <10% log spam Pep8. --- CHANGES.md | 12 ++- _cleaner.py | 34 ++++-- gui/slick/images/providers/torrentbytes.png | Bin 433 -> 0 bytes gui/slick/interfaces/default/cache.tmpl | 42 +++++--- lib/bs4/builder/_html5lib.py | 13 +-- sickbeard/helpers.py | 32 +++--- sickbeard/providers/__init__.py | 3 +- sickbeard/providers/generic.py | 39 +++++-- sickbeard/providers/newznab.py | 10 +- sickbeard/providers/speedcd.py | 65 ++++++++--- sickbeard/providers/torrentbytes.py | 113 -------------------- sickbeard/tv.py | 4 +- sickbeard/tvcache.py | 46 ++++---- 13 files changed, 196 insertions(+), 217 deletions(-) delete mode 100644 gui/slick/images/providers/torrentbytes.png delete mode 100644 sickbeard/providers/torrentbytes.py diff --git a/CHANGES.md b/CHANGES.md index d9d70559..63a8deb6 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,4 +1,14 @@ -### 0.15.13 (2018-04-18 13:50:00 UTC) +### 0.15.14 (2018-04-20 12:00:00 UTC) + +* Change prefer modern html5lib over old to prevent display show issue on systems that fail to clean libs +* Change add un/pw for cookie support to improve SpeedCD torrent provider +* Change improve handling faults when downloading .torrent files +* Remove TorrentBytes provider +* Change remove redundant log messages for releases never to be cached removing <30% log spam +* Change remove redundant log messages for items not found in cache removing <10% log spam + + +### 0.15.13 (2018-04-18 13:50:00 UTC) * Fix API endpoints for sg.exceptions and exceptions * Change improve searching torrent provider BTScene diff --git a/_cleaner.py b/_cleaner.py index 7be17fc6..a8cabc2e 100644 --- a/_cleaner.py +++ b/_cleaner.py @@ -4,7 +4,7 @@ import os import shutil parent_dir = os.path.abspath(os.path.dirname(__file__)) -cleaned_file = os.path.abspath(os.path.join(parent_dir, r'.cleaned.tmp')) +cleaned_file = os.path.abspath(os.path.join(parent_dir, '.cleaned.tmp')) if not os.path.isfile(cleaned_file): dead_dirs = [os.path.abspath(os.path.join(parent_dir, *d)) for d in [ ('tornado',), @@ -32,8 +32,11 @@ if not os.path.isfile(cleaned_file): fp.flush() os.fsync(fp.fileno()) -cleaned_file = os.path.abspath(os.path.join(parent_dir, r'.cleaned_html5lib.tmp')) -if not os.path.isfile(cleaned_file): +cleaned_file = os.path.abspath(os.path.join(parent_dir, '.cleaned_html5lib.tmp')) +test = os.path.abspath(os.path.join(parent_dir, 'lib', 'html5lib', 'treebuilders', '_base.pyc')) +danger_output = os.path.abspath(os.path.join(parent_dir, '__README-DANGER.txt')) +bad_files = [] +if not os.path.isfile(cleaned_file) or os.path.exists(test): for dead_path in [os.path.abspath(os.path.join(parent_dir, *d)) for d in [ ('lib', 'html5lib', 'trie'), ('lib', 'html5lib', 'serializer') @@ -56,12 +59,27 @@ if not os.path.isfile(cleaned_file): ('lib', 'html5lib', 'treewalkers', 'genshistream.py'), ]]: for ext in ['', 'c', 'o']: - try: - os.remove('%s.py%s' % (os.path.splitext(dead_file)[:-1][0], ext)) - except (StandardError, Exception): - pass + name = '%s.py%s' % (os.path.splitext(dead_file)[:-1][0], ext) + if os.path.exists(name): + try: + os.remove(name) + except (StandardError, Exception): + bad_files += [name] + if any(bad_files): + swap_name = cleaned_file + cleaned_file = danger_output + danger_output = swap_name + msg = 'Failed (permissions?) to delete file(s). You must manually delete:\r\n%s' % '\r\n'.join(bad_files) + print(msg) + else: + msg = 'This file exists to prevent a rerun delete of dead lib/html5lib files' with open(cleaned_file, 'wb') as fp: - fp.write('This file exists to prevent a rerun delete of dead lib/html5lib files') + fp.write(msg) fp.flush() os.fsync(fp.fileno()) + +try: + os.remove(danger_output) +except (StandardError, Exception): + pass diff --git a/gui/slick/images/providers/torrentbytes.png b/gui/slick/images/providers/torrentbytes.png deleted file mode 100644 index 98ff9d7ab96689f18a41a6b66415a0b4ffb02345..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 433 zcmV;i0Z#sjP)Px#1ZP1_K>z@;j|==^1poj532;bRa{vGi!~g&e!~vBn4jTXf0Y*tgK~y+Tt&*=x zL_rY82h0}1Y!{Q!D3(LKbj5gJF^M2H!vl*yl5lW{cU}>}=mq@)1QEf%K!X+$EP{R9 z&s%19_Q7k0@0;ECWuYrWq^nH zykgT-i-RIZ)(3fGYa$Ljz`L+~R3};?YcBxTKPm^0CB&N4fYq~1TduNJj#OrPDz6$L z2scD)21K?dD*bVId7?7;P&p3WPM>0&enkjQ5JM#bpm7HWn-^NE69CnN%I-93=Qe_H z;*Uk~M#$g3jE!3X2mr^iLNAJ`s~>C(KxcEnF`5nxmH;e}&48|^yYG;&rZU$rRR9 @@ -14,7 +17,7 @@ { \$('#cacheTable:has(tbody tr)').tablesorter({ widgets: ['zebra', 'filter'], - sortList: [[0,1]], + sortList: [[1,0]], }); #raw @@ -40,41 +43,50 @@ - - - - - + + + + + - + - - + + - + #for $hItem in $cacheResults: + #set $provider = $providers.getProviderClass($hItem['provider']) + #set $tip = '%s @ %s' % ($hItem['provider'], $sbdatetime.sbdatetime.sbfdatetime($sbdatetime.sbdatetime.fromtimestamp($hItem['time']))) + #set $ver = $hItem['version'] + #set $ver = ($ver, '')[-1 == $ver] + #set $quality = tryInt($hItem['quality']) - + #if $provider + + #else + + #end if - + - + - + #end for
ProviderNameSeasonEpisodesIndexer IdProvRls NameSnEnShow Id UrlTimeTimeStamp QualityRelease GroupVersionRls GroupVer
 $len($cacheResults) releases
$hItem['provider']$tip$hItem['provider']$hItem['name'] $hItem['season']$hItem['episodes']$hItem['episodes'].strip('|').replace('|', ',') $hItem['indexerid'] $hItem['time']$hItem['quality']$Quality.get_quality_ui($quality) $hItem['release_group']$hItem['version']$ver
-#include $os.path.join($sickbeard.PROG_DIR,'gui/slick/interfaces/default/inc_bottom.tmpl') \ No newline at end of file +#include $os.path.join($sickbeard.PROG_DIR,'gui/slick/interfaces/default/inc_bottom.tmpl') diff --git a/lib/bs4/builder/_html5lib.py b/lib/bs4/builder/_html5lib.py index 5f548935..641c2ebe 100644 --- a/lib/bs4/builder/_html5lib.py +++ b/lib/bs4/builder/_html5lib.py @@ -30,13 +30,14 @@ from bs4.element import ( ) try: - # Pre-0.99999999 - from html5lib.treebuilders import _base as treebuilder_base - new_html5lib = False -except ImportError, e: # 0.99999999 and up from html5lib.treebuilders import base as treebuilder_base - new_html5lib = True + old_html5lib = False +except ImportError: + # Pre-0.99999999 + from html5lib.treebuilders import _base as treebuilder_base + old_html5lib = True + class HTML5TreeBuilder(HTMLTreeBuilder): """Use html5lib to build a tree.""" @@ -65,7 +66,7 @@ class HTML5TreeBuilder(HTMLTreeBuilder): extra_kwargs = dict() if not isinstance(markup, unicode): - if new_html5lib: + if not old_html5lib: extra_kwargs['override_encoding'] = self.user_specified_encoding else: extra_kwargs['encoding'] = self.user_specified_encoding diff --git a/sickbeard/helpers.py b/sickbeard/helpers.py index b6ee1c72..e0d55784 100644 --- a/sickbeard/helpers.py +++ b/sickbeard/helpers.py @@ -1107,30 +1107,25 @@ def getURL(url, post_data=None, params=None, headers=None, timeout=30, session=N Either 1) Returns a byte-string retrieved from the url provider. 2) Return True/False if success after using kwargs 'savefile' set to file pathname. + 3) Returns Tuple response, session if success after setting kwargs 'resp_sess' True. """ # selectively mute some errors - mute = [] - for muted in filter( - lambda x: kwargs.get(x, False), ['mute_connect_err', 'mute_read_timeout', 'mute_connect_timeout']): - mute += [muted] - del kwargs[muted] + mute = filter(lambda x: kwargs.pop(x, False), ['mute_connect_err', 'mute_read_timeout', 'mute_connect_timeout']) # reuse or instantiate request session + resp_sess = kwargs.pop('resp_sess', None) if None is session: session = CloudflareScraper.create_scraper() session.headers.update({'User-Agent': USER_AGENT}) # download and save file or simply fetch url - savename = None - if 'savename' in kwargs: + savename = kwargs.pop('savename', None) + if savename: # session streaming session.stream = True - savename = kwargs.pop('savename') - if 'nocache' in kwargs: - del kwargs['nocache'] - else: + if not kwargs.pop('nocache', False): cache_dir = sickbeard.CACHE_DIR or _getTempDir() session = CacheControl(sess=session, cache=caches.FileCache(ek.ek(os.path.join, cache_dir, 'sessions'))) @@ -1168,13 +1163,13 @@ def getURL(url, post_data=None, params=None, headers=None, timeout=30, session=N session.proxies = {'http': proxy_address, 'https': proxy_address} # decide if we get or post data to server - if 'post_json' in kwargs: - kwargs.setdefault('json', kwargs.pop('post_json')) + if post_data or 'post_json' in kwargs: + if post_data: + kwargs.setdefault('data', post_data) - if post_data: - kwargs.setdefault('data', post_data) + if 'post_json' in kwargs: + kwargs.setdefault('json', kwargs.pop('post_json')) - if 'data' in kwargs or 'json' in kwargs: response = session.post(url, timeout=timeout, **kwargs) else: response = session.get(url, timeout=timeout, **kwargs) @@ -1242,6 +1237,8 @@ def getURL(url, post_data=None, params=None, headers=None, timeout=30, session=N if json: try: data_json = response.json() + if resp_sess: + return ({}, data_json)[isinstance(data_json, (dict, list))], session return ({}, data_json)[isinstance(data_json, (dict, list))] except (TypeError, Exception) as e: logger.log(u'JSON data issue from URL %s\r\nDetail... %s' % (url, e.message), logger.WARNING) @@ -1267,6 +1264,9 @@ def getURL(url, post_data=None, params=None, headers=None, timeout=30, session=N return return True + if resp_sess: + return response.content, session + return response.content diff --git a/sickbeard/providers/__init__.py b/sickbeard/providers/__init__.py index a6dd9ebe..66433f74 100755 --- a/sickbeard/providers/__init__.py +++ b/sickbeard/providers/__init__.py @@ -30,7 +30,7 @@ from . import alpharatio, alphareign, beyondhd, bithdtv, bitmetv, blutopia, btn, fano, filelist, funfile, grabtheinfo, hdbits, hdspace, hdtorrents, \ iptorrents, limetorrents, magnetdl, morethan, nebulance, ncore, nyaa, pisexy, potuk, pretome, privatehd, ptf, \ rarbg, revtt, scenehd, scenetime, shazbat, showrss, skytorrents, speedcd, \ - thepiratebay, torlock, torrentbytes, torrentday, torrenting, torrentleech, \ + thepiratebay, torlock, torrentday, torrenting, torrentleech, \ torrentz2, tvchaosuk, wop, zooqle # anime from . import anizb, tokyotoshokan @@ -83,7 +83,6 @@ __all__ = ['omgwtfnzbs', 'speedcd', 'thepiratebay', 'torlock', - 'torrentbytes', 'torrentday', 'torrenting', 'torrentleech', diff --git a/sickbeard/providers/generic.py b/sickbeard/providers/generic.py index 4d733d17..5cbedbdf 100644 --- a/sickbeard/providers/generic.py +++ b/sickbeard/providers/generic.py @@ -515,7 +515,8 @@ class GenericProvider(object): log_failure_url = False try: data = helpers.getURL(url, *args, **kwargs) - if data: + if data and not isinstance(data, tuple) \ + or isinstance(data, tuple) and data[0]: if 0 != self.failure_count: logger.log('Unblocking provider: %s' % self.get_id(), logger.DEBUG) self.failure_count = 0 @@ -638,6 +639,7 @@ class GenericProvider(object): pass if not btih or not re.search('(?i)[0-9a-f]{32,40}', btih): + assert not result.url.startswith('http') logger.log('Unable to extract torrent hash from link: ' + ex(result.url), logger.ERROR) return False @@ -1494,7 +1496,7 @@ class TorrentProvider(GenericProvider): is_valid = s + zlib.crc32(file_hd.read()) in (1661931498, 472149389) return is_valid - def _authorised(self, logged_in=None, post_params=None, failed_msg=None, url=None, timeout=30): + def _authorised(self, logged_in=None, post_params=None, failed_msg=None, url=None, timeout=30, **kwargs): maxed_out = (lambda y: re.search(r'(?i)[1-3]((<[^>]+>)|\W)*' + '(attempts|tries|remain)[\W\w]{,40}?(remain|left|attempt)', y)) @@ -1514,19 +1516,24 @@ class TorrentProvider(GenericProvider): if not self._valid_home(): return False - if hasattr(self, 'digest'): + if getattr(self, 'digest', None): self.cookies = re.sub(r'(?i)([\s\']+|cookie\s*:)', '', self.digest) success, msg = self._check_cookie() if not success: self.cookies = None logger.log(u'%s: [%s]' % (msg, self.cookies), logger.WARNING) return False - elif not self._check_auth(): - return False + else: + try: + if not self._check_auth(): + return False + except AuthException as e: + logger.log('%s' % ex(e), logger.ERROR) + return False if isinstance(url, type([])): for i in range(0, len(url)): - self.get_url(url.pop(), skip_auth=True) + self.get_url(url.pop(), skip_auth=True, **kwargs) if self.should_skip(): return False @@ -1535,7 +1542,9 @@ class TorrentProvider(GenericProvider): if hasattr(self, 'urls'): url = self.urls.get('login_action') if url: - response = self.get_url(url, skip_auth=True) + response = self.get_url(url, skip_auth=True, **kwargs) + if isinstance(response, tuple): + response = response[0] if self.should_skip() or None is response: return False try: @@ -1560,6 +1569,8 @@ class TorrentProvider(GenericProvider): passfield = name if name not in ('username', 'password') and 'password' != itype: post_params.setdefault(name, value) + except IndexError: + return False except KeyError: return super(TorrentProvider, self)._authorised() else: @@ -1567,7 +1578,7 @@ class TorrentProvider(GenericProvider): if not url: return super(TorrentProvider, self)._authorised() - if hasattr(self, 'username') and hasattr(self, 'password'): + if getattr(self, 'username', None) and getattr(self, 'password', None): if not post_params: post_params = dict(username=self.username, password=self.password) elif isinstance(post_params, type({})): @@ -1576,15 +1587,21 @@ class TorrentProvider(GenericProvider): if self.password not in post_params.values(): post_params[(passfield, 'password')[not passfield]] = self.password - response = self.get_url(url, skip_auth=True, post_data=post_params, timeout=timeout) + response = self.get_url(url, skip_auth=True, post_data=post_params, timeout=timeout, **kwargs) + session = True + if isinstance(response, tuple): + session = response[1] + response = response[0] if not self.should_skip() and response: if logged_in(response): - return True + return session if maxed_out(response) and hasattr(self, 'password'): self.password = None sickbeard.save_config() - logger.log(failed_msg(response) % self.name, logger.ERROR) + msg = failed_msg(response) + if msg: + logger.log(msg % self.name, logger.ERROR) return False diff --git a/sickbeard/providers/newznab.py b/sickbeard/providers/newznab.py index 664ea210..3f435a9a 100755 --- a/sickbeard/providers/newznab.py +++ b/sickbeard/providers/newznab.py @@ -983,10 +983,8 @@ class NewznabCache(tvcache.TVCache): ids = self.parse_ids(item, ns) - if not title or not url: - logger.log('The data returned from the %s feed is incomplete, this result is unusable' - % self.provider.name, logger.DEBUG) - return None + if title and url: + return self.add_cache_entry(title, url, id_dict=ids) - logger.log('Attempting to add item from RSS to cache: %s' % title, logger.DEBUG) - return self.add_cache_entry(title, url, id_dict=ids) + logger.log('Data returned from the %s feed is incomplete, this result is unusable' % self.provider.name, + logger.DEBUG) diff --git a/sickbeard/providers/speedcd.py b/sickbeard/providers/speedcd.py index fa60c3d9..96b60b31 100644 --- a/sickbeard/providers/speedcd.py +++ b/sickbeard/providers/speedcd.py @@ -20,18 +20,22 @@ import time from urllib import quote, unquote from . import generic +from sickbeard import logger from sickbeard.bs4_parser import BS4Parser from sickbeard.helpers import tryInt +import sickbeard class SpeedCDProvider(generic.TorrentProvider): def __init__(self): - generic.TorrentProvider.__init__(self, 'SpeedCD', cache_update_freq=20, update_freq=4*60) + generic.TorrentProvider.__init__(self, 'SpeedCD', update_freq=7*60) self.url_base = 'https://speed.cd/' self.urls = {'config_provider_home_uri': self.url_base, 'login': self.url_base + 'rss.php', + 'login_action': None, + 'do_login': self.url_base, 'search': self.url_base + 'V3/API/API.php'} self.categories = {'Season': [41, 53], 'Episode': [2, 49, 50, 55], 'anime': [30]} @@ -39,18 +43,53 @@ class SpeedCDProvider(generic.TorrentProvider): self.url = self.urls['config_provider_home_uri'] - self.digest, self.freeleech, self.minseed, self.minleech = 4 * [None] + self.username, self.password, self.digest, self.freeleech, self.minseed, self.minleech = 6 * [None] def _authorised(self, **kwargs): - digest = [x[::-1] for x in self.digest[::-1].rpartition('=')] - self.digest = digest[2] + digest[1] + quote(unquote(digest[0])) - return super(SpeedCDProvider, self)._authorised( - logged_in=(lambda y='': all( - [self.session.cookies.get_dict(domain='.speed.cd') and - self.session.cookies.clear('.speed.cd') is None or True] + - ['RSS' in y, 'type="password"' not in y, self.has_all_cookies(['speedian'], 'inSpeed_')] + - [(self.session.cookies.get('inSpeed_' + c) or 'sg!no!pw') in self.digest for c in ['speedian']])), - failed_msg=(lambda y=None: u'Invalid cookie details for %s. Perhaps the cookie expired? Check settings')) + result = False + if self.digest: + digest = [x[::-1] for x in self.digest[::-1].rpartition('=')] + self.digest = digest[2] + digest[1] + quote(unquote(digest[0])) + params = dict( + logged_in=(lambda y='': all( + [self.session.cookies.get_dict(domain='.speed.cd') and + self.session.cookies.clear('.speed.cd') is None or True] + + ['RSS' in y, 'type="password"' not in y, self.has_all_cookies(['speedian'], 'inSpeed_')] + + [(self.session.cookies.get('inSpeed_' + c) or 'sg!no!pw') in self.digest for c in ['speedian']])), + failed_msg=(lambda y=None: None)) + username = self.username + del self.username + result = super(SpeedCDProvider, self)._authorised(**params) + setattr(self, 'username', username) + + if not result and not self.failure_count: + if self.digest: + self.get_url('%slogout.php' % self.url_base, skip_auth=True, post_data={'submit.x': 24, 'submit.y': 11}) + self.digest = '' + params = dict( + logged_in=(lambda y='': all( + [self.session.cookies.get_dict(domain='.speed.cd') and + self.session.cookies.clear('.speed.cd') is None or True] + + [bool(y), not re.search('(?i)type="password"', y)] + + [re.search('(?i)Logout', y) or not self.digest + or (self.session.cookies.get('inSpeed_speedian') or 'sg!no!pw') in self.digest])), + failed_msg=(lambda y='': ( + re.search(r'(?i)(username|password)((<[^>]+>)|\W)*' + + '(or|and|/|\s)((<[^>]+>)|\W)*(password|incorrect)', y) and + u'Invalid username or password for %s. Check settings' or + u'Failed to authenticate or parse a response from %s, abort provider')), + post_params={'form_tmpl': True}) + self.urls['login_action'] = self.urls.get('do_login') + session = super(SpeedCDProvider, self)._authorised(session=None, resp_sess=True, **params) + self.urls['login_action'] = None + if session: + self.digest = 'inSpeed_speedian=%s' % session.cookies.get('inSpeed_speedian') + sickbeard.save_config() + result = True + logger.log('Cookie details for %s updated.' % self.name, logger.DEBUG) + elif not self.failure_count: + logger.log('Invalid cookie details for %s and login failed. Check settings' % self.name, logger.ERROR) + return result def _search_provider(self, search_params, **kwargs): @@ -127,8 +166,8 @@ class SpeedCDProvider(generic.TorrentProvider): def ui_string(key): return 'speedcd_digest' == key and \ - 'use... \'inSpeed_speedian=yy\' - warning: SpeedCD cookies expire minutes after inactivity, ' \ - 'so keep SG running. If you get auth failures, grab another browser cookie' or '' + 'use... \'inSpeed_speedian=yy\' - warning: SpeedCD cookies often expire, ' \ + 'username/pw may update them automatically, else update manually from browser' or '' provider = SpeedCDProvider() diff --git a/sickbeard/providers/torrentbytes.py b/sickbeard/providers/torrentbytes.py deleted file mode 100644 index ffa9a237..00000000 --- a/sickbeard/providers/torrentbytes.py +++ /dev/null @@ -1,113 +0,0 @@ -# coding=utf-8 -# -# This file is part of SickGear. -# -# SickGear is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# SickGear is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with SickGear. If not, see . - -import re -import traceback - -from . import generic -from sickbeard import logger -from sickbeard.bs4_parser import BS4Parser -from sickbeard.helpers import tryInt -from lib.unidecode import unidecode - - -class TorrentBytesProvider(generic.TorrentProvider): - - def __init__(self): - generic.TorrentProvider.__init__(self, 'TorrentBytes', cache_update_freq=20) - - self.url_home = ['https://www.torrentbytes.%s/' % u for u in 'net', 'me'] - - self.url_vars = {'login_action': 'login.php', 'search': 'browse.php?search=%s&%s'} - self.url_tmpl = {'config_provider_home_uri': '%(home)s', 'login_action': '%(home)s%(vars)s', - 'search': '%(home)s%(vars)s'} - - self.categories = {'Season': [41], 'Episode': [32, 33, 37, 38]} - self.categories['Cache'] = self.categories['Season'] + self.categories['Episode'] - - self.username, self.password, self.freeleech, self.minseed, self.minleech = 5 * [None] - - def _authorised(self, **kwargs): - - return super(TorrentBytesProvider, self)._authorised(post_params={'form_tmpl': True}) - - def _search_provider(self, search_params, **kwargs): - - results = [] - if not self._authorised(): - return results - - items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []} - - rc = dict((k, re.compile('(?i)' + v)) for (k, v) in {'info': 'detail', 'get': 'download', - 'fl': '\[\W*F\W?L\W*\]'}.items()) - for mode in search_params.keys(): - for search_string in search_params[mode]: - search_string = isinstance(search_string, unicode) and unidecode(search_string) or search_string - search_url = self.urls['search'] % (search_string, self._categories_string(mode)) - - html = self.get_url(search_url, timeout=90) - if self.should_skip(): - return results - - cnt = len(items[mode]) - try: - if not html or self._has_no_results(html): - raise generic.HaltParseException - - with BS4Parser(html, features=['html5lib', 'permissive'], attr='border="1"') as soup: - torrent_table = soup.find('table', attrs={'border': '1'}) - torrent_rows = [] if not torrent_table else torrent_table.find_all('tr') - - if 2 > len(torrent_rows): - raise generic.HaltParseException - - head = None - for tr in torrent_rows[1:]: - cells = tr.find_all('td') - if 5 > len(cells): - continue - try: - info = tr.find('a', href=rc['info']) - head = head if None is not head else self._header_row(tr) - seeders, leechers, size = [tryInt(n, n) for n in [ - cells[head[x]].get_text().strip() for x in 'seed', 'leech', 'size']] - if self.freeleech and (len(info.contents) < 2 or not rc['fl'].search( - info.contents[1].string.strip())) or self._peers_fail(mode, seeders, leechers): - continue - - title = (info.attrs.get('title') or info.get_text()).strip() - download_url = self._link(tr.find('a', href=rc['get'])['href']) - except (AttributeError, TypeError, ValueError, KeyError): - continue - - if title and download_url: - items[mode].append((title, download_url, seeders, self._bytesizer(size))) - - except generic.HaltParseException: - pass - except (StandardError, Exception): - logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR) - - self._log_search(mode, len(items[mode]) - cnt, search_url) - - results = self._sort_seeding(mode, results + items[mode]) - - return results - - -provider = TorrentBytesProvider() diff --git a/sickbeard/tv.py b/sickbeard/tv.py index 79f5264c..05aba0b5 100644 --- a/sickbeard/tv.py +++ b/sickbeard/tv.py @@ -389,8 +389,8 @@ class TVShow(object): if noCreate: return None - logger.log('%s: An object for episode %sx%s didn\'t exist in the cache, trying to create it' % - (self.indexerid, season, episode), logger.DEBUG) + # logger.log('%s: An object for episode %sx%s didn\'t exist in the cache, trying to create it' % + # (self.indexerid, season, episode), logger.DEBUG) if file: ep = TVEpisode(self, season, episode, file, show_sql=ep_sql) diff --git a/sickbeard/tvcache.py b/sickbeard/tvcache.py index 2706997d..b4075b4e 100644 --- a/sickbeard/tvcache.py +++ b/sickbeard/tvcache.py @@ -127,14 +127,10 @@ class TVCache: title = self._translateTitle(title) url = self._translateLinkURL(url) - logger.log(u'Attempting to add item to cache: ' + title, logger.DEBUG) return self.add_cache_entry(title, url) - else: - logger.log( - u'The data returned from the ' + self.provider.name + ' feed is incomplete, this result is unusable', - logger.DEBUG) - return None + logger.log('Data returned from the %s feed is incomplete, this result is unusable' % self.provider.name, + logger.DEBUG) def _getLastUpdate(self): myDB = self.get_db() @@ -197,42 +193,41 @@ class TVCache: if not parse_result: # create showObj from indexer_id if available - showObj=None + show_obj = None if indexer_id: try: - showObj = helpers.findCertainShow(sickbeard.showList, indexer_id) + show_obj = helpers.findCertainShow(sickbeard.showList, indexer_id) except MultipleShowObjectsException: - return None + return if id_dict: try: - showObj = helpers.find_show_by_id(sickbeard.showList, id_dict=id_dict, no_mapped_ids=False) + show_obj = helpers.find_show_by_id(sickbeard.showList, id_dict=id_dict, no_mapped_ids=False) except MultipleShowObjectsException: - return None + return try: - np = NameParser(showObj=showObj, convert=True, indexer_lookup=False) + np = NameParser(showObj=show_obj, convert=True, indexer_lookup=False) parse_result = np.parse(name) except InvalidNameException: - logger.log(u'Unable to parse the filename ' + name + ' into a valid episode', logger.DEBUG) - return None + logger.log('Unable to parse the filename %s into a valid episode' % name, logger.DEBUG) + return except InvalidShowException: - logger.log(u'No show in the db matches filename ' + name + ' not cached', logger.DEBUG) - return None + return if not parse_result or not parse_result.series_name: - return None + return - # if we made it this far then lets add the parsed result to cache for usager later on + # if we made it this far then lets add the parsed result to cache for usage later on season = parse_result.season_number if parse_result.season_number else 1 episodes = parse_result.episode_numbers if season and episodes: - # store episodes as a seperated string - episodeText = '|' + '|'.join(map(str, episodes)) + '|' + # store episodes as a separated string + episode_text = '|%s|' % '|'.join(map(str, episodes)) # get the current timestamp - curTimestamp = int(time.mktime(datetime.datetime.today().timetuple())) + cur_timestamp = int(time.mktime(datetime.datetime.today().timetuple())) # get quality of release quality = parse_result.quality @@ -246,11 +241,14 @@ class TVCache: # get version version = parse_result.version - logger.log(u'Added RSS item: [' + name + '] to cache: [' + self.providerID + ']', logger.DEBUG) + logger.log('Add to cache: [%s]' % name, logger.DEBUG) return [ - 'INSERT OR IGNORE INTO provider_cache (provider, name, season, episodes, indexerid, url, time, quality, release_group, version) VALUES (?,?,?,?,?,?,?,?,?,?)', - [self.providerID, name, season, episodeText, parse_result.show.indexerid, url, curTimestamp, quality, release_group, version]] + 'INSERT OR IGNORE INTO provider_cache' + ' (provider, name, season, episodes, indexerid, url, time, quality, release_group, version)' + ' VALUES (?,?,?,?,?,?,?,?,?,?)', + [self.providerID, name, season, episode_text, parse_result.show.indexerid, + url, cur_timestamp, quality, release_group, version]] def searchCache(self, episode, manualSearch=False): neededEps = self.findNeededEpisodes(episode, manualSearch) From f8fd89baebf4f85e546b4c8313bc6adf2bdc41ef Mon Sep 17 00:00:00 2001 From: Prinz23 Date: Tue, 24 Apr 2018 15:30:33 +0100 Subject: [PATCH 2/2] Fix marking episodes wanted due to parsing malformed non-anime release name as an anime season pack. Change disallow anime regex matches for non-anime shows. Change add unit test for invalid (numbered) show releases (non-anime show with anime numbering). Change speed optimization, compile static name parser regexes once, instead of for every NameParser instance. Change remove redundant create regexs log messages removing <10% log spam. --- CHANGES.md | 3 +++ sickbeard/name_parser/parser.py | 36 +++++++++++++++++++++++---------- tests/name_parser_tests.py | 27 ++++++++++++++++++++++++- 3 files changed, 54 insertions(+), 12 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 63a8deb6..147b1f86 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -6,6 +6,9 @@ * Remove TorrentBytes provider * Change remove redundant log messages for releases never to be cached removing <30% log spam * Change remove redundant log messages for items not found in cache removing <10% log spam +* Fix marking episodes wanted due to parsing malformed non-anime release name as an anime season pack +* Change speed optimization, compile static name parser regexes once, instead of for every NameParser instance +* Change remove redundant create regexs log messages removing <10% log spam ### 0.15.13 (2018-04-18 13:50:00 UTC) diff --git a/sickbeard/name_parser/parser.py b/sickbeard/name_parser/parser.py index 97209c50..ebe9f876 100644 --- a/sickbeard/name_parser/parser.py +++ b/sickbeard/name_parser/parser.py @@ -54,24 +54,22 @@ class NameParser(object): self.indexer_lookup = indexer_lookup if self.showObj and not self.showObj.is_anime: - self._compile_regexes(self.NORMAL_REGEX) + self.compiled_regexes = compiled_regexes[self.NORMAL_REGEX] elif self.showObj and self.showObj.is_anime: - self._compile_regexes(self.ANIME_REGEX) + self.compiled_regexes = compiled_regexes[self.ANIME_REGEX] else: - self._compile_regexes(self.ALL_REGEX) + self.compiled_regexes = compiled_regexes[self.ALL_REGEX] - def _compile_regexes(self, regex_mode): - if self.ANIME_REGEX == regex_mode: - logger.log(u'Using ANIME regexs', logger.DEBUG) + @classmethod + def compile_regexes(cls, regex_mode): + if cls.ANIME_REGEX == regex_mode: uncompiled_regex = [regexes.anime_regexes] - elif self.NORMAL_REGEX == regex_mode: - logger.log(u'Using NORMAL regexs', logger.DEBUG) + elif cls.NORMAL_REGEX == regex_mode: uncompiled_regex = [regexes.normal_regexes] else: - logger.log(u'Using ALL regexes', logger.DEBUG) uncompiled_regex = [regexes.normal_regexes, regexes.anime_regexes] - self.compiled_regexes = {0: [], 1: []} + cls.compiled_regexes = {0: [], 1: []} index = 0 for regexItem in uncompiled_regex: for cur_pattern_num, (cur_pattern_name, cur_pattern) in enumerate(regexItem): @@ -80,9 +78,11 @@ class NameParser(object): except re.error as errormsg: logger.log(u'WARNING: Invalid episode_pattern, %s. %s' % (errormsg, cur_pattern)) else: - self.compiled_regexes[index].append([cur_pattern_num, cur_pattern_name, cur_regex]) + cls.compiled_regexes[index].append([cur_pattern_num, cur_pattern_name, cur_regex]) index += 1 + return cls.compiled_regexes + @staticmethod def clean_series_name(series_name): """Cleans up series name by removing any . and _ @@ -144,6 +144,15 @@ class NameParser(object): result.score += 1 + if 'anime' in cur_regex_name and not (self.showObj and self.showObj.is_anime): + p_show = helpers.get_show(result.series_name, True) + if p_show and self.showObj and p_show.indexerid != self.showObj.indexerid: + p_show = None + if not p_show and self.showObj: + p_show = self.showObj + if p_show and not p_show.is_anime: + continue + if 'series_num' in named_groups and match.group('series_num'): result.score += 1 @@ -558,6 +567,11 @@ class NameParser(object): return final_result +compiled_regexes = {NameParser.NORMAL_REGEX: NameParser.compile_regexes(NameParser.NORMAL_REGEX), + NameParser.ANIME_REGEX: NameParser.compile_regexes(NameParser.ANIME_REGEX), + NameParser.ALL_REGEX: NameParser.compile_regexes(NameParser.ALL_REGEX)} + + class ParseResult(object): def __init__(self, original_name, diff --git a/tests/name_parser_tests.py b/tests/name_parser_tests.py index f1742da5..983a6f7c 100644 --- a/tests/name_parser_tests.py +++ b/tests/name_parser_tests.py @@ -9,6 +9,7 @@ sys.path.insert(1, os.path.abspath('..')) sys.path.insert(1, os.path.abspath('../lib')) from sickbeard.name_parser import parser +from sickbeard import name_cache import sickbeard @@ -352,6 +353,25 @@ unicode_test_cases = [ failure_cases = ['7sins-jfcs01e09-720p-bluray-x264'] +invalid_cases = [('The.Show.Name.111E14.1080p.WEB.x264-GROUP', 'the show name', 11, False)] + + +class InvalidCases(test.SickbeardTestDBCase): + + def _test_invalid(self, name, show, indexerid, is_anime): + sickbeard.showList.append(TVShow(name=name, indexerid=indexerid, is_anime=is_anime)) + name_cache.addNameToCache(show, indexerid) + invalidexception = False + try: + parse_result = parser.NameParser(True).parse(name) + except (parser.InvalidNameException, parser.InvalidShowException): + invalidexception = True + self.assertEqual(invalidexception, True) + + def test_invalid(self): + for (name, show, indexerid, is_anime) in invalid_cases: + self._test_invalid(name, show, indexerid, is_anime) + class UnicodeTests(test.SickbeardTestDBCase): @@ -593,8 +613,10 @@ class BasicTests(test.SickbeardTestDBCase): class TVShow(object): - def __init__(self, is_anime=False): + def __init__(self, is_anime=False, name='', indexerid=0): self.is_anime = is_anime + self.name = name + self.indexerid = indexerid if __name__ == '__main__': @@ -612,3 +634,6 @@ if __name__ == '__main__': suite = unittest.TestLoader().loadTestsFromTestCase(FailureCaseTests) unittest.TextTestRunner(verbosity=2).run(suite) + + suite = unittest.TestLoader().loadTestsFromTestCase(InvalidCases) + unittest.TextTestRunner(verbosity=2).run(suite)