Testing new faster method of obtaining show object via regex matching for series name during Name Parsing for results.

This commit is contained in:
echel0n 2014-06-22 11:33:58 -07:00
parent 7defffb4f1
commit b1c9b71846

View file

@ -37,7 +37,8 @@ class NameParser(object):
SPORTS_REGEX = 2
ANIME_REGEX = 3
def __init__(self, file_name=True, showObj=None, epObj=None, useIndexers=False, convert=False, naming_pattern=False):
def __init__(self, file_name=True, showObj=None, epObj=None, useIndexers=False, convert=False,
naming_pattern=False):
self.file_name = file_name
self.showList = sickbeard.showList or []
@ -47,17 +48,6 @@ class NameParser(object):
self.convert = convert
self.naming_pattern = naming_pattern
self.regexMode = self.ALL_REGEX
if self.showObj and self.showObj.is_anime:
self.regexMode = self.ANIME_REGEX
elif self.showObj and self.showObj.is_sports:
self.regexMode = self.SPORTS_REGEX
elif self.showObj and not self.showObj.is_anime and not self.showObj.is_sports:
self.regexMode = self.NORMAL_REGEX
self.compiled_regexes = {}
self._compile_regexes(self.regexMode)
def clean_series_name(self, series_name):
"""Cleans up series name by removing any . and _
characters, along with any trailing hyphens.
@ -117,6 +107,36 @@ class NameParser(object):
if not name:
return
if not self.naming_pattern:
# Regex pattern to return the Show / Series Name regardless of the file pattern tossed at it, matched 53 show name examples from regexes.py
show_pattern = '''^(?:(UEFA|MLB|ESPN|WWE|MMA|UFC|TNA|EPL|NASCAR|NBA|NFL|NHL|NRL|PGA|SUPER LEAGUE|FORMULA|FIFA|NETBALL|MOTOG(P)))?(?:[0-9]+)?(?:\[(?:.+?)\][ ._-])?(?P<series_name>.*?)(?:[ ._-])+?(?:Season|Part)?(?:.[eE][0-9][0-9]?)?(?:.?[sS]?[0-9][0-9]?)'''
try:
show_regex = re.compile(show_pattern, re.VERBOSE | re.IGNORECASE)
except re.error, errormsg:
logger.log(u"WARNING: Invalid show series name pattern, %s: [%s]" % (errormsg, show_pattern))
else:
seriesname_match = show_regex.match(name)
seriesname_groups = seriesname_match.groupdict().keys()
if not self.showObj and 'series_name' in seriesname_groups:
# Do we have recognize this show?
series_name = self.clean_series_name(seriesname_match.group('series_name'))
self.showObj = helpers.get_show_by_name(series_name, useIndexer=self.useIndexers)
if not self.showObj:
return
regexMode = self.ALL_REGEX
if self.showObj and self.showObj.is_anime:
regexMode = self.ANIME_REGEX
elif self.showObj and self.showObj.is_sports:
regexMode = self.SPORTS_REGEX
elif self.showObj and not self.showObj.is_anime and not self.showObj.is_sports:
regexMode = self.NORMAL_REGEX
self.compiled_regexes = {}
self._compile_regexes(regexMode)
matches = []
result = None
for (cur_regex_type, cur_regex_name), cur_regex in self.compiled_regexes.items():
@ -137,22 +157,6 @@ class NameParser(object):
result.series_name = self.clean_series_name(result.series_name)
result.score += 1
if not self.showObj and not self.naming_pattern:
self.showObj = helpers.get_show_by_name(result.series_name, useIndexer=self.useIndexers)
if self.showObj:
result.show = self.showObj
if getattr(self.showObj, 'air_by_date', None) and not cur_regex_type == 'normal':
continue
elif getattr(self.showObj, 'sports', None) and not cur_regex_type == 'sports':
continue
elif getattr(self.showObj, 'anime', None) and not cur_regex_type == 'anime':
continue
# don't continue parsing if we don't have a show object by now, try next regex pattern
if not self.showObj and not self.naming_pattern:
continue
if 'season_num' in named_groups:
tmp_season = int(match.group('season_num'))
if not (cur_regex_name == 'bare' and tmp_season in (19, 20)):
@ -555,7 +559,9 @@ class ParseResult(object):
self.episode_numbers = new_episode_numbers
self.season_number = new_season_numbers[0]
logger.log(u"Converted parsed result " + self.original_name + " into " + str(self).decode('utf-8', 'xmlcharrefreplace'), logger.DEBUG)
logger.log(u"Converted parsed result " + self.original_name + " into " + str(self).decode('utf-8',
'xmlcharrefreplace'),
logger.DEBUG)
return self