Add UK date format handling to name parser.

This commit is contained in:
JackDandy 2016-12-27 15:06:14 +00:00
parent dd0d12f72c
commit 5bdee041ed
4 changed files with 131 additions and 51 deletions

View file

@ -1,4 +1,9 @@
### 0.12.2 (2016-12-20 16:00:00 UTC)
### 0.12.3 (2016-12-27 15:20:00 UTC)
* Add UK date format handling to name parser
### 0.12.2 (2016-12-20 16:00:00 UTC)
* Change Rarbg and IPT urls

View file

@ -32,7 +32,7 @@ normal_regexes = [
((?<![. _-])(?<!WEB) # Make sure this is really the release group
-(?P<release_group>[^- ]+))?)?$ # Group
'''
),
),
('fov_repeat',
# Show.Name.1x02.1x03.Source.Quality.Etc-Group
@ -47,7 +47,7 @@ normal_regexes = [
((?<![. _-])(?<!WEB) # Make sure this is really the release group
-(?P<release_group>[^- ]+))?)?$ # Group
'''
),
),
('standard',
# Show.Name.S01E02.Source.Quality.Etc-Group
@ -61,12 +61,12 @@ normal_regexes = [
s(?P<season_num>\d+)[. _-]* # S01 and optional separator
e(?P<ep_num>\d+) # E02 and separator
(([. _-]*e|-) # linking e/- char
(?P<extra_ep_num>(?!(1080|720|480)[pi])\d+))* # additional E03/etc
(?P<extra_ep_num>(?!(2160|1080|720|480)[pi])\d+))* # additional E03/etc
[. _-]*((?P<extra_info>.+?) # Source_Quality_Etc-
((?<![. _-])(?<!WEB) # Make sure this is really the release group
-(?P<release_group>[^- ]+))?)?$ # Group
'''
),
),
('fov',
# Show_Name.1x02.Source_Quality_Etc-Group
@ -79,13 +79,13 @@ normal_regexes = [
(?P<ep_num>\d+) # 02 and separator
(([. _-]*x|-) # linking x/- char
(?P<extra_ep_num>
(?!(1080|720|480)[pi])(?!(?<=x)264) # ignore obviously wrong multi-eps
(?!(2160|1080|720|480)[pi])(?!(?<=x)264) # ignore obviously wrong multi-eps
\d+))* # additional x03/etc
[\]. _-]*((?P<extra_info>.+?) # Source_Quality_Etc-
((?<![. _-])(?<!WEB) # Make sure this is really the release group
-(?P<release_group>[^- ]+))?)?$ # Group
'''
),
),
('scene_date_format',
# Show.Name.2010.11.23.Source.Quality.Etc-Group
@ -99,7 +99,21 @@ normal_regexes = [
((?<![. _-])(?<!WEB) # Make sure this is really the release group
-(?P<release_group>[^- ]+))?)?$ # Group
'''
),
),
('uk_date_format',
# Show.Name.23.11.2010.Source.Quality.Etc-Group
# Show Name - 23-11-2010 - Ep Name
'''
^((?P<series_name>.+?)[. _-]+)? # Show_Name and separator
(?P<air_day>\d{2})[. _-]+ # 23 and separator
(?P<air_month>\d{2})[. _-]+ # 11 and separator
(?P<air_year>(?:19|20)\d{2}) # 2010 and separator
[. _-]*((?P<extra_info>.+?) # Source_Quality_Etc-
((?<![. _-])(?<!WEB) # Make sure this is really the release group
-(?P<release_group>[^- ]+))?)?$ # Group
'''
),
('stupid',
# tpz-abc102
@ -109,7 +123,7 @@ normal_regexes = [
(?P<season_num>\d{1,2}) # 1
(?P<ep_num>\d{2})$ # 02
'''
),
),
('verbose',
# Show Name Season 1 Episode 2 Ep Name
@ -121,7 +135,7 @@ normal_regexes = [
(?P<ep_num>\d+)[. _-]+ # 02 and separator
(?P<extra_info>.+)$ # Source_Quality_Etc-
'''
),
),
('season_only',
# Show.Name.S01.Source.Quality.Etc-Group
@ -133,7 +147,7 @@ normal_regexes = [
((?<![. _-])(?<!WEB) # Make sure this is really the release group
-(?P<release_group>[^- ]+))?)?$ # Group
'''
),
),
('no_season_multi_ep',
# Show.Name.E02-03
@ -143,12 +157,12 @@ normal_regexes = [
(e(p(isode)?)?|part|pt)[. _-]? # e, ep, episode, or part
(?P<ep_num>(\d+|[ivx]+)) # first ep num
((([. _-]+(and|&|to)[. _-]+)|-) # and/&/to joiner
(?P<extra_ep_num>(?!(1080|720|480)[pi])(\d+|[ivx]+))[. _-]) # second ep num
(?P<extra_ep_num>(?!(2160|1080|720|480)[pi])(\d+|[ivx]+))[. _-]) # second ep num
([. _-]*(?P<extra_info>.+?) # Source_Quality_Etc-
((?<![. _-])(?<!WEB) # Make sure this is really the release group
-(?P<release_group>[^- ]+))?)?$ # Group
'''
),
),
('no_season_general',
# Show.Name.E23.Test
@ -160,13 +174,13 @@ normal_regexes = [
(?P<ep_num>(\d+|([ivx]+(?=[. _-])))) # first ep num
([. _-]+((and|&|to)[. _-]+)? # and/&/to joiner
((e(p(isode)?)?|part|pt)[. _-]?) # e, ep, episode, or part
(?P<extra_ep_num>(?!(1080|720|480)[pi])
(?P<extra_ep_num>(?!(2160|1080|720|480)[pi])
(\d+|([ivx]+(?=[. _-]))))[. _-])* # second ep num
([. _-]*(?P<extra_info>.+?) # Source_Quality_Etc-
((?<![. _-])(?<!WEB) # Make sure this is really the release group
-(?P<release_group>[^- ]+))?)?$ # Group
'''
),
),
('bare',
# Show.Name.102.Source.Quality.Etc-Group
@ -177,7 +191,7 @@ normal_regexes = [
([. _-]+(?P<extra_info>(?!\d{3}[. _-]+)[^-]+) # Source_Quality_Etc-
(-(?P<release_group>.+))?)?$ # Group
'''
),
),
('no_season',
# Show Name - 01 - Ep Name
@ -190,7 +204,7 @@ normal_regexes = [
((?<![. _-])(?<!WEB) # Make sure this is really the release group
-(?P<release_group>[^- ]+))?)?$ # Group
'''
),
),
]
anime_regexes = [
@ -206,7 +220,8 @@ anime_regexes = [
(?:[ ._]?\[(?P<crc>\w+)\])?
.*?
'''
),
),
('anime_standard',
# [Group Name] Show Name.13-14
# [Group Name] Show Name - 13-14
@ -223,7 +238,9 @@ anime_regexes = [
[ ._-]+\[(?P<extra_info>\d{3,4}[xp]?\d{0,4}.+?)\] # Source_Quality_Etc-
(\[(?P<crc>\w{8})\])? # CRC
.*? # Separator and EOL
'''),
'''
),
('anime_standard_round',
# [Stratos-Subs]_Infinite_Stratos_-_12_(1280x720_H.264_AAC)_[379759DB]
# [ShinBunBu-Subs] Bleach - 02-03 (CX 1280x720 x264 AAC)
@ -236,7 +253,9 @@ anime_regexes = [
[ ._-]+\((?P<extra_info>(CX[ ._-]?)?\d{3,4}[xp]?\d{0,4}[\.\w\s-]*)\) # Source_Quality_Etc-
(\[(?P<crc>\w{8})\])? # CRC
.*? # Separator and EOL
'''),
'''
),
('anime_slash',
# [SGKK] Bleach 312v1 [720p/MKV]
'''
@ -248,7 +267,9 @@ anime_regexes = [
[ ._-]+\[(?P<extra_info>\d{3,4}p) # Source_Quality_Etc-
(\[(?P<crc>\w{8})\])? # CRC
.*? # Separator and EOL
'''),
'''
),
('anime_standard_codec',
# [Ayako]_Infinite_Stratos_-_IS_-_07_[H264][720p][EB7838FC]
# [Ayako] Infinite Stratos - IS - 07v2 [H264][720p][44419534]
@ -264,7 +285,9 @@ anime_regexes = [
[ ._-]*\[(?P<extra_info>(\d{3,4}[xp]?\d{0,4})?[\.\w\s-]*)\] # Source_Quality_Etc-
(\[(?P<crc>\w{8})\])? # CRC
.*? # Separator and EOL
'''),
'''
),
('anime_and_normal',
# Bleach - s16e03-04 - 313-314
# Bleach.s16e03-04.313-314
@ -283,7 +306,8 @@ anime_regexes = [
(v(?P<version>[0-9]))? # the version e.g. "v2"
.*?
'''
),
),
('anime_and_normal_x',
# Bleach - s16e03-04 - 313-314
# Bleach.s16e03-04.313-314
@ -301,7 +325,8 @@ anime_regexes = [
(v(?P<version>[0-9]))? # the version e.g. "v2"
.*?
'''
),
),
('anime_and_normal_reverse',
# Bleach - 313-314 - s16e03-04
'''
@ -317,7 +342,8 @@ anime_regexes = [
(?P<extra_ep_num>\d+))* # additional E03/etc
.*?
'''
),
),
('anime_and_normal_front',
# 165.Naruto Shippuuden.s08e014
'''
@ -331,7 +357,8 @@ anime_regexes = [
(?P<extra_ep_num>\d+))* # additional E03/etc
.*?
'''
),
),
('anime_ep_name',
'''
^(?:\[(?P<release_group>.+?)\][ ._-]*)
@ -344,7 +371,8 @@ anime_regexes = [
(?:\[(?P<crc>\w{8})\])?
.*?
'''
),
),
('anime_bare',
# One Piece - 102
# [ACX]_Wolf's_Spirit_001.mkv
@ -355,9 +383,10 @@ anime_regexes = [
(-(?P<extra_ab_ep_num>\d{3}))* # E02
(v(?P<version>[0-9]))? # v2
.*? # Separator and EOL
'''),
'''
),
('standard',
('standard',
# Show.Name.S01E02.Source.Quality.Etc-Group
# Show Name - S01E02 - My Ep Name
# Show.Name.S01.E03.My.Ep.Name
@ -369,10 +398,10 @@ anime_regexes = [
s(?P<season_num>\d+)[. _-]* # S01 and optional separator
e(?P<ep_num>\d+) # E02 and separator
(([. _-]*e|-) # linking e/- char
(?P<extra_ep_num>(?!(1080|720|480)[pi])\d+))* # additional E03/etc
(?P<extra_ep_num>(?!(2160|1080|720|480)[pi])\d+))* # additional E03/etc
[. _-]*((?P<extra_info>.+?) # Source_Quality_Etc-
((?<![. _-])(?<!WEB) # Make sure this is really the release group
-(?P<release_group>[^- ]+))?)?$ # Group
'''
),
),
]

View file

@ -21,8 +21,8 @@ import traceback
from . import generic
from sickbeard import logger
from sickbeard.bs4_parser import BS4Parser
from sickbeard.helpers import tryInt
from sickbeard.config import naming_ep_type
from sickbeard.helpers import tryInt
from dateutil.parser import parse
from lib.unidecode import unidecode
@ -115,9 +115,10 @@ class TVChaosUKProvider(generic.TorrentProvider):
get_detail = False
try:
title = self.regulate_title(title, mode)
if title and download_url:
items[mode].append((title, download_url, seeders, self._bytesizer(size)))
titles = self.regulate_title(title, mode, search_string)
if download_url and titles:
for title in titles:
items[mode].append((title, download_url, seeders, self._bytesizer(size)))
except (StandardError, Exception):
pass
@ -137,7 +138,7 @@ class TVChaosUKProvider(generic.TorrentProvider):
return results
@staticmethod
def regulate_title(title, mode='-'):
def regulate_title(title, mode='-', search_string=''):
has_series = re.findall('(?i)(.*?series[^\d]*?\d+)(.*)', title)
if has_series:
@ -163,13 +164,14 @@ class TVChaosUKProvider(generic.TorrentProvider):
for yr in years:
title = re.sub('\{\{yr\}\}', yr, title, count=1)
dated = re.findall('(?i)([(\s]*)((?:\d+\s)?)([adfjmnos]\w{2,}\s+)((?:19|20)\d\d)([)\s]*)', title)
date_re = '(?i)([(\s]*)((?:\d+\s)?)([adfjmnos]\w{2,}\s+)((?:19|20)\d\d)([)\s]*)'
dated = re.findall(date_re, title)
dnew = None
for d in dated:
try:
dout = parse(''.join(d[1:4])).strftime('%Y-%m-%d')
title = title.replace(''.join(d), '%s%s%s' % (
('', ' ')[1 < len(d[0])], dout[0: not any(d[2]) and 4 or not any(d[1]) and 7 or len(dout)],
('', ' ')[1 < len(d[4])]))
dnew = dout[0: not any(d[2]) and 4 or not any(d[1]) and 7 or len(dout)]
title = title.replace(''.join(d), '%s%s%s' % (('', ' ')[1 < len(d[0])], dnew, ('', ' ')[1 < len(d[4])]))
except (StandardError, Exception):
pass
if dated:
@ -212,7 +214,30 @@ class TVChaosUKProvider(generic.TorrentProvider):
for r in [('\s+[-]?\s+|\s+`|`\s+', '`'), ('`+', '.')]:
title = re.sub(r[0], r[1], title)
return title
titles = []
if dnew:
snew = None
dated_s = re.findall(date_re, search_string)
for d in dated_s:
try:
sout = parse(''.join(d[1:4])).strftime('%Y-%m-%d')
snew = sout[0: not any(d[2]) and 4 or not any(d[1]) and 7 or len(sout)]
except (StandardError, Exception):
pass
if snew and dnew and snew != dnew:
return titles
try:
sxxexx_r = '(?i)S\d\d+E\d\d+'
if dnew and re.search(sxxexx_r, title):
titles += [re.sub(sxxexx_r, dnew, re.sub('[_.\-\s]?%s' % dnew, '', title))]
except (StandardError, Exception):
pass
titles += [title]
return titles
def _season_strings(self, ep_obj, **kwargs):
@ -224,7 +249,8 @@ class TVChaosUKProvider(generic.TorrentProvider):
def _episode_strings(self, ep_obj, **kwargs):
return generic.TorrentProvider._episode_strings(self, ep_obj, scene=False, prefix='%', date_detail=(
lambda d: [d.strftime('%d %b %Y')] + ([d.strftime('%d %B %Y')], [])[d.strftime('%b') == d.strftime('%B')]),
lambda d: [x.strip('0') for x in (
[d.strftime('%d %b %Y')] + ([d.strftime('%d %B %Y')], [])[d.strftime('%b') == d.strftime('%B')])]),
ep_detail=(lambda e: [naming_ep_type[2] % e] + (
[], ['%(episodenumber)dof' % e])[1 == tryInt(e.get('seasonnumber'))]), **kwargs)

View file

@ -143,6 +143,19 @@ simple_test_cases = {
parser.ParseResult(None, 'Show Name', None, [], 'WEB-DL', None, datetime.date(2010, 11, 23)),
},
'uk_date_format': {
'Show.Name.23.11.2010.Source.Quality.Etc-Group':
parser.ParseResult(None, 'Show Name', None, [], 'Source.Quality.Etc', 'Group', datetime.date(2010, 11, 23)),
'Show Name - 23.11.2010': parser.ParseResult(None, 'Show Name', air_date=datetime.date(2010, 11, 23)),
'Show.Name.11.23.2010.Source.Quality.Etc-Group':
parser.ParseResult(None, 'Show Name', None, [], 'Source.Quality.Etc', 'Group', datetime.date(2010, 11, 23)),
'Show Name - 23-11-2010 - Ep Name':
parser.ParseResult(None, 'Show Name', extra_info='Ep Name', air_date=datetime.date(2010, 11, 23)),
'23-11-2010 - Ep Name': parser.ParseResult(None, extra_info='Ep Name', air_date=datetime.date(2010, 11, 23)),
'Show.Name.23.11.2010.WEB-DL':
parser.ParseResult(None, 'Show Name', None, [], 'WEB-DL', None, datetime.date(2010, 11, 23)),
},
'anime_ultimate': {
'[Tsuki] Bleach - 301 [1280x720][61D1D4EE]':
parser.ParseResult(None, 'Bleach', None, [], '1280x720', 'Tsuki', None, [301]),
@ -308,10 +321,16 @@ combination_test_cases = [
unicode_test_cases = [
(u'The.Big.Bang.Theory.2x07.The.Panty.Pi\xf1ata.Polarization.720p.HDTV.x264.AC3-SHELDON.mkv',
parser.ParseResult(None, 'The.Big.Bang.Theory', 2, [7], '720p.HDTV.x264.AC3', 'SHELDON')
parser.ParseResult(
u'The.Big.Bang.Theory.2x07.The.Panty.Pi\xf1ata.Polarization.720p.HDTV.x264.AC3-SHELDON.mkv',
u'The Big Bang Theory', 2, [7], u'The.Panty.Pi\xf1ata.Polarization.720p.HDTV.x264.AC3', u'SHELDON',
version=-1)
),
('The.Big.Bang.Theory.2x07.The.Panty.Pi\xc3\xb1ata.Polarization.720p.HDTV.x264.AC3-SHELDON.mkv',
parser.ParseResult(None, 'The.Big.Bang.Theory', 2, [7], '720p.HDTV.x264.AC3', 'SHELDON')
parser.ParseResult(
u'The.Big.Bang.Theory.2x07.The.Panty.Pi\xf1ata.Polarization.720p.HDTV.x264.AC3-SHELDON.mkv',
u'The Big Bang Theory', 2, [7], u'The.Panty.Pi\xf1ata.Polarization.720p.HDTV.x264.AC3', u'SHELDON',
version=-1)
),
]
@ -319,14 +338,11 @@ failure_cases = ['7sins-jfcs01e09-720p-bluray-x264']
class UnicodeTests(test.SickbeardTestDBCase):
@staticmethod
def _test_unicode(name, result):
np = parser.NameParser(True)
try:
parse_result = np.parse(name)
except parser.InvalidShowException:
return False
def _test_unicode(self, name, result):
result.which_regex = ['fov']
parse_result = parser.NameParser(True, testing=True).parse(name)
self.assertEqual(parse_result, result)
# this shouldn't raise an exception
void = repr(str(parse_result))
@ -460,6 +476,10 @@ class BasicTests(test.SickbeardTestDBCase):
np = parser.NameParser(False, testing=True)
self._test_names(np, 'scene_date_format')
def test_uk_date_format_names(self):
np = parser.NameParser(False, testing=True)
self._test_names(np, 'uk_date_format')
def test_standard_file_names(self):
np = parser.NameParser(testing=True)
self._test_names(np, 'standard', lambda x: x + '.avi')