SickGear/lib/guessit/transfo/guess_episode_info_from_position.py
echel0n 0d9fbc1ad7 Welcome to our SickBeard-TVRage Edition ...
This version of SickBeard uses both TVDB and TVRage to search and gather it's series data from allowing you to now have access to and download shows that you couldn't before because of being locked into only what TheTVDB had to offer.

Also this edition is based off the code we used in our XEM editon so it does come with scene numbering support as well as all the other features our XEM edition has to offer.

Please before using this with your existing database (sickbeard.db) please make a backup copy of it and delete any other database files such as cache.db and failed.db if present, we HIGHLY recommend starting out with no database files at all to make this a fresh start but the choice is at your own risk!

Enjoy!
2014-03-09 22:39:12 -07:00

146 lines
6 KiB
Python

#!/usr/bin/env python2
# -*- coding: utf-8 -*-
#
# GuessIt - A library for guessing information from filenames
# Copyright (c) 2012 Nicolas Wack <wackou@gmail.com>
#
# GuessIt is free software; you can redistribute it and/or modify it under
# the terms of the Lesser GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GuessIt is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Lesser GNU General Public License for more details.
#
# You should have received a copy of the Lesser GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import unicode_literals
from guessit.transfo import found_property
from guessit.patterns import non_episode_title, unlikely_series
import logging
log = logging.getLogger(__name__)
def match_from_epnum_position(mtree, node):
epnum_idx = node.node_idx
# a few helper functions to be able to filter using high-level semantics
def before_epnum_in_same_pathgroup():
return [ leaf for leaf in mtree.unidentified_leaves()
if (leaf.node_idx[0] == epnum_idx[0] and
leaf.node_idx[1:] < epnum_idx[1:]) ]
def after_epnum_in_same_pathgroup():
return [ leaf for leaf in mtree.unidentified_leaves()
if (leaf.node_idx[0] == epnum_idx[0] and
leaf.node_idx[1:] > epnum_idx[1:]) ]
def after_epnum_in_same_explicitgroup():
return [ leaf for leaf in mtree.unidentified_leaves()
if (leaf.node_idx[:2] == epnum_idx[:2] and
leaf.node_idx[2:] > epnum_idx[2:]) ]
# epnumber is the first group and there are only 2 after it in same
# path group
# -> series title - episode title
title_candidates = [ n for n in after_epnum_in_same_pathgroup()
if n.clean_value.lower() not in non_episode_title ]
if ('title' not in mtree.info and # no title
before_epnum_in_same_pathgroup() == [] and # no groups before
len(title_candidates) == 2): # only 2 groups after
found_property(title_candidates[0], 'series', confidence=0.4)
found_property(title_candidates[1], 'title', confidence=0.4)
return
# if we have at least 1 valid group before the episodeNumber, then it's
# probably the series name
series_candidates = before_epnum_in_same_pathgroup()
if len(series_candidates) >= 1:
found_property(series_candidates[0], 'series', confidence=0.7)
# only 1 group after (in the same path group) and it's probably the
# episode title
title_candidates = [ n for n in after_epnum_in_same_pathgroup()
if n.clean_value.lower() not in non_episode_title ]
if len(title_candidates) == 1:
found_property(title_candidates[0], 'title', confidence=0.5)
return
else:
# try in the same explicit group, with lower confidence
title_candidates = [ n for n in after_epnum_in_same_explicitgroup()
if n.clean_value.lower() not in non_episode_title
]
if len(title_candidates) == 1:
found_property(title_candidates[0], 'title', confidence=0.4)
return
elif len(title_candidates) > 1:
found_property(title_candidates[0], 'title', confidence=0.3)
return
# get the one with the longest value
title_candidates = [ n for n in after_epnum_in_same_pathgroup()
if n.clean_value.lower() not in non_episode_title ]
if title_candidates:
maxidx = -1
maxv = -1
for i, c in enumerate(title_candidates):
if len(c.clean_value) > maxv:
maxidx = i
maxv = len(c.clean_value)
found_property(title_candidates[maxidx], 'title', confidence=0.3)
def process(mtree):
eps = [node for node in mtree.leaves() if 'episodeNumber' in node.guess]
if eps:
match_from_epnum_position(mtree, eps[0])
else:
# if we don't have the episode number, but at least 2 groups in the
# basename, then it's probably series - eptitle
basename = mtree.node_at((-2,))
title_candidates = [ n for n in basename.unidentified_leaves()
if n.clean_value.lower() not in non_episode_title
]
if len(title_candidates) >= 2:
found_property(title_candidates[0], 'series', 0.4)
found_property(title_candidates[1], 'title', 0.4)
elif len(title_candidates) == 1:
# but if there's only one candidate, it's probably the series name
found_property(title_candidates[0], 'series', 0.4)
# if we only have 1 remaining valid group in the folder containing the
# file, then it's likely that it is the series name
try:
series_candidates = mtree.node_at((-3,)).unidentified_leaves()
except ValueError:
series_candidates = []
if len(series_candidates) == 1:
found_property(series_candidates[0], 'series', 0.3)
# if there's a path group that only contains the season info, then the
# previous one is most likely the series title (ie: ../series/season X/..)
eps = [ node for node in mtree.nodes()
if 'season' in node.guess and 'episodeNumber' not in node.guess ]
if eps:
previous = [ node for node in mtree.unidentified_leaves()
if node.node_idx[0] == eps[0].node_idx[0] - 1 ]
if len(previous) == 1:
found_property(previous[0], 'series', 0.5)
# reduce the confidence of unlikely series
for node in mtree.nodes():
if 'series' in node.guess:
if node.guess['series'].lower() in unlikely_series:
new_confidence = node.guess.confidence('series') * 0.5
node.guess.set_confidence('series', new_confidence)