mirror of
https://github.com/SickGear/SickGear.git
synced 2025-01-07 10:33:38 +00:00
0d9fbc1ad7
This version of SickBeard uses both TVDB and TVRage to search and gather it's series data from allowing you to now have access to and download shows that you couldn't before because of being locked into only what TheTVDB had to offer. Also this edition is based off the code we used in our XEM editon so it does come with scene numbering support as well as all the other features our XEM edition has to offer. Please before using this with your existing database (sickbeard.db) please make a backup copy of it and delete any other database files such as cache.db and failed.db if present, we HIGHLY recommend starting out with no database files at all to make this a fresh start but the choice is at your own risk! Enjoy!
287 lines
8.9 KiB
Python
287 lines
8.9 KiB
Python
#!/usr/bin/env python2
|
|
# -*- coding: utf-8 -*-
|
|
#
|
|
# GuessIt - A library for guessing information from filenames
|
|
# Copyright (c) 2011 Nicolas Wack <wackou@gmail.com>
|
|
#
|
|
# GuessIt is free software; you can redistribute it and/or modify it under
|
|
# the terms of the Lesser GNU General Public License as published by
|
|
# the Free Software Foundation; either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# GuessIt is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# Lesser GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the Lesser GNU General Public License
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
#
|
|
|
|
from __future__ import unicode_literals
|
|
from guessit import UnicodeMixin, base_text_type, Guess
|
|
from guessit.textutils import clean_string, str_fill
|
|
from guessit.patterns import group_delimiters
|
|
from guessit.guess import (merge_similar_guesses, merge_all,
|
|
choose_int, choose_string)
|
|
import copy
|
|
import logging
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
class BaseMatchTree(UnicodeMixin):
|
|
"""A MatchTree represents the hierarchical split of a string into its
|
|
constituent semantic groups."""
|
|
|
|
def __init__(self, string='', span=None, parent=None):
|
|
self.string = string
|
|
self.span = span or (0, len(string))
|
|
self.parent = parent
|
|
self.children = []
|
|
self.guess = Guess()
|
|
|
|
@property
|
|
def value(self):
|
|
return self.string[self.span[0]:self.span[1]]
|
|
|
|
@property
|
|
def clean_value(self):
|
|
return clean_string(self.value)
|
|
|
|
@property
|
|
def offset(self):
|
|
return self.span[0]
|
|
|
|
@property
|
|
def info(self):
|
|
result = dict(self.guess)
|
|
|
|
for c in self.children:
|
|
result.update(c.info)
|
|
|
|
return result
|
|
|
|
@property
|
|
def root(self):
|
|
if not self.parent:
|
|
return self
|
|
|
|
return self.parent.root
|
|
|
|
@property
|
|
def depth(self):
|
|
if self.is_leaf():
|
|
return 0
|
|
|
|
return 1 + max(c.depth for c in self.children)
|
|
|
|
def is_leaf(self):
|
|
return self.children == []
|
|
|
|
def add_child(self, span):
|
|
child = MatchTree(self.string, span=span, parent=self)
|
|
self.children.append(child)
|
|
|
|
def partition(self, indices):
|
|
indices = sorted(indices)
|
|
if indices[0] != 0:
|
|
indices.insert(0, 0)
|
|
if indices[-1] != len(self.value):
|
|
indices.append(len(self.value))
|
|
|
|
for start, end in zip(indices[:-1], indices[1:]):
|
|
self.add_child(span=(self.offset + start,
|
|
self.offset + end))
|
|
|
|
def split_on_components(self, components):
|
|
offset = 0
|
|
for c in components:
|
|
start = self.value.find(c, offset)
|
|
end = start + len(c)
|
|
self.add_child(span=(self.offset + start,
|
|
self.offset + end))
|
|
offset = end
|
|
|
|
def nodes_at_depth(self, depth):
|
|
if depth == 0:
|
|
yield self
|
|
|
|
for child in self.children:
|
|
for node in child.nodes_at_depth(depth - 1):
|
|
yield node
|
|
|
|
@property
|
|
def node_idx(self):
|
|
if self.parent is None:
|
|
return ()
|
|
return self.parent.node_idx + (self.parent.children.index(self),)
|
|
|
|
def node_at(self, idx):
|
|
if not idx:
|
|
return self
|
|
|
|
try:
|
|
return self.children[idx[0]].node_at(idx[1:])
|
|
except:
|
|
raise ValueError('Non-existent node index: %s' % (idx,))
|
|
|
|
def nodes(self):
|
|
yield self
|
|
for child in self.children:
|
|
for node in child.nodes():
|
|
yield node
|
|
|
|
def _leaves(self):
|
|
if self.is_leaf():
|
|
yield self
|
|
else:
|
|
for child in self.children:
|
|
# pylint: disable=W0212
|
|
for leaf in child._leaves():
|
|
yield leaf
|
|
|
|
def leaves(self):
|
|
return list(self._leaves())
|
|
|
|
def to_string(self):
|
|
empty_line = ' ' * len(self.string)
|
|
|
|
def to_hex(x):
|
|
if isinstance(x, int):
|
|
return str(x) if x < 10 else chr(55 + x)
|
|
return x
|
|
|
|
def meaning(result):
|
|
mmap = { 'episodeNumber': 'E',
|
|
'season': 'S',
|
|
'extension': 'e',
|
|
'format': 'f',
|
|
'language': 'l',
|
|
'country': 'C',
|
|
'videoCodec': 'v',
|
|
'audioCodec': 'a',
|
|
'website': 'w',
|
|
'container': 'c',
|
|
'series': 'T',
|
|
'title': 't',
|
|
'date': 'd',
|
|
'year': 'y',
|
|
'releaseGroup': 'r',
|
|
'screenSize': 's'
|
|
}
|
|
|
|
if result is None:
|
|
return ' '
|
|
|
|
for prop, l in mmap.items():
|
|
if prop in result:
|
|
return l
|
|
|
|
return 'x'
|
|
|
|
lines = [ empty_line ] * (self.depth + 2) # +2: remaining, meaning
|
|
lines[-2] = self.string
|
|
|
|
for node in self.nodes():
|
|
if node == self:
|
|
continue
|
|
|
|
idx = node.node_idx
|
|
depth = len(idx) - 1
|
|
if idx:
|
|
lines[depth] = str_fill(lines[depth], node.span,
|
|
to_hex(idx[-1]))
|
|
if node.guess:
|
|
lines[-2] = str_fill(lines[-2], node.span, '_')
|
|
lines[-1] = str_fill(lines[-1], node.span, meaning(node.guess))
|
|
|
|
lines.append(self.string)
|
|
|
|
return '\n'.join(lines)
|
|
|
|
def __unicode__(self):
|
|
return self.to_string()
|
|
|
|
|
|
class MatchTree(BaseMatchTree):
|
|
"""The MatchTree contains a few "utility" methods which are not necessary
|
|
for the BaseMatchTree, but add a lot of convenience for writing
|
|
higher-level rules."""
|
|
|
|
def _unidentified_leaves(self,
|
|
valid=lambda leaf: len(leaf.clean_value) >= 2):
|
|
for leaf in self._leaves():
|
|
if not leaf.guess and valid(leaf):
|
|
yield leaf
|
|
|
|
def unidentified_leaves(self,
|
|
valid=lambda leaf: len(leaf.clean_value) >= 2):
|
|
return list(self._unidentified_leaves(valid))
|
|
|
|
def _leaves_containing(self, property_name):
|
|
if isinstance(property_name, base_text_type):
|
|
property_name = [ property_name ]
|
|
|
|
for leaf in self._leaves():
|
|
for prop in property_name:
|
|
if prop in leaf.guess:
|
|
yield leaf
|
|
break
|
|
|
|
def leaves_containing(self, property_name):
|
|
return list(self._leaves_containing(property_name))
|
|
|
|
def first_leaf_containing(self, property_name):
|
|
try:
|
|
return next(self._leaves_containing(property_name))
|
|
except StopIteration:
|
|
return None
|
|
|
|
def _previous_unidentified_leaves(self, node):
|
|
node_idx = node.node_idx
|
|
for leaf in self._unidentified_leaves():
|
|
if leaf.node_idx < node_idx:
|
|
yield leaf
|
|
|
|
def previous_unidentified_leaves(self, node):
|
|
return list(self._previous_unidentified_leaves(node))
|
|
|
|
def _previous_leaves_containing(self, node, property_name):
|
|
node_idx = node.node_idx
|
|
for leaf in self._leaves_containing(property_name):
|
|
if leaf.node_idx < node_idx:
|
|
yield leaf
|
|
|
|
def previous_leaves_containing(self, node, property_name):
|
|
return list(self._previous_leaves_containing(node, property_name))
|
|
|
|
def is_explicit(self):
|
|
"""Return whether the group was explicitly enclosed by
|
|
parentheses/square brackets/etc."""
|
|
return (self.value[0] + self.value[-1]) in group_delimiters
|
|
|
|
def matched(self):
|
|
# we need to make a copy here, as the merge functions work in place and
|
|
# calling them on the match tree would modify it
|
|
parts = [node.guess for node in self.nodes() if node.guess]
|
|
parts = copy.deepcopy(parts)
|
|
|
|
# 1- try to merge similar information together and give it a higher
|
|
# confidence
|
|
for int_part in ('year', 'season', 'episodeNumber'):
|
|
merge_similar_guesses(parts, int_part, choose_int)
|
|
|
|
for string_part in ('title', 'series', 'container', 'format',
|
|
'releaseGroup', 'website', 'audioCodec',
|
|
'videoCodec', 'screenSize', 'episodeFormat',
|
|
'audioChannels', 'idNumber'):
|
|
merge_similar_guesses(parts, string_part, choose_string)
|
|
|
|
# 2- merge the rest, potentially discarding information not properly
|
|
# merged before
|
|
result = merge_all(parts,
|
|
append=['language', 'subtitleLanguage', 'other'])
|
|
|
|
log.debug('Final result: ' + result.nice_string())
|
|
return result
|