SickGear/sickbeard/show_name_helpers.py

# Author: Nic Wolfe <nic@wolfeden.ca>
# URL: http://code.google.com/p/sickbeard/
#
# This file is part of Sick Beard.
#
# Sick Beard is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Sick Beard is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Sick Beard.  If not, see <http://www.gnu.org/licenses/>.
import fnmatch
import os

import re
import datetime

import sickbeard
from sickbeard.common import countryList
from sickbeard.helpers import sanitizeSceneName
from sickbeard.scene_exceptions import get_scene_exceptions
from sickbeard import logger
from sickbeard import db
from sickbeard import encodingKludge as ek
from name_parser.parser import NameParser, InvalidNameException
from lib.unidecode import unidecode


resultFilters = ["sub(pack|s|bed)", "swesub(bed)?",
                 "(dir|sample|sub|nfo)fix", "sample", "(dvd)?extras",
                 "dub(bed)?"]

def filterBadReleases(name):
    """
    Filters out non-english and just all-around stupid releases by comparing them
    to the resultFilters contents.

    name: the release name to check

    Returns: True if the release name is OK, False if it's bad.
    """

    try:
        fp = NameParser()
        parse_result = fp.parse(name)
    except InvalidNameException:
        logger.log(u"Unable to parse the filename " + name + " into a valid episode", logger.WARNING)
        return False

#    # use the extra info and the scene group to filter against
#    check_string = ''
#    if parse_result.extra_info:
#        check_string = parse_result.extra_info
#    if parse_result.release_group:
#        if check_string:
#            check_string = check_string + '-' + parse_result.release_group
#        else:
#            check_string = parse_result.release_group
#
#    # if there's no info after the season info then assume it's fine
#    if not check_string:
#        return True

    # if any of the bad strings are in the name then say no
    for x in resultFilters + sickbeard.IGNORE_WORDS.split(','):
        if re.search('(^|[\W_])' + x.strip() + '($|[\W_])', name, re.I):
            logger.log(u"Invalid scene release: "+name+" contains "+x+", ignoring it", logger.DEBUG)
            return False

    return True

def sceneToNormalShowNames(name):
    """
    Takes a show name from a scene dirname and converts it to a more "human-readable" format.

    name: The show name to convert

    Returns: a list of all the possible "normal" names
    """

    if not name:
        return []

    name_list = [name]

    # use both and and &
    new_name = re.sub('(?i)([\. ])and([\. ])', '\\1&\\2', name, re.I)
    if new_name not in name_list:
        name_list.append(new_name)

    results = []

    for cur_name in name_list:
        # add brackets around the year
        results.append(re.sub('(\D)(\d{4})$', '\\1(\\2)', cur_name))

        # add brackets around the country
        country_match_str = '|'.join(countryList.values())
        results.append(re.sub('(?i)([. _-])(' + country_match_str + ')$', '\\1(\\2)', cur_name))

    results += name_list

    return list(set(results))

def makeSceneShowSearchStrings(show):

    showNames = allPossibleShowNames(show)

    # scenify the names
    return map(sanitizeSceneName, showNames)


def makeSceneSeasonSearchString(show, segment, extraSearchType=None):

    myDB = db.DBConnection()

    if show.air_by_date:
        numseasons = 0

        # the search string for air by date shows is just
        seasonStrings = [segment]

    else:
        numseasonsSQlResult = myDB.select("SELECT COUNT(DISTINCT season) as numseasons FROM tv_episodes WHERE showid = ? and season != 0", [show.indexerid])
        numseasons = int(numseasonsSQlResult[0][0])

        seasonStrings = ["S%02d" % segment]

    showNames = set(makeSceneShowSearchStrings(show))

    toReturn = []

    # search each show name
    for curShow in showNames:
        # most providers all work the same way
        if not extraSearchType:
            # if there's only one season then we can just use the show name straight up
            if numseasons == 1:
                toReturn.append(curShow)
            # for providers that don't allow multiple searches in one request we only search for Sxx style stuff
            else:
                for cur_season in seasonStrings:
                    toReturn.append(curShow + "." + cur_season)

    return toReturn


def makeSceneSearchString(episode):

    myDB = db.DBConnection()
    numseasonsSQlResult = myDB.select("SELECT COUNT(DISTINCT season) as numseasons FROM tv_episodes WHERE showid = ? and season != 0", [episode.show.indexerid])
    numseasons = int(numseasonsSQlResult[0][0])
    numepisodesSQlResult = myDB.select("SELECT COUNT(episode) as numepisodes FROM tv_episodes WHERE showid = ? and season != 0", [episode.show.indexerid])
    numepisodes = int(numepisodesSQlResult[0][0])

    # see if we should use dates instead of episodes
    if episode.show.air_by_date and episode.airdate != datetime.date.fromordinal(1):
        epStrings = [str(episode.airdate)]
    else:
        epStrings = ["S%02iE%02i" % (int(episode.season), int(episode.episode)),
                    "%ix%02i" % (int(episode.season), int(episode.episode))]

    # for single-season shows just search for the show name -- if total ep count (exclude s0) is less than 11
    # due to the amount of qualities and releases, it is easy to go over the 50 result limit on rss feeds otherwise
    if numseasons == 1 and numepisodes < 11:
        epStrings = ['']

    showNames = set(makeSceneShowSearchStrings(episode.show))

    toReturn = []

    for curShow in showNames:
        for curEpString in epStrings:
            toReturn.append(curShow + '.' + curEpString)

    return toReturn

def isGoodResult(name, show, log=True):
    """
    Use an automatically-created regex to make sure the result actually is the show it claims to be
    """

    all_show_names = allPossibleShowNames(show)
    showNames = map(sanitizeSceneName, all_show_names) + all_show_names
    showNames += map(unidecode, all_show_names)

    for curName in set(showNames):
        escaped_name = re.sub('\\\\[\\s.-]', '\W+', re.escape(curName))
        if show.startyear:
            escaped_name += "(?:\W+" + str(show.startyear) + ")?"
        curRegex = '^' + escaped_name + '\W+(?:(?:S\d[\dE._ -])|(?:\d\d?x)|(?:\d{4}\W\d\d\W\d\d)|(?:(?:part|pt)[\._ -]?(\d|[ivx]))|Season\W+\d+\W+|E\d+\W+)|((.+?)[. _-]+)?(\d+)(?:[a-zA-Z]{2})\W([a-zA-Z]{3})\W(?:\d{4})[. _-]*(.+?)([. _-])()((([^- ]+))?)?$'
        if log:
            logger.log(u"Checking if show " + name + " matches " + curRegex, logger.DEBUG)

        match = re.search(curRegex, name, re.I)

        if match:
            logger.log(u"Matched " + curRegex + " to " + name, logger.DEBUG)
            return True

    if log:
        logger.log(u"Provider gave result " + name + " but that doesn't seem like a valid result for "+show.name+" so I'm ignoring it")
    return False

def allPossibleShowNames(show):
    """
    Figures out every possible variation of the name for a particular show. Includes TVDB name, TVRage name,
    country codes on the end, eg. "Show Name (AU)", and any scene exception names.

    show: a TVShow object that we should get the names of

    Returns: a list of all the possible show names
    """

    showNames = [show.name]
    showNames += [name for name in get_scene_exceptions(show.indexerid)]

    newShowNames = []

    country_list = countryList
    country_list.update(dict(zip(countryList.values(), countryList.keys())))

    # if we have "Show Name Australia" or "Show Name (Australia)" this will add "Show Name (AU)" for
    # any countries defined in common.countryList
    # (and vice versa)
    for curName in set(showNames):
        if not curName:
            continue
        for curCountry in country_list:
            if curName.endswith(' ' + curCountry):
                newShowNames.append(curName.replace(' ' + curCountry, ' (' + country_list[curCountry] + ')'))
            elif curName.endswith(' (' + curCountry + ')'):
                newShowNames.append(curName.replace(' (' + curCountry + ')', ' (' + country_list[curCountry] + ')'))

    showNames += newShowNames

    return showNames


def determineReleaseName(dir_name=None, nzb_name=None):
    """Determine a release name from an nzb and/or folder name"""

    if nzb_name is not None:
        logger.log(u"Using nzb_name for release name.")
        return nzb_name.rpartition('.')[0]

    if dir_name is None:
        return None

    # try to get the release name from nzb/nfo
    file_types = ["*.nzb", "*.nfo"]

    for search in file_types:

        reg_expr = re.compile(fnmatch.translate(search), re.IGNORECASE)
        files = [file_name for file_name in ek.ek(os.listdir, dir_name) if ek.ek(os.path.isfile, ek.ek(os.path.join, dir_name, file_name))]
        results = filter(reg_expr.search, files)

        if len(results) == 1:
            found_file = ek.ek(os.path.basename, results[0])
            found_file = found_file.rpartition('.')[0]
            if filterBadReleases(found_file):
                logger.log(u"Release name (" + found_file + ") found from file (" + results[0] + ")")
                return found_file.rpartition('.')[0]

    # If that fails, we try the folder
    folder = ek.ek(os.path.basename, dir_name)
    if filterBadReleases(folder):
        # NOTE: Multiple failed downloads will change the folder name.
        # (e.g., appending #s)
        # Should we handle that?
        logger.log(u"Folder name (" + folder + ") appears to be a valid release name. Using it.")
        return folder

    return None