SickGear/sickbeard/providers/generic.py

745 lines
28 KiB
Python
Raw Normal View History

2014-04-22 08:02:43 +00:00
# coding=utf-8
# Author: Nic Wolfe <nic@wolfeden.ca>
# URL: http://code.google.com/p/sickbeard/
#
# This file is part of SickGear.
#
# SickGear is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# SickGear is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with SickGear. If not, see <http://www.gnu.org/licenses/>.
from __future__ import with_statement
import time
import datetime
import os
import re
import itertools
from base64 import b16encode, b32decode
import sickbeard
import requests
from sickbeard import helpers, classes, logger, db, tvcache, encodingKludge as ek
from sickbeard.common import Quality, MULTI_EP_RESULT, SEASON_RESULT, USER_AGENT
from sickbeard.exceptions import SickBeardException, AuthException, ex
from sickbeard.helpers import maybe_plural
from sickbeard.name_parser.parser import NameParser, InvalidNameException, InvalidShowException
from sickbeard.show_name_helpers import allPossibleShowNames
from hachoir_parser import createParser
class HaltParseException(SickBeardException):
"""Something requires the current processing to abort"""
class GenericProvider:
NZB = "nzb"
TORRENT = "torrent"
def __init__(self, name, supports_backlog=False, anime_only=False):
# these need to be set in the subclass
self.providerType = None
self.name = name
self.supportsBacklog = supports_backlog
self.anime_only = anime_only
self.url = ''
self.show = None
self.search_mode = None
self.search_fallback = False
self.enabled = False
self.enable_recentsearch = False
self.enable_backlog = False
self.cache = tvcache.TVCache(self)
self.session = requests.session()
self.headers = {
# Using USER_AGENT instead of Mozilla to keep same user agent along authentication and download phases,
#otherwise session might be broken and download fail, asking again for authentication
#'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36'}
'User-Agent': USER_AGENT}
def getID(self):
return GenericProvider.makeID(self.name)
@staticmethod
def makeID(name):
return re.sub("[^\w\d_]", "_", name.strip().lower())
def imageName(self, *default_name):
for name in ['%s.%s' % (self.getID(), image_ext) for image_ext in ['png', 'gif', 'jpg']]:
if ek.ek(os.path.isfile,
ek.ek(os.path.join, sickbeard.PROG_DIR, 'gui', sickbeard.GUI_NAME, 'images', 'providers', name)):
return name
return '%s.png' % ('newznab', default_name[0])[any(default_name)]
def _checkAuth(self):
return True
def _doLogin(self):
return True
def isActive(self):
if self.providerType == GenericProvider.NZB and sickbeard.USE_NZBS:
return self.isEnabled()
elif self.providerType == GenericProvider.TORRENT and sickbeard.USE_TORRENTS:
return self.isEnabled()
else:
return False
def isEnabled(self):
"""
This should be overridden and should return the config setting eg. sickbeard.MYPROVIDER
"""
return self.enabled
def getResult(self, episodes):
"""
Returns a result of the correct type for this provider
"""
if self.providerType == GenericProvider.NZB:
result = classes.NZBSearchResult(episodes)
elif self.providerType == GenericProvider.TORRENT:
result = classes.TorrentSearchResult(episodes)
else:
result = classes.SearchResult(episodes)
result.provider = self
return result
def getURL(self, url, post_data=None, params=None, timeout=30, json=False):
"""
By default this is just a simple urlopen call but this method should be overridden
for providers with special URL requirements (like cookies)
"""
# check for auth
if not self._doLogin():
return
return helpers.getURL(url, post_data=post_data, params=params, headers=self.headers, timeout=timeout,
session=self.session, json=json)
def downloadResult(self, result):
"""
Save the result to disk.
"""
# check for auth
if not self._doLogin():
return False
if self.providerType == GenericProvider.TORRENT:
try:
torrent_hash = re.findall('urn:btih:([0-9a-f]{32,40})', result.url)[0].upper()
2014-09-11 17:26:17 +00:00
if 32 == len(torrent_hash):
2014-09-11 17:26:17 +00:00
torrent_hash = b16encode(b32decode(torrent_hash)).lower()
if not torrent_hash:
logger.log("Unable to extract torrent hash from link: " + ex(result.url), logger.ERROR)
return False
urls = ['https://%s/%s.torrent' % (u, torrent_hash)
for u in ('torcache.net/torrent', 'torrage.com/torrent', 'getstrike.net/torrents/api/download')]
except:
urls = [result.url]
filename = ek.ek(os.path.join, sickbeard.TORRENT_DIR,
helpers.sanitizeFileName(result.name) + '.' + self.providerType)
elif self.providerType == GenericProvider.NZB:
urls = [result.url]
filename = ek.ek(os.path.join, sickbeard.NZB_DIR,
helpers.sanitizeFileName(result.name) + '.' + self.providerType)
else:
return
for url in urls:
if helpers.download_file(url, filename, session=self.session):
logger.log(u"Downloading a result from " + self.name + " at " + url)
if self.providerType == GenericProvider.TORRENT:
logger.log(u"Saved magnet link to " + filename, logger.MESSAGE)
else:
logger.log(u"Saved result to " + filename, logger.MESSAGE)
if self._verify_download(filename):
return True
elif ek.ek(os.path.isfile, filename):
ek.ek(os.remove, filename)
logger.log(u"Failed to download result", logger.ERROR)
return False
def _verify_download(self, file_name=None):
"""
Checks the saved file to see if it was actually valid, if not then consider the download a failure.
"""
# primitive verification of torrents, just make sure we didn't get a text file or something
if self.providerType == GenericProvider.TORRENT:
parser = createParser(file_name)
if parser:
mime_type = parser._getMimeType()
try:
parser.stream._input.close()
except:
pass
if mime_type == 'application/x-bittorrent':
return True
logger.log(u"Result is not a valid torrent file", logger.WARNING)
return False
return True
def searchRSS(self, episodes):
return self.cache.findNeededEpisodes(episodes)
def getQuality(self, item, anime=False):
"""
Figures out the quality of the given RSS item node
item: An elementtree.ElementTree element representing the <item> tag of the RSS feed
Returns a Quality value obtained from the node's data
"""
(title, url) = self._get_title_and_url(item) # @UnusedVariable
quality = Quality.sceneQuality(title, anime)
return quality
def _doSearch(self, search_params, search_mode='eponly', epcount=0, age=0):
return []
def _get_season_search_strings(self, episode):
return []
def _get_episode_search_strings(self, eb_obj, add_string=''):
return []
def _get_title_and_url(self, item):
"""
Retrieves the title and URL data from the item
item: An elementtree.ElementTree element representing the <item> tag of the RSS feed, or a two part tup
Returns: A tuple containing two strings representing title and URL respectively
"""
title, url = None, None
try:
if isinstance(item, tuple):
title = item[0]
url = item[1]
else:
if 'title' in item:
title = item.title
if 'link' in item:
url = item.link
except Exception:
pass
if title:
title = re.sub(r'\s+', '.', u'' + title)
if url:
url = url.replace('&amp;', '&')
return title, url
def findSearchResults(self, show, episodes, search_mode, manualSearch=False):
self._checkAuth()
self.show = show
results = {}
itemList = []
searched_scene_season = None
for epObj in episodes:
# search cache for episode result
cacheResult = self.cache.searchCache(epObj, manualSearch)
if cacheResult:
if epObj.episode not in results:
results[epObj.episode] = cacheResult
else:
results[epObj.episode].extend(cacheResult)
# found result, search next episode
continue
# skip if season already searched
if len(episodes) > 1 and searched_scene_season == epObj.scene_season:
continue
# mark season searched for season pack searches so we can skip later on
searched_scene_season = epObj.scene_season
if 'sponly' == search_mode:
# get season search results
for curString in self._get_season_search_strings(epObj):
itemList += self._doSearch(curString, search_mode, len(episodes))
else:
# get single episode search results
for curString in self._get_episode_search_strings(epObj):
itemList += self._doSearch(curString, 'eponly', len(episodes))
# if we found what we needed already from cache then return results and exit
if len(results) == len(episodes):
return results
# sort list by quality
if len(itemList):
items = {}
itemsUnknown = []
for item in itemList:
quality = self.getQuality(item, anime=show.is_anime)
if quality == Quality.UNKNOWN:
itemsUnknown += [item]
else:
if quality not in items:
items[quality] = [item]
else:
items[quality].append(item)
itemList = list(itertools.chain(*[v for (k, v) in sorted(items.items(), reverse=True)]))
itemList += itemsUnknown if itemsUnknown else []
# filter results
cl = []
for item in itemList:
(title, url) = self._get_title_and_url(item)
# parse the file name
try:
2014-07-25 04:29:03 +00:00
myParser = NameParser(False, convert=True)
parse_result = myParser.parse(title)
except InvalidNameException:
logger.log(u"Unable to parse the filename " + title + " into a valid episode", logger.DEBUG)
continue
except InvalidShowException:
logger.log(u'No show name or scene exception matched the parsed filename ' + title, logger.DEBUG)
continue
showObj = parse_result.show
quality = parse_result.quality
release_group = parse_result.release_group
2014-07-22 04:53:32 +00:00
version = parse_result.version
addCacheEntry = False
if not (showObj.air_by_date or showObj.sports):
if search_mode == 'sponly':
if len(parse_result.episode_numbers):
logger.log(
u"This is supposed to be a season pack search but the result " + title + " is not a valid season pack, skipping it",
logger.DEBUG)
addCacheEntry = True
if len(parse_result.episode_numbers) and (
parse_result.season_number not in set([ep.season for ep in episodes]) or not [ep for ep in episodes if
ep.scene_episode in parse_result.episode_numbers]):
logger.log(
u"The result " + title + " doesn't seem to be a valid episode that we are trying to snatch, ignoring",
logger.DEBUG)
addCacheEntry = True
else:
if not len(parse_result.episode_numbers) and parse_result.season_number and not [ep for ep in
episodes if
ep.season == parse_result.season_number and ep.episode in parse_result.episode_numbers]:
logger.log(
u"The result " + title + " doesn't seem to be a valid season that we are trying to snatch, ignoring",
logger.DEBUG)
addCacheEntry = True
elif len(parse_result.episode_numbers) and not [ep for ep in episodes if
ep.season == parse_result.season_number and ep.episode in parse_result.episode_numbers]:
logger.log(
u"The result " + title + " doesn't seem to be a valid episode that we are trying to snatch, ignoring",
logger.DEBUG)
addCacheEntry = True
if not addCacheEntry:
# we just use the existing info for normal searches
actual_season = parse_result.season_number
actual_episodes = parse_result.episode_numbers
else:
if not (parse_result.is_air_by_date):
logger.log(
u"This is supposed to be a date search but the result " + title + " didn't parse as one, skipping it",
logger.DEBUG)
addCacheEntry = True
else:
airdate = parse_result.air_date.toordinal()
myDB = db.DBConnection()
sql_results = myDB.select(
"SELECT season, episode FROM tv_episodes WHERE showid = ? AND airdate = ?",
[showObj.indexerid, airdate])
if len(sql_results) != 1:
logger.log(
u"Tried to look up the date for the episode " + title + " but the database didn't give proper results, skipping it",
logger.WARNING)
addCacheEntry = True
if not addCacheEntry:
actual_season = int(sql_results[0]["season"])
actual_episodes = [int(sql_results[0]["episode"])]
# add parsed result to cache for usage later on
if addCacheEntry:
logger.log(u"Adding item from search to cache: " + title, logger.DEBUG)
ci = self.cache._addCacheEntry(title, url, parse_result=parse_result)
if ci is not None:
cl.append(ci)
continue
# make sure we want the episode
wantEp = True
for epNo in actual_episodes:
if not showObj.wantEpisode(actual_season, epNo, quality, manualSearch):
wantEp = False
break
if not wantEp:
logger.log(
u"Ignoring result " + title + " because we don't want an episode that is " +
Quality.qualityStrings[
quality], logger.DEBUG)
continue
logger.log(u"Found result " + title + " at " + url, logger.DEBUG)
# make a result object
epObj = []
for curEp in actual_episodes:
epObj.append(showObj.getEpisode(actual_season, curEp))
result = self.getResult(epObj)
result.show = showObj
result.url = url
result.name = title
result.quality = quality
result.release_group = release_group
result.content = None
2014-07-22 04:53:32 +00:00
result.version = version
if len(epObj) == 1:
epNum = epObj[0].episode
logger.log(u"Single episode result.", logger.DEBUG)
elif len(epObj) > 1:
epNum = MULTI_EP_RESULT
logger.log(u"Separating multi-episode result to check for later - result contains episodes: " + str(
parse_result.episode_numbers), logger.DEBUG)
elif len(epObj) == 0:
epNum = SEASON_RESULT
logger.log(u"Separating full season result to check for later", logger.DEBUG)
if epNum not in results:
results[epNum] = [result]
else:
results[epNum].append(result)
# check if we have items to add to cache
if len(cl) > 0:
myDB = self.cache._getDB()
myDB.mass_action(cl)
return results
def findPropers(self, search_date=None):
results = self.cache.listPropers(search_date)
Fixed issues with editing/saving custom scene exceptions. Fixed charmap issues for anime show names. Fixed issues with display show page and epCat key errors. Fixed duplicate log messages for clearing provider caches. Fixed issues with email notifier ep names not properly being encoded to UTF-8. TVDB<->TVRAGE Indexer ID mapping is now performed on demand to be used when needed such as newznab providers can be searched with tvrage_id's and some will return tvrage_id's that later can be used to create show objects from for faster and more accurate name parsing, mapping is done via Trakt API calls. Added stop event signals to schedualed tasks, SR now waits indefinate till task has been fully stopped before completing a restart or shutdown event. NameParserCache is now persistent and stores 200 parsed results at any given time for quicker lookups and better performance, this helps maintain results between updates or shutdown/startup events. Black and White lists for anime now only get used for anime shows as intended, performance gain for non-anime shows that dont need to load these lists. Internal name cache now builds it self on demand when needed per show request plus checks if show is already in cache and if true exits routine to save time. Schedualer and QueueItems classes are now a sub-class of threading.Thread and a stop threading event signal has been added to each. If I forgot to list something it doesn't mean its not fixed so please test and report back if anything is wrong or has been corrected by this new release.
2014-07-15 02:00:53 +00:00
return [classes.Proper(x['name'], x['url'], datetime.datetime.fromtimestamp(x['time']), self.show) for x in
results]
2014-05-08 22:28:28 +00:00
def seedRatio(self):
'''
Provider should override this value if custom seed ratio enabled
It should return the value of the provider seed ratio
'''
return ''
@staticmethod
def _log_result(mode='cache', count=0, url='url missing'):
"""
Simple function to log the result of a search
:param count: count of successfully processed items
:param url: source url of item(s)
"""
mode = mode.lower()
logger.log(u'%s in response from %s' % (('No %s items' % mode,
'%s %s item%s' % (count, mode, maybe_plural(count)))[0 < count], url))
class NZBProvider(GenericProvider):
def __init__(self, name, supports_backlog=True, anime_only=False):
GenericProvider.__init__(self, name, supports_backlog, anime_only)
self.providerType = GenericProvider.NZB
def imageName(self):
return GenericProvider.imageName(self, 'newznab')
def _find_propers(self, search_date=None):
cache_results = self.cache.listPropers(search_date)
results = [classes.Proper(x['name'], x['url'], datetime.datetime.fromtimestamp(x['time']), self.show) for x in
cache_results]
index = 0
alt_search = ('nzbs_org' == self.getID())
term_items_found = False
do_search_alt = False
search_terms = ['.proper.', '.repack.']
proper_check = re.compile(r'(?i)\b(proper)|(repack)\b')
while index < len(search_terms):
search_params = {'q': search_terms[index]}
if alt_search:
if do_search_alt:
index += 1
if term_items_found:
do_search_alt = True
term_items_found = False
else:
if do_search_alt:
search_params['t'] = 'search'
do_search_alt = (True, False)[do_search_alt]
else:
index += 1
for item in self._doSearch(search_params, age=4):
(title, url) = self._get_title_and_url(item)
if not proper_check.search(title):
continue
if 'published_parsed' in item and item['published_parsed']:
result_date = item.published_parsed
if result_date:
result_date = datetime.datetime(*result_date[0:6])
else:
logger.log(u'Unable to figure out the date for entry %s, skipping it', title)
continue
if not search_date or result_date > search_date:
search_result = classes.Proper(title, url, result_date, self.show)
results.append(search_result)
term_items_found = True
do_search_alt = False
time.sleep(0.2)
return results
Fixed issues with editing/saving custom scene exceptions. Fixed charmap issues for anime show names. Fixed issues with display show page and epCat key errors. Fixed duplicate log messages for clearing provider caches. Fixed issues with email notifier ep names not properly being encoded to UTF-8. TVDB<->TVRAGE Indexer ID mapping is now performed on demand to be used when needed such as newznab providers can be searched with tvrage_id's and some will return tvrage_id's that later can be used to create show objects from for faster and more accurate name parsing, mapping is done via Trakt API calls. Added stop event signals to schedualed tasks, SR now waits indefinate till task has been fully stopped before completing a restart or shutdown event. NameParserCache is now persistent and stores 200 parsed results at any given time for quicker lookups and better performance, this helps maintain results between updates or shutdown/startup events. Black and White lists for anime now only get used for anime shows as intended, performance gain for non-anime shows that dont need to load these lists. Internal name cache now builds it self on demand when needed per show request plus checks if show is already in cache and if true exits routine to save time. Schedualer and QueueItems classes are now a sub-class of threading.Thread and a stop threading event signal has been added to each. If I forgot to list something it doesn't mean its not fixed so please test and report back if anything is wrong or has been corrected by this new release.
2014-07-15 02:00:53 +00:00
class TorrentProvider(GenericProvider):
def __init__(self, name, supports_backlog=True, anime_only=False):
GenericProvider.__init__(self, name, supports_backlog, anime_only)
2014-07-25 04:29:03 +00:00
self.providerType = GenericProvider.TORRENT
self._seed_ratio = None
def imageName(self):
return GenericProvider.imageName(self, 'torrent')
def seedRatio(self):
return self._seed_ratio
def getQuality(self, item, anime=False):
if isinstance(item, tuple):
name = item[0]
elif isinstance(item, dict):
name, url = self._get_title_and_url(item)
else:
name = item.title
return Quality.sceneQuality(name, anime)
@staticmethod
def _reverse_quality(quality):
return {
Quality.SDTV: 'HDTV x264',
Quality.SDDVD: 'DVDRIP',
Quality.HDTV: '720p HDTV x264',
Quality.FULLHDTV: '1080p HDTV x264',
Quality.RAWHDTV: '1080i HDTV mpeg2',
Quality.HDWEBDL: '720p WEB-DL h264',
Quality.FULLHDWEBDL: '1080p WEB-DL h264',
Quality.HDBLURAY: '720p Bluray x264',
Quality.FULLHDBLURAY: '1080p Bluray x264'
}.get(quality, '')
def _get_season_search_strings(self, ep_obj, detail_only=False, scene=True):
if ep_obj.show.air_by_date or ep_obj.show.sports:
ep_detail = str(ep_obj.airdate).split('-')[0]
elif ep_obj.show.anime:
ep_detail = ep_obj.scene_absolute_number
else:
ep_detail = 'S%02d' % int(ep_obj.scene_season)
detail = ({}, {'Season_only': [ep_detail]})[detail_only and not self.show.sports and not self.show.anime]
return [dict({'Season': self._build_search_strings(ep_detail, scene)}.items() + detail.items())]
def _get_episode_search_strings(self, ep_obj, add_string='', detail_only=False, scene=True, sep_date=' ', use_or=True):
if not ep_obj:
return []
if self.show.air_by_date or self.show.sports:
ep_detail = str(ep_obj.airdate).replace('-', sep_date)
if self.show.sports:
month = ep_obj.airdate.strftime('%b')
ep_detail = ([ep_detail] + [month], '%s|%s' % (ep_detail, month))[use_or]
elif self.show.anime:
ep_detail = ep_obj.scene_absolute_number
else:
ep_detail = sickbeard.config.naming_ep_type[2] % {'seasonnumber': ep_obj.scene_season,
'episodenumber': ep_obj.scene_episode}
append = (add_string, '')[self.show.anime]
detail = ({}, {'Episode_only': [ep_detail]})[detail_only and not self.show.sports and not self.show.anime]
return [dict({'Episode': self._build_search_strings(ep_detail, scene, append)}.items() + detail.items())]
def _build_search_strings(self, ep_detail, process_name=True, append=''):
"""
Build a list of search strings for querying a provider
:param ep_detail: String of episode detail or List of episode details
:param process_name: Bool Whether to call sanitizeSceneName() on show name
:param append: String to append to search strings
:return: List of search string parameters
"""
if not isinstance(ep_detail, list):
ep_detail = [ep_detail]
if not isinstance(append, list):
append = [append]
search_params = []
crop = re.compile(r'([\.\s])(?:\1)+')
for name in set(allPossibleShowNames(self.show)):
if process_name:
name = helpers.sanitizeSceneName(name)
for detail in ep_detail:
search_params += [crop.sub(r'\1', '%s %s' % (name, detail) + ('', ' ' + x)[any(x)]) for x in append]
return search_params
def _checkAuth(self):
if hasattr(self, 'username') and hasattr(self, 'password'):
if self.username and self.password:
return True
setting = 'Password or Username'
elif hasattr(self, 'username') and hasattr(self, 'passkey'):
if self.username and self.passkey:
return True
setting = 'Passkey or Username'
elif hasattr(self, 'api_key'):
if self.api_key:
return True
setting = 'Apikey'
2015-07-06 11:14:37 +00:00
elif hasattr(self, 'passkey'):
if self.passkey:
return True
setting = 'Passkey'
else:
return GenericProvider._checkAuth(self)
raise AuthException('%s for %s is empty in config provider options' % (setting, self.name))
def _find_propers(self, search_date=datetime.datetime.today(), search_terms=None):
"""
Search for releases of type PROPER
:param search_date: Filter search on episodes since this date
:param search_terms: String or list of strings that qualify PROPER release types
:return: list of Proper objects
"""
results = []
my_db = db.DBConnection()
sql_results = my_db.select(
'SELECT s.show_name, e.showid, e.season, e.episode, e.status, e.airdate FROM tv_episodes AS e' +
' INNER JOIN tv_shows AS s ON (e.showid = s.indexer_id)' +
' WHERE e.airdate >= ' + str(search_date.toordinal()) +
' AND (e.status IN (%s)' % ','.join([str(x) for x in Quality.DOWNLOADED]) +
' OR (e.status IN (%s)))' % ','.join([str(x) for x in Quality.SNATCHED])
)
if not sql_results:
return results
clean_term = re.compile(r'(?i)[^a-z\|\.]+')
for sqlshow in sql_results:
showid, season, episode = [int(sqlshow[item]) for item in ('showid', 'season', 'episode')]
self.show = helpers.findCertainShow(sickbeard.showList, showid)
if not self.show:
continue
cur_ep = self.show.getEpisode(season, episode)
if None is search_terms:
search_terms = ['proper', 'repack']
elif not isinstance(search_terms, list):
if '' == search_terms:
search_terms = 'proper|repack'
search_terms = [search_terms]
for proper_term in search_terms:
proper_check = re.compile(r'(?i)(?:%s)' % clean_term.sub('', proper_term))
search_string = self._get_episode_search_strings(cur_ep, add_string=proper_term)
for item in self._doSearch(search_string[0]):
title, url = self._get_title_and_url(item)
if not proper_check.search(title):
continue
results.append(classes.Proper(title, url, datetime.datetime.today(), self.show))
return results
@staticmethod
def _has_no_results(*html):
return re.search(r'(?i)<(?:h\d|strong)[^>]*>(?:'
+ 'your\ssearch\sdid\snot\smatch|'
+ 'nothing\sfound|'
+ 'no\storrents\sfound|'
+ '.*?there\sare\sno\sresults|'
+ '.*?no\shits\.\sTry\sadding'
+ ')', html[0])
def get_cache_data(self, *args, **kwargs):
search_params = {'Cache': ['']}
return self._doSearch(search_params)