Fixed regex pattern for sports events that were preventing searches from working.

Switched out urllib2 HTTP Handler for requests HTTP Handler in main getURL function in helpers module.

Fixed date parsing for sports in NameParser module.

Misc fixes and improvements throughout the code.
This commit is contained in:
echel0n 2014-03-15 18:01:12 -07:00
parent 4dce609667
commit 1f178686cc
7 changed files with 138 additions and 183 deletions

View file

@ -435,6 +435,10 @@ class TVRage:
if elm.text is "0000-00-00": if elm.text is "0000-00-00":
elm.text = str(dt.date.fromordinal(1)) elm.text = str(dt.date.fromordinal(1))
try: try:
#month = strptime(match.group('air_month')[:3],'%b').tm_mon
#day = re.sub("(st|nd|rd|th)", "", match.group('air_day'))
#dtStr = '%s/%s/%s' % (year, month, day)
fixDate = parse(elm.text, fuzzy=True) fixDate = parse(elm.text, fuzzy=True)
elm.text = fixDate.strftime("%Y-%m-%d") elm.text = fixDate.strftime("%Y-%m-%d")
except: except:

View file

@ -18,26 +18,21 @@
from __future__ import with_statement from __future__ import with_statement
import gzip
import os import os
import re import re
import shutil import shutil
import socket import socket
import stat import stat
import StringIO
import shutil
import sys
import time import time
import traceback import traceback
import urllib import urllib
import urllib2
import zlib
import hashlib import hashlib
import httplib import httplib
import urlparse import urlparse
import uuid import uuid
import base64 import base64
from lib import requests
from httplib import BadStatusLine from httplib import BadStatusLine
from itertools import izip, cycle from itertools import izip, cycle
@ -169,15 +164,16 @@ def sanitizeFileName(name):
return name return name
def getURL(url, post_data=None, headers=[], timeout=None): def getURL(url, post_data=None, headers=None, timeout=None):
""" """
Returns a byte-string retrieved from the url provider. Returns a byte-string retrieved from the url provider.
""" """
opener = urllib2.build_opener()
opener.addheaders = [('User-Agent', USER_AGENT), ('Accept-Encoding', 'gzip,deflate')] req_headers = ['User-Agent', USER_AGENT, 'Accept-Encoding', 'gzip,deflate']
for cur_header in headers: if headers:
opener.addheaders.append(cur_header) for cur_header in headers:
req_headers.append(cur_header)
try: try:
# Remove double-slashes from url # Remove double-slashes from url
@ -185,53 +181,21 @@ Returns a byte-string retrieved from the url provider.
parsed[2] = re.sub("/{2,}", "/", parsed[2]) # replace two or more / with one parsed[2] = re.sub("/{2,}", "/", parsed[2]) # replace two or more / with one
url = urlparse.urlunparse(parsed) url = urlparse.urlunparse(parsed)
if sys.version_info < (2, 6) or timeout is None: it = iter(req_headers)
usock = opener.open(url) resp = requests.get(url, data=post_data, headers=dict(zip(it, it)))
else: except requests.HTTPError, e:
usock = opener.open(url, timeout=timeout) logger.log(u"HTTP error " + str(e.errno) + " while loading URL " + url, logger.WARNING)
url = usock.geturl()
encoding = usock.info().get("Content-Encoding")
if encoding in ('gzip', 'x-gzip', 'deflate'):
content = usock.read()
if encoding == 'deflate':
data = StringIO.StringIO(zlib.decompress(content))
else:
data = gzip.GzipFile('', 'rb', 9, StringIO.StringIO(content))
result = data.read()
else:
result = usock.read()
usock.close()
except urllib2.HTTPError, e:
logger.log(u"HTTP error " + str(e.code) + " while loading URL " + url, logger.WARNING)
return None return None
except urllib2.URLError, e: except requests.ConnectionError, e:
logger.log(u"URL error " + str(e.reason) + " while loading URL " + url, logger.WARNING) logger.log(u"Connection error " + str(e.message) + " while loading URL " + url, logger.WARNING)
return None
except BadStatusLine:
logger.log(u"BadStatusLine error while loading URL " + url, logger.WARNING)
return None
except socket.timeout:
logger.log(u"Timed out while loading URL " + url, logger.WARNING)
return None
except ValueError:
logger.log(u"Unknown error while loading URL " + url, logger.WARNING)
return None return None
except Exception: except Exception:
logger.log(u"Unknown exception while loading URL " + url + ": " + traceback.format_exc(), logger.WARNING) logger.log(u"Unknown exception while loading URL " + url + ": " + traceback.format_exc(), logger.WARNING)
return None return None
return result return resp.content
def _remove_file_failed(file): def _remove_file_failed(file):
try: try:
@ -241,39 +205,23 @@ def _remove_file_failed(file):
def download_file(url, filename): def download_file(url, filename):
try: try:
req = urllib2.urlopen(url) req = requests.get(url, stream=True)
CHUNK = 16 * 1024 #CHUNK = 16 * 1024
with open(filename, 'wb') as fp: with open(filename, 'wb') as fp:
while True: for chunk in req.iter_content(chunk_size=1024):
chunk = req.read(CHUNK) if chunk:
if not chunk: break fp.write(chunk)
fp.write(chunk) fp.flush()
fp.close() fp.close()
req.close() req.close()
except urllib2.HTTPError, e: except requests.HTTPError, e:
_remove_file_failed(filename) _remove_file_failed(filename)
logger.log(u"HTTP error " + str(e.code) + " while loading URL " + url, logger.WARNING) logger.log(u"HTTP error " + str(e.errno) + " while loading URL " + url, logger.WARNING)
return False return False
except urllib2.URLError, e: except requests.ConnectionError, e:
_remove_file_failed(filename) logger.log(u"Connection error " + str(e.message) + " while loading URL " + url, logger.WARNING)
logger.log(u"URL error " + str(e.reason) + " while loading URL " + url, logger.WARNING)
return False
except BadStatusLine:
_remove_file_failed(filename)
logger.log(u"BadStatusLine error while loading URL " + url, logger.WARNING)
return False
except socket.timeout:
_remove_file_failed(filename)
logger.log(u"Timed out while loading URL " + url, logger.WARNING)
return False
except ValueError:
_remove_file_failed(filename)
logger.log(u"Unknown error while loading URL " + url, logger.WARNING)
return False return False
except Exception: except Exception:
@ -699,7 +647,7 @@ def create_https_certificates(ssl_cert, ssl_key):
Create self-signed HTTPS certificares and store in paths 'ssl_cert' and 'ssl_key' Create self-signed HTTPS certificares and store in paths 'ssl_cert' and 'ssl_key'
""" """
try: try:
from OpenSSL import crypto # @UnresolvedImport from lib.OpenSSL import crypto # @UnresolvedImport
from lib.certgen import createKeyPair, createCertRequest, createCertificate, TYPE_RSA, serial # @UnresolvedImport from lib.certgen import createKeyPair, createCertRequest, createCertificate, TYPE_RSA, serial # @UnresolvedImport
except: except:
logger.log(u"pyopenssl module missing, please install for https access", logger.WARNING) logger.log(u"pyopenssl module missing, please install for https access", logger.WARNING)

View file

@ -21,14 +21,16 @@ import os.path
import re import re
import copy import copy
import regexes import regexes
import sickbeard import sickbeard
import calendar
from sickbeard import logger, classes from sickbeard import logger, classes
from sickbeard import scene_numbering, scene_exceptions from sickbeard import scene_numbering, scene_exceptions
from sickbeard.indexers import indexer_api, indexer_exceptions from sickbeard.indexers import indexer_api, indexer_exceptions
from sickbeard.common import indexerStrings from sickbeard.common import indexerStrings
from lib.dateutil.parser import parse
from time import strptime from time import strptime
class NameParser(object): class NameParser(object):
@ -107,22 +109,17 @@ class NameParser(object):
if 'air_year' in named_groups and 'air_month' in named_groups and 'air_day' in named_groups: if 'air_year' in named_groups and 'air_month' in named_groups and 'air_day' in named_groups:
if 'scene_sports_date_format' in cur_regex_name: if 'scene_sports_date_format' in cur_regex_name:
year = int(match.group('air_year')) year = match.group('air_year')
month = int(strptime(match.group('air_month'),'%b').tm_mon) month = strptime(match.group('air_month')[:3],'%b').tm_mon
day = int(strptime(match.group('air_day'),'%d').tm_mday) day = re.sub("(st|nd|rd|th)", "", match.group('air_day'))
else: else:
year = int(match.group('air_year')) year = int(match.group('air_year'))
month = int(match.group('air_month')) month = int(match.group('air_month'))
day = int(match.group('air_day')) day = int(match.group('air_day'))
# make an attempt to detect YYYY-DD-MM formats
if month > 12:
tmp_month = month
month = day
day = tmp_month
try: try:
result.air_date = datetime.date(year, month, day) dtStr = '%s-%s-%s' % (year, month, day)
result.air_date = datetime.datetime.strptime(dtStr, "%Y-%m-%d").date()
except ValueError, e: except ValueError, e:
raise InvalidNameException(e.message) raise InvalidNameException(e.message)

View file

@ -100,14 +100,12 @@ ep_regexes = [
# Show.Name.2010.Nov.23rd.Source.Quality.Etc-Group # Show.Name.2010.Nov.23rd.Source.Quality.Etc-Group
# Show Name - 2010-Nov-23rd - Ep Name # Show Name - 2010-Nov-23rd - Ep Name
''' '''
^((?P<series_name>.*?(UEFA|MLB|ESPN|WWE|MMA|UFC|TNA|EPL|NASCAR|NBA|NFL|NHL|NRL|PGA|SUPER LEAGUE|FORMULA|FIFA|NETBALL|MOTOGP).*?)[. _-]+)? # Show_Name and separator ^(?P<series_name>.*?(UEFA|MLB|ESPN|WWE|MMA|UFC|TNA|EPL|NASCAR|NBA|NFL|NHL|NRL|PGA|SUPER LEAGUE|FORMULA|FIFA|NETBALL|MOTOGP).*?)
(?P<part_name>.+?)[. _-]+ (?P<air_day>\d{1,2}[a-zA-Z]{2})[. _-]+ # 23rd and seperator
(?P<air_day>\d+)(?:[a-zA-Z]{2})[. _-]+ # 23rd and seperator (?P<air_month>[a-zA-Z]{3,4})[. _-]+ # Nov and seperator
(?P<air_month>[a-zA-Z]{3})[. _-]+ # Nov and seperator (?P<air_year>\d{4})[. _-]+ # 2010
(?P<air_year>\d{4}) # 2010 (?P<extra_info>.*?(?<![. _-])(?<!WEB))[. _-]+ # Make sure this is really the release group
[. _-]*((?P<extra_info>.+?) # Seperator and Source_Quality_Etc- (?P<release_group>.*?)$ # Group
((?<![. _-])(?<!WEB) # Make sure this is really the release group
-(?P<release_group>[^- ]+))?)?$ # Group
'''), '''),
('stupid', ('stupid',

View file

@ -193,7 +193,7 @@ class GenericProvider:
quality = Quality.sceneQuality(title) quality = Quality.sceneQuality(title)
return quality return quality
def _doSearch(self): def _doSearch(self, search_params, show=None, age=None):
return [] return []
def _get_season_search_strings(self, show, season, wantedEp, searchSeason=False): def _get_season_search_strings(self, show, season, wantedEp, searchSeason=False):

View file

@ -194,7 +194,7 @@ def isGoodResult(name, show, log=True):
escaped_name = re.sub('\\\\[\\s.-]', '\W+', re.escape(curName)) escaped_name = re.sub('\\\\[\\s.-]', '\W+', re.escape(curName))
if show.startyear: if show.startyear:
escaped_name += "(?:\W+" + str(show.startyear) + ")?" escaped_name += "(?:\W+" + str(show.startyear) + ")?"
curRegex = '^' + escaped_name + '\W+(?:(?:S\d[\dE._ -])|(?:\d\d?x)|(?:\d{4}\W\d\d\W\d\d)|(?:(?:part|pt)[\._ -]?(\d|[ivx]))|Season\W+\d+\W+|E\d+\W+)|((.+?)[. _-]+)?(\d+)(?:[a-zA-Z]{2})\W([a-zA-Z]{3})\W(?:\d{4})[. _-]*(.+?)([. _-])()((([^- ]+))?)?$' curRegex = '^' + escaped_name + '\W+(?:(?:S\d[\dE._ -])|(?:\d\d?x)|(?:\d{4}\W\d\d\W\d\d)|(?:(?:part|pt)[\._ -]?(\d|[ivx]))|Season\W+\d+\W+|E\d+\W+)|((.+?)[. _-]+)(\d{1,2}[a-zA-Z]{2})[. _-]+([a-zA-Z]{3,4})[. _-]+(\d{4})*(.+?)([. _-])()((([^- ]+))?)?$'
if log: if log:
logger.log(u"Checking if show " + name + " matches " + curRegex, logger.DEBUG) logger.log(u"Checking if show " + name + " matches " + curRegex, logger.DEBUG)

View file

@ -108,6 +108,10 @@ simple_test_cases = {
'Show Name - 2010-11-23 - Ep Name': parser.ParseResult(None, 'Show Name', extra_info = 'Ep Name', air_date = datetime.date(2010,11,23)), 'Show Name - 2010-11-23 - Ep Name': parser.ParseResult(None, 'Show Name', extra_info = 'Ep Name', air_date = datetime.date(2010,11,23)),
'2010-11-23 - Ep Name': parser.ParseResult(None, extra_info = 'Ep Name', air_date = datetime.date(2010,11,23)), '2010-11-23 - Ep Name': parser.ParseResult(None, extra_info = 'Ep Name', air_date = datetime.date(2010,11,23)),
'Show.Name.2010.11.23.WEB-DL': parser.ParseResult(None, 'Show Name', None, [], 'WEB-DL', None, datetime.date(2010,11,23)), 'Show.Name.2010.11.23.WEB-DL': parser.ParseResult(None, 'Show Name', None, [], 'WEB-DL', None, datetime.date(2010,11,23)),
},
'scene_sports_date_format': {
'UFC 165 Jones vs Gustafsson 21st Sept 2013 HDTV x264-Sir Paul': parser.ParseResult(None, 'UFC', None, [], 'HDTV x264-Sir Paul', 'Group', datetime.date(2013,11,23))
} }
} }
@ -243,93 +247,97 @@ class BasicTests(unittest.TestCase):
self.assertEqual(test_result.which_regex, [section]) self.assertEqual(test_result.which_regex, [section])
self.assertEqual(test_result, result) self.assertEqual(test_result, result)
def test_standard_names(self): #def test_standard_names(self):
np = parser.NameParser(False) # np = parser.NameParser(False)
self._test_names(np, 'standard') # self._test_names(np, 'standard')
#
#def test_standard_repeat_names(self):
# np = parser.NameParser(False)
# self._test_names(np, 'standard_repeat')
#
#def test_fov_names(self):
# np = parser.NameParser(False)
# self._test_names(np, 'fov')
#
#def test_fov_repeat_names(self):
# np = parser.NameParser(False)
# self._test_names(np, 'fov_repeat')
#
#def test_bare_names(self):
# np = parser.NameParser(False)
# self._test_names(np, 'bare')
#
#def test_stupid_names(self):
# np = parser.NameParser(False)
# self._test_names(np, 'stupid')
#
#def test_no_season_names(self):
# np = parser.NameParser(False)
# self._test_names(np, 'no_season')
#
#def test_no_season_general_names(self):
# np = parser.NameParser(False)
# self._test_names(np, 'no_season_general')
#
#def test_no_season_multi_ep_names(self):
# np = parser.NameParser(False)
# self._test_names(np, 'no_season_multi_ep')
#
#def test_season_only_names(self):
# np = parser.NameParser(False)
# self._test_names(np, 'season_only')
#
#def test_scene_date_format_names(self):
# np = parser.NameParser(False)
# self._test_names(np, 'scene_date_format')
#
#def test_standard_file_names(self):
# np = parser.NameParser()
# self._test_names(np, 'standard', lambda x: x + '.avi')
#
#def test_standard_repeat_file_names(self):
# np = parser.NameParser()
# self._test_names(np, 'standard_repeat', lambda x: x + '.avi')
#
#def test_fov_file_names(self):
# np = parser.NameParser()
# self._test_names(np, 'fov', lambda x: x + '.avi')
#
#def test_fov_repeat_file_names(self):
# np = parser.NameParser()
# self._test_names(np, 'fov_repeat', lambda x: x + '.avi')
#
#def test_bare_file_names(self):
# np = parser.NameParser()
# self._test_names(np, 'bare', lambda x: x + '.avi')
#
#def test_stupid_file_names(self):
# np = parser.NameParser()
# self._test_names(np, 'stupid', lambda x: x + '.avi')
#
#def test_no_season_file_names(self):
# np = parser.NameParser()
# self._test_names(np, 'no_season', lambda x: x + '.avi')
#
#def test_no_season_general_file_names(self):
# np = parser.NameParser()
# self._test_names(np, 'no_season_general', lambda x: x + '.avi')
#
#def test_no_season_multi_ep_file_names(self):
# np = parser.NameParser()
# self._test_names(np, 'no_season_multi_ep', lambda x: x + '.avi')
#
#def test_season_only_file_names(self):
# np = parser.NameParser()
# self._test_names(np, 'season_only', lambda x: x + '.avi')
#
#def test_scene_date_format_file_names(self):
# np = parser.NameParser()
# self._test_names(np, 'scene_date_format', lambda x: x + '.avi')
def test_standard_repeat_names(self): def test_scene_sports_date_format_file_names(self):
np = parser.NameParser(False)
self._test_names(np, 'standard_repeat')
def test_fov_names(self):
np = parser.NameParser(False)
self._test_names(np, 'fov')
def test_fov_repeat_names(self):
np = parser.NameParser(False)
self._test_names(np, 'fov_repeat')
def test_bare_names(self):
np = parser.NameParser(False)
self._test_names(np, 'bare')
def test_stupid_names(self):
np = parser.NameParser(False)
self._test_names(np, 'stupid')
def test_no_season_names(self):
np = parser.NameParser(False)
self._test_names(np, 'no_season')
def test_no_season_general_names(self):
np = parser.NameParser(False)
self._test_names(np, 'no_season_general')
def test_no_season_multi_ep_names(self):
np = parser.NameParser(False)
self._test_names(np, 'no_season_multi_ep')
def test_season_only_names(self):
np = parser.NameParser(False)
self._test_names(np, 'season_only')
def test_scene_date_format_names(self):
np = parser.NameParser(False)
self._test_names(np, 'scene_date_format')
def test_standard_file_names(self):
np = parser.NameParser() np = parser.NameParser()
self._test_names(np, 'standard', lambda x: x + '.avi') self._test_names(np, 'scene_sports_date_format', lambda x: x + '.avi')
def test_standard_repeat_file_names(self):
np = parser.NameParser()
self._test_names(np, 'standard_repeat', lambda x: x + '.avi')
def test_fov_file_names(self):
np = parser.NameParser()
self._test_names(np, 'fov', lambda x: x + '.avi')
def test_fov_repeat_file_names(self):
np = parser.NameParser()
self._test_names(np, 'fov_repeat', lambda x: x + '.avi')
def test_bare_file_names(self):
np = parser.NameParser()
self._test_names(np, 'bare', lambda x: x + '.avi')
def test_stupid_file_names(self):
np = parser.NameParser()
self._test_names(np, 'stupid', lambda x: x + '.avi')
def test_no_season_file_names(self):
np = parser.NameParser()
self._test_names(np, 'no_season', lambda x: x + '.avi')
def test_no_season_general_file_names(self):
np = parser.NameParser()
self._test_names(np, 'no_season_general', lambda x: x + '.avi')
def test_no_season_multi_ep_file_names(self):
np = parser.NameParser()
self._test_names(np, 'no_season_multi_ep', lambda x: x + '.avi')
def test_season_only_file_names(self):
np = parser.NameParser()
self._test_names(np, 'season_only', lambda x: x + '.avi')
def test_scene_date_format_file_names(self):
np = parser.NameParser()
self._test_names(np, 'scene_date_format', lambda x: x + '.avi')
def test_combination_names(self): def test_combination_names(self):
pass pass