mirror of
https://github.com/SickGear/SickGear.git
synced 2025-01-22 01:23:43 +00:00
Update imdbpy libs to v5.0
Fixed invalid indexer id issues for TVRage shows. Fixed issues for getting posters and backdrops for TVRage shows. We now convert XML straight to a dict object for Indexer APIs, improved overall performance api's Fixed issues with TVRage shows and displaying genre's properly.
This commit is contained in:
parent
764cf6e62e
commit
2dcd26e69c
30 changed files with 7446 additions and 453 deletions
|
@ -6,7 +6,7 @@ a person from the IMDb database.
|
||||||
It can fetch data through different media (e.g.: the IMDb web pages,
|
It can fetch data through different media (e.g.: the IMDb web pages,
|
||||||
a SQL database, etc.)
|
a SQL database, etc.)
|
||||||
|
|
||||||
Copyright 2004-2012 Davide Alberani <da@erlug.linux.it>
|
Copyright 2004-2014 Davide Alberani <da@erlug.linux.it>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -25,7 +25,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||||
|
|
||||||
__all__ = ['IMDb', 'IMDbError', 'Movie', 'Person', 'Character', 'Company',
|
__all__ = ['IMDb', 'IMDbError', 'Movie', 'Person', 'Character', 'Company',
|
||||||
'available_access_systems']
|
'available_access_systems']
|
||||||
__version__ = VERSION = '4.9'
|
__version__ = VERSION = '5.0'
|
||||||
|
|
||||||
# Import compatibility module (importing it is enough).
|
# Import compatibility module (importing it is enough).
|
||||||
import _compat
|
import _compat
|
||||||
|
@ -160,6 +160,7 @@ def IMDb(accessSystem=None, *arguments, **keywords):
|
||||||
kwds.update(keywords)
|
kwds.update(keywords)
|
||||||
keywords = kwds
|
keywords = kwds
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
|
import logging
|
||||||
logging.getLogger('imdbpy').warn('Unable to read configuration' \
|
logging.getLogger('imdbpy').warn('Unable to read configuration' \
|
||||||
' file; complete error: %s' % e)
|
' file; complete error: %s' % e)
|
||||||
# It just LOOKS LIKE a bad habit: we tried to read config
|
# It just LOOKS LIKE a bad habit: we tried to read config
|
||||||
|
@ -303,7 +304,7 @@ class IMDbBase:
|
||||||
# http://akas.imdb.com/keyword/%s/
|
# http://akas.imdb.com/keyword/%s/
|
||||||
imdbURL_keyword_main=imdbURL_base + 'keyword/%s/'
|
imdbURL_keyword_main=imdbURL_base + 'keyword/%s/'
|
||||||
# http://akas.imdb.com/chart/top
|
# http://akas.imdb.com/chart/top
|
||||||
imdbURL_top250=imdbURL_base + 'chart/top',
|
imdbURL_top250=imdbURL_base + 'chart/top'
|
||||||
# http://akas.imdb.com/chart/bottom
|
# http://akas.imdb.com/chart/bottom
|
||||||
imdbURL_bottom100=imdbURL_base + 'chart/bottom'
|
imdbURL_bottom100=imdbURL_base + 'chart/bottom'
|
||||||
# http://akas.imdb.com/find?%s
|
# http://akas.imdb.com/find?%s
|
||||||
|
@ -824,22 +825,23 @@ class IMDbBase:
|
||||||
# subclass, somewhere under the imdb.parser package.
|
# subclass, somewhere under the imdb.parser package.
|
||||||
raise NotImplementedError('override this method')
|
raise NotImplementedError('override this method')
|
||||||
|
|
||||||
def _searchIMDb(self, kind, ton):
|
def _searchIMDb(self, kind, ton, title_kind=None):
|
||||||
"""Search the IMDb akas server for the given title or name."""
|
"""Search the IMDb akas server for the given title or name."""
|
||||||
# The Exact Primary search system has gone AWOL, so we resort
|
# The Exact Primary search system has gone AWOL, so we resort
|
||||||
# to the mobile search. :-/
|
# to the mobile search. :-/
|
||||||
if not ton:
|
if not ton:
|
||||||
return None
|
return None
|
||||||
|
ton = ton.strip('"')
|
||||||
aSystem = IMDb('mobile')
|
aSystem = IMDb('mobile')
|
||||||
if kind == 'tt':
|
if kind == 'tt':
|
||||||
searchFunct = aSystem.search_movie
|
searchFunct = aSystem.search_movie
|
||||||
check = 'long imdb canonical title'
|
check = 'long imdb title'
|
||||||
elif kind == 'nm':
|
elif kind == 'nm':
|
||||||
searchFunct = aSystem.search_person
|
searchFunct = aSystem.search_person
|
||||||
check = 'long imdb canonical name'
|
check = 'long imdb name'
|
||||||
elif kind == 'char':
|
elif kind == 'char':
|
||||||
searchFunct = aSystem.search_character
|
searchFunct = aSystem.search_character
|
||||||
check = 'long imdb canonical name'
|
check = 'long imdb name'
|
||||||
elif kind == 'co':
|
elif kind == 'co':
|
||||||
# XXX: are [COUNTRY] codes included in the results?
|
# XXX: are [COUNTRY] codes included in the results?
|
||||||
searchFunct = aSystem.search_company
|
searchFunct = aSystem.search_company
|
||||||
|
@ -852,24 +854,42 @@ class IMDbBase:
|
||||||
# exact match.
|
# exact match.
|
||||||
if len(searchRes) == 1:
|
if len(searchRes) == 1:
|
||||||
return searchRes[0].getID()
|
return searchRes[0].getID()
|
||||||
|
title_only_matches = []
|
||||||
for item in searchRes:
|
for item in searchRes:
|
||||||
# Return the first perfect match.
|
# Return the first perfect match.
|
||||||
if item[check] == ton:
|
if item[check].strip('"') == ton:
|
||||||
return item.getID()
|
# For titles do additional check for kind
|
||||||
|
if kind != 'tt' or title_kind == item['kind']:
|
||||||
|
return item.getID()
|
||||||
|
elif kind == 'tt':
|
||||||
|
title_only_matches.append(item.getID())
|
||||||
|
# imdbpy2sql.py could detected wrong type, so if no title and kind
|
||||||
|
# matches found - collect all results with title only match
|
||||||
|
# Return list of IDs if multiple matches (can happen when searching
|
||||||
|
# titles with no title_kind specified)
|
||||||
|
# Example: DB: Band of Brothers "tv series" vs "tv mini-series"
|
||||||
|
if title_only_matches:
|
||||||
|
if len(title_only_matches) == 1:
|
||||||
|
return title_only_matches[0]
|
||||||
|
else:
|
||||||
|
return title_only_matches
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def title2imdbID(self, title):
|
def title2imdbID(self, title, kind=None):
|
||||||
"""Translate a movie title (in the plain text data files format)
|
"""Translate a movie title (in the plain text data files format)
|
||||||
to an imdbID.
|
to an imdbID.
|
||||||
Try an Exact Primary Title search on IMDb;
|
Try an Exact Primary Title search on IMDb;
|
||||||
return None if it's unable to get the imdbID."""
|
return None if it's unable to get the imdbID;
|
||||||
return self._searchIMDb('tt', title)
|
Always specify kind: movie, tv series, video game etc. or search can
|
||||||
|
return list of IDs if multiple matches found
|
||||||
|
"""
|
||||||
|
return self._searchIMDb('tt', title, kind)
|
||||||
|
|
||||||
def name2imdbID(self, name):
|
def name2imdbID(self, name):
|
||||||
"""Translate a person name in an imdbID.
|
"""Translate a person name in an imdbID.
|
||||||
Try an Exact Primary Name search on IMDb;
|
Try an Exact Primary Name search on IMDb;
|
||||||
return None if it's unable to get the imdbID."""
|
return None if it's unable to get the imdbID."""
|
||||||
return self._searchIMDb('tt', name)
|
return self._searchIMDb('nm', name)
|
||||||
|
|
||||||
def character2imdbID(self, name):
|
def character2imdbID(self, name):
|
||||||
"""Translate a character name in an imdbID.
|
"""Translate a character name in an imdbID.
|
||||||
|
@ -896,7 +916,8 @@ class IMDbBase:
|
||||||
imdbID = aSystem.get_imdbMovieID(mop.movieID)
|
imdbID = aSystem.get_imdbMovieID(mop.movieID)
|
||||||
else:
|
else:
|
||||||
imdbID = aSystem.title2imdbID(build_title(mop, canonical=0,
|
imdbID = aSystem.title2imdbID(build_title(mop, canonical=0,
|
||||||
ptdf=1))
|
ptdf=0, appendKind=False),
|
||||||
|
mop['kind'])
|
||||||
elif isinstance(mop, Person.Person):
|
elif isinstance(mop, Person.Person):
|
||||||
if mop.personID is not None:
|
if mop.personID is not None:
|
||||||
imdbID = aSystem.get_imdbPersonID(mop.personID)
|
imdbID = aSystem.get_imdbPersonID(mop.personID)
|
||||||
|
|
|
@ -29,7 +29,7 @@
|
||||||
|
|
||||||
[imdbpy]
|
[imdbpy]
|
||||||
## Default.
|
## Default.
|
||||||
accessSystem = mobile
|
accessSystem = http
|
||||||
|
|
||||||
## Optional (options common to every data access system):
|
## Optional (options common to every data access system):
|
||||||
# Activate adult searches (on, by default).
|
# Activate adult searches (on, by default).
|
||||||
|
@ -37,7 +37,7 @@ accessSystem = mobile
|
||||||
# Number of results for searches (20 by default).
|
# Number of results for searches (20 by default).
|
||||||
#results = 20
|
#results = 20
|
||||||
# Re-raise all caught exceptions (off, by default).
|
# Re-raise all caught exceptions (off, by default).
|
||||||
reraiseExceptions = on
|
#reraiseExceptions = off
|
||||||
|
|
||||||
## Optional (options common to http and mobile data access systems):
|
## Optional (options common to http and mobile data access systems):
|
||||||
# Proxy used to access the network. If it requires authentication,
|
# Proxy used to access the network. If it requires authentication,
|
||||||
|
@ -69,7 +69,7 @@ reraiseExceptions = on
|
||||||
## Set the threshold for logging messages.
|
## Set the threshold for logging messages.
|
||||||
# Can be one of "debug", "info", "warning", "error", "critical" (default:
|
# Can be one of "debug", "info", "warning", "error", "critical" (default:
|
||||||
# "warning").
|
# "warning").
|
||||||
loggingLevel = info
|
#loggingLevel = debug
|
||||||
|
|
||||||
## Path to a configuration file for the logging facility;
|
## Path to a configuration file for the logging facility;
|
||||||
# see: http://docs.python.org/library/logging.html#configuring-logging
|
# see: http://docs.python.org/library/logging.html#configuring-logging
|
||||||
|
|
|
@ -64,8 +64,10 @@ LANG_ARTICLES = {
|
||||||
'English': ('the', 'a', 'an'),
|
'English': ('the', 'a', 'an'),
|
||||||
'Italian': ('la', 'le', "l'", 'il', 'i', 'un', 'una', 'gli', 'lo', "un'",
|
'Italian': ('la', 'le', "l'", 'il', 'i', 'un', 'una', 'gli', 'lo', "un'",
|
||||||
'uno'),
|
'uno'),
|
||||||
'Spanish': ('la', 'le', 'el', 'les', 'un', 'los', 'una', 'uno', 'unos',
|
'Spanish': ('la', 'lo', 'el', 'las', 'un', 'los', 'una', 'al', 'del',
|
||||||
'unas'),
|
'unos', 'unas', 'uno'),
|
||||||
|
'French': ('le', "l'", 'la', 'les', 'un', 'une', 'des', 'au', 'du', '\xc3\xa0 la',
|
||||||
|
'de la', 'aux'),
|
||||||
'Portuguese': ('a', 'as', 'o', 'os', 'um', 'uns', 'uma', 'umas'),
|
'Portuguese': ('a', 'as', 'o', 'os', 'um', 'uns', 'uma', 'umas'),
|
||||||
'Turkish': (), # Some languages doesn't have articles.
|
'Turkish': (), # Some languages doesn't have articles.
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python
|
||||||
"""
|
"""
|
||||||
generatepot.py script.
|
generatepot.py script.
|
||||||
|
|
||||||
|
|
1303
lib/imdb/locale/imdbpy-ar.po
Normal file
1303
lib/imdb/locale/imdbpy-ar.po
Normal file
File diff suppressed because it is too large
Load diff
1303
lib/imdb/locale/imdbpy-bg.po
Normal file
1303
lib/imdb/locale/imdbpy-bg.po
Normal file
File diff suppressed because it is too large
Load diff
1303
lib/imdb/locale/imdbpy-de.po
Normal file
1303
lib/imdb/locale/imdbpy-de.po
Normal file
File diff suppressed because it is too large
Load diff
1304
lib/imdb/locale/imdbpy-es.po
Normal file
1304
lib/imdb/locale/imdbpy-es.po
Normal file
File diff suppressed because it is too large
Load diff
1304
lib/imdb/locale/imdbpy-fr.po
Normal file
1304
lib/imdb/locale/imdbpy-fr.po
Normal file
File diff suppressed because it is too large
Load diff
|
@ -1,4 +1,4 @@
|
||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python
|
||||||
# -*- coding: iso-8859-1 -*-
|
# -*- coding: iso-8859-1 -*-
|
||||||
"""Generate binary message catalog from textual translation description.
|
"""Generate binary message catalog from textual translation description.
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python
|
||||||
"""
|
"""
|
||||||
rebuildmo.py script.
|
rebuildmo.py script.
|
||||||
|
|
||||||
|
|
|
@ -104,15 +104,24 @@ PY_VERSION = sys.version_info[:2]
|
||||||
# The cookies for the "adult" search.
|
# The cookies for the "adult" search.
|
||||||
# Please don't mess with these account.
|
# Please don't mess with these account.
|
||||||
# Old 'IMDbPY' account.
|
# Old 'IMDbPY' account.
|
||||||
_old_cookie_id = 'boM2bYxz9MCsOnH9gZ0S9QHs12NWrNdApxsls1Vb5/NGrNdjcHx3dUas10UASoAjVEvhAbGagERgOpNkAPvxdbfKwaV2ikEj9SzXY1WPxABmDKQwdqzwRbM+12NSeJFGUEx3F8as10WwidLzVshDtxaPIbP13NdjVS9UZTYqgTVGrNcT9vyXU1'
|
_IMDbPY_cookie_id = 'boM2bYxz9MCsOnH9gZ0S9QHs12NWrNdApxsls1Vb5/NGrNdjcHx3dUas10UASoAjVEvhAbGagERgOpNkAPvxdbfKwaV2ikEj9SzXY1WPxABmDKQwdqzwRbM+12NSeJFGUEx3F8as10WwidLzVshDtxaPIbP13NdjVS9UZTYqgTVGrNcT9vyXU1'
|
||||||
_old_cookie_uu = '3M3AXsquTU5Gur/Svik+ewflPm5Rk2ieY3BIPlLjyK3C0Dp9F8UoPgbTyKiGtZp4x1X+uAUGKD7BM2g+dVd8eqEzDErCoYvdcvGLvVLAen1y08hNQtALjVKAe+1hM8g9QbNonlG1/t4S82ieUsBbrSIQbq1yhV6tZ6ArvSbA7rgHc8n5AdReyAmDaJ5Wm/ee3VDoCnGj/LlBs2ieUZNorhHDKK5Q=='
|
_IMDbPY_cookie_uu = '3M3AXsquTU5Gur/Svik+ewflPm5Rk2ieY3BIPlLjyK3C0Dp9F8UoPgbTyKiGtZp4x1X+uAUGKD7BM2g+dVd8eqEzDErCoYvdcvGLvVLAen1y08hNQtALjVKAe+1hM8g9QbNonlG1/t4S82ieUsBbrSIQbq1yhV6tZ6ArvSbA7rgHc8n5AdReyAmDaJ5Wm/ee3VDoCnGj/LlBs2ieUZNorhHDKK5Q=='
|
||||||
# New 'IMDbPYweb' account.
|
# 'imdbpy2010' account.
|
||||||
_cookie_id = 'rH1jNAkjTlNXvHolvBVBsgaPICNZbNdjVjzFwzas9JRmusdjVoqBs/Hs12NR+1WFxEoR9bGKEDUg6sNlADqXwkas12N131Rwdb+UQNGKN8PWrNdjcdqBQVLq8mbGDHP3hqzxhbD692NQi9D0JjpBtRaPIbP1zNdjUOqENQYv1ADWrNcT9vyXU1'
|
_imdbpy2010_cookie_id = 'QrCdxVi+L+WgqOLrQJJgBgRRXGInphxiBPU/YXSFDyExMFzCp6YcYgSVXyEUhS/xMID8wqemHGID4DlntwZ49vemP5UXsAxiJ4D6goSmHGIgNT9hMXBaRSF2vMS3phxB0bVfQiQlP1RxdrzhB6YcRHFASyIhQVowwXCKtDSlD2YhgRvxBsCKtGemHBKH9mxSI='
|
||||||
_cookie_uu = 'su4/m8cho4c6HP+W1qgq6wchOmhnF0w+lIWvHjRUPJ6nRA9sccEafjGADJ6hQGrMd4GKqLcz2X4z5+w+M4OIKnRn7FpENH7dxDQu3bQEHyx0ZEyeRFTPHfQEX03XF+yeN1dsPpcXaqjUZAw+lGRfXRQEfz3RIX9IgVEffdBAHw2wQXyf9xdMPrQELw0QNB8dsffsqcdQemjPB0w+moLcPh0JrKrHJ9hjBzdMPpcXTH7XRwwOk='
|
_imdbpy2010_cookie_uu = 'oiEo2yoJFCA2Zbn/o7Z1LAPIwotAu6QdALv3foDb1x5F/tdrFY63XkSfty4kntS8Y8jkHSDLt3406+d+JThEilPI0mtTaOQdA/t2/iErp22jaLdeVU5ya4PIREpj7HFdpzhEHadcIAngSER50IoHDpD6Bz4Qy3b+UIhE/hBbhz5Q63ceA2hEvhPo5B0FnrL9Q8jkWjDIbA0Au3d+AOtnXoCIRL4Q28c+UOtnXpP4RL4T6OQdA+6ijUCI5B0AW2d+UOtnXpPYRL4T6OQdA8jkTUOYlC0A=='
|
||||||
|
# old 'IMDbPYweb' account.
|
||||||
|
_old_IMDbPYweb_cookie_id = 'rH1jNAkjTlNXvHolvBVBsgaPICNZbNdjVjzFwzas9JRmusdjVoqBs/Hs12NR+1WFxEoR9bGKEDUg6sNlADqXwkas12N131Rwdb+UQNGKN8PWrNdjcdqBQVLq8mbGDHP3hqzxhbD692NQi9D0JjpBtRaPIbP1zNdjUOqENQYv1ADWrNcT9vyXU1'
|
||||||
|
_old_IMDbPYweb_cookie_uu = 'su4/m8cho4c6HP+W1qgq6wchOmhnF0w+lIWvHjRUPJ6nRA9sccEafjGADJ6hQGrMd4GKqLcz2X4z5+w+M4OIKnRn7FpENH7dxDQu3bQEHyx0ZEyeRFTPHfQEX03XF+yeN1dsPpcXaqjUZAw+lGRfXRQEfz3RIX9IgVEffdBAHw2wQXyf9xdMPrQELw0QNB8dsffsqcdQemjPB0w+moLcPh0JrKrHJ9hjBzdMPpcXTH7XRwwOk='
|
||||||
|
# old 'IMDbPYweb' account values (as of 2012-12-30)
|
||||||
|
_IMDbPYweb_cookie_id = 'BCYjtpb46Go0cMHAMewWZEauhwqPL7ASCPpPVNutu6BuayHZd0U6Dk3UAqVlEM8DHLDsSr02RGQn5ff3245-R4A130NAWJ_5yqXx7X-zJey8vQM8JKdv3rTUSEJznJQlojUW1Bije-Q0FXAixs4I0sePWhd_tA41i-9AF2q3lPmaksram6ilMhN9i3IPESW1PMbk'
|
||||||
|
_IMDbPYweb_cookie_uu = 'BCYttQjEMc-NyUdFUGxThidAnBo7wwalEzj4un9uzf2XoEjtqDhNfrH7bOSuwlRkMEQ11SNyTajl-b9Q-21m4HwYu0e3jXZrjYLXLYzFkrEroCDyUREqaTwPJPSjGtFmvlaVBZEZmsWpaxe18DT5KiygKyGPZKH78Xu4im6ba-Sd31WvbXHzP8KGXPpGjhhVuv7Dcv314HCWkE832Srf9ya-Uv0FdGAmYyLbIAXuxnvpYQd6oZ8-CYkSGLIqcKWdrf5S'
|
||||||
|
# 'IMDbPY2013' account
|
||||||
|
_IMDbPY2013_cookie_id = 'BCYmoyqSm2WglmOzG-SrFWSvVpxsTZOB0qEOOqmAwCBxCbaNgKOxd0DTKzUvt7t04Pya5gV2tUrpDmYxrc1Dr54DQj2UXI7QI35__M5-HI2KrbOI3PjDz6M-_U3HG8topMfN64R24tmBixoZhMYXVaEc556lf0Z4gQNJVYRANXvwytP5v1lpfeToRlu9aVJwN4kT'
|
||||||
|
_IMDbPY2013_cookie_uu = 'BCYquDS8Y2i8R1pJxS4nB77YrhjHHXeOea2Xl9KtZvE6RZKVfMvzTGU4Vl5-yxfPbgRSiFJasyf-hhPuVvXyaHlfeBjNlbFT8hz2HzFFkQ_SxKxq05J51gi7Fv4SaAws1M-i7zmQ1TRunfJqCVIYqPwIs2NO7s4_YDH2ZoISVGLgca8OY2K58HychOZB1oRWHVeAJNhLJMrCWJBuGRLCNnQK5X9tA0dPPntr2Ussy0ouul-N1GQz-8y5vda3JJ_C6xkwmHcA6JrOdOFO_HqMWjVSXuxGEdrXC919JM9H0vooVvKeVgAEJnTh2GiVlUJUoH3c'
|
||||||
|
|
||||||
# imdbpy2010 account.
|
# Currently used account.
|
||||||
#_cookie_id = 'QrCdxVi+L+WgqOLrQJJgBgRRXGInphxiBPU/YXSFDyExMFzCp6YcYgSVXyEUhS/xMID8wqemHGID4DlntwZ49vemP5UXsAxiJ4D6goSmHGIgNT9hMXBaRSF2vMS3phxB0bVfQiQlP1RxdrzhB6YcRHFASyIhQVowwXCKtDSlD2YhgRvxBsCKtGemHBKH9mxSI='
|
_cookie_id = _IMDbPY2013_cookie_id
|
||||||
#_cookie_uu = 'oiEo2yoJFCA2Zbn/o7Z1LAPIwotAu6QdALv3foDb1x5F/tdrFY63XkSfty4kntS8Y8jkHSDLt3406+d+JThEilPI0mtTaOQdA/t2/iErp22jaLdeVU5ya4PIREpj7HFdpzhEHadcIAngSER50IoHDpD6Bz4Qy3b+UIhE/hBbhz5Q63ceA2hEvhPo5B0FnrL9Q8jkWjDIbA0Au3d+AOtnXoCIRL4Q28c+UOtnXpP4RL4T6OQdA+6ijUCI5B0AW2d+UOtnXpPYRL4T6OQdA8jkTUOYlC0A=='
|
_cookie_uu = _IMDbPY2013_cookie_uu
|
||||||
|
|
||||||
|
|
||||||
class _FakeURLOpener(object):
|
class _FakeURLOpener(object):
|
||||||
|
@ -141,9 +150,10 @@ class IMDbURLopener(FancyURLopener):
|
||||||
for header in ('User-Agent', 'User-agent', 'user-agent'):
|
for header in ('User-Agent', 'User-agent', 'user-agent'):
|
||||||
self.del_header(header)
|
self.del_header(header)
|
||||||
self.set_header('User-Agent', 'Mozilla/5.0')
|
self.set_header('User-Agent', 'Mozilla/5.0')
|
||||||
|
self.set_header('Accept-Language', 'en-us,en;q=0.5')
|
||||||
# XXX: This class is used also to perform "Exact Primary
|
# XXX: This class is used also to perform "Exact Primary
|
||||||
# [Title|Name]" searches, and so by default the cookie is set.
|
# [Title|Name]" searches, and so by default the cookie is set.
|
||||||
c_header = 'id=%s; uu=%s' % (_cookie_id, _cookie_uu)
|
c_header = 'uu=%s; id=%s' % (_cookie_uu, _cookie_id)
|
||||||
self.set_header('Cookie', c_header)
|
self.set_header('Cookie', c_header)
|
||||||
|
|
||||||
def get_proxy(self):
|
def get_proxy(self):
|
||||||
|
@ -199,12 +209,11 @@ class IMDbURLopener(FancyURLopener):
|
||||||
server_encode = uopener.info().getparam('charset')
|
server_encode = uopener.info().getparam('charset')
|
||||||
# Otherwise, look at the content-type HTML meta tag.
|
# Otherwise, look at the content-type HTML meta tag.
|
||||||
if server_encode is None and content:
|
if server_encode is None and content:
|
||||||
first_bytes = content[:512]
|
begin_h = content.find('text/html; charset=')
|
||||||
begin_h = first_bytes.find('text/html; charset=')
|
|
||||||
if begin_h != -1:
|
if begin_h != -1:
|
||||||
end_h = first_bytes[19+begin_h:].find('"')
|
end_h = content[19+begin_h:].find('"')
|
||||||
if end_h != -1:
|
if end_h != -1:
|
||||||
server_encode = first_bytes[19+begin_h:19+begin_h+end_h]
|
server_encode = content[19+begin_h:19+begin_h+end_h]
|
||||||
if server_encode:
|
if server_encode:
|
||||||
try:
|
try:
|
||||||
if lookup(server_encode):
|
if lookup(server_encode):
|
||||||
|
@ -455,16 +464,16 @@ class IMDbHTTPAccessSystem(IMDbBase):
|
||||||
results is the maximum number of results to be retrieved."""
|
results is the maximum number of results to be retrieved."""
|
||||||
if isinstance(ton, unicode):
|
if isinstance(ton, unicode):
|
||||||
try:
|
try:
|
||||||
ton = ton.encode('iso8859-1')
|
ton = ton.encode('utf-8')
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
try:
|
try:
|
||||||
ton = ton.encode('utf-8')
|
ton = ton.encode('iso8859-1')
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
pass
|
pass
|
||||||
##params = 'q=%s&%s=on&mx=%s' % (quote_plus(ton), kind, str(results))
|
##params = 'q=%s&%s=on&mx=%s' % (quote_plus(ton), kind, str(results))
|
||||||
params = 'q=%s;s=%s;mx=%s' % (quote_plus(ton), kind, str(results))
|
params = 'q=%s&s=%s&mx=%s' % (quote_plus(ton), kind, str(results))
|
||||||
if kind == 'ep':
|
if kind == 'ep':
|
||||||
params = params.replace('s=ep;', 's=tt;ttype=ep;', 1)
|
params = params.replace('s=ep&', 's=tt&ttype=ep&', 1)
|
||||||
cont = self._retrieve(self.urls['find'] % params)
|
cont = self._retrieve(self.urls['find'] % params)
|
||||||
#print 'URL:', imdbURL_find % params
|
#print 'URL:', imdbURL_find % params
|
||||||
if cont.find('Your search returned more than') == -1 or \
|
if cont.find('Your search returned more than') == -1 or \
|
||||||
|
@ -472,7 +481,7 @@ class IMDbHTTPAccessSystem(IMDbBase):
|
||||||
return cont
|
return cont
|
||||||
# The retrieved page contains no results, because too many
|
# The retrieved page contains no results, because too many
|
||||||
# titles or names contain the string we're looking for.
|
# titles or names contain the string we're looking for.
|
||||||
params = 'q=%s;ls=%s;lm=0' % (quote_plus(ton), kind)
|
params = 'q=%s&ls=%s&lm=0' % (quote_plus(ton), kind)
|
||||||
size = 131072 + results * 512
|
size = 131072 + results * 512
|
||||||
return self._retrieve(self.urls['find'] % params, size=size)
|
return self._retrieve(self.urls['find'] % params, size=size)
|
||||||
|
|
||||||
|
@ -587,6 +596,10 @@ class IMDbHTTPAccessSystem(IMDbBase):
|
||||||
cont = self._retrieve(self.urls['movie_main'] % movieID + 'recommendations')
|
cont = self._retrieve(self.urls['movie_main'] % movieID + 'recommendations')
|
||||||
return self.mProxy.rec_parser.parse(cont)
|
return self.mProxy.rec_parser.parse(cont)
|
||||||
|
|
||||||
|
def get_movie_critic_reviews(self, movieID):
|
||||||
|
cont = self._retrieve(self.urls['movie_main'] % movieID + 'criticreviews')
|
||||||
|
return self.mProxy.criticrev_parser.parse(cont)
|
||||||
|
|
||||||
def get_movie_external_reviews(self, movieID):
|
def get_movie_external_reviews(self, movieID):
|
||||||
cont = self._retrieve(self.urls['movie_main'] % movieID + 'externalreviews')
|
cont = self._retrieve(self.urls['movie_main'] % movieID + 'externalreviews')
|
||||||
return self.mProxy.externalrev_parser.parse(cont)
|
return self.mProxy.externalrev_parser.parse(cont)
|
||||||
|
@ -754,7 +767,7 @@ class IMDbHTTPAccessSystem(IMDbBase):
|
||||||
return self.pProxy.person_keywords_parser.parse(cont)
|
return self.pProxy.person_keywords_parser.parse(cont)
|
||||||
|
|
||||||
def _search_character(self, name, results):
|
def _search_character(self, name, results):
|
||||||
cont = self._get_search_content('char', name, results)
|
cont = self._get_search_content('ch', name, results)
|
||||||
return self.scProxy.search_character_parser.parse(cont, results=results)['data']
|
return self.scProxy.search_character_parser.parse(cont, results=results)['data']
|
||||||
|
|
||||||
def get_character_main(self, characterID):
|
def get_character_main(self, characterID):
|
||||||
|
|
|
@ -9,7 +9,7 @@ pages would be:
|
||||||
plot summary: http://akas.imdb.com/title/tt0094226/plotsummary
|
plot summary: http://akas.imdb.com/title/tt0094226/plotsummary
|
||||||
...and so on...
|
...and so on...
|
||||||
|
|
||||||
Copyright 2004-2012 Davide Alberani <da@erlug.linux.it>
|
Copyright 2004-2013 Davide Alberani <da@erlug.linux.it>
|
||||||
2008 H. Turgut Uyar <uyar@tekir.org>
|
2008 H. Turgut Uyar <uyar@tekir.org>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
@ -531,9 +531,6 @@ class DOMHTMLMovieParser(DOMParserBase):
|
||||||
def _process_plotsummary(x):
|
def _process_plotsummary(x):
|
||||||
"""Process a plot (contributed by Rdian06)."""
|
"""Process a plot (contributed by Rdian06)."""
|
||||||
xauthor = x.get('author')
|
xauthor = x.get('author')
|
||||||
if xauthor:
|
|
||||||
xauthor = xauthor.replace('{', '<').replace('}', '>').replace('(',
|
|
||||||
'<').replace(')', '>').strip()
|
|
||||||
xplot = x.get('plot', u'').strip()
|
xplot = x.get('plot', u'').strip()
|
||||||
if xauthor:
|
if xauthor:
|
||||||
xplot += u'::%s' % xauthor
|
xplot += u'::%s' % xauthor
|
||||||
|
@ -555,17 +552,20 @@ class DOMHTMLPlotParser(DOMParserBase):
|
||||||
# Notice that recently IMDb started to put the email of the
|
# Notice that recently IMDb started to put the email of the
|
||||||
# author only in the link, that we're not collecting, here.
|
# author only in the link, that we're not collecting, here.
|
||||||
extractors = [Extractor(label='plot',
|
extractors = [Extractor(label='plot',
|
||||||
path="//p[@class='plotpar']",
|
path="//ul[@class='zebraList']//p",
|
||||||
attrs=Attribute(key='plot',
|
attrs=Attribute(key='plot',
|
||||||
multi=True,
|
multi=True,
|
||||||
path={'plot': './text()',
|
path={'plot': './text()[1]',
|
||||||
'author': './i/a/text()'},
|
'author': './span/em/a/text()'},
|
||||||
postprocess=_process_plotsummary))]
|
postprocess=_process_plotsummary))]
|
||||||
|
|
||||||
|
|
||||||
def _process_award(x):
|
def _process_award(x):
|
||||||
award = {}
|
award = {}
|
||||||
award['award'] = x.get('award').strip()
|
_award = x.get('award')
|
||||||
|
if _award is not None:
|
||||||
|
_award = _award.strip()
|
||||||
|
award['award'] = _award
|
||||||
if not award['award']:
|
if not award['award']:
|
||||||
return {}
|
return {}
|
||||||
award['year'] = x.get('year').strip()
|
award['year'] = x.get('year').strip()
|
||||||
|
@ -709,10 +709,16 @@ class DOMHTMLTaglinesParser(DOMParserBase):
|
||||||
result = tparser.parse(taglines_html_string)
|
result = tparser.parse(taglines_html_string)
|
||||||
"""
|
"""
|
||||||
extractors = [Extractor(label='taglines',
|
extractors = [Extractor(label='taglines',
|
||||||
path="//div[@id='tn15content']/p",
|
path='//*[contains(concat(" ", normalize-space(@class), " "), " soda ")]',
|
||||||
attrs=Attribute(key='taglines', multi=True,
|
attrs=Attribute(key='taglines',
|
||||||
|
multi=True,
|
||||||
path="./text()"))]
|
path="./text()"))]
|
||||||
|
|
||||||
|
def postprocess_data(self, data):
|
||||||
|
if 'taglines' in data:
|
||||||
|
data['taglines'] = [tagline.strip() for tagline in data['taglines']]
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
class DOMHTMLKeywordsParser(DOMParserBase):
|
class DOMHTMLKeywordsParser(DOMParserBase):
|
||||||
"""Parser for the "keywords" page of a given movie.
|
"""Parser for the "keywords" page of a given movie.
|
||||||
|
@ -785,9 +791,9 @@ class DOMHTMLSoundtrackParser(DOMHTMLAlternateVersionsParser):
|
||||||
]
|
]
|
||||||
|
|
||||||
def postprocess_data(self, data):
|
def postprocess_data(self, data):
|
||||||
if 'soundtrack' in data:
|
if 'alternate versions' in data:
|
||||||
nd = []
|
nd = []
|
||||||
for x in data['soundtrack']:
|
for x in data['alternate versions']:
|
||||||
ds = x.split('\n')
|
ds = x.split('\n')
|
||||||
title = ds[0]
|
title = ds[0]
|
||||||
if title[0] == '"' and title[-1] == '"':
|
if title[0] == '"' and title[-1] == '"':
|
||||||
|
@ -846,6 +852,13 @@ class DOMHTMLCrazyCreditsParser(DOMParserBase):
|
||||||
x.replace('\n', ' ').replace(' ', ' ')))]
|
x.replace('\n', ' ').replace(' ', ' ')))]
|
||||||
|
|
||||||
|
|
||||||
|
def _process_goof(x):
|
||||||
|
if x['spoiler_category']:
|
||||||
|
return x['spoiler_category'].strip() + ': SPOILER: ' + x['text'].strip()
|
||||||
|
else:
|
||||||
|
return x['category'].strip() + ': ' + x['text'].strip()
|
||||||
|
|
||||||
|
|
||||||
class DOMHTMLGoofsParser(DOMParserBase):
|
class DOMHTMLGoofsParser(DOMParserBase):
|
||||||
"""Parser for the "goofs" page of a given movie.
|
"""Parser for the "goofs" page of a given movie.
|
||||||
The page should be provided as a string, as taken from
|
The page should be provided as a string, as taken from
|
||||||
|
@ -858,9 +871,14 @@ class DOMHTMLGoofsParser(DOMParserBase):
|
||||||
"""
|
"""
|
||||||
_defGetRefs = True
|
_defGetRefs = True
|
||||||
|
|
||||||
extractors = [Extractor(label='goofs', path="//ul[@class='trivia']/li",
|
extractors = [Extractor(label='goofs', path="//div[@class='soda odd']",
|
||||||
attrs=Attribute(key='goofs', multi=True, path=".//text()",
|
attrs=Attribute(key='goofs', multi=True,
|
||||||
postprocess=lambda x: (x or u'').strip()))]
|
path={
|
||||||
|
'text':"./text()",
|
||||||
|
'category':'./preceding-sibling::h4[1]/text()',
|
||||||
|
'spoiler_category': './h4/text()'
|
||||||
|
},
|
||||||
|
postprocess=_process_goof))]
|
||||||
|
|
||||||
|
|
||||||
class DOMHTMLQuotesParser(DOMParserBase):
|
class DOMHTMLQuotesParser(DOMParserBase):
|
||||||
|
@ -876,9 +894,16 @@ class DOMHTMLQuotesParser(DOMParserBase):
|
||||||
_defGetRefs = True
|
_defGetRefs = True
|
||||||
|
|
||||||
extractors = [
|
extractors = [
|
||||||
Extractor(label='quotes',
|
Extractor(label='quotes_odd',
|
||||||
path="//div[@class='_imdbpy']",
|
path="//div[@class='quote soda odd']",
|
||||||
attrs=Attribute(key='quotes',
|
attrs=Attribute(key='quotes_odd',
|
||||||
|
multi=True,
|
||||||
|
path=".//text()",
|
||||||
|
postprocess=lambda x: x.strip().replace(' \n',
|
||||||
|
'::').replace('::\n', '::').replace('\n', ' '))),
|
||||||
|
Extractor(label='quotes_even',
|
||||||
|
path="//div[@class='quote soda even']",
|
||||||
|
attrs=Attribute(key='quotes_even',
|
||||||
multi=True,
|
multi=True,
|
||||||
path=".//text()",
|
path=".//text()",
|
||||||
postprocess=lambda x: x.strip().replace(' \n',
|
postprocess=lambda x: x.strip().replace(' \n',
|
||||||
|
@ -886,27 +911,23 @@ class DOMHTMLQuotesParser(DOMParserBase):
|
||||||
]
|
]
|
||||||
|
|
||||||
preprocessors = [
|
preprocessors = [
|
||||||
(re.compile('(<a name="?qt[0-9]{7}"?></a>)', re.I),
|
(re.compile('<a href="#" class="hidesoda hidden">Hide options</a><br>', re.I), '')
|
||||||
r'\1<div class="_imdbpy">'),
|
]
|
||||||
(re.compile('<hr width="30%">', re.I), '</div>'),
|
|
||||||
(re.compile('<hr/>', re.I), '</div>'),
|
|
||||||
(re.compile('<script.*?</script>', re.I|re.S), ''),
|
|
||||||
# For BeautifulSoup.
|
|
||||||
(re.compile('<!-- sid: t-channel : MIDDLE_CENTER -->', re.I), '</div>')
|
|
||||||
]
|
|
||||||
|
|
||||||
def preprocess_dom(self, dom):
|
def preprocess_dom(self, dom):
|
||||||
# Remove "link this quote" links.
|
# Remove "link this quote" links.
|
||||||
for qLink in self.xpath(dom, "//p[@class='linksoda']"):
|
for qLink in self.xpath(dom, "//span[@class='linksoda']"):
|
||||||
|
qLink.drop_tree()
|
||||||
|
for qLink in self.xpath(dom, "//div[@class='sharesoda_pre']"):
|
||||||
qLink.drop_tree()
|
qLink.drop_tree()
|
||||||
return dom
|
return dom
|
||||||
|
|
||||||
def postprocess_data(self, data):
|
def postprocess_data(self, data):
|
||||||
if 'quotes' not in data:
|
quotes = data.get('quotes_odd', []) + data.get('quotes_even', [])
|
||||||
|
if not quotes:
|
||||||
return {}
|
return {}
|
||||||
for idx, quote in enumerate(data['quotes']):
|
quotes = [q.split('::') for q in quotes]
|
||||||
data['quotes'][idx] = quote.split('::')
|
return {'quotes': quotes}
|
||||||
return data
|
|
||||||
|
|
||||||
|
|
||||||
class DOMHTMLReleaseinfoParser(DOMParserBase):
|
class DOMHTMLReleaseinfoParser(DOMParserBase):
|
||||||
|
@ -920,13 +941,13 @@ class DOMHTMLReleaseinfoParser(DOMParserBase):
|
||||||
result = rdparser.parse(releaseinfo_html_string)
|
result = rdparser.parse(releaseinfo_html_string)
|
||||||
"""
|
"""
|
||||||
extractors = [Extractor(label='release dates',
|
extractors = [Extractor(label='release dates',
|
||||||
path="//th[@class='xxxx']/../../tr",
|
path="//table[@id='release_dates']//tr",
|
||||||
attrs=Attribute(key='release dates', multi=True,
|
attrs=Attribute(key='release dates', multi=True,
|
||||||
path={'country': ".//td[1]//text()",
|
path={'country': ".//td[1]//text()",
|
||||||
'date': ".//td[2]//text()",
|
'date': ".//td[2]//text()",
|
||||||
'notes': ".//td[3]//text()"})),
|
'notes': ".//td[3]//text()"})),
|
||||||
Extractor(label='akas',
|
Extractor(label='akas',
|
||||||
path="//div[@class='_imdbpy_akas']/table/tr",
|
path="//table[@id='akas']//tr",
|
||||||
attrs=Attribute(key='akas', multi=True,
|
attrs=Attribute(key='akas', multi=True,
|
||||||
path={'title': "./td[1]/text()",
|
path={'title': "./td[1]/text()",
|
||||||
'countries': "./td[2]/text()"}))]
|
'countries': "./td[2]/text()"}))]
|
||||||
|
@ -961,7 +982,7 @@ class DOMHTMLReleaseinfoParser(DOMParserBase):
|
||||||
title = (aka.get('title') or '').strip()
|
title = (aka.get('title') or '').strip()
|
||||||
if not title:
|
if not title:
|
||||||
continue
|
continue
|
||||||
countries = (aka.get('countries') or '').split('/')
|
countries = (aka.get('countries') or '').split(',')
|
||||||
if not countries:
|
if not countries:
|
||||||
nakas.append(title)
|
nakas.append(title)
|
||||||
else:
|
else:
|
||||||
|
@ -1135,7 +1156,28 @@ def _normalize_href(href):
|
||||||
href = '%s%s' % (imdbURL_base, href)
|
href = '%s%s' % (imdbURL_base, href)
|
||||||
return href
|
return href
|
||||||
|
|
||||||
|
class DOMHTMLCriticReviewsParser(DOMParserBase):
|
||||||
|
"""Parser for the "critic reviews" pages of a given movie.
|
||||||
|
The page should be provided as a string, as taken from
|
||||||
|
the akas.imdb.com server. The final result will be a
|
||||||
|
dictionary, with a key for every relevant section.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
osparser = DOMHTMLCriticReviewsParser()
|
||||||
|
result = osparser.parse(officialsites_html_string)
|
||||||
|
"""
|
||||||
|
kind = 'critic reviews'
|
||||||
|
|
||||||
|
extractors = [
|
||||||
|
Extractor(label='metascore',
|
||||||
|
path="//div[@class='metascore_wrap']/div/span",
|
||||||
|
attrs=Attribute(key='metascore',
|
||||||
|
path=".//text()")),
|
||||||
|
Extractor(label='metacritic url',
|
||||||
|
path="//div[@class='article']/div[@class='see-more']/a",
|
||||||
|
attrs=Attribute(key='metacritic url',
|
||||||
|
path="./@href")) ]
|
||||||
|
|
||||||
class DOMHTMLOfficialsitesParser(DOMParserBase):
|
class DOMHTMLOfficialsitesParser(DOMParserBase):
|
||||||
"""Parser for the "official sites", "external reviews", "newsgroup
|
"""Parser for the "official sites", "external reviews", "newsgroup
|
||||||
reviews", "miscellaneous links", "sound clips", "video clips" and
|
reviews", "miscellaneous links", "sound clips", "video clips" and
|
||||||
|
@ -1471,6 +1513,14 @@ class DOMHTMLSeasonEpisodesParser(DOMParserBase):
|
||||||
try: selected_season = int(selected_season)
|
try: selected_season = int(selected_season)
|
||||||
except: pass
|
except: pass
|
||||||
nd = {selected_season: {}}
|
nd = {selected_season: {}}
|
||||||
|
if 'episode -1' in data:
|
||||||
|
counter = 1
|
||||||
|
for episode in data['episode -1']:
|
||||||
|
while 'episode %d' % counter in data:
|
||||||
|
counter += 1
|
||||||
|
k = 'episode %d' % counter
|
||||||
|
data[k] = [episode]
|
||||||
|
del data['episode -1']
|
||||||
for episode_nr, episode in data.iteritems():
|
for episode_nr, episode in data.iteritems():
|
||||||
if not (episode and episode[0] and
|
if not (episode and episode[0] and
|
||||||
episode_nr.startswith('episode ')):
|
episode_nr.startswith('episode ')):
|
||||||
|
@ -1860,6 +1910,8 @@ _OBJECTS = {
|
||||||
'releasedates_parser': ((DOMHTMLReleaseinfoParser,), None),
|
'releasedates_parser': ((DOMHTMLReleaseinfoParser,), None),
|
||||||
'ratings_parser': ((DOMHTMLRatingsParser,), None),
|
'ratings_parser': ((DOMHTMLRatingsParser,), None),
|
||||||
'officialsites_parser': ((DOMHTMLOfficialsitesParser,), None),
|
'officialsites_parser': ((DOMHTMLOfficialsitesParser,), None),
|
||||||
|
'criticrev_parser': ((DOMHTMLCriticReviewsParser,),
|
||||||
|
{'kind': 'critic reviews'}),
|
||||||
'externalrev_parser': ((DOMHTMLOfficialsitesParser,),
|
'externalrev_parser': ((DOMHTMLOfficialsitesParser,),
|
||||||
{'kind': 'external reviews'}),
|
{'kind': 'external reviews'}),
|
||||||
'newsgrouprev_parser': ((DOMHTMLOfficialsitesParser,),
|
'newsgrouprev_parser': ((DOMHTMLOfficialsitesParser,),
|
||||||
|
|
|
@ -8,7 +8,7 @@ E.g., for "Mel Gibson" the referred pages would be:
|
||||||
biography: http://akas.imdb.com/name/nm0000154/bio
|
biography: http://akas.imdb.com/name/nm0000154/bio
|
||||||
...and so on...
|
...and so on...
|
||||||
|
|
||||||
Copyright 2004-20101 Davide Alberani <da@erlug.linux.it>
|
Copyright 2004-2013 Davide Alberani <da@erlug.linux.it>
|
||||||
2008 H. Turgut Uyar <uyar@tekir.org>
|
2008 H. Turgut Uyar <uyar@tekir.org>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
@ -60,6 +60,7 @@ class DOMHTMLMaindetailsParser(DOMParserBase):
|
||||||
result = cparser.parse(categorized_html_string)
|
result = cparser.parse(categorized_html_string)
|
||||||
"""
|
"""
|
||||||
_containsObjects = True
|
_containsObjects = True
|
||||||
|
_name_imdb_index = re.compile(r'\([IVXLCDM]+\)')
|
||||||
|
|
||||||
_birth_attrs = [Attribute(key='birth date',
|
_birth_attrs = [Attribute(key='birth date',
|
||||||
path='.//time[@itemprop="birthDate"]/@datetime'),
|
path='.//time[@itemprop="birthDate"]/@datetime'),
|
||||||
|
@ -100,6 +101,10 @@ class DOMHTMLMaindetailsParser(DOMParserBase):
|
||||||
path=".//text()",
|
path=".//text()",
|
||||||
postprocess=lambda x: analyze_name(x,
|
postprocess=lambda x: analyze_name(x,
|
||||||
canonical=1))),
|
canonical=1))),
|
||||||
|
Extractor(label='name_index',
|
||||||
|
path="//h1[@class='header']/span[1]",
|
||||||
|
attrs=Attribute(key='name_index',
|
||||||
|
path="./text()")),
|
||||||
|
|
||||||
Extractor(label='birth info',
|
Extractor(label='birth info',
|
||||||
path="//div[h4='Born:']",
|
path="//div[h4='Born:']",
|
||||||
|
@ -110,7 +115,7 @@ class DOMHTMLMaindetailsParser(DOMParserBase):
|
||||||
attrs=_death_attrs),
|
attrs=_death_attrs),
|
||||||
|
|
||||||
Extractor(label='headshot',
|
Extractor(label='headshot',
|
||||||
path="//td[@id='img_primary']/a",
|
path="//td[@id='img_primary']/div[@class='image']/a",
|
||||||
attrs=Attribute(key='headshot',
|
attrs=Attribute(key='headshot',
|
||||||
path="./img/@src")),
|
path="./img/@src")),
|
||||||
|
|
||||||
|
@ -152,6 +157,11 @@ class DOMHTMLMaindetailsParser(DOMParserBase):
|
||||||
for what in 'birth date', 'death date':
|
for what in 'birth date', 'death date':
|
||||||
if what in data and not data[what]:
|
if what in data and not data[what]:
|
||||||
del data[what]
|
del data[what]
|
||||||
|
name_index = (data.get('name_index') or '').strip()
|
||||||
|
if name_index:
|
||||||
|
if self._name_imdb_index.match(name_index):
|
||||||
|
data['imdbIndex'] = name_index[1:-1]
|
||||||
|
del data['name_index']
|
||||||
# XXX: the code below is for backwards compatibility
|
# XXX: the code below is for backwards compatibility
|
||||||
# probably could be removed
|
# probably could be removed
|
||||||
for key in data.keys():
|
for key in data.keys():
|
||||||
|
@ -220,13 +230,13 @@ class DOMHTMLBioParser(DOMParserBase):
|
||||||
attrs=Attribute(key='headshot',
|
attrs=Attribute(key='headshot',
|
||||||
path="./img/@src")),
|
path="./img/@src")),
|
||||||
Extractor(label='birth info',
|
Extractor(label='birth info',
|
||||||
path="//div[h5='Date of Birth']",
|
path="//table[@id='overviewTable']//td[text()='Date of Birth']/following-sibling::td[1]",
|
||||||
attrs=_birth_attrs),
|
attrs=_birth_attrs),
|
||||||
Extractor(label='death info',
|
Extractor(label='death info',
|
||||||
path="//div[h5='Date of Death']",
|
path="//table[@id='overviewTable']//td[text()='Date of Death']/following-sibling::td[1]",
|
||||||
attrs=_death_attrs),
|
attrs=_death_attrs),
|
||||||
Extractor(label='nick names',
|
Extractor(label='nick names',
|
||||||
path="//div[h5='Nickname']",
|
path="//table[@id='overviewTable']//td[text()='Nickenames']/following-sibling::td[1]",
|
||||||
attrs=Attribute(key='nick names',
|
attrs=Attribute(key='nick names',
|
||||||
path="./text()",
|
path="./text()",
|
||||||
joiner='|',
|
joiner='|',
|
||||||
|
@ -234,25 +244,25 @@ class DOMHTMLBioParser(DOMParserBase):
|
||||||
'::(', 1) for n in x.split('|')
|
'::(', 1) for n in x.split('|')
|
||||||
if n.strip()])),
|
if n.strip()])),
|
||||||
Extractor(label='birth name',
|
Extractor(label='birth name',
|
||||||
path="//div[h5='Birth Name']",
|
path="//table[@id='overviewTable']//td[text()='Birth Name']/following-sibling::td[1]",
|
||||||
attrs=Attribute(key='birth name',
|
attrs=Attribute(key='birth name',
|
||||||
path="./text()",
|
path="./text()",
|
||||||
postprocess=lambda x: canonicalName(x.strip()))),
|
postprocess=lambda x: canonicalName(x.strip()))),
|
||||||
Extractor(label='height',
|
Extractor(label='height',
|
||||||
path="//div[h5='Height']",
|
path="//table[@id='overviewTable']//td[text()='Height']/following-sibling::td[1]",
|
||||||
attrs=Attribute(key='height',
|
attrs=Attribute(key='height',
|
||||||
path="./text()",
|
path="./text()",
|
||||||
postprocess=lambda x: x.strip())),
|
postprocess=lambda x: x.strip())),
|
||||||
Extractor(label='mini biography',
|
Extractor(label='mini biography',
|
||||||
path="//div[h5='Mini Biography']",
|
path="//a[@name='mini_bio']/following-sibling::div[1 = count(preceding-sibling::a[1] | ../a[@name='mini_bio'])]",
|
||||||
attrs=Attribute(key='mini biography',
|
attrs=Attribute(key='mini biography',
|
||||||
multi=True,
|
multi=True,
|
||||||
path={
|
path={
|
||||||
'bio': "./p//text()",
|
'bio': ".//text()",
|
||||||
'by': "./b/following-sibling::a/text()"
|
'by': ".//a[@name='ba']//text()"
|
||||||
},
|
},
|
||||||
postprocess=lambda x: "%s::%s" % \
|
postprocess=lambda x: "%s::%s" % \
|
||||||
(x.get('bio').strip(),
|
((x.get('bio') or u'').split('- IMDb Mini Biography By:')[0].strip(),
|
||||||
(x.get('by') or u'').strip() or u'Anonymous'))),
|
(x.get('by') or u'').strip() or u'Anonymous'))),
|
||||||
Extractor(label='spouse',
|
Extractor(label='spouse',
|
||||||
path="//div[h5='Spouse']/table/tr",
|
path="//div[h5='Spouse']/table/tr",
|
||||||
|
|
|
@ -5,9 +5,9 @@ This module provides the HTMLSearchCharacterParser class (and the
|
||||||
search_character_parser instance), used to parse the results of a search
|
search_character_parser instance), used to parse the results of a search
|
||||||
for a given character.
|
for a given character.
|
||||||
E.g., when searching for the name "Jesse James", the parsed page would be:
|
E.g., when searching for the name "Jesse James", the parsed page would be:
|
||||||
http://akas.imdb.com/find?s=Characters;mx=20;q=Jesse+James
|
http://akas.imdb.com/find?s=ch;mx=20;q=Jesse+James
|
||||||
|
|
||||||
Copyright 2007-2009 Davide Alberani <da@erlug.linux.it>
|
Copyright 2007-2012 Davide Alberani <da@erlug.linux.it>
|
||||||
2008 H. Turgut Uyar <uyar@tekir.org>
|
2008 H. Turgut Uyar <uyar@tekir.org>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
@ -42,7 +42,7 @@ class DOMBasicCharacterParser(DOMBasicMovieParser):
|
||||||
|
|
||||||
class DOMHTMLSearchCharacterParser(DOMHTMLSearchMovieParser):
|
class DOMHTMLSearchCharacterParser(DOMHTMLSearchMovieParser):
|
||||||
_BaseParser = DOMBasicCharacterParser
|
_BaseParser = DOMBasicCharacterParser
|
||||||
_notDirectHitTitle = '<title>imdb search'
|
_notDirectHitTitle = '<title>find - imdb'
|
||||||
_titleBuilder = lambda self, x: build_name(x, canonical=False)
|
_titleBuilder = lambda self, x: build_name(x, canonical=False)
|
||||||
_linkPrefix = '/character/ch'
|
_linkPrefix = '/character/ch'
|
||||||
|
|
||||||
|
@ -57,7 +57,7 @@ class DOMHTMLSearchCharacterParser(DOMHTMLSearchMovieParser):
|
||||||
{'name': x.get('name')}
|
{'name': x.get('name')}
|
||||||
))]
|
))]
|
||||||
extractors = [Extractor(label='search',
|
extractors = [Extractor(label='search',
|
||||||
path="//td[3]/a[starts-with(@href, " \
|
path="//td[@class='result_text']/a[starts-with(@href, " \
|
||||||
"'/character/ch')]/..",
|
"'/character/ch')]/..",
|
||||||
attrs=_attrs)]
|
attrs=_attrs)]
|
||||||
|
|
||||||
|
|
|
@ -7,7 +7,7 @@ for a given company.
|
||||||
E.g., when searching for the name "Columbia Pictures", the parsed page would be:
|
E.g., when searching for the name "Columbia Pictures", the parsed page would be:
|
||||||
http://akas.imdb.com/find?s=co;mx=20;q=Columbia+Pictures
|
http://akas.imdb.com/find?s=co;mx=20;q=Columbia+Pictures
|
||||||
|
|
||||||
Copyright 2008-2009 Davide Alberani <da@erlug.linux.it>
|
Copyright 2008-2012 Davide Alberani <da@erlug.linux.it>
|
||||||
2008 H. Turgut Uyar <uyar@tekir.org>
|
2008 H. Turgut Uyar <uyar@tekir.org>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
@ -42,7 +42,7 @@ class DOMBasicCompanyParser(DOMBasicMovieParser):
|
||||||
|
|
||||||
class DOMHTMLSearchCompanyParser(DOMHTMLSearchMovieParser):
|
class DOMHTMLSearchCompanyParser(DOMHTMLSearchMovieParser):
|
||||||
_BaseParser = DOMBasicCompanyParser
|
_BaseParser = DOMBasicCompanyParser
|
||||||
_notDirectHitTitle = '<title>imdb company'
|
_notDirectHitTitle = '<title>find - imdb'
|
||||||
_titleBuilder = lambda self, x: build_company_name(x)
|
_titleBuilder = lambda self, x: build_company_name(x)
|
||||||
_linkPrefix = '/company/co'
|
_linkPrefix = '/company/co'
|
||||||
|
|
||||||
|
@ -59,7 +59,7 @@ class DOMHTMLSearchCompanyParser(DOMHTMLSearchMovieParser):
|
||||||
or u''), stripNotes=True)
|
or u''), stripNotes=True)
|
||||||
))]
|
))]
|
||||||
extractors = [Extractor(label='search',
|
extractors = [Extractor(label='search',
|
||||||
path="//td[3]/a[starts-with(@href, " \
|
path="//td[@class='result_text']/a[starts-with(@href, " \
|
||||||
"'/company/co')]/..",
|
"'/company/co')]/..",
|
||||||
attrs=_attrs)]
|
attrs=_attrs)]
|
||||||
|
|
||||||
|
|
|
@ -8,7 +8,7 @@ E.g., for when searching for the title "the passion", the parsed
|
||||||
page would be:
|
page would be:
|
||||||
http://akas.imdb.com/find?q=the+passion&tt=on&mx=20
|
http://akas.imdb.com/find?q=the+passion&tt=on&mx=20
|
||||||
|
|
||||||
Copyright 2004-2010 Davide Alberani <da@erlug.linux.it>
|
Copyright 2004-2013 Davide Alberani <da@erlug.linux.it>
|
||||||
2008 H. Turgut Uyar <uyar@tekir.org>
|
2008 H. Turgut Uyar <uyar@tekir.org>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
@ -77,7 +77,7 @@ class DOMBasicMovieParser(DOMParserBase):
|
||||||
def custom_analyze_title(title):
|
def custom_analyze_title(title):
|
||||||
"""Remove garbage notes after the (year), (year/imdbIndex) or (year) (TV)"""
|
"""Remove garbage notes after the (year), (year/imdbIndex) or (year) (TV)"""
|
||||||
# XXX: very crappy. :-(
|
# XXX: very crappy. :-(
|
||||||
nt = title.split(' ')[0]
|
nt = title.split(' aka ')[0]
|
||||||
if nt:
|
if nt:
|
||||||
title = nt
|
title = nt
|
||||||
if not title:
|
if not title:
|
||||||
|
@ -92,7 +92,7 @@ class DOMHTMLSearchMovieParser(DOMParserBase):
|
||||||
"new search system" is used, for movies."""
|
"new search system" is used, for movies."""
|
||||||
|
|
||||||
_BaseParser = DOMBasicMovieParser
|
_BaseParser = DOMBasicMovieParser
|
||||||
_notDirectHitTitle = '<title>imdb title'
|
_notDirectHitTitle = '<title>find - imdb</title>'
|
||||||
_titleBuilder = lambda self, x: build_title(x)
|
_titleBuilder = lambda self, x: build_title(x)
|
||||||
_linkPrefix = '/title/tt'
|
_linkPrefix = '/title/tt'
|
||||||
|
|
||||||
|
@ -101,8 +101,7 @@ class DOMHTMLSearchMovieParser(DOMParserBase):
|
||||||
path={
|
path={
|
||||||
'link': "./a[1]/@href",
|
'link': "./a[1]/@href",
|
||||||
'info': ".//text()",
|
'info': ".//text()",
|
||||||
#'akas': ".//div[@class='_imdbpyAKA']//text()"
|
'akas': "./i//text()"
|
||||||
'akas': ".//p[@class='find-aka']//text()"
|
|
||||||
},
|
},
|
||||||
postprocess=lambda x: (
|
postprocess=lambda x: (
|
||||||
analyze_imdbid(x.get('link') or u''),
|
analyze_imdbid(x.get('link') or u''),
|
||||||
|
@ -110,7 +109,7 @@ class DOMHTMLSearchMovieParser(DOMParserBase):
|
||||||
x.get('akas')
|
x.get('akas')
|
||||||
))]
|
))]
|
||||||
extractors = [Extractor(label='search',
|
extractors = [Extractor(label='search',
|
||||||
path="//td[3]/a[starts-with(@href, '/title/tt')]/..",
|
path="//td[@class='result_text']",
|
||||||
attrs=_attrs)]
|
attrs=_attrs)]
|
||||||
def _init(self):
|
def _init(self):
|
||||||
self.url = u''
|
self.url = u''
|
||||||
|
@ -119,14 +118,11 @@ class DOMHTMLSearchMovieParser(DOMParserBase):
|
||||||
self.url = u''
|
self.url = u''
|
||||||
|
|
||||||
def preprocess_string(self, html_string):
|
def preprocess_string(self, html_string):
|
||||||
if self._notDirectHitTitle in html_string[:1024].lower():
|
if self._notDirectHitTitle in html_string[:10240].lower():
|
||||||
if self._linkPrefix == '/title/tt':
|
if self._linkPrefix == '/title/tt':
|
||||||
# Only for movies.
|
# Only for movies.
|
||||||
|
# XXX (HTU): does this still apply?
|
||||||
html_string = html_string.replace('(TV mini-series)', '(mini)')
|
html_string = html_string.replace('(TV mini-series)', '(mini)')
|
||||||
html_string = html_string.replace('<p class="find-aka">',
|
|
||||||
'<p class="find-aka">::')
|
|
||||||
#html_string = _reAKAStitles.sub(
|
|
||||||
# r'<div class="_imdbpyAKA">\1::</div>\2', html_string)
|
|
||||||
return html_string
|
return html_string
|
||||||
# Direct hit!
|
# Direct hit!
|
||||||
dbme = self._BaseParser(useModule=self._useModule)
|
dbme = self._BaseParser(useModule=self._useModule)
|
||||||
|
@ -141,7 +137,7 @@ class DOMHTMLSearchMovieParser(DOMParserBase):
|
||||||
title = self._titleBuilder(res[0][1])
|
title = self._titleBuilder(res[0][1])
|
||||||
if not (link and title): return u''
|
if not (link and title): return u''
|
||||||
link = link.replace('http://pro.imdb.com', '')
|
link = link.replace('http://pro.imdb.com', '')
|
||||||
new_html = '<td></td><td></td><td><a href="%s">%s</a></td>' % (link,
|
new_html = '<td class="result_text"><a href="%s">%s</a></td>' % (link,
|
||||||
title)
|
title)
|
||||||
return new_html
|
return new_html
|
||||||
|
|
||||||
|
@ -161,11 +157,14 @@ class DOMHTMLSearchMovieParser(DOMParserBase):
|
||||||
if not datum[0] and datum[1]:
|
if not datum[0] and datum[1]:
|
||||||
continue
|
continue
|
||||||
if datum[2] is not None:
|
if datum[2] is not None:
|
||||||
akas = filter(None, datum[2].split('::'))
|
#akas = filter(None, datum[2].split('::'))
|
||||||
if self._linkPrefix == '/title/tt':
|
if self._linkPrefix == '/title/tt':
|
||||||
akas = [a.replace('" - ', '::').rstrip() for a in akas]
|
# XXX (HTU): couldn't find a result with multiple akas
|
||||||
akas = [a.replace('aka "', '', 1).replace('aka "',
|
aka = datum[2]
|
||||||
'', 1).lstrip() for a in akas]
|
akas = [aka[1:-1]] # remove the quotes
|
||||||
|
#akas = [a.replace('" - ', '::').rstrip() for a in akas]
|
||||||
|
#akas = [a.replace('aka "', '', 1).replace('aka "',
|
||||||
|
#'', 1).lstrip() for a in akas]
|
||||||
datum[1]['akas'] = akas
|
datum[1]['akas'] = akas
|
||||||
data['data'][idx] = (datum[0], datum[1])
|
data['data'][idx] = (datum[0], datum[1])
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -7,7 +7,7 @@ for a given person.
|
||||||
E.g., when searching for the name "Mel Gibson", the parsed page would be:
|
E.g., when searching for the name "Mel Gibson", the parsed page would be:
|
||||||
http://akas.imdb.com/find?q=Mel+Gibson&nm=on&mx=20
|
http://akas.imdb.com/find?q=Mel+Gibson&nm=on&mx=20
|
||||||
|
|
||||||
Copyright 2004-2010 Davide Alberani <da@erlug.linux.it>
|
Copyright 2004-2013 Davide Alberani <da@erlug.linux.it>
|
||||||
2008 H. Turgut Uyar <uyar@tekir.org>
|
2008 H. Turgut Uyar <uyar@tekir.org>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
@ -55,7 +55,7 @@ class DOMHTMLSearchPersonParser(DOMHTMLSearchMovieParser):
|
||||||
"""Parse the html page that the IMDb web server shows when the
|
"""Parse the html page that the IMDb web server shows when the
|
||||||
"new search system" is used, for persons."""
|
"new search system" is used, for persons."""
|
||||||
_BaseParser = DOMBasicPersonParser
|
_BaseParser = DOMBasicPersonParser
|
||||||
_notDirectHitTitle = '<title>imdb name'
|
_notDirectHitTitle = '<title>find - imdb'
|
||||||
_titleBuilder = lambda self, x: build_name(x, canonical=True)
|
_titleBuilder = lambda self, x: build_name(x, canonical=True)
|
||||||
_linkPrefix = '/name/nm'
|
_linkPrefix = '/name/nm'
|
||||||
|
|
||||||
|
@ -74,11 +74,11 @@ class DOMHTMLSearchPersonParser(DOMHTMLSearchMovieParser):
|
||||||
canonical=1), x.get('akas')
|
canonical=1), x.get('akas')
|
||||||
))]
|
))]
|
||||||
extractors = [Extractor(label='search',
|
extractors = [Extractor(label='search',
|
||||||
path="//td[3]/a[starts-with(@href, '/name/nm')]/..",
|
path="//td[@class='result_text']/a[starts-with(@href, '/name/nm')]/..",
|
||||||
attrs=_attrs)]
|
attrs=_attrs)]
|
||||||
|
|
||||||
def preprocess_string(self, html_string):
|
def preprocess_string(self, html_string):
|
||||||
if self._notDirectHitTitle in html_string[:1024].lower():
|
if self._notDirectHitTitle in html_string[:10240].lower():
|
||||||
html_string = _reAKASp.sub(
|
html_string = _reAKASp.sub(
|
||||||
r'\1<div class="_imdbpyAKA">\2::</div>\3',
|
r'\1<div class="_imdbpyAKA">\2::</div>\3',
|
||||||
html_string)
|
html_string)
|
||||||
|
|
|
@ -340,7 +340,7 @@ def build_movie(txt, movieID=None, roleID=None, status=None,
|
||||||
title = title[:nidx].rstrip()
|
title = title[:nidx].rstrip()
|
||||||
if year:
|
if year:
|
||||||
year = year.strip()
|
year = year.strip()
|
||||||
if title[-1] == ')':
|
if title[-1:] == ')':
|
||||||
fpIdx = title.rfind('(')
|
fpIdx = title.rfind('(')
|
||||||
if fpIdx != -1:
|
if fpIdx != -1:
|
||||||
if notes: notes = '%s %s' % (title[fpIdx:], notes)
|
if notes: notes = '%s %s' % (title[fpIdx:], notes)
|
||||||
|
|
|
@ -6,7 +6,7 @@ IMDb's data for mobile systems.
|
||||||
the imdb.IMDb function will return an instance of this class when
|
the imdb.IMDb function will return an instance of this class when
|
||||||
called with the 'accessSystem' argument set to "mobile".
|
called with the 'accessSystem' argument set to "mobile".
|
||||||
|
|
||||||
Copyright 2005-2011 Davide Alberani <da@erlug.linux.it>
|
Copyright 2005-2012 Davide Alberani <da@erlug.linux.it>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -193,7 +193,7 @@ class IMDbMobileAccessSystem(IMDbHTTPAccessSystem):
|
||||||
title)
|
title)
|
||||||
return res
|
return res
|
||||||
tl = title[0].lower()
|
tl = title[0].lower()
|
||||||
if not tl.startswith('imdb title'):
|
if not tl.startswith('find - imdb'):
|
||||||
# a direct hit!
|
# a direct hit!
|
||||||
title = _unHtml(title[0])
|
title = _unHtml(title[0])
|
||||||
mid = None
|
mid = None
|
||||||
|
@ -211,7 +211,7 @@ class IMDbMobileAccessSystem(IMDbHTTPAccessSystem):
|
||||||
# XXX: this results*3 prevents some recursion errors, but...
|
# XXX: this results*3 prevents some recursion errors, but...
|
||||||
# it's not exactly understandable (i.e.: why 'results' is
|
# it's not exactly understandable (i.e.: why 'results' is
|
||||||
# not enough to get all the results?)
|
# not enough to get all the results?)
|
||||||
lis = _findBetween(cont, 'td valign="top">', '</td>',
|
lis = _findBetween(cont, 'td class="result_text">', '</td>',
|
||||||
maxRes=results*3)
|
maxRes=results*3)
|
||||||
for li in lis:
|
for li in lis:
|
||||||
akas = re_makas.findall(li)
|
akas = re_makas.findall(li)
|
||||||
|
@ -492,7 +492,7 @@ class IMDbMobileAccessSystem(IMDbHTTPAccessSystem):
|
||||||
self._mobile_logger.warn('no title tag searching for name %s', name)
|
self._mobile_logger.warn('no title tag searching for name %s', name)
|
||||||
return res
|
return res
|
||||||
nl = name[0].lower()
|
nl = name[0].lower()
|
||||||
if not nl.startswith('imdb name'):
|
if not nl.startswith('find - imdb'):
|
||||||
# a direct hit!
|
# a direct hit!
|
||||||
name = _unHtml(name[0])
|
name = _unHtml(name[0])
|
||||||
name = name.replace('- Filmography by type' , '').strip()
|
name = name.replace('- Filmography by type' , '').strip()
|
||||||
|
@ -506,7 +506,7 @@ class IMDbMobileAccessSystem(IMDbHTTPAccessSystem):
|
||||||
return res
|
return res
|
||||||
res[:] = [(str(pid[0]), analyze_name(name, canonical=1))]
|
res[:] = [(str(pid[0]), analyze_name(name, canonical=1))]
|
||||||
else:
|
else:
|
||||||
lis = _findBetween(cont, 'td valign="top">', '</td>',
|
lis = _findBetween(cont, 'td class="result_text">', '</td>',
|
||||||
maxRes=results*3)
|
maxRes=results*3)
|
||||||
for li in lis:
|
for li in lis:
|
||||||
akas = _findBetween(li, '<em>"', '"</em>')
|
akas = _findBetween(li, '<em>"', '"</em>')
|
||||||
|
@ -771,7 +771,7 @@ class IMDbMobileAccessSystem(IMDbHTTPAccessSystem):
|
||||||
return {'data': d}
|
return {'data': d}
|
||||||
|
|
||||||
def _search_character(self, name, results):
|
def _search_character(self, name, results):
|
||||||
cont = subXMLRefs(self._get_search_content('char', name, results))
|
cont = subXMLRefs(self._get_search_content('ch', name, results))
|
||||||
name = _findBetween(cont, '<title>', '</title>', maxRes=1)
|
name = _findBetween(cont, '<title>', '</title>', maxRes=1)
|
||||||
res = []
|
res = []
|
||||||
if not name:
|
if not name:
|
||||||
|
@ -779,8 +779,7 @@ class IMDbMobileAccessSystem(IMDbHTTPAccessSystem):
|
||||||
name)
|
name)
|
||||||
return res
|
return res
|
||||||
nl = name[0].lower()
|
nl = name[0].lower()
|
||||||
if not (nl.startswith('imdb search') or nl.startswith('imdb search') \
|
if not nl.startswith('find - imdb'):
|
||||||
or nl.startswith('imdb character')):
|
|
||||||
# a direct hit!
|
# a direct hit!
|
||||||
name = _unHtml(name[0]).replace('(Character)', '').strip()
|
name = _unHtml(name[0]).replace('(Character)', '').strip()
|
||||||
pid = None
|
pid = None
|
||||||
|
@ -793,23 +792,18 @@ class IMDbMobileAccessSystem(IMDbHTTPAccessSystem):
|
||||||
return res
|
return res
|
||||||
res[:] = [(str(pid[0]), analyze_name(name))]
|
res[:] = [(str(pid[0]), analyze_name(name))]
|
||||||
else:
|
else:
|
||||||
sects = _findBetween(cont, '<b>Popular Characters</b>', '</table>',
|
lis = _findBetween(cont, '<td class="result_text"',
|
||||||
maxRes=results*3)
|
['<small', '</td>', '<br'])
|
||||||
sects += _findBetween(cont, '<b>Characters', '</table>',
|
for li in lis:
|
||||||
maxRes=results*3)
|
li = '<%s' % li
|
||||||
for sect in sects:
|
pid = re_imdbID.findall(li)
|
||||||
lis = _findBetween(sect, '<a href="/character/',
|
pname = _unHtml(li)
|
||||||
['<small', '</td>', '<br'])
|
if not (pid and pname):
|
||||||
for li in lis:
|
self._mobile_logger.debug('no name/characterID' \
|
||||||
li = '<%s' % li
|
' parsing %s searching for' \
|
||||||
pid = re_imdbID.findall(li)
|
' character %s', li, name)
|
||||||
pname = _unHtml(li)
|
continue
|
||||||
if not (pid and pname):
|
res.append((str(pid[0]), analyze_name(pname)))
|
||||||
self._mobile_logger.debug('no name/characterID' \
|
|
||||||
' parsing %s searching for' \
|
|
||||||
' character %s', li, name)
|
|
||||||
continue
|
|
||||||
res.append((str(pid[0]), analyze_name(pname)))
|
|
||||||
return res
|
return res
|
||||||
|
|
||||||
def get_character_main(self, characterID):
|
def get_character_main(self, characterID):
|
||||||
|
|
|
@ -7,7 +7,7 @@ the SQLObject _AND_ SQLAlchemy Object Relational Managers is available.
|
||||||
the imdb.IMDb function will return an instance of this class when
|
the imdb.IMDb function will return an instance of this class when
|
||||||
called with the 'accessSystem' argument set to "sql", "database" or "db".
|
called with the 'accessSystem' argument set to "sql", "database" or "db".
|
||||||
|
|
||||||
Copyright 2005-2010 Davide Alberani <da@erlug.linux.it>
|
Copyright 2005-2012 Davide Alberani <da@erlug.linux.it>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -452,7 +452,12 @@ def get_movie_data(movieID, kindDict, fromAka=0, _table=None):
|
||||||
else:
|
else:
|
||||||
if not fromAka: Table = Title
|
if not fromAka: Table = Title
|
||||||
else: Table = AkaTitle
|
else: Table = AkaTitle
|
||||||
m = Table.get(movieID)
|
try:
|
||||||
|
m = Table.get(movieID)
|
||||||
|
except Exception, e:
|
||||||
|
_aux_logger.warn('Unable to fetch information for movieID %s: %s', movieID, e)
|
||||||
|
mdict = {}
|
||||||
|
return mdict
|
||||||
mdict = {'title': m.title, 'kind': kindDict[m.kindID],
|
mdict = {'title': m.title, 'kind': kindDict[m.kindID],
|
||||||
'year': m.productionYear, 'imdbIndex': m.imdbIndex,
|
'year': m.productionYear, 'imdbIndex': m.imdbIndex,
|
||||||
'season': m.seasonNr, 'episode': m.episodeNr}
|
'season': m.seasonNr, 'episode': m.episodeNr}
|
||||||
|
@ -825,14 +830,14 @@ class IMDbSqlAccessSystem(IMDbBase):
|
||||||
imdbID = movie.imdbID
|
imdbID = movie.imdbID
|
||||||
if imdbID is not None: return '%07d' % imdbID
|
if imdbID is not None: return '%07d' % imdbID
|
||||||
m_dict = get_movie_data(movie.id, self._kind)
|
m_dict = get_movie_data(movie.id, self._kind)
|
||||||
titline = build_title(m_dict, ptdf=1)
|
titline = build_title(m_dict, ptdf=0)
|
||||||
imdbID = self.title2imdbID(titline)
|
imdbID = self.title2imdbID(titline, m_dict['kind'])
|
||||||
# If the imdbID was retrieved from the web and was not in the
|
# If the imdbID was retrieved from the web and was not in the
|
||||||
# database, update the database (ignoring errors, because it's
|
# database, update the database (ignoring errors, because it's
|
||||||
# possibile that the current user has not update privileges).
|
# possibile that the current user has not update privileges).
|
||||||
# There're times when I think I'm a genius; this one of
|
# There're times when I think I'm a genius; this one of
|
||||||
# those times... <g>
|
# those times... <g>
|
||||||
if imdbID is not None:
|
if imdbID is not None and not isinstance(imdbID, list):
|
||||||
try: movie.imdbID = int(imdbID)
|
try: movie.imdbID = int(imdbID)
|
||||||
except: pass
|
except: pass
|
||||||
return imdbID
|
return imdbID
|
||||||
|
@ -847,9 +852,9 @@ class IMDbSqlAccessSystem(IMDbBase):
|
||||||
imdbID = person.imdbID
|
imdbID = person.imdbID
|
||||||
if imdbID is not None: return '%07d' % imdbID
|
if imdbID is not None: return '%07d' % imdbID
|
||||||
n_dict = {'name': person.name, 'imdbIndex': person.imdbIndex}
|
n_dict = {'name': person.name, 'imdbIndex': person.imdbIndex}
|
||||||
namline = build_name(n_dict, canonical=1)
|
namline = build_name(n_dict, canonical=False)
|
||||||
imdbID = self.name2imdbID(namline)
|
imdbID = self.name2imdbID(namline)
|
||||||
if imdbID is not None:
|
if imdbID is not None and not isinstance(imdbID, list):
|
||||||
try: person.imdbID = int(imdbID)
|
try: person.imdbID = int(imdbID)
|
||||||
except: pass
|
except: pass
|
||||||
return imdbID
|
return imdbID
|
||||||
|
@ -864,9 +869,9 @@ class IMDbSqlAccessSystem(IMDbBase):
|
||||||
imdbID = character.imdbID
|
imdbID = character.imdbID
|
||||||
if imdbID is not None: return '%07d' % imdbID
|
if imdbID is not None: return '%07d' % imdbID
|
||||||
n_dict = {'name': character.name, 'imdbIndex': character.imdbIndex}
|
n_dict = {'name': character.name, 'imdbIndex': character.imdbIndex}
|
||||||
namline = build_name(n_dict, canonical=1)
|
namline = build_name(n_dict, canonical=False)
|
||||||
imdbID = self.character2imdbID(namline)
|
imdbID = self.character2imdbID(namline)
|
||||||
if imdbID is not None:
|
if imdbID is not None and not isinstance(imdbID, list):
|
||||||
try: character.imdbID = int(imdbID)
|
try: character.imdbID = int(imdbID)
|
||||||
except: pass
|
except: pass
|
||||||
return imdbID
|
return imdbID
|
||||||
|
@ -883,7 +888,7 @@ class IMDbSqlAccessSystem(IMDbBase):
|
||||||
n_dict = {'name': company.name, 'country': company.countryCode}
|
n_dict = {'name': company.name, 'country': company.countryCode}
|
||||||
namline = build_company_name(n_dict)
|
namline = build_company_name(n_dict)
|
||||||
imdbID = self.company2imdbID(namline)
|
imdbID = self.company2imdbID(namline)
|
||||||
if imdbID is not None:
|
if imdbID is not None and not isinstance(imdbID, list):
|
||||||
try: company.imdbID = int(imdbID)
|
try: company.imdbID = int(imdbID)
|
||||||
except: pass
|
except: pass
|
||||||
return imdbID
|
return imdbID
|
||||||
|
@ -1116,8 +1121,9 @@ class IMDbSqlAccessSystem(IMDbBase):
|
||||||
if mlinks:
|
if mlinks:
|
||||||
for ml in mlinks:
|
for ml in mlinks:
|
||||||
lmovieData = get_movie_data(ml[0], self._kind)
|
lmovieData = get_movie_data(ml[0], self._kind)
|
||||||
m = Movie(movieID=ml[0], data=lmovieData, accessSystem='sql')
|
if lmovieData:
|
||||||
ml[0] = m
|
m = Movie(movieID=ml[0], data=lmovieData, accessSystem='sql')
|
||||||
|
ml[0] = m
|
||||||
res['connections'] = {}
|
res['connections'] = {}
|
||||||
mlinks[:] = _groupListBy(mlinks, 1)
|
mlinks[:] = _groupListBy(mlinks, 1)
|
||||||
for group in mlinks:
|
for group in mlinks:
|
||||||
|
|
|
@ -466,6 +466,7 @@ class _AlchemyConnection(object):
|
||||||
|
|
||||||
def setConnection(uri, tables, encoding='utf8', debug=False):
|
def setConnection(uri, tables, encoding='utf8', debug=False):
|
||||||
"""Set connection for every table."""
|
"""Set connection for every table."""
|
||||||
|
params = {'encoding': encoding}
|
||||||
# FIXME: why on earth MySQL requires an additional parameter,
|
# FIXME: why on earth MySQL requires an additional parameter,
|
||||||
# is well beyond my understanding...
|
# is well beyond my understanding...
|
||||||
if uri.startswith('mysql'):
|
if uri.startswith('mysql'):
|
||||||
|
@ -474,7 +475,11 @@ def setConnection(uri, tables, encoding='utf8', debug=False):
|
||||||
else:
|
else:
|
||||||
uri += '?'
|
uri += '?'
|
||||||
uri += 'charset=%s' % encoding
|
uri += 'charset=%s' % encoding
|
||||||
params = {'encoding': encoding}
|
|
||||||
|
# On some server configurations, we will need to explictly enable
|
||||||
|
# loading data from local files
|
||||||
|
params['local_infile'] = 1
|
||||||
|
|
||||||
if debug:
|
if debug:
|
||||||
params['echo'] = True
|
params['echo'] = True
|
||||||
if uri.startswith('ibm_db'):
|
if uri.startswith('ibm_db'):
|
||||||
|
|
Binary file not shown.
|
@ -182,6 +182,10 @@ def setConnection(uri, tables, encoding='utf8', debug=False):
|
||||||
kw['use_unicode'] = 1
|
kw['use_unicode'] = 1
|
||||||
#kw['sqlobject_encoding'] = encoding
|
#kw['sqlobject_encoding'] = encoding
|
||||||
kw['charset'] = encoding
|
kw['charset'] = encoding
|
||||||
|
|
||||||
|
# On some server configurations, we will need to explictly enable
|
||||||
|
# loading data from local files
|
||||||
|
kw['local_infile'] = 1
|
||||||
conn = connectionForURI(uri, **kw)
|
conn = connectionForURI(uri, **kw)
|
||||||
conn.debug = debug
|
conn.debug = debug
|
||||||
# XXX: doesn't work and a work-around was put in imdbpy2sql.py;
|
# XXX: doesn't work and a work-around was put in imdbpy2sql.py;
|
||||||
|
|
|
@ -3,7 +3,7 @@ utils module (imdb package).
|
||||||
|
|
||||||
This module provides basic utilities for the imdb package.
|
This module provides basic utilities for the imdb package.
|
||||||
|
|
||||||
Copyright 2004-2012 Davide Alberani <da@erlug.linux.it>
|
Copyright 2004-2013 Davide Alberani <da@erlug.linux.it>
|
||||||
2009 H. Turgut Uyar <uyar@tekir.org>
|
2009 H. Turgut Uyar <uyar@tekir.org>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
@ -189,10 +189,9 @@ _unicodeArticles = linguistics.toUnicode(_articles)
|
||||||
articlesDicts = linguistics.articlesDictsForLang(None)
|
articlesDicts = linguistics.articlesDictsForLang(None)
|
||||||
spArticles = linguistics.spArticlesForLang(None)
|
spArticles = linguistics.spArticlesForLang(None)
|
||||||
|
|
||||||
def canonicalTitle(title, lang=None):
|
def canonicalTitle(title, lang=None, imdbIndex=None):
|
||||||
"""Return the title in the canonic format 'Movie Title, The';
|
"""Return the title in the canonic format 'Movie Title, The';
|
||||||
beware that it doesn't handle long imdb titles, but only the
|
beware that it doesn't handle long imdb titles.
|
||||||
title portion, without year[/imdbIndex] or special markup.
|
|
||||||
The 'lang' argument can be used to specify the language of the title.
|
The 'lang' argument can be used to specify the language of the title.
|
||||||
"""
|
"""
|
||||||
isUnicode = isinstance(title, unicode)
|
isUnicode = isinstance(title, unicode)
|
||||||
|
@ -203,15 +202,19 @@ def canonicalTitle(title, lang=None):
|
||||||
except IndexError:
|
except IndexError:
|
||||||
pass
|
pass
|
||||||
if isUnicode:
|
if isUnicode:
|
||||||
_format = u'%s, %s'
|
_format = u'%s%s, %s'
|
||||||
else:
|
else:
|
||||||
_format = '%s, %s'
|
_format = '%s%s, %s'
|
||||||
ltitle = title.lower()
|
ltitle = title.lower()
|
||||||
|
if imdbIndex:
|
||||||
|
imdbIndex = ' (%s)' % imdbIndex
|
||||||
|
else:
|
||||||
|
imdbIndex = ''
|
||||||
spArticles = linguistics.spArticlesForLang(lang)
|
spArticles = linguistics.spArticlesForLang(lang)
|
||||||
for article in spArticles[isUnicode]:
|
for article in spArticles[isUnicode]:
|
||||||
if ltitle.startswith(article):
|
if ltitle.startswith(article):
|
||||||
lart = len(article)
|
lart = len(article)
|
||||||
title = _format % (title[lart:], title[:lart])
|
title = _format % (title[lart:], imdbIndex, title[:lart])
|
||||||
if article[-1] == ' ':
|
if article[-1] == ' ':
|
||||||
title = title[:-1]
|
title = title[:-1]
|
||||||
break
|
break
|
||||||
|
@ -383,18 +386,42 @@ def analyze_title(title, canonical=None, canonicalSeries=None,
|
||||||
if title.endswith('(TV)'):
|
if title.endswith('(TV)'):
|
||||||
kind = u'tv movie'
|
kind = u'tv movie'
|
||||||
title = title[:-4].rstrip()
|
title = title[:-4].rstrip()
|
||||||
|
elif title.endswith('(TV Movie)'):
|
||||||
|
kind = u'tv movie'
|
||||||
|
title = title[:-10].rstrip()
|
||||||
elif title.endswith('(V)'):
|
elif title.endswith('(V)'):
|
||||||
kind = u'video movie'
|
kind = u'video movie'
|
||||||
title = title[:-3].rstrip()
|
title = title[:-3].rstrip()
|
||||||
elif title.endswith('(video)'):
|
elif title.lower().endswith('(video)'):
|
||||||
kind = u'video movie'
|
kind = u'video movie'
|
||||||
title = title[:-7].rstrip()
|
title = title[:-7].rstrip()
|
||||||
|
elif title.endswith('(TV Short)'):
|
||||||
|
kind = u'tv short'
|
||||||
|
title = title[:-10].rstrip()
|
||||||
|
elif title.endswith('(TV Mini-Series)'):
|
||||||
|
kind = u'tv mini series'
|
||||||
|
title = title[:-16].rstrip()
|
||||||
elif title.endswith('(mini)'):
|
elif title.endswith('(mini)'):
|
||||||
kind = u'tv mini series'
|
kind = u'tv mini series'
|
||||||
title = title[:-6].rstrip()
|
title = title[:-6].rstrip()
|
||||||
elif title.endswith('(VG)'):
|
elif title.endswith('(VG)'):
|
||||||
kind = u'video game'
|
kind = u'video game'
|
||||||
title = title[:-4].rstrip()
|
title = title[:-4].rstrip()
|
||||||
|
elif title.endswith('(Video Game)'):
|
||||||
|
kind = u'video game'
|
||||||
|
title = title[:-12].rstrip()
|
||||||
|
elif title.endswith('(TV Series)'):
|
||||||
|
epindex = title.find('(TV Episode) - ')
|
||||||
|
if epindex >= 0:
|
||||||
|
# It's an episode of a series.
|
||||||
|
kind = u'episode'
|
||||||
|
series_info = analyze_title(title[epindex + 15:])
|
||||||
|
result['episode of'] = series_info.get('title')
|
||||||
|
result['series year'] = series_info.get('year')
|
||||||
|
title = title[:epindex]
|
||||||
|
else:
|
||||||
|
kind = u'tv series'
|
||||||
|
title = title[:-11].rstrip()
|
||||||
# Search for the year and the optional imdbIndex (a roman number).
|
# Search for the year and the optional imdbIndex (a roman number).
|
||||||
yi = re_year_index.findall(title)
|
yi = re_year_index.findall(title)
|
||||||
if not yi:
|
if not yi:
|
||||||
|
@ -430,9 +457,6 @@ def analyze_title(title, canonical=None, canonicalSeries=None,
|
||||||
if not kind:
|
if not kind:
|
||||||
kind = u'tv series'
|
kind = u'tv series'
|
||||||
title = title[1:-1].strip()
|
title = title[1:-1].strip()
|
||||||
elif title.endswith('(TV series)'):
|
|
||||||
kind = u'tv series'
|
|
||||||
title = title[:-11].rstrip()
|
|
||||||
if not title:
|
if not title:
|
||||||
raise IMDbParserError('invalid title: "%s"' % original_t)
|
raise IMDbParserError('invalid title: "%s"' % original_t)
|
||||||
if canonical is not None:
|
if canonical is not None:
|
||||||
|
@ -489,7 +513,7 @@ def _convertTime(title, fromPTDFtoWEB=1, _emptyString=u''):
|
||||||
|
|
||||||
def build_title(title_dict, canonical=None, canonicalSeries=None,
|
def build_title(title_dict, canonical=None, canonicalSeries=None,
|
||||||
canonicalEpisode=None, ptdf=0, lang=None, _doYear=1,
|
canonicalEpisode=None, ptdf=0, lang=None, _doYear=1,
|
||||||
_emptyString=u''):
|
_emptyString=u'', appendKind=True):
|
||||||
"""Given a dictionary that represents a "long" IMDb title,
|
"""Given a dictionary that represents a "long" IMDb title,
|
||||||
return a string.
|
return a string.
|
||||||
|
|
||||||
|
@ -511,6 +535,11 @@ def build_title(title_dict, canonical=None, canonicalSeries=None,
|
||||||
doYear = 0
|
doYear = 0
|
||||||
if ptdf:
|
if ptdf:
|
||||||
doYear = 1
|
doYear = 1
|
||||||
|
# XXX: for results coming from the new search page.
|
||||||
|
if not isinstance(episode_of, (dict, _Container)):
|
||||||
|
episode_of = {'title': episode_of, 'kind': 'tv series'}
|
||||||
|
if 'series year' in title_dict:
|
||||||
|
episode_of['year'] = title_dict['series year']
|
||||||
pre_title = build_title(episode_of, canonical=canonicalSeries,
|
pre_title = build_title(episode_of, canonical=canonicalSeries,
|
||||||
ptdf=0, _doYear=doYear,
|
ptdf=0, _doYear=doYear,
|
||||||
_emptyString=_emptyString)
|
_emptyString=_emptyString)
|
||||||
|
@ -545,12 +574,14 @@ def build_title(title_dict, canonical=None, canonicalSeries=None,
|
||||||
episode_title += '.%s' % episode
|
episode_title += '.%s' % episode
|
||||||
episode_title += ')'
|
episode_title += ')'
|
||||||
episode_title = '{%s}' % episode_title
|
episode_title = '{%s}' % episode_title
|
||||||
return '%s %s' % (pre_title, episode_title)
|
return _emptyString + '%s %s' % (_emptyString + pre_title,
|
||||||
|
_emptyString + episode_title)
|
||||||
title = title_dict.get('title', '')
|
title = title_dict.get('title', '')
|
||||||
|
imdbIndex = title_dict.get('imdbIndex', '')
|
||||||
if not title: return _emptyString
|
if not title: return _emptyString
|
||||||
if canonical is not None:
|
if canonical is not None:
|
||||||
if canonical:
|
if canonical:
|
||||||
title = canonicalTitle(title, lang=lang)
|
title = canonicalTitle(title, lang=lang, imdbIndex=imdbIndex)
|
||||||
else:
|
else:
|
||||||
title = normalizeTitle(title, lang=lang)
|
title = normalizeTitle(title, lang=lang)
|
||||||
if pre_title:
|
if pre_title:
|
||||||
|
@ -558,15 +589,20 @@ def build_title(title_dict, canonical=None, canonicalSeries=None,
|
||||||
if kind in (u'tv series', u'tv mini series'):
|
if kind in (u'tv series', u'tv mini series'):
|
||||||
title = '"%s"' % title
|
title = '"%s"' % title
|
||||||
if _doYear:
|
if _doYear:
|
||||||
imdbIndex = title_dict.get('imdbIndex')
|
year = title_dict.get('year') or '????'
|
||||||
year = title_dict.get('year') or u'????'
|
|
||||||
if isinstance(_emptyString, str):
|
if isinstance(_emptyString, str):
|
||||||
year = str(year)
|
year = str(year)
|
||||||
title += ' (%s' % year
|
imdbIndex = title_dict.get('imdbIndex')
|
||||||
if imdbIndex:
|
if not ptdf:
|
||||||
title += '/%s' % imdbIndex
|
if imdbIndex and (canonical is None or canonical):
|
||||||
title += ')'
|
title += ' (%s)' % imdbIndex
|
||||||
if kind:
|
title += ' (%s)' % year
|
||||||
|
else:
|
||||||
|
title += ' (%s' % year
|
||||||
|
if imdbIndex and (canonical is None or canonical):
|
||||||
|
title += '/%s' % imdbIndex
|
||||||
|
title += ')'
|
||||||
|
if appendKind and kind:
|
||||||
if kind == 'tv movie':
|
if kind == 'tv movie':
|
||||||
title += ' (TV)'
|
title += ' (TV)'
|
||||||
elif kind == 'video movie':
|
elif kind == 'video movie':
|
||||||
|
|
|
@ -11,6 +11,7 @@ __author__ = "dbr/Ben"
|
||||||
__version__ = "1.9"
|
__version__ = "1.9"
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
import time
|
import time
|
||||||
import getpass
|
import getpass
|
||||||
import StringIO
|
import StringIO
|
||||||
|
@ -18,8 +19,10 @@ import tempfile
|
||||||
import warnings
|
import warnings
|
||||||
import logging
|
import logging
|
||||||
import zipfile
|
import zipfile
|
||||||
|
import datetime as dt
|
||||||
import requests
|
import requests
|
||||||
import cachecontrol
|
import cachecontrol
|
||||||
|
import xmltodict
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import xml.etree.cElementTree as ElementTree
|
import xml.etree.cElementTree as ElementTree
|
||||||
|
@ -31,6 +34,7 @@ try:
|
||||||
except ImportError:
|
except ImportError:
|
||||||
gzip = None
|
gzip = None
|
||||||
|
|
||||||
|
from lib.dateutil.parser import parse
|
||||||
from cachecontrol import caches
|
from cachecontrol import caches
|
||||||
|
|
||||||
from tvdb_ui import BaseUI, ConsoleUI
|
from tvdb_ui import BaseUI, ConsoleUI
|
||||||
|
@ -560,44 +564,71 @@ class Tvdb:
|
||||||
except requests.Timeout, e:
|
except requests.Timeout, e:
|
||||||
raise tvdb_error("Connection timed out " + str(e.message) + " while loading URL " + str(url))
|
raise tvdb_error("Connection timed out " + str(e.message) + " while loading URL " + str(url))
|
||||||
|
|
||||||
if 'application/zip' in resp.headers.get("Content-Type", '') and resp.ok:
|
def process(path, key, value):
|
||||||
try:
|
key = key.lower()
|
||||||
# TODO: The zip contains actors.xml and banners.xml, which are currently ignored [GH-20]
|
|
||||||
log().debug("We recived a zip file unpacking now ...")
|
|
||||||
zipdata = StringIO.StringIO()
|
|
||||||
zipdata.write(resp.content)
|
|
||||||
myzipfile = zipfile.ZipFile(zipdata)
|
|
||||||
return myzipfile.read('%s.xml' % language)
|
|
||||||
except zipfile.BadZipfile:
|
|
||||||
raise tvdb_error("Bad zip file received from thetvdb.com, could not read it")
|
|
||||||
|
|
||||||
return resp.content if resp.ok else None
|
# clean up value and do type changes
|
||||||
|
if value:
|
||||||
|
try:
|
||||||
|
# convert to integer if needed
|
||||||
|
if value.isdigit():
|
||||||
|
value = int(value)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if key in ['banner', 'fanart', 'poster']:
|
||||||
|
value = self.config['url_artworkPrefix'] % (value)
|
||||||
|
else:
|
||||||
|
value = self._cleanData(value)
|
||||||
|
|
||||||
|
try:
|
||||||
|
if key == 'firstaired' and value in "0000-00-00":
|
||||||
|
new_value = str(dt.date.fromordinal(1))
|
||||||
|
new_value = re.sub("([-]0{2}){1,}", "", new_value)
|
||||||
|
fixDate = parse(new_value, fuzzy=True).date()
|
||||||
|
value = fixDate.strftime("%Y-%m-%d")
|
||||||
|
elif key == 'firstaired':
|
||||||
|
value = parse(value, fuzzy=True).date()
|
||||||
|
value = value.strftime("%Y-%m-%d")
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
value = self._cleanData(value)
|
||||||
|
return (key, value)
|
||||||
|
|
||||||
|
if resp.ok:
|
||||||
|
if 'application/zip' in resp.headers.get("Content-Type", ''):
|
||||||
|
try:
|
||||||
|
# TODO: The zip contains actors.xml and banners.xml, which are currently ignored [GH-20]
|
||||||
|
log().debug("We recived a zip file unpacking now ...")
|
||||||
|
zipdata = StringIO.StringIO()
|
||||||
|
zipdata.write(resp.content)
|
||||||
|
myzipfile = zipfile.ZipFile(zipdata)
|
||||||
|
return xmltodict.parse(myzipfile.read('%s.xml' % language), postprocessor=process)
|
||||||
|
except zipfile.BadZipfile:
|
||||||
|
raise tvdb_error("Bad zip file received from thetvdb.com, could not read it")
|
||||||
|
else:
|
||||||
|
return xmltodict.parse(resp.text.strip(), postprocessor=process)
|
||||||
|
|
||||||
def _getetsrc(self, url, params=None, language=None):
|
def _getetsrc(self, url, params=None, language=None):
|
||||||
"""Loads a URL using caching, returns an ElementTree of the source
|
"""Loads a URL using caching, returns an ElementTree of the source
|
||||||
"""
|
"""
|
||||||
src = self._loadUrl(url, params=params, language=language)
|
|
||||||
try:
|
try:
|
||||||
# TVDB doesn't sanitize \r (CR) from user input in some fields,
|
|
||||||
# remove it to avoid errors. Change from SickBeard, from will14m
|
|
||||||
return ElementTree.fromstring(src.rstrip("\r")) if src else None
|
|
||||||
except SyntaxError:
|
|
||||||
src = self._loadUrl(url, params=params, language=language)
|
src = self._loadUrl(url, params=params, language=language)
|
||||||
try:
|
src = [src[item] for item in src][0]
|
||||||
return ElementTree.fromstring(src.rstrip("\r")) if src else None
|
except:
|
||||||
except SyntaxError, exceptionmsg:
|
errormsg = "There was an error with the XML retrieved from thetvdb.com:"
|
||||||
errormsg = "There was an error with the XML retrieved from thetvdb.com:\n%s" % (
|
|
||||||
exceptionmsg
|
if self.config['cache_enabled']:
|
||||||
|
errormsg += "\nFirst try emptying the cache folder at..\n%s" % (
|
||||||
|
self.config['cache_location']
|
||||||
)
|
)
|
||||||
|
|
||||||
if self.config['cache_enabled']:
|
errormsg += "\nIf this does not resolve the issue, please try again later. If the error persists, report a bug on"
|
||||||
errormsg += "\nFirst try emptying the cache folder at..\n%s" % (
|
errormsg += "\nhttp://dbr.lighthouseapp.com/projects/13342-tvdb_api/overview\n"
|
||||||
self.config['cache_location']
|
raise tvdb_error(errormsg)
|
||||||
)
|
|
||||||
|
|
||||||
errormsg += "\nIf this does not resolve the issue, please try again later. If the error persists, report a bug on"
|
return src
|
||||||
errormsg += "\nhttp://dbr.lighthouseapp.com/projects/13342-tvdb_api/overview\n"
|
|
||||||
raise tvdb_error(errormsg)
|
|
||||||
|
|
||||||
def _setItem(self, sid, seas, ep, attrib, value):
|
def _setItem(self, sid, seas, ep, attrib, value):
|
||||||
"""Creates a new episode, creating Show(), Season() and
|
"""Creates a new episode, creating Show(), Season() and
|
||||||
|
@ -649,9 +680,8 @@ class Tvdb:
|
||||||
log().debug("Searching for show %s" % series)
|
log().debug("Searching for show %s" % series)
|
||||||
self.config['params_getSeries']['seriesname'] = series
|
self.config['params_getSeries']['seriesname'] = series
|
||||||
seriesEt = self._getetsrc(self.config['url_getSeries'], self.config['params_getSeries'])
|
seriesEt = self._getetsrc(self.config['url_getSeries'], self.config['params_getSeries'])
|
||||||
allSeries = list(dict((s.tag.lower(), s.text) for s in x.getchildren()) for x in seriesEt)
|
|
||||||
|
|
||||||
return allSeries
|
return [seriesEt[item] for item in seriesEt][0]
|
||||||
|
|
||||||
def _getSeries(self, series):
|
def _getSeries(self, series):
|
||||||
"""This searches TheTVDB.com for the series name,
|
"""This searches TheTVDB.com for the series name,
|
||||||
|
@ -798,24 +828,13 @@ class Tvdb:
|
||||||
self.config['url_seriesInfo'] % (sid, getShowInLanguage)
|
self.config['url_seriesInfo'] % (sid, getShowInLanguage)
|
||||||
)
|
)
|
||||||
|
|
||||||
if seriesInfoEt is None: return False
|
# check and make sure we have data to process and that it contains a series name
|
||||||
for curInfo in seriesInfoEt.findall("Series")[0]:
|
if seriesInfoEt is None or 'seriesname' not in seriesInfoEt['series']:
|
||||||
tag = curInfo.tag.lower()
|
return False
|
||||||
value = curInfo.text
|
|
||||||
|
|
||||||
if tag == 'seriesname' and value is None:
|
for k, v in seriesInfoEt['series'].items():
|
||||||
return False
|
self._setShowData(sid, k, v)
|
||||||
|
|
||||||
if value is not None:
|
|
||||||
if tag == 'id':
|
|
||||||
value = int(value)
|
|
||||||
|
|
||||||
if tag in ['banner', 'fanart', 'poster']:
|
|
||||||
value = self.config['url_artworkPrefix'] % (value)
|
|
||||||
else:
|
|
||||||
value = self._cleanData(value)
|
|
||||||
|
|
||||||
self._setShowData(sid, tag, value)
|
|
||||||
if seriesSearch:
|
if seriesSearch:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
@ -837,63 +856,40 @@ class Tvdb:
|
||||||
|
|
||||||
epsEt = self._getetsrc(url, language=language)
|
epsEt = self._getetsrc(url, language=language)
|
||||||
|
|
||||||
for cur_ep in epsEt.findall("Episode"):
|
for cur_ep in epsEt["episode"]:
|
||||||
|
|
||||||
if self.config['dvdorder']:
|
if self.config['dvdorder']:
|
||||||
log().debug('Using DVD ordering.')
|
log().debug('Using DVD ordering.')
|
||||||
use_dvd = cur_ep.find('DVD_season').text != None and cur_ep.find('DVD_episodenumber').text != None
|
use_dvd = cur_ep['dvd_season'] != None and cur_ep['dvd_episodenumber'] != None
|
||||||
else:
|
else:
|
||||||
use_dvd = False
|
use_dvd = False
|
||||||
|
|
||||||
if use_dvd:
|
if use_dvd:
|
||||||
elem_seasnum, elem_epno = cur_ep.find('DVD_season'), cur_ep.find('DVD_episodenumber')
|
seasnum, epno = cur_ep['dvd_season'], cur_ep['dvd_episodenumber']
|
||||||
else:
|
else:
|
||||||
elem_seasnum, elem_epno = cur_ep.find('SeasonNumber'), cur_ep.find('EpisodeNumber')
|
seasnum, epno = cur_ep['seasonnumber'], cur_ep['episodenumber']
|
||||||
|
|
||||||
if elem_seasnum is None or elem_epno is None:
|
|
||||||
|
|
||||||
|
if seasnum is None or epno is None:
|
||||||
log().warning("An episode has incomplete season/episode number (season: %r, episode: %r)" % (
|
log().warning("An episode has incomplete season/episode number (season: %r, episode: %r)" % (
|
||||||
elem_seasnum, elem_epno))
|
seasnum, epno))
|
||||||
log().debug(
|
|
||||||
" ".join(
|
|
||||||
"%r is %r" % (child.tag, child.text) for child in cur_ep.getchildren()))
|
|
||||||
# TODO: Should this happen?
|
|
||||||
continue # Skip to next episode
|
continue # Skip to next episode
|
||||||
|
|
||||||
|
|
||||||
# float() is because https://github.com/dbr/tvnamer/issues/95 - should probably be fixed in TVDB data
|
# float() is because https://github.com/dbr/tvnamer/issues/95 - should probably be fixed in TVDB data
|
||||||
seas_no = int(float(elem_seasnum.text))
|
seas_no = int(float(seasnum))
|
||||||
ep_no = int(float(elem_epno.text))
|
ep_no = int(float(epno))
|
||||||
|
|
||||||
useDVD = False
|
for k,v in cur_ep.items():
|
||||||
|
k = k.lower()
|
||||||
|
|
||||||
if (self.config['dvdorder']):
|
if v is not None:
|
||||||
log().debug('DVD Order? Yes')
|
if k == 'id':
|
||||||
useDVD = (cur_ep.find('DVD_season').text != None and cur_ep.find('DVD_episodenumber').text != None)
|
v = int(v)
|
||||||
else:
|
|
||||||
log().debug('DVD Order? No')
|
|
||||||
|
|
||||||
if (useDVD):
|
if k == 'filename':
|
||||||
log().debug('Use DVD Order? Yes')
|
v = self.config['url_artworkPrefix'] % (v)
|
||||||
seas_no = int(cur_ep.find('DVD_season').text)
|
|
||||||
ep_no = int(float(cur_ep.find('DVD_episodenumber').text))
|
|
||||||
else:
|
|
||||||
log().debug('Use DVD Order? No')
|
|
||||||
seas_no = int(cur_ep.find('SeasonNumber').text)
|
|
||||||
ep_no = int(cur_ep.find('EpisodeNumber').text)
|
|
||||||
|
|
||||||
for cur_item in cur_ep.getchildren():
|
|
||||||
tag = cur_item.tag.lower()
|
|
||||||
value = cur_item.text
|
|
||||||
if value is not None:
|
|
||||||
if tag == 'id':
|
|
||||||
value = int(value)
|
|
||||||
|
|
||||||
if tag == 'filename':
|
|
||||||
value = self.config['url_artworkPrefix'] % (value)
|
|
||||||
else:
|
else:
|
||||||
value = self._cleanData(value)
|
v = self._cleanData(v)
|
||||||
self._setItem(sid, seas_no, ep_no, tag, value)
|
|
||||||
|
self._setItem(sid, seas_no, ep_no, k, v)
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
#!/usr/bin/env python2
|
# !/usr/bin/env python2
|
||||||
#encoding:utf-8
|
#encoding:utf-8
|
||||||
#author:echel0n
|
#author:echel0n
|
||||||
#project:tvrage_api
|
#project:tvrage_api
|
||||||
|
@ -24,6 +24,7 @@ import logging
|
||||||
import datetime as dt
|
import datetime as dt
|
||||||
import requests
|
import requests
|
||||||
import cachecontrol
|
import cachecontrol
|
||||||
|
import xmltodict
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import xml.etree.cElementTree as ElementTree
|
import xml.etree.cElementTree as ElementTree
|
||||||
|
@ -35,11 +36,13 @@ from cachecontrol import caches
|
||||||
|
|
||||||
from tvrage_ui import BaseUI
|
from tvrage_ui import BaseUI
|
||||||
from tvrage_exceptions import (tvrage_error, tvrage_userabort, tvrage_shownotfound,
|
from tvrage_exceptions import (tvrage_error, tvrage_userabort, tvrage_shownotfound,
|
||||||
tvrage_seasonnotfound, tvrage_episodenotfound, tvrage_attributenotfound)
|
tvrage_seasonnotfound, tvrage_episodenotfound, tvrage_attributenotfound)
|
||||||
|
|
||||||
|
|
||||||
def log():
|
def log():
|
||||||
return logging.getLogger("tvrage_api")
|
return logging.getLogger("tvrage_api")
|
||||||
|
|
||||||
|
|
||||||
def retry(ExceptionToCheck, tries=4, delay=3, backoff=2, logger=None):
|
def retry(ExceptionToCheck, tries=4, delay=3, backoff=2, logger=None):
|
||||||
"""Retry calling the decorated function using an exponential backoff.
|
"""Retry calling the decorated function using an exponential backoff.
|
||||||
|
|
||||||
|
@ -83,6 +86,7 @@ def retry(ExceptionToCheck, tries=4, delay=3, backoff=2, logger=None):
|
||||||
|
|
||||||
return deco_retry
|
return deco_retry
|
||||||
|
|
||||||
|
|
||||||
class ShowContainer(dict):
|
class ShowContainer(dict):
|
||||||
"""Simple dict that holds a series of Show instances
|
"""Simple dict that holds a series of Show instances
|
||||||
"""
|
"""
|
||||||
|
@ -105,13 +109,14 @@ class ShowContainer(dict):
|
||||||
|
|
||||||
_lastgc = time.time()
|
_lastgc = time.time()
|
||||||
del tbd
|
del tbd
|
||||||
|
|
||||||
super(ShowContainer, self).__setitem__(key, value)
|
super(ShowContainer, self).__setitem__(key, value)
|
||||||
|
|
||||||
|
|
||||||
class Show(dict):
|
class Show(dict):
|
||||||
"""Holds a dict of seasons, and show data.
|
"""Holds a dict of seasons, and show data.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
dict.__init__(self)
|
dict.__init__(self)
|
||||||
self.data = {}
|
self.data = {}
|
||||||
|
@ -157,7 +162,7 @@ class Show(dict):
|
||||||
raise tvrage_episodenotfound("Could not find any episodes that aired on %s" % date)
|
raise tvrage_episodenotfound("Could not find any episodes that aired on %s" % date)
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
def search(self, term = None, key = None):
|
def search(self, term=None, key=None):
|
||||||
"""
|
"""
|
||||||
Search all episodes in show. Can search all data, or a specific key (for
|
Search all episodes in show. Can search all data, or a specific key (for
|
||||||
example, episodename)
|
example, episodename)
|
||||||
|
@ -173,7 +178,7 @@ class Show(dict):
|
||||||
"""
|
"""
|
||||||
results = []
|
results = []
|
||||||
for cur_season in self.values():
|
for cur_season in self.values():
|
||||||
searchresult = cur_season.search(term = term, key = key)
|
searchresult = cur_season.search(term=term, key=key)
|
||||||
if len(searchresult) != 0:
|
if len(searchresult) != 0:
|
||||||
results.extend(searchresult)
|
results.extend(searchresult)
|
||||||
|
|
||||||
|
@ -181,7 +186,7 @@ class Show(dict):
|
||||||
|
|
||||||
|
|
||||||
class Season(dict):
|
class Season(dict):
|
||||||
def __init__(self, show = None):
|
def __init__(self, show=None):
|
||||||
"""The show attribute points to the parent show
|
"""The show attribute points to the parent show
|
||||||
"""
|
"""
|
||||||
self.show = show
|
self.show = show
|
||||||
|
@ -202,13 +207,13 @@ class Season(dict):
|
||||||
else:
|
else:
|
||||||
return dict.__getitem__(self, episode_number)
|
return dict.__getitem__(self, episode_number)
|
||||||
|
|
||||||
def search(self, term = None, key = None):
|
def search(self, term=None, key=None):
|
||||||
"""Search all episodes in season, returns a list of matching Episode
|
"""Search all episodes in season, returns a list of matching Episode
|
||||||
instances.
|
instances.
|
||||||
"""
|
"""
|
||||||
results = []
|
results = []
|
||||||
for ep in self.values():
|
for ep in self.values():
|
||||||
searchresult = ep.search(term = term, key = key)
|
searchresult = ep.search(term=term, key=key)
|
||||||
if searchresult is not None:
|
if searchresult is not None:
|
||||||
results.append(
|
results.append(
|
||||||
searchresult
|
searchresult
|
||||||
|
@ -217,7 +222,7 @@ class Season(dict):
|
||||||
|
|
||||||
|
|
||||||
class Episode(dict):
|
class Episode(dict):
|
||||||
def __init__(self, season = None):
|
def __init__(self, season=None):
|
||||||
"""The season attribute points to the parent season
|
"""The season attribute points to the parent season
|
||||||
"""
|
"""
|
||||||
self.season = season
|
self.season = season
|
||||||
|
@ -242,7 +247,7 @@ class Episode(dict):
|
||||||
except KeyError:
|
except KeyError:
|
||||||
raise tvrage_attributenotfound("Cannot find attribute %s" % (repr(key)))
|
raise tvrage_attributenotfound("Cannot find attribute %s" % (repr(key)))
|
||||||
|
|
||||||
def search(self, term = None, key = None):
|
def search(self, term=None, key=None):
|
||||||
"""Search episode data for term, if it matches, return the Episode (self).
|
"""Search episode data for term, if it matches, return the Episode (self).
|
||||||
The key parameter can be used to limit the search to a specific element,
|
The key parameter can be used to limit the search to a specific element,
|
||||||
for example, episodename.
|
for example, episodename.
|
||||||
|
@ -258,25 +263,27 @@ class Episode(dict):
|
||||||
if key is not None and cur_key != key:
|
if key is not None and cur_key != key:
|
||||||
# Do not search this key
|
# Do not search this key
|
||||||
continue
|
continue
|
||||||
if cur_value.find( unicode(term).lower() ) > -1:
|
if cur_value.find(unicode(term).lower()) > -1:
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
|
||||||
class TVRage:
|
class TVRage:
|
||||||
"""Create easy-to-use interface to name of season/episode name"""
|
"""Create easy-to-use interface to name of season/episode name"""
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
interactive = False,
|
interactive=False,
|
||||||
select_first = False,
|
select_first=False,
|
||||||
debug = False,
|
debug=False,
|
||||||
cache = True,
|
cache=True,
|
||||||
banners = False,
|
banners=False,
|
||||||
actors = False,
|
actors=False,
|
||||||
custom_ui = None,
|
custom_ui=None,
|
||||||
language = None,
|
language=None,
|
||||||
search_all_languages = False,
|
search_all_languages=False,
|
||||||
apikey = None,
|
apikey=None,
|
||||||
forceConnect=False,
|
forceConnect=False,
|
||||||
useZip=False,
|
useZip=False,
|
||||||
dvdorder=False):
|
dvdorder=False):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
cache (True/False/str/unicode/urllib2 opener):
|
cache (True/False/str/unicode/urllib2 opener):
|
||||||
|
@ -294,18 +301,18 @@ class TVRage:
|
||||||
return an exception immediately.
|
return an exception immediately.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
self.shows = ShowContainer() # Holds all Show classes
|
self.shows = ShowContainer() # Holds all Show classes
|
||||||
self.corrections = {} # Holds show-name to show_id mapping
|
self.corrections = {} # Holds show-name to show_id mapping
|
||||||
self.sess = requests.session() # HTTP Session
|
self.sess = requests.session() # HTTP Session
|
||||||
|
|
||||||
self.config = {}
|
self.config = {}
|
||||||
|
|
||||||
if apikey is not None:
|
if apikey is not None:
|
||||||
self.config['apikey'] = apikey
|
self.config['apikey'] = apikey
|
||||||
else:
|
else:
|
||||||
self.config['apikey'] = "Uhewg1Rr0o62fvZvUIZt" # tvdb_api's API key
|
self.config['apikey'] = "Uhewg1Rr0o62fvZvUIZt" # tvdb_api's API key
|
||||||
|
|
||||||
self.config['debug_enabled'] = debug # show debugging messages
|
self.config['debug_enabled'] = debug # show debugging messages
|
||||||
|
|
||||||
self.config['custom_ui'] = custom_ui
|
self.config['custom_ui'] = custom_ui
|
||||||
|
|
||||||
|
@ -322,8 +329,8 @@ class TVRage:
|
||||||
|
|
||||||
if self.config['debug_enabled']:
|
if self.config['debug_enabled']:
|
||||||
warnings.warn("The debug argument to tvrage_api.__init__ will be removed in the next version. "
|
warnings.warn("The debug argument to tvrage_api.__init__ will be removed in the next version. "
|
||||||
"To enable debug messages, use the following code before importing: "
|
"To enable debug messages, use the following code before importing: "
|
||||||
"import logging; logging.basicConfig(level=logging.DEBUG)")
|
"import logging; logging.basicConfig(level=logging.DEBUG)")
|
||||||
logging.basicConfig(level=logging.DEBUG)
|
logging.basicConfig(level=logging.DEBUG)
|
||||||
|
|
||||||
|
|
||||||
|
@ -331,8 +338,8 @@ class TVRage:
|
||||||
# Hard-coded here as it is realtively static, and saves another HTTP request, as
|
# Hard-coded here as it is realtively static, and saves another HTTP request, as
|
||||||
# recommended on http://tvrage.com/wiki/index.php/API:languages.xml
|
# recommended on http://tvrage.com/wiki/index.php/API:languages.xml
|
||||||
self.config['valid_languages'] = [
|
self.config['valid_languages'] = [
|
||||||
"da", "fi", "nl", "de", "it", "es", "fr","pl", "hu","el","tr",
|
"da", "fi", "nl", "de", "it", "es", "fr", "pl", "hu", "el", "tr",
|
||||||
"ru","he","ja","pt","zh","cs","sl", "hr","ko","en","sv","no"
|
"ru", "he", "ja", "pt", "zh", "cs", "sl", "hr", "ko", "en", "sv", "no"
|
||||||
]
|
]
|
||||||
|
|
||||||
# tvrage.com should be based around numeric language codes,
|
# tvrage.com should be based around numeric language codes,
|
||||||
|
@ -340,9 +347,9 @@ class TVRage:
|
||||||
# requires the language ID, thus this mapping is required (mainly
|
# requires the language ID, thus this mapping is required (mainly
|
||||||
# for usage in tvrage_ui - internally tvrage_api will use the language abbreviations)
|
# for usage in tvrage_ui - internally tvrage_api will use the language abbreviations)
|
||||||
self.config['langabbv_to_id'] = {'el': 20, 'en': 7, 'zh': 27,
|
self.config['langabbv_to_id'] = {'el': 20, 'en': 7, 'zh': 27,
|
||||||
'it': 15, 'cs': 28, 'es': 16, 'ru': 22, 'nl': 13, 'pt': 26, 'no': 9,
|
'it': 15, 'cs': 28, 'es': 16, 'ru': 22, 'nl': 13, 'pt': 26, 'no': 9,
|
||||||
'tr': 21, 'pl': 18, 'fr': 17, 'hr': 31, 'de': 14, 'da': 10, 'fi': 11,
|
'tr': 21, 'pl': 18, 'fr': 17, 'hr': 31, 'de': 14, 'da': 10, 'fi': 11,
|
||||||
'hu': 19, 'ja': 25, 'he': 24, 'ko': 32, 'sv': 8, 'sl': 30}
|
'hu': 19, 'ja': 25, 'he': 24, 'ko': 32, 'sv': 8, 'sl': 30}
|
||||||
|
|
||||||
if language is None:
|
if language is None:
|
||||||
self.config['language'] = 'en'
|
self.config['language'] = 'en'
|
||||||
|
@ -390,9 +397,9 @@ class TVRage:
|
||||||
|
|
||||||
# get response from TVRage
|
# get response from TVRage
|
||||||
if self.config['cache_enabled']:
|
if self.config['cache_enabled']:
|
||||||
resp = self.sess.get(url, cache_auto=True, params=params)
|
resp = self.sess.get(url.strip(), cache_auto=True, params=params)
|
||||||
else:
|
else:
|
||||||
resp = requests.get(url, params=params)
|
resp = requests.get(url.strip(), params=params)
|
||||||
|
|
||||||
except requests.HTTPError, e:
|
except requests.HTTPError, e:
|
||||||
raise tvrage_error("HTTP error " + str(e.errno) + " while loading URL " + str(url))
|
raise tvrage_error("HTTP error " + str(e.errno) + " while loading URL " + str(url))
|
||||||
|
@ -403,81 +410,84 @@ class TVRage:
|
||||||
except requests.Timeout, e:
|
except requests.Timeout, e:
|
||||||
raise tvrage_error("Connection timed out " + str(e.message) + " while loading URL " + str(url))
|
raise tvrage_error("Connection timed out " + str(e.message) + " while loading URL " + str(url))
|
||||||
|
|
||||||
return resp.content if resp.ok else None
|
def remap_keys(path, key, value):
|
||||||
|
name_map = {
|
||||||
|
'showid': 'id',
|
||||||
|
'showname': 'seriesname',
|
||||||
|
'name': 'seriesname',
|
||||||
|
'summary': 'overview',
|
||||||
|
'started': 'firstaired',
|
||||||
|
'genres': 'genre',
|
||||||
|
'airtime': 'airs_time',
|
||||||
|
'airday': 'airs_dayofweek',
|
||||||
|
'image': 'fanart',
|
||||||
|
'epnum': 'absolute_number',
|
||||||
|
'title': 'episodename',
|
||||||
|
'airdate': 'firstaired',
|
||||||
|
'screencap': 'filename',
|
||||||
|
'seasonnum': 'episodenumber'
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
key = name_map[key.lower()]
|
||||||
|
except (ValueError, TypeError, KeyError):
|
||||||
|
key.lower()
|
||||||
|
|
||||||
|
# clean up value and do type changes
|
||||||
|
if value:
|
||||||
|
if isinstance(value, dict):
|
||||||
|
if key == 'network':
|
||||||
|
value = value['#text']
|
||||||
|
if key == 'genre':
|
||||||
|
value = value['genre']
|
||||||
|
if not isinstance(value, list):
|
||||||
|
value = [value]
|
||||||
|
value = '|' + '|'.join(value) + '|'
|
||||||
|
|
||||||
|
try:
|
||||||
|
# convert to integer if needed
|
||||||
|
if value.isdigit():
|
||||||
|
value = int(value)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
if key == 'firstaired' and value in "0000-00-00":
|
||||||
|
new_value = str(dt.date.fromordinal(1))
|
||||||
|
new_value = re.sub("([-]0{2}){1,}", "", new_value)
|
||||||
|
fixDate = parse(new_value, fuzzy=True).date()
|
||||||
|
value = fixDate.strftime("%Y-%m-%d")
|
||||||
|
elif key == 'firstaired':
|
||||||
|
value = parse(value, fuzzy=True).date()
|
||||||
|
value = value.strftime("%Y-%m-%d")
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
value = self._cleanData(value)
|
||||||
|
return (key, value)
|
||||||
|
|
||||||
|
if resp.ok:
|
||||||
|
return xmltodict.parse(resp.text.strip(), postprocessor=remap_keys)
|
||||||
|
|
||||||
def _getetsrc(self, url, params=None):
|
def _getetsrc(self, url, params=None):
|
||||||
"""Loads a URL using caching, returns an ElementTree of the source
|
"""Loads a URL using caching, returns an ElementTree of the source
|
||||||
"""
|
"""
|
||||||
reDict = {
|
|
||||||
'showid': 'id',
|
|
||||||
'showname': 'seriesname',
|
|
||||||
'name': 'seriesname',
|
|
||||||
'summary': 'overview',
|
|
||||||
'started': 'firstaired',
|
|
||||||
'genres': 'genre',
|
|
||||||
'airtime': 'airs_time',
|
|
||||||
'airday': 'airs_dayofweek',
|
|
||||||
'image': 'fanart',
|
|
||||||
'epnum': 'absolute_number',
|
|
||||||
'title': 'episodename',
|
|
||||||
'airdate': 'firstaired',
|
|
||||||
'screencap': 'filename',
|
|
||||||
'seasonnum': 'episodenumber',
|
|
||||||
}
|
|
||||||
|
|
||||||
robj = re.compile('|'.join(reDict.keys()))
|
|
||||||
src = self._loadUrl(url, params)
|
|
||||||
try:
|
try:
|
||||||
# TVRAGE doesn't sanitize \r (CR) from user input in some fields,
|
|
||||||
# remove it to avoid errors. Change from SickBeard, from will14m
|
|
||||||
xml = ElementTree.fromstring(src.rstrip("\r"))
|
|
||||||
tree = ElementTree.ElementTree(xml)
|
|
||||||
for elm in tree.findall('.//*'):
|
|
||||||
elm.tag = robj.sub(lambda m: reDict[m.group(0)], elm.tag)
|
|
||||||
|
|
||||||
if elm.tag in 'firstaired':
|
|
||||||
try:
|
|
||||||
if elm.text in "0000-00-00":
|
|
||||||
elm.text = str(dt.date.fromordinal(1))
|
|
||||||
elm.text = re.sub("([-]0{2}){1,}", "", elm.text)
|
|
||||||
fixDate = parse(elm.text, fuzzy=True).date()
|
|
||||||
elm.text = fixDate.strftime("%Y-%m-%d")
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
return ElementTree.fromstring(ElementTree.tostring(xml))
|
|
||||||
except SyntaxError:
|
|
||||||
src = self._loadUrl(url, params)
|
src = self._loadUrl(url, params)
|
||||||
try:
|
src = [src[item] for item in src][0]
|
||||||
xml = ElementTree.fromstring(src.rstrip("\r"))
|
except:
|
||||||
tree = ElementTree.ElementTree(xml)
|
errormsg = "There was an error with the XML retrieved from tvrage.com"
|
||||||
for elm in tree.findall('.//*'):
|
|
||||||
elm.tag = robj.sub(lambda m: reDict[m.group(0)], elm.tag)
|
|
||||||
|
|
||||||
if elm.tag in 'firstaired' and elm.text:
|
if self.config['cache_enabled']:
|
||||||
if elm.text == "0000-00-00":
|
errormsg += "\nFirst try emptying the cache folder at..\n%s" % (
|
||||||
elm.text = str(dt.date.fromordinal(1))
|
self.config['cache_location']
|
||||||
try:
|
|
||||||
#month = strptime(match.group('air_month')[:3],'%b').tm_mon
|
|
||||||
#day = re.sub("(st|nd|rd|th)", "", match.group('air_day'))
|
|
||||||
#dtStr = '%s/%s/%s' % (year, month, day)
|
|
||||||
|
|
||||||
fixDate = parse(elm.text, fuzzy=True)
|
|
||||||
elm.text = fixDate.strftime("%Y-%m-%d")
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
return ElementTree.fromstring(ElementTree.tostring(xml))
|
|
||||||
except SyntaxError, exceptionmsg:
|
|
||||||
errormsg = "There was an error with the XML retrieved from tvrage.com:\n%s" % (
|
|
||||||
exceptionmsg
|
|
||||||
)
|
)
|
||||||
|
|
||||||
if self.config['cache_enabled']:
|
errormsg += "\nIf this does not resolve the issue, please try again later. If the error persists, report a bug on\n"
|
||||||
errormsg += "\nFirst try emptying the cache folder at..\n%s" % (
|
raise tvrage_error(errormsg)
|
||||||
self.config['cache_location']
|
|
||||||
)
|
|
||||||
|
|
||||||
errormsg += "\nIf this does not resolve the issue, please try again later. If the error persists, report a bug on\n"
|
return src
|
||||||
raise tvrage_error(errormsg)
|
|
||||||
|
|
||||||
def _setItem(self, sid, seas, ep, attrib, value):
|
def _setItem(self, sid, seas, ep, attrib, value):
|
||||||
"""Creates a new episode, creating Show(), Season() and
|
"""Creates a new episode, creating Show(), Season() and
|
||||||
|
@ -497,9 +507,9 @@ class TVRage:
|
||||||
if sid not in self.shows:
|
if sid not in self.shows:
|
||||||
self.shows[sid] = Show()
|
self.shows[sid] = Show()
|
||||||
if seas not in self.shows[sid]:
|
if seas not in self.shows[sid]:
|
||||||
self.shows[sid][seas] = Season(show = self.shows[sid])
|
self.shows[sid][seas] = Season(show=self.shows[sid])
|
||||||
if ep not in self.shows[sid][seas]:
|
if ep not in self.shows[sid][seas]:
|
||||||
self.shows[sid][seas][ep] = Episode(season = self.shows[sid][seas])
|
self.shows[sid][seas][ep] = Episode(season=self.shows[sid][seas])
|
||||||
self.shows[sid][seas][ep][attrib] = value
|
self.shows[sid][seas][ep][attrib] = value
|
||||||
|
|
||||||
def _setShowData(self, sid, key, value):
|
def _setShowData(self, sid, key, value):
|
||||||
|
@ -529,9 +539,8 @@ class TVRage:
|
||||||
log().debug("Searching for show %s" % series)
|
log().debug("Searching for show %s" % series)
|
||||||
self.config['params_getSeries']['show'] = series
|
self.config['params_getSeries']['show'] = series
|
||||||
seriesEt = self._getetsrc(self.config['url_getSeries'], self.config['params_getSeries'])
|
seriesEt = self._getetsrc(self.config['url_getSeries'], self.config['params_getSeries'])
|
||||||
allSeries = list(dict((s.tag.lower(),s.text) for s in x.getchildren()) for x in seriesEt)
|
|
||||||
|
|
||||||
return allSeries
|
return [seriesEt[item] for item in seriesEt][0]
|
||||||
|
|
||||||
def _getSeries(self, series):
|
def _getSeries(self, series):
|
||||||
"""This searches tvrage.com for the series name,
|
"""This searches tvrage.com for the series name,
|
||||||
|
@ -547,10 +556,10 @@ class TVRage:
|
||||||
|
|
||||||
if self.config['custom_ui'] is not None:
|
if self.config['custom_ui'] is not None:
|
||||||
log().debug("Using custom UI %s" % (repr(self.config['custom_ui'])))
|
log().debug("Using custom UI %s" % (repr(self.config['custom_ui'])))
|
||||||
ui = self.config['custom_ui'](config = self.config)
|
ui = self.config['custom_ui'](config=self.config)
|
||||||
else:
|
else:
|
||||||
log().debug('Auto-selecting first search result using BaseUI')
|
log().debug('Auto-selecting first search result using BaseUI')
|
||||||
ui = BaseUI(config = self.config)
|
ui = BaseUI(config=self.config)
|
||||||
|
|
||||||
return ui.selectSeries(allSeries)
|
return ui.selectSeries(allSeries)
|
||||||
|
|
||||||
|
@ -568,62 +577,49 @@ class TVRage:
|
||||||
self.config['params_seriesInfo']
|
self.config['params_seriesInfo']
|
||||||
)
|
)
|
||||||
|
|
||||||
if seriesInfoEt is None: return False
|
# check and make sure we have data to process and that it contains a series name
|
||||||
for curInfo in seriesInfoEt:
|
if seriesInfoEt is None or 'seriesname' not in seriesInfoEt:
|
||||||
tag = curInfo.tag.lower()
|
return False
|
||||||
value = curInfo.text
|
|
||||||
|
|
||||||
if tag == 'seriesname' and value is None:
|
for k, v in seriesInfoEt.items():
|
||||||
return False
|
self._setShowData(sid, k, v)
|
||||||
|
|
||||||
if tag == 'id':
|
# series search ends here
|
||||||
value = int(value)
|
if seriesSearch:
|
||||||
|
return True
|
||||||
if value is not None:
|
|
||||||
value = self._cleanData(value)
|
|
||||||
|
|
||||||
self._setShowData(sid, tag, value)
|
|
||||||
if seriesSearch: return True
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Parse genre data
|
|
||||||
log().debug('Getting genres of %s' % (sid))
|
|
||||||
for genre in seriesInfoEt.find('genres'):
|
|
||||||
tag = genre.tag.lower()
|
|
||||||
|
|
||||||
value = genre.text
|
|
||||||
if value is not None:
|
|
||||||
value = self._cleanData(value)
|
|
||||||
|
|
||||||
self._setShowData(sid, tag, value)
|
|
||||||
except Exception:
|
|
||||||
log().debug('No genres for %s' % (sid))
|
|
||||||
|
|
||||||
# Parse episode data
|
# Parse episode data
|
||||||
log().debug('Getting all episodes of %s' % (sid))
|
log().debug('Getting all episodes of %s' % (sid))
|
||||||
|
|
||||||
self.config['params_epInfo']['sid'] = sid
|
self.config['params_epInfo']['sid'] = sid
|
||||||
epsEt = self._getetsrc(self.config['url_epInfo'], self.config['params_epInfo'])
|
epsEt = self._getetsrc(self.config['url_epInfo'], self.config['params_epInfo'])
|
||||||
for cur_list in epsEt.findall("Episodelist"):
|
|
||||||
for cur_seas in cur_list:
|
|
||||||
try:
|
|
||||||
seas_no = int(cur_seas.attrib['no'])
|
|
||||||
for cur_ep in cur_seas:
|
|
||||||
ep_no = int(cur_ep.find('episodenumber').text)
|
|
||||||
self._setItem(sid, seas_no, ep_no, 'seasonnumber', seas_no)
|
|
||||||
for cur_item in cur_ep:
|
|
||||||
tag = cur_item.tag.lower()
|
|
||||||
|
|
||||||
value = cur_item.text
|
for season in epsEt['Episodelist']['Season']:
|
||||||
if value is not None:
|
episodes = season['episode']
|
||||||
if tag == 'id':
|
if not isinstance(episodes, list):
|
||||||
value = int(value)
|
episodes = [episodes]
|
||||||
|
|
||||||
value = self._cleanData(value)
|
for episode in episodes:
|
||||||
|
seas_no = int(season['@no'])
|
||||||
|
ep_no = int(episode['episodenumber'])
|
||||||
|
self._setItem(sid, seas_no, ep_no, 'seasonnumber', seas_no)
|
||||||
|
|
||||||
self._setItem(sid, seas_no, ep_no, tag, value)
|
for k,v in episode.items():
|
||||||
except:
|
try:
|
||||||
continue
|
k = k.lower()
|
||||||
|
if v is not None:
|
||||||
|
if k == 'link':
|
||||||
|
v = v.rsplit('/', 1)[1]
|
||||||
|
k = 'id'
|
||||||
|
|
||||||
|
if k == 'id':
|
||||||
|
v = int(v)
|
||||||
|
|
||||||
|
v = self._cleanData(v)
|
||||||
|
|
||||||
|
self._setItem(sid, seas_no, ep_no, k, v)
|
||||||
|
except:
|
||||||
|
continue
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def _nameToSid(self, name):
|
def _nameToSid(self, name):
|
||||||
|
@ -632,7 +628,7 @@ class TVRage:
|
||||||
the correct SID.
|
the correct SID.
|
||||||
"""
|
"""
|
||||||
if name in self.corrections:
|
if name in self.corrections:
|
||||||
log().debug('Correcting %s to %s' % (name, self.corrections[name]) )
|
log().debug('Correcting %s to %s' % (name, self.corrections[name]))
|
||||||
return self.corrections[name]
|
return self.corrections[name]
|
||||||
else:
|
else:
|
||||||
log().debug('Getting show %s' % (name))
|
log().debug('Getting show %s' % (name))
|
||||||
|
@ -673,11 +669,13 @@ def main():
|
||||||
grabs an episode name interactively.
|
grabs an episode name interactively.
|
||||||
"""
|
"""
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
logging.basicConfig(level=logging.DEBUG)
|
logging.basicConfig(level=logging.DEBUG)
|
||||||
|
|
||||||
tvrage_instance = TVRage(cache=False)
|
tvrage_instance = TVRage(cache=False)
|
||||||
print tvrage_instance['Lost']['seriesname']
|
print tvrage_instance['Lost']['seriesname']
|
||||||
print tvrage_instance['Lost'][1][4]['episodename']
|
print tvrage_instance['Lost'][1][4]['episodename']
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
||||||
|
|
359
lib/xmltodict.py
Normal file
359
lib/xmltodict.py
Normal file
|
@ -0,0 +1,359 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
"Makes working with XML feel like you are working with JSON"
|
||||||
|
|
||||||
|
from xml.parsers import expat
|
||||||
|
from xml.sax.saxutils import XMLGenerator
|
||||||
|
from xml.sax.xmlreader import AttributesImpl
|
||||||
|
try: # pragma no cover
|
||||||
|
from cStringIO import StringIO
|
||||||
|
except ImportError: # pragma no cover
|
||||||
|
try:
|
||||||
|
from StringIO import StringIO
|
||||||
|
except ImportError:
|
||||||
|
from io import StringIO
|
||||||
|
try: # pragma no cover
|
||||||
|
from collections import OrderedDict
|
||||||
|
except ImportError: # pragma no cover
|
||||||
|
try:
|
||||||
|
from ordereddict import OrderedDict
|
||||||
|
except ImportError:
|
||||||
|
OrderedDict = dict
|
||||||
|
|
||||||
|
try: # pragma no cover
|
||||||
|
_basestring = basestring
|
||||||
|
except NameError: # pragma no cover
|
||||||
|
_basestring = str
|
||||||
|
try: # pragma no cover
|
||||||
|
_unicode = unicode
|
||||||
|
except NameError: # pragma no cover
|
||||||
|
_unicode = str
|
||||||
|
|
||||||
|
__author__ = 'Martin Blech'
|
||||||
|
__version__ = '0.9.0'
|
||||||
|
__license__ = 'MIT'
|
||||||
|
|
||||||
|
|
||||||
|
class ParsingInterrupted(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class _DictSAXHandler(object):
|
||||||
|
def __init__(self,
|
||||||
|
item_depth=0,
|
||||||
|
item_callback=lambda *args: True,
|
||||||
|
xml_attribs=True,
|
||||||
|
attr_prefix='@',
|
||||||
|
cdata_key='#text',
|
||||||
|
force_cdata=False,
|
||||||
|
cdata_separator='',
|
||||||
|
postprocessor=None,
|
||||||
|
dict_constructor=OrderedDict,
|
||||||
|
strip_whitespace=True,
|
||||||
|
namespace_separator=':',
|
||||||
|
namespaces=None):
|
||||||
|
self.path = []
|
||||||
|
self.stack = []
|
||||||
|
self.data = None
|
||||||
|
self.item = None
|
||||||
|
self.item_depth = item_depth
|
||||||
|
self.xml_attribs = xml_attribs
|
||||||
|
self.item_callback = item_callback
|
||||||
|
self.attr_prefix = attr_prefix
|
||||||
|
self.cdata_key = cdata_key
|
||||||
|
self.force_cdata = force_cdata
|
||||||
|
self.cdata_separator = cdata_separator
|
||||||
|
self.postprocessor = postprocessor
|
||||||
|
self.dict_constructor = dict_constructor
|
||||||
|
self.strip_whitespace = strip_whitespace
|
||||||
|
self.namespace_separator = namespace_separator
|
||||||
|
self.namespaces = namespaces
|
||||||
|
|
||||||
|
def _build_name(self, full_name):
|
||||||
|
if not self.namespaces:
|
||||||
|
return full_name
|
||||||
|
i = full_name.rfind(self.namespace_separator)
|
||||||
|
if i == -1:
|
||||||
|
return full_name
|
||||||
|
namespace, name = full_name[:i], full_name[i+1:]
|
||||||
|
short_namespace = self.namespaces.get(namespace, namespace)
|
||||||
|
if not short_namespace:
|
||||||
|
return name
|
||||||
|
else:
|
||||||
|
return self.namespace_separator.join((short_namespace, name))
|
||||||
|
|
||||||
|
def _attrs_to_dict(self, attrs):
|
||||||
|
if isinstance(attrs, dict):
|
||||||
|
return attrs
|
||||||
|
return self.dict_constructor(zip(attrs[0::2], attrs[1::2]))
|
||||||
|
|
||||||
|
def startElement(self, full_name, attrs):
|
||||||
|
name = self._build_name(full_name)
|
||||||
|
attrs = self._attrs_to_dict(attrs)
|
||||||
|
self.path.append((name, attrs or None))
|
||||||
|
if len(self.path) > self.item_depth:
|
||||||
|
self.stack.append((self.item, self.data))
|
||||||
|
if self.xml_attribs:
|
||||||
|
attrs = self.dict_constructor(
|
||||||
|
(self.attr_prefix+key, value)
|
||||||
|
for (key, value) in attrs.items())
|
||||||
|
else:
|
||||||
|
attrs = None
|
||||||
|
self.item = attrs or None
|
||||||
|
self.data = None
|
||||||
|
|
||||||
|
def endElement(self, full_name):
|
||||||
|
name = self._build_name(full_name)
|
||||||
|
if len(self.path) == self.item_depth:
|
||||||
|
item = self.item
|
||||||
|
if item is None:
|
||||||
|
item = self.data
|
||||||
|
should_continue = self.item_callback(self.path, item)
|
||||||
|
if not should_continue:
|
||||||
|
raise ParsingInterrupted()
|
||||||
|
if len(self.stack):
|
||||||
|
item, data = self.item, self.data
|
||||||
|
self.item, self.data = self.stack.pop()
|
||||||
|
if self.strip_whitespace and data is not None:
|
||||||
|
data = data.strip() or None
|
||||||
|
if data and self.force_cdata and item is None:
|
||||||
|
item = self.dict_constructor()
|
||||||
|
if item is not None:
|
||||||
|
if data:
|
||||||
|
self.push_data(item, self.cdata_key, data)
|
||||||
|
self.item = self.push_data(self.item, name, item)
|
||||||
|
else:
|
||||||
|
self.item = self.push_data(self.item, name, data)
|
||||||
|
else:
|
||||||
|
self.item = self.data = None
|
||||||
|
self.path.pop()
|
||||||
|
|
||||||
|
def characters(self, data):
|
||||||
|
if not self.data:
|
||||||
|
self.data = data
|
||||||
|
else:
|
||||||
|
self.data += self.cdata_separator + data
|
||||||
|
|
||||||
|
def push_data(self, item, key, data):
|
||||||
|
if self.postprocessor is not None:
|
||||||
|
result = self.postprocessor(self.path, key, data)
|
||||||
|
if result is None:
|
||||||
|
return item
|
||||||
|
key, data = result
|
||||||
|
if item is None:
|
||||||
|
item = self.dict_constructor()
|
||||||
|
try:
|
||||||
|
value = item[key]
|
||||||
|
if isinstance(value, list):
|
||||||
|
value.append(data)
|
||||||
|
else:
|
||||||
|
item[key] = [value, data]
|
||||||
|
except KeyError:
|
||||||
|
item[key] = data
|
||||||
|
return item
|
||||||
|
|
||||||
|
|
||||||
|
def parse(xml_input, encoding=None, expat=expat, process_namespaces=False,
|
||||||
|
namespace_separator=':', **kwargs):
|
||||||
|
"""Parse the given XML input and convert it into a dictionary.
|
||||||
|
|
||||||
|
`xml_input` can either be a `string` or a file-like object.
|
||||||
|
|
||||||
|
If `xml_attribs` is `True`, element attributes are put in the dictionary
|
||||||
|
among regular child elements, using `@` as a prefix to avoid collisions. If
|
||||||
|
set to `False`, they are just ignored.
|
||||||
|
|
||||||
|
Simple example::
|
||||||
|
|
||||||
|
>>> import xmltodict
|
||||||
|
>>> doc = xmltodict.parse(\"\"\"
|
||||||
|
... <a prop="x">
|
||||||
|
... <b>1</b>
|
||||||
|
... <b>2</b>
|
||||||
|
... </a>
|
||||||
|
... \"\"\")
|
||||||
|
>>> doc['a']['@prop']
|
||||||
|
u'x'
|
||||||
|
>>> doc['a']['b']
|
||||||
|
[u'1', u'2']
|
||||||
|
|
||||||
|
If `item_depth` is `0`, the function returns a dictionary for the root
|
||||||
|
element (default behavior). Otherwise, it calls `item_callback` every time
|
||||||
|
an item at the specified depth is found and returns `None` in the end
|
||||||
|
(streaming mode).
|
||||||
|
|
||||||
|
The callback function receives two parameters: the `path` from the document
|
||||||
|
root to the item (name-attribs pairs), and the `item` (dict). If the
|
||||||
|
callback's return value is false-ish, parsing will be stopped with the
|
||||||
|
:class:`ParsingInterrupted` exception.
|
||||||
|
|
||||||
|
Streaming example::
|
||||||
|
|
||||||
|
>>> def handle(path, item):
|
||||||
|
... print 'path:%s item:%s' % (path, item)
|
||||||
|
... return True
|
||||||
|
...
|
||||||
|
>>> xmltodict.parse(\"\"\"
|
||||||
|
... <a prop="x">
|
||||||
|
... <b>1</b>
|
||||||
|
... <b>2</b>
|
||||||
|
... </a>\"\"\", item_depth=2, item_callback=handle)
|
||||||
|
path:[(u'a', {u'prop': u'x'}), (u'b', None)] item:1
|
||||||
|
path:[(u'a', {u'prop': u'x'}), (u'b', None)] item:2
|
||||||
|
|
||||||
|
The optional argument `postprocessor` is a function that takes `path`,
|
||||||
|
`key` and `value` as positional arguments and returns a new `(key, value)`
|
||||||
|
pair where both `key` and `value` may have changed. Usage example::
|
||||||
|
|
||||||
|
>>> def postprocessor(path, key, value):
|
||||||
|
... try:
|
||||||
|
... return key + ':int', int(value)
|
||||||
|
... except (ValueError, TypeError):
|
||||||
|
... return key, value
|
||||||
|
>>> xmltodict.parse('<a><b>1</b><b>2</b><b>x</b></a>',
|
||||||
|
... postprocessor=postprocessor)
|
||||||
|
OrderedDict([(u'a', OrderedDict([(u'b:int', [1, 2]), (u'b', u'x')]))])
|
||||||
|
|
||||||
|
You can pass an alternate version of `expat` (such as `defusedexpat`) by
|
||||||
|
using the `expat` parameter. E.g:
|
||||||
|
|
||||||
|
>>> import defusedexpat
|
||||||
|
>>> xmltodict.parse('<a>hello</a>', expat=defusedexpat.pyexpat)
|
||||||
|
OrderedDict([(u'a', u'hello')])
|
||||||
|
|
||||||
|
"""
|
||||||
|
handler = _DictSAXHandler(namespace_separator=namespace_separator,
|
||||||
|
**kwargs)
|
||||||
|
if isinstance(xml_input, _unicode):
|
||||||
|
if not encoding:
|
||||||
|
encoding = 'utf-8'
|
||||||
|
xml_input = xml_input.encode(encoding)
|
||||||
|
if not process_namespaces:
|
||||||
|
namespace_separator = None
|
||||||
|
parser = expat.ParserCreate(
|
||||||
|
encoding,
|
||||||
|
namespace_separator
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
parser.ordered_attributes = True
|
||||||
|
except AttributeError:
|
||||||
|
# Jython's expat does not support ordered_attributes
|
||||||
|
pass
|
||||||
|
parser.StartElementHandler = handler.startElement
|
||||||
|
parser.EndElementHandler = handler.endElement
|
||||||
|
parser.CharacterDataHandler = handler.characters
|
||||||
|
parser.buffer_text = True
|
||||||
|
try:
|
||||||
|
parser.ParseFile(xml_input)
|
||||||
|
except (TypeError, AttributeError):
|
||||||
|
parser.Parse(xml_input, True)
|
||||||
|
return handler.item
|
||||||
|
|
||||||
|
|
||||||
|
def _emit(key, value, content_handler,
|
||||||
|
attr_prefix='@',
|
||||||
|
cdata_key='#text',
|
||||||
|
depth=0,
|
||||||
|
preprocessor=None,
|
||||||
|
pretty=False,
|
||||||
|
newl='\n',
|
||||||
|
indent='\t'):
|
||||||
|
if preprocessor is not None:
|
||||||
|
result = preprocessor(key, value)
|
||||||
|
if result is None:
|
||||||
|
return
|
||||||
|
key, value = result
|
||||||
|
if not isinstance(value, (list, tuple)):
|
||||||
|
value = [value]
|
||||||
|
if depth == 0 and len(value) > 1:
|
||||||
|
raise ValueError('document with multiple roots')
|
||||||
|
for v in value:
|
||||||
|
if v is None:
|
||||||
|
v = OrderedDict()
|
||||||
|
elif not isinstance(v, dict):
|
||||||
|
v = _unicode(v)
|
||||||
|
if isinstance(v, _basestring):
|
||||||
|
v = OrderedDict(((cdata_key, v),))
|
||||||
|
cdata = None
|
||||||
|
attrs = OrderedDict()
|
||||||
|
children = []
|
||||||
|
for ik, iv in v.items():
|
||||||
|
if ik == cdata_key:
|
||||||
|
cdata = iv
|
||||||
|
continue
|
||||||
|
if ik.startswith(attr_prefix):
|
||||||
|
attrs[ik[len(attr_prefix):]] = iv
|
||||||
|
continue
|
||||||
|
children.append((ik, iv))
|
||||||
|
if pretty:
|
||||||
|
content_handler.ignorableWhitespace(depth * indent)
|
||||||
|
content_handler.startElement(key, AttributesImpl(attrs))
|
||||||
|
if pretty and children:
|
||||||
|
content_handler.ignorableWhitespace(newl)
|
||||||
|
for child_key, child_value in children:
|
||||||
|
_emit(child_key, child_value, content_handler,
|
||||||
|
attr_prefix, cdata_key, depth+1, preprocessor,
|
||||||
|
pretty, newl, indent)
|
||||||
|
if cdata is not None:
|
||||||
|
content_handler.characters(cdata)
|
||||||
|
if pretty and children:
|
||||||
|
content_handler.ignorableWhitespace(depth * indent)
|
||||||
|
content_handler.endElement(key)
|
||||||
|
if pretty and depth:
|
||||||
|
content_handler.ignorableWhitespace(newl)
|
||||||
|
|
||||||
|
|
||||||
|
def unparse(input_dict, output=None, encoding='utf-8', full_document=True,
|
||||||
|
**kwargs):
|
||||||
|
"""Emit an XML document for the given `input_dict` (reverse of `parse`).
|
||||||
|
|
||||||
|
The resulting XML document is returned as a string, but if `output` (a
|
||||||
|
file-like object) is specified, it is written there instead.
|
||||||
|
|
||||||
|
Dictionary keys prefixed with `attr_prefix` (default=`'@'`) are interpreted
|
||||||
|
as XML node attributes, whereas keys equal to `cdata_key`
|
||||||
|
(default=`'#text'`) are treated as character data.
|
||||||
|
|
||||||
|
The `pretty` parameter (default=`False`) enables pretty-printing. In this
|
||||||
|
mode, lines are terminated with `'\n'` and indented with `'\t'`, but this
|
||||||
|
can be customized with the `newl` and `indent` parameters.
|
||||||
|
|
||||||
|
"""
|
||||||
|
((key, value),) = input_dict.items()
|
||||||
|
must_return = False
|
||||||
|
if output is None:
|
||||||
|
output = StringIO()
|
||||||
|
must_return = True
|
||||||
|
content_handler = XMLGenerator(output, encoding)
|
||||||
|
if full_document:
|
||||||
|
content_handler.startDocument()
|
||||||
|
_emit(key, value, content_handler, **kwargs)
|
||||||
|
if full_document:
|
||||||
|
content_handler.endDocument()
|
||||||
|
if must_return:
|
||||||
|
value = output.getvalue()
|
||||||
|
try: # pragma no cover
|
||||||
|
value = value.decode(encoding)
|
||||||
|
except AttributeError: # pragma no cover
|
||||||
|
pass
|
||||||
|
return value
|
||||||
|
|
||||||
|
if __name__ == '__main__': # pragma: no cover
|
||||||
|
import sys
|
||||||
|
import marshal
|
||||||
|
|
||||||
|
(item_depth,) = sys.argv[1:]
|
||||||
|
item_depth = int(item_depth)
|
||||||
|
|
||||||
|
def handle_item(path, item):
|
||||||
|
marshal.dump((path, item), sys.stdout)
|
||||||
|
return True
|
||||||
|
|
||||||
|
try:
|
||||||
|
root = parse(sys.stdin,
|
||||||
|
item_depth=item_depth,
|
||||||
|
item_callback=handle_item,
|
||||||
|
dict_constructor=dict)
|
||||||
|
if item_depth == 0:
|
||||||
|
handle_item([], root)
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
pass
|
|
@ -782,14 +782,10 @@ class GenericMetadata():
|
||||||
|
|
||||||
# Try and get posters and fanart from TMDB
|
# Try and get posters and fanart from TMDB
|
||||||
if image_url is None:
|
if image_url is None:
|
||||||
for show_name in set(allPossibleShowNames(show_obj)):
|
if image_type in ('poster', 'poster_thumb'):
|
||||||
if image_type in ('poster', 'poster_thumb'):
|
image_url = self._retrieve_show_images_from_tmdb(show_obj, poster=True)
|
||||||
image_url = self._retrieve_show_images_from_tmdb(show_obj, poster=True)
|
elif image_type == 'fanart':
|
||||||
elif image_type == 'fanart':
|
image_url = self._retrieve_show_images_from_tmdb(show_obj, backdrop=True)
|
||||||
image_url = self._retrieve_show_images_from_tmdb(show_obj, backdrop=True)
|
|
||||||
|
|
||||||
if image_url:
|
|
||||||
break
|
|
||||||
|
|
||||||
if image_url:
|
if image_url:
|
||||||
image_data = metadata_helpers.getShowImage(image_url, which)
|
image_data = metadata_helpers.getShowImage(image_url, which)
|
||||||
|
@ -965,8 +961,6 @@ class GenericMetadata():
|
||||||
return (indexer_id, name, indexer)
|
return (indexer_id, name, indexer)
|
||||||
|
|
||||||
def _retrieve_show_images_from_tmdb(self, show, backdrop=False, poster=False):
|
def _retrieve_show_images_from_tmdb(self, show, backdrop=False, poster=False):
|
||||||
tmdb_id = None
|
|
||||||
|
|
||||||
# get TMDB configuration info
|
# get TMDB configuration info
|
||||||
tmdb = TMDB(sickbeard.TMDB_API_KEY)
|
tmdb = TMDB(sickbeard.TMDB_API_KEY)
|
||||||
config = tmdb.Configuration()
|
config = tmdb.Configuration()
|
||||||
|
@ -981,27 +975,14 @@ class GenericMetadata():
|
||||||
|
|
||||||
try:
|
try:
|
||||||
search = tmdb.Search()
|
search = tmdb.Search()
|
||||||
for result in search.collection({'query': show.name}) + search.tv({'query': show.name}):
|
for show_name in set(allPossibleShowNames(show)):
|
||||||
tmdb_id = result['id']
|
for result in search.collection({'query': show_name})['results'] + search.tv({'query': show_name})['results']:
|
||||||
external_ids = tmdb.TV(tmdb_id).external_ids()
|
if backdrop and result['backdrop_path']:
|
||||||
if show.indexerid in [external_ids['tvdb_id'], external_ids['tvrage_id']]:
|
return "{0}{1}{2}".format(base_url, max_size, result['backdrop_path'])
|
||||||
break
|
elif poster and result['poster_path']:
|
||||||
|
return "{0}{1}{2}".format(base_url, max_size, result['poster_path'])
|
||||||
|
|
||||||
if tmdb_id:
|
except Exception, e:
|
||||||
images = tmdb.Collections(tmdb_id).images()
|
|
||||||
if len(images) > 0:
|
|
||||||
# get backdrop urls
|
|
||||||
if backdrop:
|
|
||||||
rel_path = images['backdrops'][0]['file_path']
|
|
||||||
url = "{0}{1}{2}".format(base_url, max_size, rel_path)
|
|
||||||
return url
|
|
||||||
|
|
||||||
# get poster urls
|
|
||||||
if poster:
|
|
||||||
rel_path = images['posters'][0]['file_path']
|
|
||||||
url = "{0}{1}{2}".format(base_url, max_size, rel_path)
|
|
||||||
return url
|
|
||||||
except:
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
logger.log(u"Could not find any posters or background for " + show.name, logger.DEBUG)
|
logger.log(u"Could not find any posters or background for " + show.name, logger.DEBUG)
|
|
@ -829,7 +829,7 @@ class TVShow(object):
|
||||||
self.airs = myEp["airs_dayofweek"] + " " + myEp["airs_time"]
|
self.airs = myEp["airs_dayofweek"] + " " + myEp["airs_time"]
|
||||||
|
|
||||||
if getattr(myEp, 'firstaired', None) is not None:
|
if getattr(myEp, 'firstaired', None) is not None:
|
||||||
self.startyear = int(myEp["firstaired"].split('-')[0])
|
self.startyear = int(str(myEp["firstaired"]).split('-')[0])
|
||||||
|
|
||||||
self.status = getattr(myEp, 'status', '')
|
self.status = getattr(myEp, 'status', '')
|
||||||
|
|
||||||
|
@ -855,7 +855,6 @@ class TVShow(object):
|
||||||
i = imdb.IMDb()
|
i = imdb.IMDb()
|
||||||
imdbTv = i.get_movie(str(re.sub("[^0-9]", "", self.imdbid)))
|
imdbTv = i.get_movie(str(re.sub("[^0-9]", "", self.imdbid)))
|
||||||
|
|
||||||
test = imdbTv.keys()
|
|
||||||
for key in filter(lambda x: x.replace('_', ' ') in imdbTv.keys(), imdb_info.keys()):
|
for key in filter(lambda x: x.replace('_', ' ') in imdbTv.keys(), imdb_info.keys()):
|
||||||
# Store only the first value for string type
|
# Store only the first value for string type
|
||||||
if type(imdb_info[key]) == type('') and type(imdbTv.get(key)) == type([]):
|
if type(imdb_info[key]) == type('') and type(imdbTv.get(key)) == type([]):
|
||||||
|
@ -1556,7 +1555,7 @@ class TVEpisode(object):
|
||||||
self.deleteEpisode()
|
self.deleteEpisode()
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if myEp["absolute_number"] == None or myEp["absolute_number"] == "":
|
if getattr(myEp, 'absolute_number', None) is None:
|
||||||
logger.log(u"This episode (" + self.show.name + " - " + str(season) + "x" + str(
|
logger.log(u"This episode (" + self.show.name + " - " + str(season) + "x" + str(
|
||||||
episode) + ") has no absolute number on " + sickbeard.indexerApi(
|
episode) + ") has no absolute number on " + sickbeard.indexerApi(
|
||||||
self.indexer).name
|
self.indexer).name
|
||||||
|
@ -1564,7 +1563,7 @@ class TVEpisode(object):
|
||||||
else:
|
else:
|
||||||
logger.log(
|
logger.log(
|
||||||
str(self.show.indexerid) + ": The absolute_number for " + str(season) + "x" + str(episode) + " is : " +
|
str(self.show.indexerid) + ": The absolute_number for " + str(season) + "x" + str(episode) + " is : " +
|
||||||
myEp["absolute_number"], logger.DEBUG)
|
str(myEp["absolute_number"]), logger.DEBUG)
|
||||||
self.absolute_number = int(myEp["absolute_number"])
|
self.absolute_number = int(myEp["absolute_number"])
|
||||||
|
|
||||||
self.name = getattr(myEp, 'episodename', "")
|
self.name = getattr(myEp, 'episodename', "")
|
||||||
|
@ -1603,8 +1602,9 @@ class TVEpisode(object):
|
||||||
u"The show dir is missing, not bothering to change the episode statuses since it'd probably be invalid")
|
u"The show dir is missing, not bothering to change the episode statuses since it'd probably be invalid")
|
||||||
return
|
return
|
||||||
|
|
||||||
logger.log(str(self.show.indexerid) + u": Setting status for " + str(season) + "x" + str(
|
if self.location:
|
||||||
episode) + " based on status " + str(self.status) + " and existence of " + self.location, logger.DEBUG)
|
logger.log(str(self.show.indexerid) + u": Setting status for " + str(season) + "x" + str(
|
||||||
|
episode) + " based on status " + str(self.status) + " and existence of " + self.location, logger.DEBUG)
|
||||||
|
|
||||||
if not ek.ek(os.path.isfile, self.location):
|
if not ek.ek(os.path.isfile, self.location):
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue