Update IMDb 5.1 (r907) → 5.2.1dev20171113 (f640595).

Thanks to the backport by @MasterMind2k
2025-01-05 17:43:37 +00:00 · 2018-03-26 18:16:59 +01:00 · 2018-03-26 18:16:59 +01:00 · 78026584eb
commit 78026584eb
parent 18c400acec
24 changed files with 1992 additions and 1184 deletions
--- a/CHANGES.md
+++ b/CHANGES.md
@ -7,6 +7,7 @@
 * Update feedparser library 5.2.1 (f1dd1bb) to 5.2.1 (5646f4c) - Uses the faster cchardet if installed
 * Change Hachoir can't support PY2 so backport their PY3 to prevent a need for system dependant external binaries like mediainfo
 * Update html5lib 0.99999999/1.0b9 (1a28d72) to 1.1-dev (e9ef538)
+* Update IMDb 5.1 (r907) to 5.2.1dev20171113 (f640595)

 [develop changelog]

--- a/lib/imdb/Company.py
+++ b/lib/imdb/Company.py
@ -23,8 +23,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

 from copy import deepcopy

-from imdb.utils import analyze_company_name, build_company_name, \
-                        flatten, _Container, cmpCompanies
+from imdb.utils import _Container
+from imdb.utils import analyze_company_name, build_company_name, cmpCompanies, flatten


 class Company(_Container):
--- a/lib/imdb/Movie.py
+++ b/lib/imdb/Movie.py
@ -24,8 +24,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 from copy import deepcopy

 from imdb import linguistics
-from imdb.utils import analyze_title, build_title, canonicalTitle, \
-                        flatten, _Container, cmpMovies
+from imdb.utils import _Container
+from imdb.utils import analyze_title, build_title, canonicalTitle, cmpMovies, flatten


 class Movie(_Container):
--- a/lib/imdb/init.py
+++ b/lib/imdb/init.py
@ -6,7 +6,7 @@ a person from the IMDb database.
 It can fetch data through different media (e.g.: the IMDb web pages,
 a SQL database, etc.)

-Copyright 2004-2016 Davide Alberani <da@erlug.linux.it>
+Copyright 2004-2018 Davide Alberani <da@erlug.linux.it>

 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@ -25,12 +25,25 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

 __all__ = ['IMDb', 'IMDbError', 'Movie', 'Person', 'Character', 'Company',
            'available_access_systems']
-__version__ = VERSION = '5.1'
+__version__ = VERSION = '5.2.1dev20171113'
+
+VERSION_NOTICE = """This is the imdbpy-legacy branch of IMDbPY, and requires Python 2.
+Please notice that this version is mostly unsupported.
+
+For a version compatible with Python 3, see the master branch:
+    https://github.com/alberanid/imdbpy/
+"""
+
+import sys
+
+if sys.hexversion >= 0x3000000:
+    print(VERSION_NOTICE)
+    sys.exit(1)

 # Import compatibility module (importing it is enough).
 import _compat

-import sys, os, ConfigParser, logging
+import os, ConfigParser, logging
 from types import MethodType

 from imdb import Movie, Person, Character, Company
@ -38,38 +51,39 @@ import imdb._logging
 from imdb._exceptions import IMDbError, IMDbDataAccessError, IMDbParserError
 from imdb.utils import build_title, build_name, build_company_name

+_imdb_logger = logging.getLogger('imdbpy')
 _aux_logger = logging.getLogger('imdbpy.aux')


 # URLs of the main pages for movies, persons, characters and queries.
-imdbURL_base = 'http://akas.imdb.com/'
+imdbURL_base = 'http://www.imdb.com/'

 # NOTE: the urls below will be removed in a future version.
 #       please use the values in the 'urls' attribute
 #       of the IMDbBase subclass instance.
-# http://akas.imdb.com/title/
+# http://www.imdb.com/title/
 imdbURL_movie_base = '%stitle/' % imdbURL_base
-# http://akas.imdb.com/title/tt%s/
+# http://www.imdb.com/title/tt%s/
 imdbURL_movie_main = imdbURL_movie_base + 'tt%s/'
-# http://akas.imdb.com/name/
+# http://www.imdb.com/name/
 imdbURL_person_base = '%sname/' % imdbURL_base
-# http://akas.imdb.com/name/nm%s/
+# http://www.imdb.com/name/nm%s/
 imdbURL_person_main = imdbURL_person_base + 'nm%s/'
-# http://akas.imdb.com/character/
+# http://www.imdb.com/character/
 imdbURL_character_base = '%scharacter/' % imdbURL_base
-# http://akas.imdb.com/character/ch%s/
+# http://www.imdb.com/character/ch%s/
 imdbURL_character_main = imdbURL_character_base + 'ch%s/'
-# http://akas.imdb.com/company/
+# http://www.imdb.com/company/
 imdbURL_company_base = '%scompany/' % imdbURL_base
-# http://akas.imdb.com/company/co%s/
+# http://www.imdb.com/company/co%s/
 imdbURL_company_main = imdbURL_company_base + 'co%s/'
-# http://akas.imdb.com/keyword/%s/
+# http://www.imdb.com/keyword/%s/
 imdbURL_keyword_main = imdbURL_base + 'keyword/%s/'
-# http://akas.imdb.com/chart/top
+# http://www.imdb.com/chart/top
 imdbURL_top250 = imdbURL_base + 'chart/top'
-# http://akas.imdb.com/chart/bottom
+# http://www.imdb.com/chart/bottom
 imdbURL_bottom100 = imdbURL_base + 'chart/bottom'
-# http://akas.imdb.com/find?%s
+# http://www.imdb.com/find?%s
 imdbURL_find = imdbURL_base + 'find?%s'

 # Name of the configuration file.
@ -103,7 +117,7 @@ class ConfigParserWithCase(ConfigParser.ConfigParser):
            try:
                self.read(fname)
            except (ConfigParser.MissingSectionHeaderError,
-                    ConfigParser.ParsingError), e:
+                    ConfigParser.ParsingError) as e:
                _aux_logger.warn('Troubles reading config file: %s' % e)
            # Stop at the first valid file.
            if self.has_section('imdbpy'):
@ -159,10 +173,8 @@ def IMDb(accessSystem=None, *arguments, **keywords):
                accessSystem = 'http'
            kwds.update(keywords)
            keywords = kwds
-        except Exception, e:
-            import logging
-            logging.getLogger('imdbpy').warn('Unable to read configuration' \
-                                            ' file; complete error: %s' % e)
+        except Exception as e:
+            _imdb_logger.warn('Unable to read configuration file; complete error: %s' % e)
            # It just LOOKS LIKE a bad habit: we tried to read config
            # options from some files, but something is gone horribly
            # wrong: ignore everything and pretend we were called with
@ -177,9 +189,8 @@ def IMDb(accessSystem=None, *arguments, **keywords):
        try:
            import logging.config
            logging.config.fileConfig(os.path.expanduser(logCfg))
-        except Exception, e:
-            logging.getLogger('imdbpy').warn('unable to read logger ' \
-                                            'config: %s' % e)
+        except Exception as e:
+            _imdb_logger.warn('unable to read logger config: %s' % e)
    if accessSystem in ('httpThin', 'webThin', 'htmlThin'):
        logging.warn('httpThin was removed since IMDbPY 4.8')
        accessSystem = 'http'
@ -244,9 +255,6 @@ class IMDbBase:
    # in the subclasses).
    accessSystem = 'UNKNOWN'

-    # Top-level logger for IMDbPY.
-    _imdb_logger = logging.getLogger('imdbpy')
-
    # Whether to re-raise caught exceptions or not.
    _reraise_exceptions = False

@ -285,30 +293,30 @@ class IMDbBase:
            imdbURL_base = 'http://%s' % imdbURL_base
        if not imdbURL_base.endswith('/'):
            imdbURL_base = '%s/' % imdbURL_base
-        # http://akas.imdb.com/title/
-        imdbURL_movie_base='%stitle/' % imdbURL_base
-        # http://akas.imdb.com/title/tt%s/
-        imdbURL_movie_main=imdbURL_movie_base + 'tt%s/'
-        # http://akas.imdb.com/name/
-        imdbURL_person_base='%sname/' % imdbURL_base
-        # http://akas.imdb.com/name/nm%s/
-        imdbURL_person_main=imdbURL_person_base + 'nm%s/'
-        # http://akas.imdb.com/character/
-        imdbURL_character_base='%scharacter/' % imdbURL_base
-        # http://akas.imdb.com/character/ch%s/
-        imdbURL_character_main=imdbURL_character_base + 'ch%s/'
-        # http://akas.imdb.com/company/
-        imdbURL_company_base='%scompany/' % imdbURL_base
-        # http://akas.imdb.com/company/co%s/
-        imdbURL_company_main=imdbURL_company_base + 'co%s/'
-        # http://akas.imdb.com/keyword/%s/
-        imdbURL_keyword_main=imdbURL_base + 'keyword/%s/'
-        # http://akas.imdb.com/chart/top
-        imdbURL_top250=imdbURL_base + 'chart/top'
-        # http://akas.imdb.com/chart/bottom
-        imdbURL_bottom100=imdbURL_base + 'chart/bottom'
-        # http://akas.imdb.com/find?%s
-        imdbURL_find=imdbURL_base + 'find?%s'
+        # http://www.imdb.com/title/
+        imdbURL_movie_base = '%stitle/' % imdbURL_base
+        # http://www.imdb.com/title/tt%s/
+        imdbURL_movie_main = imdbURL_movie_base + 'tt%s/'
+        # http://www.imdb.com/name/
+        imdbURL_person_base = '%sname/' % imdbURL_base
+        # http://www.imdb.com/name/nm%s/
+        imdbURL_person_main = imdbURL_person_base + 'nm%s/'
+        # http://www.imdb.com/character/
+        imdbURL_character_base = '%scharacter/' % imdbURL_base
+        # http://www.imdb.com/character/ch%s/
+        imdbURL_character_main = imdbURL_character_base + 'ch%s/'
+        # http://www.imdb.com/company/
+        imdbURL_company_base = '%scompany/' % imdbURL_base
+        # http://www.imdb.com/company/co%s/
+        imdbURL_company_main = imdbURL_company_base + 'co%s/'
+        # http://www.imdb.com/keyword/%s/
+        imdbURL_keyword_main = imdbURL_base + 'keyword/%s/'
+        # http://www.imdb.com/chart/top
+        imdbURL_top250 = imdbURL_base + 'chart/top'
+        # http://www.imdb.com/chart/bottom
+        imdbURL_bottom100 = imdbURL_base + 'chart/bottom'
+        # http://www.imdb.com/find?%s
+        imdbURL_find = imdbURL_base + 'find?%s'
        self.urls = dict(
            movie_base=imdbURL_movie_base,
            movie_main=imdbURL_movie_main,
@ -727,16 +735,15 @@ class IMDbBase:
            mopID = mop.companyID
            prefix = 'company'
        else:
-            raise IMDbError('object ' + repr(mop) + \
-                    ' is not a Movie, Person, Character or Company instance')
+            raise IMDbError('object ' + repr(mop) +
+                            ' is not a Movie, Person, Character or Company instance')
        if mopID is None:
            # XXX: enough?  It's obvious that there are Characters
            #      objects without characterID, so I think they should
            #      just do nothing, when an i.update(character) is tried.
            if prefix == 'character':
                return
-            raise IMDbDataAccessError( \
-                'the supplied object has null movieID, personID or companyID')
+            raise IMDbDataAccessError('supplied object has null movieID, personID or companyID')
        if mop.accessSystem == self.accessSystem:
            aSystem = self
        else:
@ -760,21 +767,22 @@ class IMDbBase:
                continue
            if not i:
                continue
-            self._imdb_logger.debug('retrieving "%s" info set', i)
+            _imdb_logger.debug('retrieving "%s" info set', i)
            try:
                method = getattr(aSystem, 'get_%s_%s' %
                                    (prefix, i.replace(' ', '_')))
            except AttributeError:
-                self._imdb_logger.error('unknown information set "%s"', i)
+                _imdb_logger.error('unknown information set "%s"', i)
                # Keeps going.
                method = lambda *x: {}
            try:
                ret = method(mopID)
-            except Exception, e:
-                self._imdb_logger.critical('caught an exception retrieving ' \
-                                    'or parsing "%s" info set for mopID ' \
-                                    '"%s" (accessSystem: %s)',
-                                    i, mopID, mop.accessSystem, exc_info=True)
+            except Exception:
+                _imdb_logger.critical(
+                    'caught an exception retrieving or parsing "%s" info set'
+                    ' for mopID "%s" (accessSystem: %s)',
+                    i, mopID, mop.accessSystem, exc_info=True
+                )
                ret = {}
                # If requested by the user, reraise the exception.
                if self._reraise_exceptions:
@ -826,9 +834,7 @@ class IMDbBase:
        raise NotImplementedError('override this method')

    def _searchIMDb(self, kind, ton, title_kind=None):
-        """Search the IMDb akas server for the given title or name."""
-        # The Exact Primary search system has gone AWOL, so we resort
-        # to the mobile search. :-/
+        """Search the IMDb www server for the given title or name."""
        if not ton:
            return None
        ton = ton.strip('"')
@ -935,8 +941,8 @@ class IMDbBase:
            else:
                imdbID = aSystem.company2imdbID(build_company_name(mop))
        else:
-            raise IMDbError('object ' + repr(mop) + \
-                        ' is not a Movie, Person or Character instance')
+            raise IMDbError('object ' + repr(mop) +
+                            ' is not a Movie, Person or Character instance')
        return imdbID

    def get_imdbURL(self, mop):
@ -954,8 +960,8 @@ class IMDbBase:
        elif isinstance(mop, Company.Company):
            url_firstPart = imdbURL_company_main
        else:
-            raise IMDbError('object ' + repr(mop) + \
-                        ' is not a Movie, Person, Character or Company instance')
+            raise IMDbError('object ' + repr(mop) +
+                            ' is not a Movie, Person, Character or Company instance')
        return url_firstPart % imdbID

    def get_special_methods(self):
--- a/lib/imdb/_logging.py
+++ b/lib/imdb/_logging.py
@ -32,8 +32,9 @@ LEVELS = {'debug': logging.DEBUG,

 imdbpyLogger = logging.getLogger('imdbpy')
 imdbpyStreamHandler = logging.StreamHandler()
-imdbpyFormatter = logging.Formatter('%(asctime)s %(levelname)s [%(name)s]' \
-                                    ' %(pathname)s:%(lineno)d: %(message)s')
+imdbpyFormatter = logging.Formatter(
+    '%(asctime)s %(levelname)s [%(name)s] %(pathname)s:%(lineno)d: %(message)s'
+)
 imdbpyStreamHandler.setFormatter(imdbpyFormatter)
 imdbpyLogger.addHandler(imdbpyStreamHandler)

--- a/lib/imdb/helpers.py
+++ b/lib/imdb/helpers.py
@ -269,8 +269,8 @@ for k, v in {'lt':u'<','gt':u'>','amp':u'&','quot':u'"','apos':u'\''}.items():
    everyentcharrefs[k] = v
    everyentcharrefs['#%s' % ord(v)] = v
 everyentcharrefsget = everyentcharrefs.get
-re_everyentcharrefs = re.compile('&(%s|\#160|\#\d{1,5});' %
-                            '|'.join(map(re.escape, everyentcharrefs)))
+re_everyentcharrefs = re.compile('&(%s|\#160|\#\d{1,5});' % '|'.join(map(re.escape,
+                                                                         everyentcharrefs)))
 re_everyentcharrefssub = re_everyentcharrefs.sub

 def _replAllXMLRef(match):
@ -408,7 +408,7 @@ def _valueWithType(tag, tagValue):

 # Extra tags to get (if values were not already read from title/name).
 _titleTags = ('imdbindex', 'kind', 'year')
-_nameTags = ('imdbindex')
+_nameTags = ('imdbindex',)
 _companyTags = ('imdbindex', 'country')

 def parseTags(tag, _topLevel=True, _as=None, _infoset2keys=None,
--- a/lib/imdb/parser/http/init.py
+++ b/lib/imdb/parser/http/init.py
@ -7,7 +7,7 @@ the imdb.IMDb function will return an instance of this class when
 called with the 'accessSystem' argument set to "http" or "web"
 or "html" (this is the default).

-Copyright 2004-2012 Davide Alberani <da@erlug.linux.it>
+Copyright 2004-2017 Davide Alberani <da@erlug.linux.it>
               2008 H. Turgut Uyar <uyar@tekir.org>

 This program is free software; you can redistribute it and/or modify
@ -26,6 +26,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 """

 import sys
+import ssl
 import socket
 import logging
 from urllib import FancyURLopener, quote_plus
@ -68,8 +69,8 @@ class _ModuleProxy:
        """Initialize a proxy for the given module; defaultKeys, if set,
        muste be a dictionary of values to set for instanced objects."""
        if oldParsers or fallBackToNew:
-            _aux_logger.warn('The old set of parsers was removed; falling ' \
-                            'back to the new parsers.')
+            _aux_logger.warn('The old set of parsers was removed;'
+                             ' falling back to the new parsers.')
        self.useModule = useModule
        if defaultKeys is None:
            defaultKeys = {}
@ -142,6 +143,7 @@ class IMDbURLopener(FancyURLopener):

    def __init__(self, *args, **kwargs):
        self._last_url = u''
+        kwargs['context'] = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
        FancyURLopener.__init__(self, *args, **kwargs)
        # Headers to add to every request.
        # XXX: IMDb's web server doesn't like urllib-based programs,
@ -211,9 +213,9 @@ class IMDbURLopener(FancyURLopener):
            if server_encode is None and content:
                begin_h = content.find('text/html; charset=')
                if begin_h != -1:
-                    end_h = content[19+begin_h:].find('"')
+                    end_h = content[19 + begin_h:].find('"')
                    if end_h != -1:
-                        server_encode = content[19+begin_h:19+begin_h+end_h]
+                        server_encode = content[19 + begin_h:19 + begin_h + end_h]
            if server_encode:
                try:
                    if lookup(server_encode):
@ -237,9 +239,10 @@ class IMDbURLopener(FancyURLopener):
        if encode is None:
            encode = 'latin_1'
            # The detection of the encoding is error prone...
-            self._logger.warn('Unable to detect the encoding of the retrieved '
-                        'page [%s]; falling back to default latin1.', encode)
-        ##print unicode(content, encode, 'replace').encode('utf8')
+            self._logger.warn('Unable to detect the encoding of the retrieved page [%s];'
+                              ' falling back to default utf8.', encode)
+        if isinstance(content, unicode):
+            return content
        return unicode(content, encode, 'replace')

    def http_error_default(self, url, fp, errcode, errmsg, headers):
@ -288,8 +291,8 @@ class IMDbHTTPAccessSystem(IMDbBase):
        self._getRefs = True
        self._mdparse = False
        if isThin:
-            self._http_logger.warn('"httpThin" access system no longer ' +
-                    'supported; "http" used automatically', exc_info=False)
+            self._http_logger.warn('"httpThin" access system no longer supported;'
+                                   ' "http" used automatically', exc_info=False)
            self.isThin = 0
            if self.accessSystem in ('httpThin', 'webThin', 'htmlThin'):
                self.accessSystem = 'http'
@ -503,7 +506,7 @@ class IMDbHTTPAccessSystem(IMDbBase):
        return self.smProxy.search_movie_parser.parse(cont, results=results)['data']

    def get_movie_main(self, movieID):
-        cont = self._retrieve(self.urls['movie_main'] % movieID + 'combined')
+        cont = self._retrieve(self.urls['movie_main'] % movieID + 'reference')
        return self.mProxy.movie_parser.parse(cont, mdparse=self._mdparse)

    def get_movie_full_credits(self, movieID):
@ -811,7 +814,7 @@ class IMDbHTTPAccessSystem(IMDbBase):
    def _search_keyword(self, keyword, results):
        # XXX: the IMDb web server seems to have some serious problem with
        #      non-ascii keyword.
-        #      E.g.: http://akas.imdb.com/keyword/fianc%E9/
+        #      E.g.: http://www.imdb.com/keyword/fianc%E9/
        #      will return a 500 Internal Server Error: Redirect Recursion.
        keyword = keyword.encode('utf8', 'ignore')
        try:
--- a/lib/imdb/parser/http/bsouplxml/_bsoup.py
+++ b/lib/imdb/parser/http/bsouplxml/_bsoup.py
@ -171,7 +171,7 @@ class PageElement:
        return self

    def _lastRecursiveChild(self):
-        "Finds the last element beneath this object to be parsed."
+        """Finds the last element beneath this object to be parsed."""
        lastChild = self
        while hasattr(lastChild, 'contents') and lastChild.contents:
            lastChild = lastChild.contents[-1]
@ -184,7 +184,7 @@ class PageElement:
            newChild = NavigableString(newChild)

        position =  min(position, len(self.contents))
-        if hasattr(newChild, 'parent') and newChild.parent != None:
+        if hasattr(newChild, 'parent') and newChild.parent is not None:
            # We're 'inserting' an element that's already one
            # of this object's children.
            if newChild.parent == self:
@ -323,7 +323,7 @@ class PageElement:
        return r

    def _findAll(self, name, attrs, text, limit, generator, **kwargs):
-        "Iterates over a generator looking for things that match."
+        """Iterates over a generator looking for things that match."""

        if isinstance(name, SoupStrainer):
            strainer = name
@ -415,7 +415,7 @@ class NavigableString(unicode, PageElement):
        return unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)

    def __getnewargs__(self):
-        return (NavigableString.__str__(self),)
+        return NavigableString.__str__(self),

    def __getattr__(self, attr):
        """text.string gives you text. This is for backwards
@ -460,7 +460,7 @@ class Tag(PageElement):
    """Represents a found HTML tag with its attributes and contents."""

    def _invert(h):
-        "Cheap function to invert a hash."
+        """Cheap function to invert a hash."""
        i = {}
        for k,v in h.items():
            i[v] = k
@ -501,14 +501,14 @@ class Tag(PageElement):

    def __init__(self, parser, name, attrs=None, parent=None,
                 previous=None):
-        "Basic constructor."
+        """Basic constructor."""

        # We don't actually store the parser object: that lets extracted
        # chunks be garbage-collected
        self.parserClass = parser.__class__
        self.isSelfClosing = parser.isSelfClosingTag(name)
        self.name = name
-        if attrs == None:
+        if attrs is None:
            attrs = []
        self.attrs = attrs
        self.contents = []
@ -541,18 +541,18 @@ class Tag(PageElement):
        return self._getAttrMap()[key]

    def __iter__(self):
-        "Iterating over a tag iterates over its contents."
+        """Iterating over a tag iterates over its contents."""
        return iter(self.contents)

    def __len__(self):
-        "The length of a tag is the length of its list of contents."
+        """The length of a tag is the length of its list of contents."""
        return len(self.contents)

    def __contains__(self, x):
        return x in self.contents

    def __nonzero__(self):
-        "A tag is non-None even if it has no contents."
+        """A tag is non-None even if it has no contents."""
        return True

    def __setitem__(self, key, value):
@ -570,7 +570,7 @@ class Tag(PageElement):
        self._getAttrMap()[key] = value

    def __delitem__(self, key):
-        "Deleting tag[key] deletes all 'key' attributes for the tag."
+        """Deleting tag[key] deletes all 'key' attributes for the tag."""
        for item in self.attrs:
            if item[0] == key:
                self.attrs.remove(item)
@ -911,7 +911,7 @@ class SoupStrainer:
        #print "Matching %s against %s" % (markup, matchAgainst)
        result = False
        if matchAgainst == True and type(matchAgainst) == types.BooleanType:
-            result = markup != None
+            result = markup is not None
        elif callable(matchAgainst):
            result = matchAgainst(markup)
        else:
@ -1130,7 +1130,7 @@ class BeautifulStoneSoup(Tag, SGMLParser):
                # Python installations can't copy regexes. If anyone
                # was relying on the existence of markupMassage, this
                # might cause problems.
-                del(self.markupMassage)
+                del self.markupMassage
        self.reset()

        SGMLParser.feed(self, markup)
@ -1253,7 +1253,7 @@ class BeautifulStoneSoup(Tag, SGMLParser):
        """

        nestingResetTriggers = self.NESTABLE_TAGS.get(name)
-        isNestable = nestingResetTriggers != None
+        isNestable = nestingResetTriggers is not None
        isResetNesting = self.RESET_NESTING_TAGS.has_key(name)
        popTo = None
        inclusive = True
@ -1264,9 +1264,9 @@ class BeautifulStoneSoup(Tag, SGMLParser):
                #last occurance.
                popTo = name
                break
-            if (nestingResetTriggers != None
+            if (nestingResetTriggers is not None
                and p.name in nestingResetTriggers) \
-                or (nestingResetTriggers == None and isResetNesting
+                or (nestingResetTriggers is None and isResetNesting
                    and self.RESET_NESTING_TAGS.has_key(p.name)):

                #If we encounter one of the nesting reset triggers
@ -1342,11 +1342,11 @@ class BeautifulStoneSoup(Tag, SGMLParser):
        self._toStringSubclass(text, ProcessingInstruction)

    def handle_comment(self, text):
-        "Handle comments as Comment objects."
+        """Handle comments as Comment objects."""
        self._toStringSubclass(text, Comment)

    def handle_charref(self, ref):
-        "Handle character references as data."
+        """Handle character references as data."""
        if self.convertEntities:
            data = unichr(int(ref))
        else:
@ -1397,7 +1397,7 @@ class BeautifulStoneSoup(Tag, SGMLParser):
        self.handle_data(data)

    def handle_decl(self, data):
-        "Handle DOCTYPEs and the like as Declaration objects."
+        """Handle DOCTYPEs and the like as Declaration objects."""
        self._toStringSubclass(data, Declaration)

    def parse_declaration(self, i):
@ -1793,8 +1793,8 @@ class UnicodeDammit:
        return self.markup

    def _toUnicode(self, data, encoding):
-        '''Given a string and its encoding, decodes the string into Unicode.
-        %encoding is a string recognized by encodings.aliases'''
+        """Given a string and its encoding, decodes the string into Unicode.
+        %encoding is a string recognized by encodings.aliases"""

        # strip Byte Order Mark (if present)
        if (len(data) >= 4) and (data[:2] == '\xfe\xff') \
--- a/lib/imdb/parser/http/bsouplxml/bsoupxpath.py
+++ b/lib/imdb/parser/http/bsouplxml/bsoupxpath.py
@ -67,7 +67,7 @@ def tokenize_path(path):
        if path[i] == '/':
            if i > 0:
                separators.append((last_position, i))
-            if (path[i+1] == '/'):
+            if path[i+1] == '/':
                last_position = i
                i = i + 1
            else:
--- a/lib/imdb/parser/http/characterParser.py
+++ b/lib/imdb/parser/http/characterParser.py
@ -2,7 +2,7 @@
 parser.http.characterParser module (imdb package).

 This module provides the classes (and the instances), used to parse
-the IMDb pages on the akas.imdb.com server about a character.
+the IMDb pages on the www.imdb.com server about a character.
 E.g., for "Jesse James" the referred pages would be:
    main details:   http://www.imdb.com/character/ch0000001/
    biography:      http://www.imdb.com/character/ch0000001/bio
@ -37,7 +37,7 @@ _personIDs = re.compile(r'/name/nm([0-9]{7})')
 class DOMHTMLCharacterMaindetailsParser(DOMHTMLMaindetailsParser):
    """Parser for the "filmography" page of a given character.
    The page should be provided as a string, as taken from
-    the akas.imdb.com server.  The final result will be a
+    the www.imdb.com server.  The final result will be a
    dictionary, with a key for every relevant section.

    Example:
@ -101,7 +101,7 @@ class DOMHTMLCharacterMaindetailsParser(DOMHTMLMaindetailsParser):
 class DOMHTMLCharacterBioParser(DOMParserBase):
    """Parser for the "biography" page of a given character.
    The page should be provided as a string, as taken from
-    the akas.imdb.com server.  The final result will be a
+    the www.imdb.com server.  The final result will be a
    dictionary, with a key for every relevant section.

    Example:
@ -146,7 +146,7 @@ class DOMHTMLCharacterBioParser(DOMParserBase):
 class DOMHTMLCharacterQuotesParser(DOMParserBase):
    """Parser for the "quotes" page of a given character.
    The page should be provided as a string, as taken from
-    the akas.imdb.com server.  The final result will be a
+    the www.imdb.com server.  The final result will be a
    dictionary, with a key for every relevant section.

    Example:
--- a/lib/imdb/parser/http/companyParser.py
+++ b/lib/imdb/parser/http/companyParser.py
@ -2,12 +2,12 @@
 parser.http.companyParser module (imdb package).

 This module provides the classes (and the instances), used to parse
-the IMDb pages on the akas.imdb.com server about a company.
+the IMDb pages on the www.imdb.com server about a company.
 E.g., for "Columbia Pictures [us]" the referred page would be:
-    main details:   http://akas.imdb.com/company/co0071509/
+    main details:   http://www.imdb.com/company/co0071509/

-Copyright 2008-2009 Davide Alberani <da@erlug.linux.it>
-          2008 H. Turgut Uyar <uyar@tekir.org>
+Copyright 2008-2017 Davide Alberani <da@erlug.linux.it>
+          2008-2017 H. Turgut Uyar <uyar@tekir.org>

 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@ -34,7 +34,7 @@ from imdb.utils import analyze_company_name
 class DOMCompanyParser(DOMParserBase):
    """Parser for the main page of a given company.
    The page should be provided as a string, as taken from
-    the akas.imdb.com server.  The final result will be a
+    the www.imdb.com server.  The final result will be a
    dictionary, with a key for every relevant section.

    Example:
@ -44,31 +44,38 @@ class DOMCompanyParser(DOMParserBase):
    _containsObjects = True

    extractors = [
-            Extractor(label='name',
-                        path="//title",
-                        attrs=Attribute(key='name',
-                            path="./text()",
-                        postprocess=lambda x: \
-                                analyze_company_name(x, stripNotes=True))),
+        Extractor(
+            label='name',
+            path="//h1/span[@class='display-title ']",  # note the extra trailing space in class
+            attrs=Attribute(
+                key='name',
+                path="./text()",
+                postprocess=lambda x: analyze_company_name(x, stripNotes=True)
+            )
+        ),

-            Extractor(label='filmography',
-                        group="//b/a[@name]",
-                        group_key="./text()",
-                        group_key_normalize=lambda x: x.lower(),
-                        path="../following-sibling::ol[1]/li",
-                        attrs=Attribute(key=None,
-                            multi=True,
-                            path={
-                                'link': "./a[1]/@href",
-                                'title': "./a[1]/text()",
-                                'year': "./text()[1]"
-                                },
-                            postprocess=lambda x:
-                                build_movie(u'%s %s' % \
-                                (x.get('title'), x.get('year').strip()),
-                                movieID=analyze_imdbid(x.get('link') or u''),
-                                _parsingCompany=True))),
-            ]
+        Extractor(
+            label='filmography',
+            group="//b/a[@name]",
+            group_key="./text()",
+            group_key_normalize=lambda x: x.lower(),
+            path="../following-sibling::ol[1]/li",
+            attrs=Attribute(
+                key=None,
+                multi=True,
+                path={
+                    'link': "./a[1]/@href",
+                    'title': "./a[1]/text()",
+                    'year': "./text()[1]"
+                },
+                postprocess=lambda x: build_movie(
+                    '%s %s' % (x.get('title'), x.get('year').strip()),
+                    movieID=analyze_imdbid(x.get('link') or u''),
+                    _parsingCompany=True
+                )
+            )
+        )
+    ]

    preprocessors = [
        (re.compile('(<b><a name=)', re.I), r'</p>\1')
--- a/lib/imdb/parser/http/movieParser.py
+++ b/lib/imdb/parser/http/movieParser.py
--- a/lib/imdb/parser/http/personParser.py
+++ b/lib/imdb/parser/http/personParser.py
@ -2,10 +2,10 @@
 parser.http.personParser module (imdb package).

 This module provides the classes (and the instances), used to parse
-the IMDb pages on the akas.imdb.com server about a person.
+the IMDb pages on the www.imdb.com server about a person.
 E.g., for "Mel Gibson" the referred pages would be:
-    categorized:    http://akas.imdb.com/name/nm0000154/maindetails
-    biography:      http://akas.imdb.com/name/nm0000154/bio
+    categorized:    http://www.imdb.com/name/nm0000154/maindetails
+    biography:      http://www.imdb.com/name/nm0000154/bio
    ...and so on...

 Copyright 2004-2013 Davide Alberani <da@erlug.linux.it>
@ -52,7 +52,7 @@ def build_date(date):
 class DOMHTMLMaindetailsParser(DOMParserBase):
    """Parser for the "categorized" (maindetails) page of a given person.
    The page should be provided as a string, as taken from
-    the akas.imdb.com server.  The final result will be a
+    the www.imdb.com server.  The final result will be a
    dictionary, with a key for every relevant section.

    Example:
@ -192,7 +192,7 @@ class DOMHTMLMaindetailsParser(DOMParserBase):
 class DOMHTMLBioParser(DOMParserBase):
    """Parser for the "biography" page of a given person.
    The page should be provided as a string, as taken from
-    the akas.imdb.com server.  The final result will be a
+    the www.imdb.com server.  The final result will be a
    dictionary, with a key for every relevant section.

    Example:
@ -225,92 +225,157 @@ class DOMHTMLBioParser(DOMParserBase):
                        # TODO: check if this slicing is always correct
                        postprocess=lambda x: u''.join(x).strip()[2:])]
    extractors = [
-            Extractor(label='headshot',
-                        path="//a[@name='headshot']",
-                        attrs=Attribute(key='headshot',
-                            path="./img/@src")),
-            Extractor(label='birth info',
-                        path="//table[@id='overviewTable']//td[text()='Date of Birth']/following-sibling::td[1]",
-                        attrs=_birth_attrs),
-            Extractor(label='death info',
-                        path="//table[@id='overviewTable']//td[text()='Date of Death']/following-sibling::td[1]",
-                        attrs=_death_attrs),
-            Extractor(label='nick names',
-                        path="//table[@id='overviewTable']//td[text()='Nickenames']/following-sibling::td[1]",
-                        attrs=Attribute(key='nick names',
-                            path="./text()",
-                            joiner='|',
-                            postprocess=lambda x: [n.strip().replace(' (',
-                                    '::(', 1) for n in x.split('|')
-                                    if n.strip()])),
-            Extractor(label='birth name',
-                        path="//table[@id='overviewTable']//td[text()='Birth Name']/following-sibling::td[1]",
-                        attrs=Attribute(key='birth name',
-                            path="./text()",
-                            postprocess=lambda x: canonicalName(x.strip()))),
-            Extractor(label='height',
-                path="//table[@id='overviewTable']//td[text()='Height']/following-sibling::td[1]",
-                        attrs=Attribute(key='height',
-                            path="./text()",
-                            postprocess=lambda x: x.strip())),
-            Extractor(label='mini biography',
-                        path="//a[@name='mini_bio']/following-sibling::div[1 = count(preceding-sibling::a[1] | ../a[@name='mini_bio'])]",
-                        attrs=Attribute(key='mini biography',
-                            multi=True,
-                            path={
-                                'bio': ".//text()",
-                                'by': ".//a[@name='ba']//text()"
-                                },
-                            postprocess=lambda x: "%s::%s" % \
-                                ((x.get('bio') or u'').split('- IMDb Mini Biography By:')[0].strip(),
-                                (x.get('by') or u'').strip() or u'Anonymous'))),
-            Extractor(label='spouse',
-                        path="//div[h5='Spouse']/table/tr",
-                        attrs=Attribute(key='spouse',
-                            multi=True,
-                            path={
-                                'name': "./td[1]//text()",
-                                'info': "./td[2]//text()"
-                                },
-                            postprocess=lambda x: ("%s::%s" % \
-                                (x.get('name').strip(),
-                                (x.get('info') or u'').strip())).strip(':'))),
-            Extractor(label='trade mark',
-                        path="//div[h5='Trade Mark']/p",
-                        attrs=Attribute(key='trade mark',
-                            multi=True,
-                            path=".//text()",
-                            postprocess=lambda x: x.strip())),
-            Extractor(label='trivia',
-                        path="//div[h5='Trivia']/p",
-                        attrs=Attribute(key='trivia',
-                            multi=True,
-                            path=".//text()",
-                            postprocess=lambda x: x.strip())),
-            Extractor(label='quotes',
-                        path="//div[h5='Personal Quotes']/p",
-                        attrs=Attribute(key='quotes',
-                            multi=True,
-                            path=".//text()",
-                            postprocess=lambda x: x.strip())),
-            Extractor(label='salary',
-                        path="//div[h5='Salary']/table/tr",
-                        attrs=Attribute(key='salary history',
-                            multi=True,
-                            path={
-                                'title': "./td[1]//text()",
-                                'info': "./td[2]/text()",
-                                },
-                            postprocess=lambda x: "%s::%s" % \
-                                    (x.get('title').strip(),
-                                        x.get('info').strip()))),
-            Extractor(label='where now',
-                        path="//div[h5='Where Are They Now']/p",
-                        attrs=Attribute(key='where now',
-                            multi=True,
-                            path=".//text()",
-                            postprocess=lambda x: x.strip())),
-            ]
+        Extractor(
+            label='headshot',
+            path="//a[@name='headshot']",
+            attrs=Attribute(
+                key='headshot',
+                path="./img/@src"
+            )
+        ),
+
+        Extractor(
+            label='birth info',
+            path="//table[@id='overviewTable']"
+                 "//td[text()='Date of Birth']/following-sibling::td[1]",
+            attrs=_birth_attrs
+        ),
+
+        Extractor(
+            label='death info',
+            path="//table[@id='overviewTable']"
+                 "//td[text()='Date of Death']/following-sibling::td[1]",
+            attrs=_death_attrs
+        ),
+
+        Extractor(
+            label='nick names',
+            path="//table[@id='overviewTable']"
+                 "//td[text()='Nickenames']/following-sibling::td[1]",
+            attrs=Attribute(
+                key='nick names',
+                path="./text()",
+                joiner='|',
+                postprocess=lambda x: [n.strip().replace(' (', '::(', 1) for n in x.split('|')
+                                       if n.strip()]
+            )
+        ),
+
+        Extractor(
+            label='birth name',
+            path="//table[@id='overviewTable']"
+                 "//td[text()='Birth Name']/following-sibling::td[1]",
+            attrs=Attribute(
+                key='birth name',
+                path="./text()",
+                postprocess=lambda x: canonicalName(x.strip())
+            )
+        ),
+
+        Extractor(
+            label='height',
+            path="//table[@id='overviewTable']//td[text()='Height']/following-sibling::td[1]",
+            attrs=Attribute(
+                key='height',
+                path="./text()",
+                postprocess=lambda x: x.strip()
+            )
+        ),
+
+        Extractor(
+            label='mini biography',
+            path="//a[@name='mini_bio']/following-sibling::"
+                 "div[1 = count(preceding-sibling::a[1] | ../a[@name='mini_bio'])]",
+            attrs=Attribute(
+                key='mini biography',
+                multi=True,
+                path={
+                    'bio': ".//text()",
+                    'by': ".//a[@name='ba']//text()"
+                },
+                postprocess=lambda x: "%s::%s" % (
+                    (x.get('bio') or u'').split('- IMDb Mini Biography By:')[0].strip(),
+                    (x.get('by') or u'').strip() or u'Anonymous'
+                )
+            )
+        ),
+
+        Extractor(
+            label='spouse',
+            path="//div[h5='Spouse']/table/tr",
+            attrs=Attribute(
+                key='spouse',
+                multi=True,
+                path={
+                    'name': "./td[1]//text()",
+                    'info': "./td[2]//text()"
+                },
+                postprocess=lambda x: ("%s::%s" % (
+                    x.get('name').strip(),
+                    (x.get('info') or u'').strip())).strip(':')
+            )
+        ),
+
+        Extractor(
+            label='trade mark',
+            path="//div[h5='Trade Mark']/p",
+            attrs=Attribute(
+                key='trade mark',
+                multi=True,
+                path=".//text()",
+                postprocess=lambda x: x.strip()
+            )
+        ),
+
+        Extractor(
+            label='trivia',
+            path="//div[h5='Trivia']/p",
+            attrs=Attribute(
+                key='trivia',
+                multi=True,
+                path=".//text()",
+                postprocess=lambda x: x.strip()
+            )
+        ),
+
+        Extractor(
+            label='quotes',
+            path="//div[h5='Personal Quotes']/p",
+            attrs=Attribute(
+                key='quotes',
+                multi=True,
+                path=".//text()",
+                postprocess=lambda x: x.strip()
+            )
+        ),
+
+        Extractor(
+            label='salary',
+            path="//div[h5='Salary']/table/tr",
+            attrs=Attribute(
+                key='salary history',
+                multi=True,
+                path={
+                    'title': "./td[1]//text()",
+                    'info': "./td[2]/text()",
+                },
+                postprocess=lambda x: "%s::%s" % (
+                    x.get('title').strip(),
+                    x.get('info').strip())
+            )
+        ),
+
+        Extractor(
+            label='where now',
+            path="//div[h5='Where Are They Now']/p",
+            attrs=Attribute(
+                key='where now',
+                multi=True,
+                path=".//text()",
+                postprocess=lambda x: x.strip()
+            )
+        )
+    ]

    preprocessors = [
        (re.compile('(<h5>)', re.I), r'</div><div class="_imdbpy">\1'),
@ -329,7 +394,7 @@ class DOMHTMLBioParser(DOMParserBase):
 class DOMHTMLResumeParser(DOMParserBase):
    """Parser for the "resume" page of a given person.
    The page should be provided as a string, as taken from
-    the akas.imdb.com server.  The final result will be a
+    the www.imdb.com server.  The final result will be a
    dictionary, with a key for every relevant section.

    Example:
@ -406,13 +471,13 @@ class DOMHTMLResumeParser(DOMParserBase):
                continue
            if len(data[key][0]) == 3:
                for item in data[key]:
-                    item[:] = [x for x in item if not x == None]
+                    item[:] = [x for x in item if not x is None]
                continue

            if len(data[key][0]) == 2:
                new_key = {}
                for item in data[key]:
-                    if item[0] == None:
+                    if item[0] is None:
                        continue
                    if ':' in item[0]:
                        if item[1].replace(item[0], '')[1:].strip() == '':
@ -422,15 +487,14 @@ class DOMHTMLResumeParser(DOMParserBase):
                        new_key[item[0]] = item[1]
                data[key] = new_key

-        new_data = {}
-        new_data['resume'] = data
+        new_data = {'resume': data}
        return new_data


 class DOMHTMLOtherWorksParser(DOMParserBase):
    """Parser for the "other works" and "agent" pages of a given person.
    The page should be provided as a string, as taken from
-    the akas.imdb.com server.  The final result will be a
+    the www.imdb.com server.  The final result will be a
    dictionary, with a key for every relevant section.

    Example:
@ -466,7 +530,7 @@ def _build_episode(link, title, minfo, role, roleA, roleAID):
    minidx = minfo.find(' -')
    # Sometimes, for some unknown reason, the role is left in minfo.
    if minidx != -1:
-        slfRole = minfo[minidx+3:].lstrip()
+        slfRole = minfo[minidx + 3:].lstrip()
        minfo = minfo[:minidx].rstrip()
        if slfRole.endswith(')'):
            commidx = slfRole.rfind('(')
@ -504,7 +568,7 @@ def _build_episode(link, title, minfo, role, roleA, roleAID):
 class DOMHTMLSeriesParser(DOMParserBase):
    """Parser for the "by TV series" page of a given person.
    The page should be provided as a string, as taken from
-    the akas.imdb.com server.  The final result will be a
+    the www.imdb.com server.  The final result will be a
    dictionary, with a key for every relevant section.

    Example:
@ -559,7 +623,7 @@ class DOMHTMLSeriesParser(DOMParserBase):
 class DOMHTMLPersonGenresParser(DOMParserBase):
    """Parser for the "by genre" and "by keywords" pages of a given person.
    The page should be provided as a string, as taken from
-    the akas.imdb.com server.  The final result will be a
+    the www.imdb.com server.  The final result will be a
    dictionary, with a key for every relevant section.

    Example:
--- a/lib/imdb/parser/http/searchCharacterParser.py
+++ b/lib/imdb/parser/http/searchCharacterParser.py
@ -5,7 +5,7 @@ This module provides the HTMLSearchCharacterParser class (and the
 search_character_parser instance), used to parse the results of a search
 for a given character.
 E.g., when searching for the name "Jesse James", the parsed page would be:
-    http://akas.imdb.com/find?s=ch;mx=20;q=Jesse+James
+    http://www.imdb.com/find?s=ch;mx=20;q=Jesse+James

 Copyright 2007-2012 Davide Alberani <da@erlug.linux.it>
               2008 H. Turgut Uyar <uyar@tekir.org>
--- a/lib/imdb/parser/http/searchCompanyParser.py
+++ b/lib/imdb/parser/http/searchCompanyParser.py
@ -5,7 +5,7 @@ This module provides the HTMLSearchCompanyParser class (and the
 search_company_parser instance), used to parse the results of a search
 for a given company.
 E.g., when searching for the name "Columbia Pictures", the parsed page would be:
-    http://akas.imdb.com/find?s=co;mx=20;q=Columbia+Pictures
+    http://www.imdb.com/find?s=co;mx=20;q=Columbia+Pictures

 Copyright 2008-2012 Davide Alberani <da@erlug.linux.it>
          2008 H. Turgut Uyar <uyar@tekir.org>
@ -46,22 +46,29 @@ class DOMHTMLSearchCompanyParser(DOMHTMLSearchMovieParser):
    _titleBuilder = lambda self, x: build_company_name(x)
    _linkPrefix = '/company/co'

-    _attrs = [Attribute(key='data',
-                        multi=True,
-                        path={
-                            'link': "./a[1]/@href",
-                            'name': "./a[1]/text()",
-                            'notes': "./text()[1]"
-                            },
-                        postprocess=lambda x: (
-                            analyze_imdbid(x.get('link')),
-                            analyze_company_name(x.get('name')+(x.get('notes')
-                                                or u''), stripNotes=True)
-                        ))]
-    extractors = [Extractor(label='search',
-                            path="//td[@class='result_text']/a[starts-with(@href, " \
-                                    "'/company/co')]/..",
-                            attrs=_attrs)]
+    _attrs = [
+        Attribute(
+            key='data',
+            multi=True,
+            path={
+                'link': "./a[1]/@href",
+                'name': "./a[1]/text()",
+                'notes': "./text()[1]"
+            },
+            postprocess=lambda x: (
+                analyze_imdbid(x.get('link')),
+                analyze_company_name(x.get('name') + (x.get('notes') or u''), stripNotes=True)
+            )
+        )
+    ]
+
+    extractors = [
+        Extractor(
+            label='search',
+            path="//td[@class='result_text']/a[starts-with(@href, '/company/co')]/..",
+            attrs=_attrs
+        )
+    ]


 _OBJECTS = {
--- a/lib/imdb/parser/http/searchKeywordParser.py
+++ b/lib/imdb/parser/http/searchKeywordParser.py
@ -5,7 +5,7 @@ This module provides the HTMLSearchKeywordParser class (and the
 search_company_parser instance), used to parse the results of a search
 for a given keyword.
 E.g., when searching for the keyword "alabama", the parsed page would be:
-    http://akas.imdb.com/find?s=kw;mx=20;q=alabama
+    http://www.imdb.com/find?s=kw;mx=20;q=alabama

 Copyright 2009 Davide Alberani <da@erlug.linux.it>

--- a/lib/imdb/parser/http/searchMovieParser.py
+++ b/lib/imdb/parser/http/searchMovieParser.py
@ -6,7 +6,7 @@ search_movie_parser instance), used to parse the results of a search
 for a given title.
 E.g., for when searching for the title "the passion", the parsed
 page would be:
-    http://akas.imdb.com/find?q=the+passion&tt=on&mx=20
+    http://www.imdb.com/find?q=the+passion&tt=on&mx=20

 Copyright 2004-2013 Davide Alberani <da@erlug.linux.it>
               2008 H. Turgut Uyar <uyar@tekir.org>
@ -67,7 +67,7 @@ class DOMBasicMovieParser(DOMParserBase):
            data = []
        else:
            link = data.pop('link')
-            if (link and data):
+            if link and data:
                data = [(link, data)]
            else:
                data = []
--- a/lib/imdb/parser/http/searchPersonParser.py
+++ b/lib/imdb/parser/http/searchPersonParser.py
@ -5,7 +5,7 @@ This module provides the HTMLSearchPersonParser class (and the
 search_person_parser instance), used to parse the results of a search
 for a given person.
 E.g., when searching for the name "Mel Gibson", the parsed page would be:
-    http://akas.imdb.com/find?q=Mel+Gibson&nm=on&mx=20
+    http://www.imdb.com/find?q=Mel+Gibson&nm=on&mx=20

 Copyright 2004-2013 Davide Alberani <da@erlug.linux.it>
               2008 H. Turgut Uyar <uyar@tekir.org>
--- a/lib/imdb/parser/http/topBottomParser.py
+++ b/lib/imdb/parser/http/topBottomParser.py
@ -4,8 +4,8 @@ parser.http.topBottomParser module (imdb package).
 This module provides the classes (and the instances), used to parse the
 lists of top 250 and bottom 100 movies.
 E.g.:
-    http://akas.imdb.com/chart/top
-    http://akas.imdb.com/chart/bottom
+    http://www.imdb.com/chart/top
+    http://www.imdb.com/chart/bottom

 Copyright 2009-2015 Davide Alberani <da@erlug.linux.it>

@ -31,7 +31,7 @@ from utils import DOMParserBase, Attribute, Extractor, analyze_imdbid
 class DOMHTMLTop250Parser(DOMParserBase):
    """Parser for the "top 250" page.
    The page should be provided as a string, as taken from
-    the akas.imdb.com server.  The final result will be a
+    the www.imdb.com server.  The final result will be a
    dictionary, with a key for every relevant section.

    Example:
@ -42,17 +42,24 @@ class DOMHTMLTop250Parser(DOMParserBase):
    ranktext = 'top 250 rank'

    def _init(self):
-        self.extractors = [Extractor(label=self.label,
-                        path="//div[@id='main']//div[1]//div//table//tbody//tr",
-                        attrs=Attribute(key=None,
-                                multi=True,
-                                path={self.ranktext: "./td[2]//text()",
-                                        'rating': "./td[3]//strong//text()",
-                                        'title': "./td[2]//a//text()",
-                                        'year': "./td[2]//span//text()",
-                                        'movieID': "./td[2]//a/@href",
-                                        'votes': "./td[3]//strong/@title"
-                                        }))]
+        self.extractors = [
+            Extractor(
+                label=self.label,
+                path="//div[@id='main']//div[1]//div//table//tbody//tr",
+                attrs=Attribute(
+                    key=None,
+                    multi=True,
+                    path={
+                        self.ranktext: "./td[2]/text()",
+                        'rating': "./td[3]//strong//text()",
+                        'title': "./td[2]//a//text()",
+                        'year': "./td[2]//span//text()",
+                        'movieID': "./td[2]//a/@href",
+                        'votes': "./td[3]//strong/@title"
+                    }
+                )
+            )
+        ]

    def postprocess_data(self, data):
        if not data or self.label not in data:
@ -73,9 +80,11 @@ class DOMHTMLTop250Parser(DOMParserBase):
            if theID in seenIDs:
                continue
            seenIDs.append(theID)
-            minfo = analyze_title(d['title']+" "+d['year'])
-            try: minfo[self.ranktext] = int(d[self.ranktext].replace('.', ''))
-            except: pass
+            minfo = analyze_title(d['title'] + ' ' + d['year'])
+            try:
+                minfo[self.ranktext] = int(d[self.ranktext].replace('.', ''))
+            except:
+                pass
            if 'votes' in d:
                try:
                    votes = d['votes'].replace(' votes','')
@ -93,7 +102,7 @@ class DOMHTMLTop250Parser(DOMParserBase):
 class DOMHTMLBottom100Parser(DOMHTMLTop250Parser):
    """Parser for the "bottom 100" page.
    The page should be provided as a string, as taken from
-    the akas.imdb.com server.  The final result will be a
+    the www.imdb.com server.  The final result will be a
    dictionary, with a key for every relevant section.

    Example:
--- a/lib/imdb/parser/http/utils.py
+++ b/lib/imdb/parser/http/utils.py
@ -35,7 +35,9 @@ from imdb.Character import Character


 # Year, imdbIndex and kind.
-re_yearKind_index = re.compile(r'(\([0-9\?]{4}(?:/[IVXLCDM]+)?\)(?: \(mini\)| \(TV\)| \(V\)| \(VG\))?)')
+re_yearKind_index = re.compile(
+    r'(\([0-9\?]{4}(?:/[IVXLCDM]+)?\)(?: \(mini\)| \(TV\)| \(V\)| \(VG\))?)'
+)

 # Match imdb ids in href tags
 re_imdbid = re.compile(r'(title/tt|name/nm|character/ch|company/co)([0-9]+)')
@ -304,7 +306,7 @@ def build_movie(txt, movieID=None, roleID=None, status=None,
    elif title[-14:] == 'TV mini-series':
        title = title[:-14] + ' (mini)'
    if title and title.endswith(_defSep.rstrip()):
-        title = title[:-len(_defSep)+1]
+        title = title[:-len(_defSep) + 1]
    # Try to understand where the movie title ends.
    while True:
        if year:
@ -320,18 +322,17 @@ def build_movie(txt, movieID=None, roleID=None, status=None,
        # Try to match paired parentheses; yes: sometimes there are
        # parentheses inside comments...
        nidx = title.rfind('(')
-        while (nidx != -1 and \
-                    title[nidx:].count('(') != title[nidx:].count(')')):
+        while nidx != -1 and title[nidx:].count('(') != title[nidx:].count(')'):
            nidx = title[:nidx].rfind('(')
        # Unbalanced parentheses: stop here.
        if nidx == -1: break
        # The last item in parentheses seems to be a year: stop here.
-        first4 = title[nidx+1:nidx+5]
-        if (first4.isdigit() or first4 == '????') and \
-                title[nidx+5:nidx+6] in (')', '/'): break
+        first4 = title[nidx + 1:nidx + 5]
+        if (first4.isdigit() or first4 == '????') and title[nidx + 5:nidx + 6] in (')', '/'):
+            break
        # The last item in parentheses is a known kind: stop here.
-        if title[nidx+1:-1] in ('TV', 'V', 'mini', 'VG', 'TV movie',
-                'TV series', 'short'): break
+        if title[nidx + 1:-1] in ('TV', 'V', 'mini', 'VG', 'TV movie', 'TV series', 'short'):
+            break
        # Else, in parentheses there are some notes.
        # XXX: should the notes in the role half be kept separated
        #      from the notes in the movie title half?
@ -471,8 +472,8 @@ class DOMParserBase(object):
                if _gotError:
                    warnings.warn('falling back to "%s"' % mod)
                break
-            except ImportError, e:
-                if idx+1 >= nrMods:
+            except ImportError as e:
+                if idx + 1 >= nrMods:
                    # Raise the exception, if we don't have any more
                    # options to try.
                    raise IMDbError('unable to use any parser in %s: %s' % \
@ -786,10 +787,10 @@ class Extractor(object):

    def __repr__(self):
        """String representation of an Extractor object."""
-        r = '<Extractor id:%s (label=%s, path=%s, attrs=%s, group=%s, ' \
-                'group_key=%s group_key_normalize=%s)>' % (id(self),
-                        self.label, self.path, repr(self.attrs), self.group,
-                        self.group_key, self.group_key_normalize)
+        t = '<Extractor id:%s (label=%s, path=%s, attrs=%s, group=%s, group_key=%s' + \
+            ', group_key_normalize=%s)>'
+        r = t % (id(self), self.label, self.path, repr(self.attrs), self.group,
+                 self.group_key, self.group_key_normalize)
        return r


@ -825,7 +826,7 @@ def _parse_ref(text, link, info):
        yearK = re_yearKind_index.match(info)
        if yearK and yearK.start() == 0:
            text += ' %s' % info[:yearK.end()]
-    return (text.replace('\n', ' '), link)
+    return text.replace('\n', ' '), link


 class GatherRefs(DOMParserBase):
--- a/lib/imdb/parser/sql/init.py
+++ b/lib/imdb/parser/sql/init.py
@ -687,7 +687,7 @@ class IMDbSqlAccessSystem(IMDbBase):
        elif isinstance(o, dict):
            for value in o.values():
                self._findRefs(value, trefs, nrefs)
-        return (trefs, nrefs)
+        return trefs, nrefs

    def _extractRefs(self, o):
        """Scan for titles or names references in strings."""
@ -702,7 +702,7 @@ class IMDbSqlAccessSystem(IMDbBase):
                    "imdb.parser.sql.IMDbSqlAccessSystem; "
                    "if it's not a recursion limit exceeded and we're not "
                    "running in a Symbian environment, it's a bug:\n%s" % e)
-            return (trefs, nrefs)
+            return trefs, nrefs

    def _changeAKAencoding(self, akanotes, akatitle):
        """Return akatitle in the correct charset, as specified in
--- a/lib/imdb/parser/sql/alchemyadapter.py
+++ b/lib/imdb/parser/sql/alchemyadapter.py
@ -437,11 +437,13 @@ def ISNULL(x):
    """Emulate SQLObject's ISNULL."""
    # XXX: Should we use null()?  Can null() be a global instance?
    # XXX: Is it safe to test None with the == operator, in this case?
-    return x == None
+    return x is None
+

 def ISNOTNULL(x):
    """Emulate SQLObject's ISNOTNULL."""
-    return x != None
+    return x is not None
+

 def CONTAINSSTRING(expr, pattern):
    """Emulate SQLObject's CONTAINSSTRING."""
--- a/lib/imdb/parser/sql/dbschema.py
+++ b/lib/imdb/parser/sql/dbschema.py
@ -122,53 +122,80 @@ class DBTable(object):


 # Default values to insert in some tables: {'column': (list, of, values, ...)}
-kindTypeDefs = {'kind': ('movie', 'tv series', 'tv movie', 'video movie',
-                        'tv mini series', 'video game', 'episode')}
-companyTypeDefs = {'kind': ('distributors', 'production companies',
-                        'special effects companies', 'miscellaneous companies')}
-infoTypeDefs = {'info': ('runtimes', 'color info', 'genres', 'languages',
-    'certificates', 'sound mix', 'tech info', 'countries', 'taglines',
-    'keywords', 'alternate versions', 'crazy credits', 'goofs',
-    'soundtrack', 'quotes', 'release dates', 'trivia', 'locations',
-    'mini biography', 'birth notes', 'birth date', 'height',
-    'death date', 'spouse', 'other works', 'birth name',
-    'salary history', 'nick names', 'books', 'agent address',
-    'biographical movies', 'portrayed in', 'where now', 'trade mark',
-    'interviews', 'article', 'magazine cover photo', 'pictorial',
-    'death notes', 'LD disc format', 'LD year', 'LD digital sound',
-    'LD official retail price', 'LD frequency response', 'LD pressing plant',
-    'LD length', 'LD language', 'LD review', 'LD spaciality', 'LD release date',
-    'LD production country', 'LD contrast', 'LD color rendition',
-    'LD picture format', 'LD video noise', 'LD video artifacts',
-    'LD release country', 'LD sharpness', 'LD dynamic range',
-    'LD audio noise', 'LD color information', 'LD group genre',
-    'LD quality program', 'LD close captions-teletext-ld-g',
-    'LD category', 'LD analog left', 'LD certification',
-    'LD audio quality', 'LD video quality', 'LD aspect ratio',
-    'LD analog right', 'LD additional information',
-    'LD number of chapter stops', 'LD dialogue intellegibility',
-    'LD disc size', 'LD master format', 'LD subtitles',
-    'LD status of availablility', 'LD quality of source',
-    'LD number of sides', 'LD video standard', 'LD supplement',
-    'LD original title', 'LD sound encoding', 'LD number', 'LD label',
-    'LD catalog number', 'LD laserdisc title', 'screenplay-teleplay',
-    'novel', 'adaption', 'book', 'production process protocol',
-    'printed media reviews', 'essays', 'other literature', 'mpaa',
-    'plot', 'votes distribution', 'votes', 'rating',
-    'production dates', 'copyright holder', 'filming dates', 'budget',
-    'weekend gross', 'gross', 'opening weekend', 'rentals',
-    'admissions', 'studios', 'top 250 rank', 'bottom 10 rank')}
-compCastTypeDefs = {'kind': ('cast', 'crew', 'complete', 'complete+verified')}
-linkTypeDefs = {'link': ('follows', 'followed by', 'remake of', 'remade as',
-                        'references', 'referenced in', 'spoofs', 'spoofed in',
-                        'features', 'featured in', 'spin off from', 'spin off',
-                        'version of', 'similar to', 'edited into',
-                        'edited from', 'alternate language version of',
-                        'unknown link')}
-roleTypeDefs = {'role': ('actor', 'actress', 'producer', 'writer',
-                        'cinematographer', 'composer', 'costume designer',
-                        'director', 'editor', 'miscellaneous crew',
-                        'production designer', 'guest')}
+kindTypeDefs = {
+    'kind': (
+        'movie', 'tv series', 'tv movie', 'video movie',
+        'tv mini series', 'video game', 'episode', 'short', 'tv short'
+    )
+}
+
+companyTypeDefs = {
+    'kind': (
+        'distributors', 'production companies',
+        'special effects companies', 'miscellaneous companies'
+    )
+}
+
+infoTypeDefs = {
+    'info': (
+        'runtimes', 'color info', 'genres', 'languages',
+        'certificates', 'sound mix', 'tech info', 'countries', 'taglines',
+        'keywords', 'alternate versions', 'crazy credits', 'goofs',
+        'soundtrack', 'quotes', 'release dates', 'trivia', 'locations',
+        'mini biography', 'birth notes', 'birth date', 'height',
+        'death date', 'spouse', 'other works', 'birth name',
+        'salary history', 'nick names', 'books', 'agent address',
+        'biographical movies', 'portrayed in', 'where now', 'trade mark',
+        'interviews', 'article', 'magazine cover photo', 'pictorial',
+        'death notes', 'LD disc format', 'LD year', 'LD digital sound',
+        'LD official retail price', 'LD frequency response', 'LD pressing plant',
+        'LD length', 'LD language', 'LD review', 'LD spaciality', 'LD release date',
+        'LD production country', 'LD contrast', 'LD color rendition',
+        'LD picture format', 'LD video noise', 'LD video artifacts',
+        'LD release country', 'LD sharpness', 'LD dynamic range',
+        'LD audio noise', 'LD color information', 'LD group genre',
+        'LD quality program', 'LD close captions-teletext-ld-g',
+        'LD category', 'LD analog left', 'LD certification',
+        'LD audio quality', 'LD video quality', 'LD aspect ratio',
+        'LD analog right', 'LD additional information',
+        'LD number of chapter stops', 'LD dialogue intellegibility',
+        'LD disc size', 'LD master format', 'LD subtitles',
+        'LD status of availablility', 'LD quality of source',
+        'LD number of sides', 'LD video standard', 'LD supplement',
+        'LD original title', 'LD sound encoding', 'LD number', 'LD label',
+        'LD catalog number', 'LD laserdisc title', 'screenplay-teleplay',
+        'novel', 'adaption', 'book', 'production process protocol',
+        'printed media reviews', 'essays', 'other literature', 'mpaa',
+        'plot', 'votes distribution', 'votes', 'rating',
+        'production dates', 'copyright holder', 'filming dates', 'budget',
+        'weekend gross', 'gross', 'opening weekend', 'rentals',
+        'admissions', 'studios', 'top 250 rank', 'bottom 10 rank'
+    )
+}
+
+compCastTypeDefs = {
+    'kind': ('cast', 'crew', 'complete', 'complete+verified')
+}
+
+linkTypeDefs = {
+    'link': (
+        'follows', 'followed by', 'remake of', 'remade as',
+        'references', 'referenced in', 'spoofs', 'spoofed in',
+        'features', 'featured in', 'spin off from', 'spin off',
+        'version of', 'similar to', 'edited into',
+        'edited from', 'alternate language version of',
+        'unknown link'
+    )
+}
+
+roleTypeDefs = {
+    'role': (
+        'actor', 'actress', 'producer', 'writer',
+        'cinematographer', 'composer', 'costume designer',
+        'director', 'editor', 'miscellaneous crew',
+        'production designer', 'guest'
+    )
+}

 # Schema of tables in our database.
 # XXX: Foreign keys can be used to create constrains between tables,
@ -186,7 +213,7 @@ DB_SCHEMA = [
        # the alternateID attribute here will be ignored by SQLAlchemy.
        DBCol('id', INTCOL, notNone=True, alternateID=True),
        DBCol('name', UNICODECOL, notNone=True, index='idx_name', indexLen=6),
-        DBCol('imdbIndex', UNICODECOL, length=12, default=None),
+        DBCol('imdbIndex', STRINGCOL, length=12, default=None),
        DBCol('imdbID', INTCOL, default=None, index='idx_imdb_id'),
        DBCol('gender', STRINGCOL, length=1, default=None),
        DBCol('namePcodeCf', STRINGCOL, length=5, default=None,
@ -204,7 +231,7 @@ DB_SCHEMA = [
        # from namePcodeNf.
        DBCol('id', INTCOL, notNone=True, alternateID=True),
        DBCol('name', UNICODECOL, notNone=True, index='idx_name', indexLen=6),
-        DBCol('imdbIndex', UNICODECOL, length=12, default=None),
+        DBCol('imdbIndex', STRINGCOL, length=12, default=None),
        DBCol('imdbID', INTCOL, default=None),
        DBCol('namePcodeNf', STRINGCOL, length=5, default=None,
                index='idx_pcodenf'),
@ -218,7 +245,7 @@ DB_SCHEMA = [
        # namePcodeSf is the soundex of the name plus the country code.
        DBCol('id', INTCOL, notNone=True, alternateID=True),
        DBCol('name', UNICODECOL, notNone=True, index='idx_name', indexLen=6),
-        DBCol('countryCode', UNICODECOL, length=255, default=None),
+        DBCol('countryCode', STRINGCOL, length=255, default=None),
        DBCol('imdbID', INTCOL, default=None),
        DBCol('namePcodeNf', STRINGCOL, length=5, default=None,
                index='idx_pcodenf'),
@ -237,7 +264,7 @@ DB_SCHEMA = [
        DBCol('id', INTCOL, notNone=True, alternateID=True),
        DBCol('title', UNICODECOL, notNone=True,
                index='idx_title', indexLen=10),
-        DBCol('imdbIndex', UNICODECOL, length=12, default=None),
+        DBCol('imdbIndex', STRINGCOL, length=12, default=None),
        DBCol('kindID', INTCOL, notNone=True, foreignKey='KindType'),
        DBCol('productionYear', INTCOL, default=None),
        DBCol('imdbID', INTCOL, default=None, index="idx_imdb_id"),
@ -264,7 +291,7 @@ DB_SCHEMA = [
        DBCol('personID', INTCOL, notNone=True, index='idx_person',
                foreignKey='Name'),
        DBCol('name', UNICODECOL, notNone=True),
-        DBCol('imdbIndex', UNICODECOL, length=12, default=None),
+        DBCol('imdbIndex', STRINGCOL, length=12, default=None),
        DBCol('namePcodeCf',  STRINGCOL, length=5, default=None,
                index='idx_pcodecf'),
        DBCol('namePcodeNf',  STRINGCOL, length=5, default=None,
@ -291,7 +318,7 @@ DB_SCHEMA = [
        DBCol('movieID', INTCOL, notNone=True, index='idx_movieid',
                foreignKey='Title'),
        DBCol('title', UNICODECOL, notNone=True),
-        DBCol('imdbIndex', UNICODECOL, length=12, default=None),
+        DBCol('imdbIndex', STRINGCOL, length=12, default=None),
        DBCol('kindID', INTCOL, notNone=True, foreignKey='KindType'),
        DBCol('productionYear', INTCOL, default=None),
        DBCol('phoneticCode',  STRINGCOL, length=5, default=None,
--- a/lib/imdb/utils.py
+++ b/lib/imdb/utils.py
@ -42,8 +42,22 @@ _utils_logger = logging.getLogger('imdbpy.utils')
 # and year of release.
 # XXX: probably L, C, D and M are far too much! ;-)
 re_year_index = re.compile(r'\(([0-9\?]{4}(/[IVXLCDM]+)?)\)')
-re_extended_year_index = re.compile(r'\((TV episode|TV Series|TV mini-series|TV|Video|Video Game)? ?((?:[0-9\?]{4})(?:-[0-9\?]{4})?)(?:/([IVXLCDM]+)?)?\)')
-re_remove_kind = re.compile(r'\((TV episode|TV Series|TV mini-series|TV|Video|Video Game)? ?')
+re_m_episode = re.compile(r'\(TV Episode\)\s+-\s+', re.I)
+re_m_series = re.compile(r'Season\s+\d+\s+\|\s+Episode\s+\d+\s+-', re.I)
+re_m_imdbIndex = re.compile(r'\(([IVXLCDM]+)\)')
+re_m_kind = re.compile(
+    r'\((TV episode|TV Series|TV mini-series|mini|TV|Video|Video Game|VG|Short|TV Movie|TV Short|V)\)',
+    re.I)
+
+KIND_MAP = {
+    'tv': 'tv movie',
+    'tv episode': 'episode',
+    'v': 'video movie',
+    'video': 'video movie',
+    'vg': 'video game',
+    'mini': 'tv mini series',
+    'tv mini-series': 'tv mini series'
+}

 # Match only the imdbIndex (for name strings).
 re_index = re.compile(r'^\(([IVXLCDM]+)\)$')
@ -283,13 +297,6 @@ def _split_series_episode(title):
                # that means this is an episode title, as returned by
                # the web server.
                series_title = title[:second_quot]
-            ##elif episode_or_year[-1:] == '}':
-            ##        # Title of the episode, as in the plain text data files.
-            ##        begin_eps = episode_or_year.find('{')
-            ##        if begin_eps == -1: return series_title, episode_or_year
-            ##        series_title = title[:second_quot+begin_eps].rstrip()
-            ##        # episode_or_year is returned with the {...}
-            ##        episode_or_year = episode_or_year[begin_eps:]
    return series_title, episode_or_year


@ -383,65 +390,24 @@ def analyze_title(title, canonical=None, canonicalSeries=None,
    #      tv mini series: 5,497
    #      video game:     5,490
    #      More up-to-date statistics: http://us.imdb.com/database_statistics
-    if title.endswith('(TV)'):
-        kind = u'tv movie'
-        title = title[:-4].rstrip()
-    elif title.endswith('(TV Movie)'):
-        kind = u'tv movie'
-        title = title[:-10].rstrip()
-    elif title.endswith('(V)'):
-        kind = u'video movie'
-        title = title[:-3].rstrip()
-    elif title.lower().endswith('(video)'):
-        kind = u'video movie'
-        title = title[:-7].rstrip()
-    elif title.endswith('(TV Short)'):
-        kind = u'tv short'
-        title = title[:-10].rstrip()
-    elif title.endswith('(TV Mini-Series)'):
-        kind = u'tv mini series'
-        title = title[:-16].rstrip()
-    elif title.endswith('(mini)'):
-        kind = u'tv mini series'
-        title = title[:-6].rstrip()
-    elif title.endswith('(VG)'):
-        kind = u'video game'
-        title = title[:-4].rstrip()
-    elif title.endswith('(Video Game)'):
-        kind = u'video game'
-        title = title[:-12].rstrip()
-    elif title.endswith('(TV Series)'):
-        epindex = title.find('(TV Episode) - ')
-        if epindex >= 0:
-            # It's an episode of a series.
-            kind = u'episode'
-            series_info = analyze_title(title[epindex + 15:])
-            result['episode of'] = series_info.get('title')
-            result['series year'] = series_info.get('year')
-            title = title[:epindex]
-        else:
-            kind = u'tv series'
-            title = title[:-11].rstrip()
+    epindex = re_m_episode.search(title)
+    if epindex:
+        # It's an episode of a series.
+        kind = 'episode'
+        series_title = title[epindex.end():]
+        series_title = re_m_series.sub('', series_title)
+        series_info = analyze_title(series_title)
+        result['episode of'] = series_info.get('title')
+        result['series year'] = series_info.get('year')
+        title = title[:epindex.start()].strip()
+    else:
+        detected_kind = re_m_kind.findall(title)
+        if detected_kind:
+            kind = detected_kind[-1].lower().replace('-', '')
+            kind = KIND_MAP.get(kind, kind)
+            title = re_m_kind.sub('', title).strip()
    # Search for the year and the optional imdbIndex (a roman number).
    yi = re_year_index.findall(title)
-    if not yi:
-        yi = re_extended_year_index.findall(title)
-        if yi:
-            yk, yiy, yii = yi[-1]
-            yi = [(yiy, yii)]
-            if yk == 'TV episode':
-                kind = u'episode'
-            elif yk in ('TV', 'TV Movie'):
-                kind = u'tv movie'
-            elif yk == 'TV Series':
-                kind = u'tv series'
-            elif yk == 'Video':
-                kind = u'video movie'
-            elif yk in ('TV mini-series', 'TV Mini-Series'):
-                kind = u'tv mini series'
-            elif yk == 'Video Game':
-                kind = u'video game'
-            title = re_remove_kind.sub('(', title)
    if yi:
        last_yi = yi[-1]
        year = last_yi[0]
@ -450,7 +416,12 @@ def analyze_title(title, canonical=None, canonicalSeries=None,
            year = year[:-len(imdbIndex)-1]
        i = title.rfind('(%s)' % last_yi[0])
        if i != -1:
-            title = title[:i-1].rstrip()
+            title = title[:i - 1].rstrip()
+    if not imdbIndex:
+        detect_imdbIndex = re_m_imdbIndex.findall(title)
+        if detect_imdbIndex:
+            imdbIndex = detect_imdbIndex[-1]
+            title = re_m_imdbIndex.sub('', title).strip()
    # This is a tv (mini) series: strip the '"' at the begin and at the end.
    # XXX: strip('"') is not used for compatibility with Python 2.0.
    if title and title[0] == title[-1] == '"':
@ -464,8 +435,6 @@ def analyze_title(title, canonical=None, canonicalSeries=None,
            title = canonicalTitle(title)
        else:
            title = normalizeTitle(title)
-    # 'kind' is one in ('movie', 'episode', 'tv series', 'tv mini series',
-    #                   'tv movie', 'video movie', 'video game')
    result['title'] = title
    result['kind'] = kind or u'movie'
    if year and year != '????':
@ -832,7 +801,7 @@ def date_and_notes(s):
    """Parse (birth|death) date and notes; returns a tuple in the
    form (date, notes)."""
    s = s.strip()
-    if not s: return (u'', u'')
+    if not s: return u'', u''
    notes = u''
    if s[0].isdigit() or s.split()[0].lower() in ('c.', 'january', 'february',
                                                'march', 'april', 'may', 'june',
@ -990,7 +959,7 @@ def _tag4TON(ton, addAccessSystem=False, _containerOnly=False):
    beginTag += extras
    if ton.notes:
        beginTag += u'<notes>%s</notes>' % _normalizeValue(ton.notes)
-    return (beginTag, u'</%s>' % tag)
+    return beginTag, u'</%s>' % tag


 TAGS_TO_MODIFY = {
@ -1264,8 +1233,8 @@ class _Container(object):
            self.__role = role

    currentRole = property(_get_currentRole, _set_currentRole,
-                            doc="The role of a Person in a Movie" + \
-                            " or the interpreter of a Character in a Movie.")
+                           doc="The role of a Person in a Movie"
+                               " or the interpreter of a Character in a Movie.")

    def _init(self, **kwds): pass

@ -1478,10 +1447,10 @@ class _Container(object):
            except RuntimeError, e:
                # Symbian/python 2.2 has a poor regexp implementation.
                import warnings
-                warnings.warn('RuntimeError in '
-                        "imdb.utils._Container.__getitem__; if it's not "
-                        "a recursion limit exceeded and we're not running "
-                        "in a Symbian environment, it's a bug:\n%s" % e)
+                warnings.warn("RuntimeError in imdb.utils._Container.__getitem__;"
+                              " if it's not a recursion limit exceeded and we're"
+                              " not running in a Symbian environment, it's a"
+                              " bug:\n%s" % e)
        return rawData

    def __setitem__(self, key, item):