Merge branch 'feature/UpdateIMDb' into develop

2025-01-22 09:33:37 +00:00 · 2018-03-28 00:45:35 +01:00 · 2018-03-28 00:45:35 +01:00 · 655b8e422a
commit 655b8e422a
parent 18c400acec 78026584eb
24 changed files with 1992 additions and 1184 deletions
--- a/CHANGES.md
+++ b/CHANGES.md
@ -7,6 +7,7 @@
 * Update feedparser library 5.2.1 (f1dd1bb) to 5.2.1 (5646f4c) - Uses the faster cchardet if installed
 * Change Hachoir can't support PY2 so backport their PY3 to prevent a need for system dependant external binaries like mediainfo
 * Update html5lib 0.99999999/1.0b9 (1a28d72) to 1.1-dev (e9ef538)
 * Update IMDb 5.1 (r907) to 5.2.1dev20171113 (f640595)
 [develop changelog]
--- a/lib/imdb/Company.py
+++ b/lib/imdb/Company.py
@ -23,8 +23,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 from copy import deepcopy
-from imdb.utils import analyze_company_name, build_company_name, \
+from imdb.utils import _Container
-                        flatten, _Container, cmpCompanies
+from imdb.utils import analyze_company_name, build_company_name, cmpCompanies, flatten
 class Company(_Container):
--- a/lib/imdb/Movie.py
+++ b/lib/imdb/Movie.py
@ -24,8 +24,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 from copy import deepcopy
 from imdb import linguistics
-from imdb.utils import analyze_title, build_title, canonicalTitle, \
+from imdb.utils import _Container
-                        flatten, _Container, cmpMovies
+from imdb.utils import analyze_title, build_title, canonicalTitle, cmpMovies, flatten
 class Movie(_Container):
--- a/lib/imdb/init.py
+++ b/lib/imdb/init.py
@ -6,7 +6,7 @@ a person from the IMDb database.
 It can fetch data through different media (e.g.: the IMDb web pages,
 a SQL database, etc.)
-Copyright 2004-2016 Davide Alberani <da@erlug.linux.it>
+Copyright 2004-2018 Davide Alberani <da@erlug.linux.it>
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@ -25,12 +25,25 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 __all__ = ['IMDb', 'IMDbError', 'Movie', 'Person', 'Character', 'Company',
            'available_access_systems']
-__version__ = VERSION = '5.1'
+__version__ = VERSION = '5.2.1dev20171113'
 VERSION_NOTICE = """This is the imdbpy-legacy branch of IMDbPY, and requires Python 2.
 Please notice that this version is mostly unsupported.
 For a version compatible with Python 3, see the master branch:
    https://github.com/alberanid/imdbpy/
 """
 import sys
 if sys.hexversion >= 0x3000000:
    print(VERSION_NOTICE)
    sys.exit(1)
 # Import compatibility module (importing it is enough).
 import _compat
-import sys, os, ConfigParser, logging
+import os, ConfigParser, logging
 from types import MethodType
 from imdb import Movie, Person, Character, Company
@ -38,38 +51,39 @@ import imdb._logging
 from imdb._exceptions import IMDbError, IMDbDataAccessError, IMDbParserError
 from imdb.utils import build_title, build_name, build_company_name
 _imdb_logger = logging.getLogger('imdbpy')
 _aux_logger = logging.getLogger('imdbpy.aux')
 # URLs of the main pages for movies, persons, characters and queries.
-imdbURL_base = 'http://akas.imdb.com/'
+imdbURL_base = 'http://www.imdb.com/'
 # NOTE: the urls below will be removed in a future version.
 #       please use the values in the 'urls' attribute
 #       of the IMDbBase subclass instance.
-# http://akas.imdb.com/title/
+# http://www.imdb.com/title/
 imdbURL_movie_base = '%stitle/' % imdbURL_base
-# http://akas.imdb.com/title/tt%s/
+# http://www.imdb.com/title/tt%s/
 imdbURL_movie_main = imdbURL_movie_base + 'tt%s/'
-# http://akas.imdb.com/name/
+# http://www.imdb.com/name/
 imdbURL_person_base = '%sname/' % imdbURL_base
-# http://akas.imdb.com/name/nm%s/
+# http://www.imdb.com/name/nm%s/
 imdbURL_person_main = imdbURL_person_base + 'nm%s/'
-# http://akas.imdb.com/character/
+# http://www.imdb.com/character/
 imdbURL_character_base = '%scharacter/' % imdbURL_base
-# http://akas.imdb.com/character/ch%s/
+# http://www.imdb.com/character/ch%s/
 imdbURL_character_main = imdbURL_character_base + 'ch%s/'
-# http://akas.imdb.com/company/
+# http://www.imdb.com/company/
 imdbURL_company_base = '%scompany/' % imdbURL_base
-# http://akas.imdb.com/company/co%s/
+# http://www.imdb.com/company/co%s/
 imdbURL_company_main = imdbURL_company_base + 'co%s/'
-# http://akas.imdb.com/keyword/%s/
+# http://www.imdb.com/keyword/%s/
 imdbURL_keyword_main = imdbURL_base + 'keyword/%s/'
-# http://akas.imdb.com/chart/top
+# http://www.imdb.com/chart/top
 imdbURL_top250 = imdbURL_base + 'chart/top'
-# http://akas.imdb.com/chart/bottom
+# http://www.imdb.com/chart/bottom
 imdbURL_bottom100 = imdbURL_base + 'chart/bottom'
-# http://akas.imdb.com/find?%s
+# http://www.imdb.com/find?%s
 imdbURL_find = imdbURL_base + 'find?%s'
 # Name of the configuration file.
@ -103,7 +117,7 @@ class ConfigParserWithCase(ConfigParser.ConfigParser):
            try:
                self.read(fname)
            except (ConfigParser.MissingSectionHeaderError,
-                    ConfigParser.ParsingError), e:
+                    ConfigParser.ParsingError) as e:
                _aux_logger.warn('Troubles reading config file: %s' % e)
            # Stop at the first valid file.
            if self.has_section('imdbpy'):
@ -159,10 +173,8 @@ def IMDb(accessSystem=None, *arguments, **keywords):
                accessSystem = 'http'
            kwds.update(keywords)
            keywords = kwds
-        except Exception, e:
+        except Exception as e:
-            import logging
+            _imdb_logger.warn('Unable to read configuration file; complete error: %s' % e)
            logging.getLogger('imdbpy').warn('Unable to read configuration' \
                                            ' file; complete error: %s' % e)
            # It just LOOKS LIKE a bad habit: we tried to read config
            # options from some files, but something is gone horribly
            # wrong: ignore everything and pretend we were called with
@ -177,9 +189,8 @@ def IMDb(accessSystem=None, *arguments, **keywords):
        try:
            import logging.config
            logging.config.fileConfig(os.path.expanduser(logCfg))
-        except Exception, e:
+        except Exception as e:
-            logging.getLogger('imdbpy').warn('unable to read logger ' \
+            _imdb_logger.warn('unable to read logger config: %s' % e)
                                            'config: %s' % e)
    if accessSystem in ('httpThin', 'webThin', 'htmlThin'):
        logging.warn('httpThin was removed since IMDbPY 4.8')
        accessSystem = 'http'
@ -244,9 +255,6 @@ class IMDbBase:
    # in the subclasses).
    accessSystem = 'UNKNOWN'
    # Top-level logger for IMDbPY.
    _imdb_logger = logging.getLogger('imdbpy')
    # Whether to re-raise caught exceptions or not.
    _reraise_exceptions = False
@ -285,30 +293,30 @@ class IMDbBase:
            imdbURL_base = 'http://%s' % imdbURL_base
        if not imdbURL_base.endswith('/'):
            imdbURL_base = '%s/' % imdbURL_base
-        # http://akas.imdb.com/title/
+        # http://www.imdb.com/title/
-        imdbURL_movie_base='%stitle/' % imdbURL_base
+        imdbURL_movie_base = '%stitle/' % imdbURL_base
-        # http://akas.imdb.com/title/tt%s/
+        # http://www.imdb.com/title/tt%s/
-        imdbURL_movie_main=imdbURL_movie_base + 'tt%s/'
+        imdbURL_movie_main = imdbURL_movie_base + 'tt%s/'
-        # http://akas.imdb.com/name/
+        # http://www.imdb.com/name/
-        imdbURL_person_base='%sname/' % imdbURL_base
+        imdbURL_person_base = '%sname/' % imdbURL_base
-        # http://akas.imdb.com/name/nm%s/
+        # http://www.imdb.com/name/nm%s/
-        imdbURL_person_main=imdbURL_person_base + 'nm%s/'
+        imdbURL_person_main = imdbURL_person_base + 'nm%s/'
-        # http://akas.imdb.com/character/
+        # http://www.imdb.com/character/
-        imdbURL_character_base='%scharacter/' % imdbURL_base
+        imdbURL_character_base = '%scharacter/' % imdbURL_base
-        # http://akas.imdb.com/character/ch%s/
+        # http://www.imdb.com/character/ch%s/
-        imdbURL_character_main=imdbURL_character_base + 'ch%s/'
+        imdbURL_character_main = imdbURL_character_base + 'ch%s/'
-        # http://akas.imdb.com/company/
+        # http://www.imdb.com/company/
-        imdbURL_company_base='%scompany/' % imdbURL_base
+        imdbURL_company_base = '%scompany/' % imdbURL_base
-        # http://akas.imdb.com/company/co%s/
+        # http://www.imdb.com/company/co%s/
-        imdbURL_company_main=imdbURL_company_base + 'co%s/'
+        imdbURL_company_main = imdbURL_company_base + 'co%s/'
-        # http://akas.imdb.com/keyword/%s/
+        # http://www.imdb.com/keyword/%s/
-        imdbURL_keyword_main=imdbURL_base + 'keyword/%s/'
+        imdbURL_keyword_main = imdbURL_base + 'keyword/%s/'
-        # http://akas.imdb.com/chart/top
+        # http://www.imdb.com/chart/top
-        imdbURL_top250=imdbURL_base + 'chart/top'
+        imdbURL_top250 = imdbURL_base + 'chart/top'
-        # http://akas.imdb.com/chart/bottom
+        # http://www.imdb.com/chart/bottom
-        imdbURL_bottom100=imdbURL_base + 'chart/bottom'
+        imdbURL_bottom100 = imdbURL_base + 'chart/bottom'
-        # http://akas.imdb.com/find?%s
+        # http://www.imdb.com/find?%s
-        imdbURL_find=imdbURL_base + 'find?%s'
+        imdbURL_find = imdbURL_base + 'find?%s'
        self.urls = dict(
            movie_base=imdbURL_movie_base,
            movie_main=imdbURL_movie_main,
@ -727,16 +735,15 @@ class IMDbBase:
            mopID = mop.companyID
            prefix = 'company'
        else:
-            raise IMDbError('object ' + repr(mop) + \
+            raise IMDbError('object ' + repr(mop) +
-                    ' is not a Movie, Person, Character or Company instance')
+                            ' is not a Movie, Person, Character or Company instance')
        if mopID is None:
            # XXX: enough?  It's obvious that there are Characters
            #      objects without characterID, so I think they should
            #      just do nothing, when an i.update(character) is tried.
            if prefix == 'character':
                return
-            raise IMDbDataAccessError( \
+            raise IMDbDataAccessError('supplied object has null movieID, personID or companyID')
                'the supplied object has null movieID, personID or companyID')
        if mop.accessSystem == self.accessSystem:
            aSystem = self
        else:
@ -760,21 +767,22 @@ class IMDbBase:
                continue
            if not i:
                continue
-            self._imdb_logger.debug('retrieving "%s" info set', i)
+            _imdb_logger.debug('retrieving "%s" info set', i)
            try:
                method = getattr(aSystem, 'get_%s_%s' %
                                    (prefix, i.replace(' ', '_')))
            except AttributeError:
-                self._imdb_logger.error('unknown information set "%s"', i)
+                _imdb_logger.error('unknown information set "%s"', i)
                # Keeps going.
                method = lambda *x: {}
            try:
                ret = method(mopID)
-            except Exception, e:
+            except Exception:
-                self._imdb_logger.critical('caught an exception retrieving ' \
+                _imdb_logger.critical(
-                                    'or parsing "%s" info set for mopID ' \
+                    'caught an exception retrieving or parsing "%s" info set'
-                                    '"%s" (accessSystem: %s)',
+                    ' for mopID "%s" (accessSystem: %s)',
-                                    i, mopID, mop.accessSystem, exc_info=True)
+                    i, mopID, mop.accessSystem, exc_info=True
                )
                ret = {}
                # If requested by the user, reraise the exception.
                if self._reraise_exceptions:
@ -826,9 +834,7 @@ class IMDbBase:
        raise NotImplementedError('override this method')
    def _searchIMDb(self, kind, ton, title_kind=None):
-        """Search the IMDb akas server for the given title or name."""
+        """Search the IMDb www server for the given title or name."""
        # The Exact Primary search system has gone AWOL, so we resort
        # to the mobile search. :-/
        if not ton:
            return None
        ton = ton.strip('"')
@ -935,8 +941,8 @@ class IMDbBase:
            else:
                imdbID = aSystem.company2imdbID(build_company_name(mop))
        else:
-            raise IMDbError('object ' + repr(mop) + \
+            raise IMDbError('object ' + repr(mop) +
-                        ' is not a Movie, Person or Character instance')
+                            ' is not a Movie, Person or Character instance')
        return imdbID
    def get_imdbURL(self, mop):
@ -954,8 +960,8 @@ class IMDbBase:
        elif isinstance(mop, Company.Company):
            url_firstPart = imdbURL_company_main
        else:
-            raise IMDbError('object ' + repr(mop) + \
+            raise IMDbError('object ' + repr(mop) +
-                        ' is not a Movie, Person, Character or Company instance')
+                            ' is not a Movie, Person, Character or Company instance')
        return url_firstPart % imdbID
    def get_special_methods(self):
--- a/lib/imdb/_logging.py
+++ b/lib/imdb/_logging.py
@ -32,8 +32,9 @@ LEVELS = {'debug': logging.DEBUG,
 imdbpyLogger = logging.getLogger('imdbpy')
 imdbpyStreamHandler = logging.StreamHandler()
-imdbpyFormatter = logging.Formatter('%(asctime)s %(levelname)s [%(name)s]' \
+imdbpyFormatter = logging.Formatter(
-                                    ' %(pathname)s:%(lineno)d: %(message)s')
+    '%(asctime)s %(levelname)s [%(name)s] %(pathname)s:%(lineno)d: %(message)s'
 )
 imdbpyStreamHandler.setFormatter(imdbpyFormatter)
 imdbpyLogger.addHandler(imdbpyStreamHandler)
--- a/lib/imdb/helpers.py
+++ b/lib/imdb/helpers.py
@ -269,8 +269,8 @@ for k, v in {'lt':u'<','gt':u'>','amp':u'&','quot':u'"','apos':u'\''}.items():
    everyentcharrefs[k] = v
    everyentcharrefs['#%s' % ord(v)] = v
 everyentcharrefsget = everyentcharrefs.get
-re_everyentcharrefs = re.compile('&(%s|\#160|\#\d{1,5});' %
+re_everyentcharrefs = re.compile('&(%s|\#160|\#\d{1,5});' % '|'.join(map(re.escape,
-                            '|'.join(map(re.escape, everyentcharrefs)))
+                                                                         everyentcharrefs)))
 re_everyentcharrefssub = re_everyentcharrefs.sub
 def _replAllXMLRef(match):
@ -408,7 +408,7 @@ def _valueWithType(tag, tagValue):
 # Extra tags to get (if values were not already read from title/name).
 _titleTags = ('imdbindex', 'kind', 'year')
-_nameTags = ('imdbindex')
+_nameTags = ('imdbindex',)
 _companyTags = ('imdbindex', 'country')
 def parseTags(tag, _topLevel=True, _as=None, _infoset2keys=None,
--- a/lib/imdb/parser/http/init.py
+++ b/lib/imdb/parser/http/init.py
@ -7,7 +7,7 @@ the imdb.IMDb function will return an instance of this class when
 called with the 'accessSystem' argument set to "http" or "web"
 or "html" (this is the default).
-Copyright 2004-2012 Davide Alberani <da@erlug.linux.it>
+Copyright 2004-2017 Davide Alberani <da@erlug.linux.it>
               2008 H. Turgut Uyar <uyar@tekir.org>
 This program is free software; you can redistribute it and/or modify
@ -26,6 +26,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 """
 import sys
 import ssl
 import socket
 import logging
 from urllib import FancyURLopener, quote_plus
@ -68,8 +69,8 @@ class _ModuleProxy:
        """Initialize a proxy for the given module; defaultKeys, if set,
        muste be a dictionary of values to set for instanced objects."""
        if oldParsers or fallBackToNew:
-            _aux_logger.warn('The old set of parsers was removed; falling ' \
+            _aux_logger.warn('The old set of parsers was removed;'
-                            'back to the new parsers.')
+                             ' falling back to the new parsers.')
        self.useModule = useModule
        if defaultKeys is None:
            defaultKeys = {}
@ -142,6 +143,7 @@ class IMDbURLopener(FancyURLopener):
    def __init__(self, *args, **kwargs):
        self._last_url = u''
        kwargs['context'] = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
        FancyURLopener.__init__(self, *args, **kwargs)
        # Headers to add to every request.
        # XXX: IMDb's web server doesn't like urllib-based programs,
@ -211,9 +213,9 @@ class IMDbURLopener(FancyURLopener):
            if server_encode is None and content:
                begin_h = content.find('text/html; charset=')
                if begin_h != -1:
-                    end_h = content[19+begin_h:].find('"')
+                    end_h = content[19 + begin_h:].find('"')
                    if end_h != -1:
-                        server_encode = content[19+begin_h:19+begin_h+end_h]
+                        server_encode = content[19 + begin_h:19 + begin_h + end_h]
            if server_encode:
                try:
                    if lookup(server_encode):
@ -237,9 +239,10 @@ class IMDbURLopener(FancyURLopener):
        if encode is None:
            encode = 'latin_1'
            # The detection of the encoding is error prone...
-            self._logger.warn('Unable to detect the encoding of the retrieved '
+            self._logger.warn('Unable to detect the encoding of the retrieved page [%s];'
-                        'page [%s]; falling back to default latin1.', encode)
+                              ' falling back to default utf8.', encode)
-        ##print unicode(content, encode, 'replace').encode('utf8')
+        if isinstance(content, unicode):
            return content
        return unicode(content, encode, 'replace')
    def http_error_default(self, url, fp, errcode, errmsg, headers):
@ -288,8 +291,8 @@ class IMDbHTTPAccessSystem(IMDbBase):
        self._getRefs = True
        self._mdparse = False
        if isThin:
-            self._http_logger.warn('"httpThin" access system no longer ' +
+            self._http_logger.warn('"httpThin" access system no longer supported;'
-                    'supported; "http" used automatically', exc_info=False)
+                                   ' "http" used automatically', exc_info=False)
            self.isThin = 0
            if self.accessSystem in ('httpThin', 'webThin', 'htmlThin'):
                self.accessSystem = 'http'
@ -503,7 +506,7 @@ class IMDbHTTPAccessSystem(IMDbBase):
        return self.smProxy.search_movie_parser.parse(cont, results=results)['data']
    def get_movie_main(self, movieID):
-        cont = self._retrieve(self.urls['movie_main'] % movieID + 'combined')
+        cont = self._retrieve(self.urls['movie_main'] % movieID + 'reference')
        return self.mProxy.movie_parser.parse(cont, mdparse=self._mdparse)
    def get_movie_full_credits(self, movieID):
@ -811,7 +814,7 @@ class IMDbHTTPAccessSystem(IMDbBase):
    def _search_keyword(self, keyword, results):
        # XXX: the IMDb web server seems to have some serious problem with
        #      non-ascii keyword.
-        #      E.g.: http://akas.imdb.com/keyword/fianc%E9/
+        #      E.g.: http://www.imdb.com/keyword/fianc%E9/
        #      will return a 500 Internal Server Error: Redirect Recursion.
        keyword = keyword.encode('utf8', 'ignore')
        try:
--- a/lib/imdb/parser/http/bsouplxml/_bsoup.py
+++ b/lib/imdb/parser/http/bsouplxml/_bsoup.py
@ -171,7 +171,7 @@ class PageElement:
        return self
    def _lastRecursiveChild(self):
-        "Finds the last element beneath this object to be parsed."
+        """Finds the last element beneath this object to be parsed."""
        lastChild = self
        while hasattr(lastChild, 'contents') and lastChild.contents:
            lastChild = lastChild.contents[-1]
@ -184,7 +184,7 @@ class PageElement:
            newChild = NavigableString(newChild)
        position =  min(position, len(self.contents))
-        if hasattr(newChild, 'parent') and newChild.parent != None:
+        if hasattr(newChild, 'parent') and newChild.parent is not None:
            # We're 'inserting' an element that's already one
            # of this object's children.
            if newChild.parent == self:
@ -323,7 +323,7 @@ class PageElement:
        return r
    def _findAll(self, name, attrs, text, limit, generator, **kwargs):
-        "Iterates over a generator looking for things that match."
+        """Iterates over a generator looking for things that match."""
        if isinstance(name, SoupStrainer):
            strainer = name
@ -415,7 +415,7 @@ class NavigableString(unicode, PageElement):
        return unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)
    def __getnewargs__(self):
-        return (NavigableString.__str__(self),)
+        return NavigableString.__str__(self),
    def __getattr__(self, attr):
        """text.string gives you text. This is for backwards
@ -460,7 +460,7 @@ class Tag(PageElement):
    """Represents a found HTML tag with its attributes and contents."""
    def _invert(h):
-        "Cheap function to invert a hash."
+        """Cheap function to invert a hash."""
        i = {}
        for k,v in h.items():
            i[v] = k
@ -501,14 +501,14 @@ class Tag(PageElement):
    def __init__(self, parser, name, attrs=None, parent=None,
                 previous=None):
-        "Basic constructor."
+        """Basic constructor."""
        # We don't actually store the parser object: that lets extracted
        # chunks be garbage-collected
        self.parserClass = parser.__class__
        self.isSelfClosing = parser.isSelfClosingTag(name)
        self.name = name
-        if attrs == None:
+        if attrs is None:
            attrs = []
        self.attrs = attrs
        self.contents = []
@ -541,18 +541,18 @@ class Tag(PageElement):
        return self._getAttrMap()[key]
    def __iter__(self):
-        "Iterating over a tag iterates over its contents."
+        """Iterating over a tag iterates over its contents."""
        return iter(self.contents)
    def __len__(self):
-        "The length of a tag is the length of its list of contents."
+        """The length of a tag is the length of its list of contents."""
        return len(self.contents)
    def __contains__(self, x):
        return x in self.contents
    def __nonzero__(self):
-        "A tag is non-None even if it has no contents."
+        """A tag is non-None even if it has no contents."""
        return True
    def __setitem__(self, key, value):
@ -570,7 +570,7 @@ class Tag(PageElement):
        self._getAttrMap()[key] = value
    def __delitem__(self, key):
-        "Deleting tag[key] deletes all 'key' attributes for the tag."
+        """Deleting tag[key] deletes all 'key' attributes for the tag."""
        for item in self.attrs:
            if item[0] == key:
                self.attrs.remove(item)
@ -911,7 +911,7 @@ class SoupStrainer:
        #print "Matching %s against %s" % (markup, matchAgainst)
        result = False
        if matchAgainst == True and type(matchAgainst) == types.BooleanType:
-            result = markup != None
+            result = markup is not None
        elif callable(matchAgainst):
            result = matchAgainst(markup)
        else:
@ -1130,7 +1130,7 @@ class BeautifulStoneSoup(Tag, SGMLParser):
                # Python installations can't copy regexes. If anyone
                # was relying on the existence of markupMassage, this
                # might cause problems.
-                del(self.markupMassage)
+                del self.markupMassage
        self.reset()
        SGMLParser.feed(self, markup)
@ -1253,7 +1253,7 @@ class BeautifulStoneSoup(Tag, SGMLParser):
        """
        nestingResetTriggers = self.NESTABLE_TAGS.get(name)
-        isNestable = nestingResetTriggers != None
+        isNestable = nestingResetTriggers is not None
        isResetNesting = self.RESET_NESTING_TAGS.has_key(name)
        popTo = None
        inclusive = True
@ -1264,9 +1264,9 @@ class BeautifulStoneSoup(Tag, SGMLParser):
                #last occurance.
                popTo = name
                break
-            if (nestingResetTriggers != None
+            if (nestingResetTriggers is not None
                and p.name in nestingResetTriggers) \
-                or (nestingResetTriggers == None and isResetNesting
+                or (nestingResetTriggers is None and isResetNesting
                    and self.RESET_NESTING_TAGS.has_key(p.name)):
                #If we encounter one of the nesting reset triggers
@ -1342,11 +1342,11 @@ class BeautifulStoneSoup(Tag, SGMLParser):
        self._toStringSubclass(text, ProcessingInstruction)
    def handle_comment(self, text):
-        "Handle comments as Comment objects."
+        """Handle comments as Comment objects."""
        self._toStringSubclass(text, Comment)
    def handle_charref(self, ref):
-        "Handle character references as data."
+        """Handle character references as data."""
        if self.convertEntities:
            data = unichr(int(ref))
        else:
@ -1397,7 +1397,7 @@ class BeautifulStoneSoup(Tag, SGMLParser):
        self.handle_data(data)
    def handle_decl(self, data):
-        "Handle DOCTYPEs and the like as Declaration objects."
+        """Handle DOCTYPEs and the like as Declaration objects."""
        self._toStringSubclass(data, Declaration)
    def parse_declaration(self, i):
@ -1793,8 +1793,8 @@ class UnicodeDammit:
        return self.markup
    def _toUnicode(self, data, encoding):
-        '''Given a string and its encoding, decodes the string into Unicode.
+        """Given a string and its encoding, decodes the string into Unicode.
-        %encoding is a string recognized by encodings.aliases'''
+        %encoding is a string recognized by encodings.aliases"""
        # strip Byte Order Mark (if present)
        if (len(data) >= 4) and (data[:2] == '\xfe\xff') \
--- a/lib/imdb/parser/http/bsouplxml/bsoupxpath.py
+++ b/lib/imdb/parser/http/bsouplxml/bsoupxpath.py
@ -67,7 +67,7 @@ def tokenize_path(path):
        if path[i] == '/':
            if i > 0:
                separators.append((last_position, i))
-            if (path[i+1] == '/'):
+            if path[i+1] == '/':
                last_position = i
                i = i + 1
            else:
--- a/lib/imdb/parser/http/characterParser.py
+++ b/lib/imdb/parser/http/characterParser.py
@ -2,7 +2,7 @@
 parser.http.characterParser module (imdb package).
 This module provides the classes (and the instances), used to parse
-the IMDb pages on the akas.imdb.com server about a character.
+the IMDb pages on the www.imdb.com server about a character.
 E.g., for "Jesse James" the referred pages would be:
    main details:   http://www.imdb.com/character/ch0000001/
    biography:      http://www.imdb.com/character/ch0000001/bio
@ -37,7 +37,7 @@ _personIDs = re.compile(r'/name/nm([0-9]{7})')
 class DOMHTMLCharacterMaindetailsParser(DOMHTMLMaindetailsParser):
    """Parser for the "filmography" page of a given character.
    The page should be provided as a string, as taken from
-    the akas.imdb.com server.  The final result will be a
+    the www.imdb.com server.  The final result will be a
    dictionary, with a key for every relevant section.
    Example:
@ -101,7 +101,7 @@ class DOMHTMLCharacterMaindetailsParser(DOMHTMLMaindetailsParser):
 class DOMHTMLCharacterBioParser(DOMParserBase):
    """Parser for the "biography" page of a given character.
    The page should be provided as a string, as taken from
-    the akas.imdb.com server.  The final result will be a
+    the www.imdb.com server.  The final result will be a
    dictionary, with a key for every relevant section.
    Example:
@ -146,7 +146,7 @@ class DOMHTMLCharacterBioParser(DOMParserBase):
 class DOMHTMLCharacterQuotesParser(DOMParserBase):
    """Parser for the "quotes" page of a given character.
    The page should be provided as a string, as taken from
-    the akas.imdb.com server.  The final result will be a
+    the www.imdb.com server.  The final result will be a
    dictionary, with a key for every relevant section.
    Example:
--- a/lib/imdb/parser/http/companyParser.py
+++ b/lib/imdb/parser/http/companyParser.py
@ -2,12 +2,12 @@
 parser.http.companyParser module (imdb package).
 This module provides the classes (and the instances), used to parse
-the IMDb pages on the akas.imdb.com server about a company.
+the IMDb pages on the www.imdb.com server about a company.
 E.g., for "Columbia Pictures [us]" the referred page would be:
-    main details:   http://akas.imdb.com/company/co0071509/
+    main details:   http://www.imdb.com/company/co0071509/
-Copyright 2008-2009 Davide Alberani <da@erlug.linux.it>
+Copyright 2008-2017 Davide Alberani <da@erlug.linux.it>
-          2008 H. Turgut Uyar <uyar@tekir.org>
+          2008-2017 H. Turgut Uyar <uyar@tekir.org>
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@ -34,7 +34,7 @@ from imdb.utils import analyze_company_name
 class DOMCompanyParser(DOMParserBase):
    """Parser for the main page of a given company.
    The page should be provided as a string, as taken from
-    the akas.imdb.com server.  The final result will be a
+    the www.imdb.com server.  The final result will be a
    dictionary, with a key for every relevant section.
    Example:
@ -44,31 +44,38 @@ class DOMCompanyParser(DOMParserBase):
    _containsObjects = True
    extractors = [
-            Extractor(label='name',
+        Extractor(
-                        path="//title",
+            label='name',
-                        attrs=Attribute(key='name',
+            path="//h1/span[@class='display-title ']",  # note the extra trailing space in class
-                            path="./text()",
+            attrs=Attribute(
-                        postprocess=lambda x: \
+                key='name',
-                                analyze_company_name(x, stripNotes=True))),
+                path="./text()",
                postprocess=lambda x: analyze_company_name(x, stripNotes=True)
            )
        ),
-            Extractor(label='filmography',
+        Extractor(
-                        group="//b/a[@name]",
+            label='filmography',
-                        group_key="./text()",
+            group="//b/a[@name]",
-                        group_key_normalize=lambda x: x.lower(),
+            group_key="./text()",
-                        path="../following-sibling::ol[1]/li",
+            group_key_normalize=lambda x: x.lower(),
-                        attrs=Attribute(key=None,
+            path="../following-sibling::ol[1]/li",
-                            multi=True,
+            attrs=Attribute(
-                            path={
+                key=None,
-                                'link': "./a[1]/@href",
+                multi=True,
-                                'title': "./a[1]/text()",
+                path={
-                                'year': "./text()[1]"
+                    'link': "./a[1]/@href",
-                                },
+                    'title': "./a[1]/text()",
-                            postprocess=lambda x:
+                    'year': "./text()[1]"
-                                build_movie(u'%s %s' % \
+                },
-                                (x.get('title'), x.get('year').strip()),
+                postprocess=lambda x: build_movie(
-                                movieID=analyze_imdbid(x.get('link') or u''),
+                    '%s %s' % (x.get('title'), x.get('year').strip()),
-                                _parsingCompany=True))),
+                    movieID=analyze_imdbid(x.get('link') or u''),
-            ]
+                    _parsingCompany=True
                )
            )
        )
    ]
    preprocessors = [
        (re.compile('(<b><a name=)', re.I), r'</p>\1')
--- a/lib/imdb/parser/http/movieParser.py
+++ b/lib/imdb/parser/http/movieParser.py
--- a/lib/imdb/parser/http/personParser.py
+++ b/lib/imdb/parser/http/personParser.py
@ -2,10 +2,10 @@
 parser.http.personParser module (imdb package).
 This module provides the classes (and the instances), used to parse
-the IMDb pages on the akas.imdb.com server about a person.
+the IMDb pages on the www.imdb.com server about a person.
 E.g., for "Mel Gibson" the referred pages would be:
-    categorized:    http://akas.imdb.com/name/nm0000154/maindetails
+    categorized:    http://www.imdb.com/name/nm0000154/maindetails
-    biography:      http://akas.imdb.com/name/nm0000154/bio
+    biography:      http://www.imdb.com/name/nm0000154/bio
    ...and so on...
 Copyright 2004-2013 Davide Alberani <da@erlug.linux.it>
@ -52,7 +52,7 @@ def build_date(date):
 class DOMHTMLMaindetailsParser(DOMParserBase):
    """Parser for the "categorized" (maindetails) page of a given person.
    The page should be provided as a string, as taken from
-    the akas.imdb.com server.  The final result will be a
+    the www.imdb.com server.  The final result will be a
    dictionary, with a key for every relevant section.
    Example:
@ -192,7 +192,7 @@ class DOMHTMLMaindetailsParser(DOMParserBase):
 class DOMHTMLBioParser(DOMParserBase):
    """Parser for the "biography" page of a given person.
    The page should be provided as a string, as taken from
-    the akas.imdb.com server.  The final result will be a
+    the www.imdb.com server.  The final result will be a
    dictionary, with a key for every relevant section.
    Example:
@ -225,92 +225,157 @@ class DOMHTMLBioParser(DOMParserBase):
                        # TODO: check if this slicing is always correct
                        postprocess=lambda x: u''.join(x).strip()[2:])]
    extractors = [
-            Extractor(label='headshot',
+        Extractor(
-                        path="//a[@name='headshot']",
+            label='headshot',
-                        attrs=Attribute(key='headshot',
+            path="//a[@name='headshot']",
-                            path="./img/@src")),
+            attrs=Attribute(
-            Extractor(label='birth info',
+                key='headshot',
-                        path="//table[@id='overviewTable']//td[text()='Date of Birth']/following-sibling::td[1]",
+                path="./img/@src"
-                        attrs=_birth_attrs),
+            )
-            Extractor(label='death info',
+        ),
-                        path="//table[@id='overviewTable']//td[text()='Date of Death']/following-sibling::td[1]",
+
-                        attrs=_death_attrs),
+        Extractor(
-            Extractor(label='nick names',
+            label='birth info',
-                        path="//table[@id='overviewTable']//td[text()='Nickenames']/following-sibling::td[1]",
+            path="//table[@id='overviewTable']"
-                        attrs=Attribute(key='nick names',
+                 "//td[text()='Date of Birth']/following-sibling::td[1]",
-                            path="./text()",
+            attrs=_birth_attrs
-                            joiner='|',
+        ),
-                            postprocess=lambda x: [n.strip().replace(' (',
+
-                                    '::(', 1) for n in x.split('|')
+        Extractor(
-                                    if n.strip()])),
+            label='death info',
-            Extractor(label='birth name',
+            path="//table[@id='overviewTable']"
-                        path="//table[@id='overviewTable']//td[text()='Birth Name']/following-sibling::td[1]",
+                 "//td[text()='Date of Death']/following-sibling::td[1]",
-                        attrs=Attribute(key='birth name',
+            attrs=_death_attrs
-                            path="./text()",
+        ),
-                            postprocess=lambda x: canonicalName(x.strip()))),
+
-            Extractor(label='height',
+        Extractor(
-                path="//table[@id='overviewTable']//td[text()='Height']/following-sibling::td[1]",
+            label='nick names',
-                        attrs=Attribute(key='height',
+            path="//table[@id='overviewTable']"
-                            path="./text()",
+                 "//td[text()='Nickenames']/following-sibling::td[1]",
-                            postprocess=lambda x: x.strip())),
+            attrs=Attribute(
-            Extractor(label='mini biography',
+                key='nick names',
-                        path="//a[@name='mini_bio']/following-sibling::div[1 = count(preceding-sibling::a[1] | ../a[@name='mini_bio'])]",
+                path="./text()",
-                        attrs=Attribute(key='mini biography',
+                joiner='|',
-                            multi=True,
+                postprocess=lambda x: [n.strip().replace(' (', '::(', 1) for n in x.split('|')
-                            path={
+                                       if n.strip()]
-                                'bio': ".//text()",
+            )
-                                'by': ".//a[@name='ba']//text()"
+        ),
-                                },
+
-                            postprocess=lambda x: "%s::%s" % \
+        Extractor(
-                                ((x.get('bio') or u'').split('- IMDb Mini Biography By:')[0].strip(),
+            label='birth name',
-                                (x.get('by') or u'').strip() or u'Anonymous'))),
+            path="//table[@id='overviewTable']"
-            Extractor(label='spouse',
+                 "//td[text()='Birth Name']/following-sibling::td[1]",
-                        path="//div[h5='Spouse']/table/tr",
+            attrs=Attribute(
-                        attrs=Attribute(key='spouse',
+                key='birth name',
-                            multi=True,
+                path="./text()",
-                            path={
+                postprocess=lambda x: canonicalName(x.strip())
-                                'name': "./td[1]//text()",
+            )
-                                'info': "./td[2]//text()"
+        ),
-                                },
+
-                            postprocess=lambda x: ("%s::%s" % \
+        Extractor(
-                                (x.get('name').strip(),
+            label='height',
-                                (x.get('info') or u'').strip())).strip(':'))),
+            path="//table[@id='overviewTable']//td[text()='Height']/following-sibling::td[1]",
-            Extractor(label='trade mark',
+            attrs=Attribute(
-                        path="//div[h5='Trade Mark']/p",
+                key='height',
-                        attrs=Attribute(key='trade mark',
+                path="./text()",
-                            multi=True,
+                postprocess=lambda x: x.strip()
-                            path=".//text()",
+            )
-                            postprocess=lambda x: x.strip())),
+        ),
-            Extractor(label='trivia',
+
-                        path="//div[h5='Trivia']/p",
+        Extractor(
-                        attrs=Attribute(key='trivia',
+            label='mini biography',
-                            multi=True,
+            path="//a[@name='mini_bio']/following-sibling::"
-                            path=".//text()",
+                 "div[1 = count(preceding-sibling::a[1] | ../a[@name='mini_bio'])]",
-                            postprocess=lambda x: x.strip())),
+            attrs=Attribute(
-            Extractor(label='quotes',
+                key='mini biography',
-                        path="//div[h5='Personal Quotes']/p",
+                multi=True,
-                        attrs=Attribute(key='quotes',
+                path={
-                            multi=True,
+                    'bio': ".//text()",
-                            path=".//text()",
+                    'by': ".//a[@name='ba']//text()"
-                            postprocess=lambda x: x.strip())),
+                },
-            Extractor(label='salary',
+                postprocess=lambda x: "%s::%s" % (
-                        path="//div[h5='Salary']/table/tr",
+                    (x.get('bio') or u'').split('- IMDb Mini Biography By:')[0].strip(),
-                        attrs=Attribute(key='salary history',
+                    (x.get('by') or u'').strip() or u'Anonymous'
-                            multi=True,
+                )
-                            path={
+            )
-                                'title': "./td[1]//text()",
+        ),
-                                'info': "./td[2]/text()",
+
-                                },
+        Extractor(
-                            postprocess=lambda x: "%s::%s" % \
+            label='spouse',
-                                    (x.get('title').strip(),
+            path="//div[h5='Spouse']/table/tr",
-                                        x.get('info').strip()))),
+            attrs=Attribute(
-            Extractor(label='where now',
+                key='spouse',
-                        path="//div[h5='Where Are They Now']/p",
+                multi=True,
-                        attrs=Attribute(key='where now',
+                path={
-                            multi=True,
+                    'name': "./td[1]//text()",
-                            path=".//text()",
+                    'info': "./td[2]//text()"
-                            postprocess=lambda x: x.strip())),
+                },
-            ]
+                postprocess=lambda x: ("%s::%s" % (
                    x.get('name').strip(),
                    (x.get('info') or u'').strip())).strip(':')
            )
        ),
        Extractor(
            label='trade mark',
            path="//div[h5='Trade Mark']/p",
            attrs=Attribute(
                key='trade mark',
                multi=True,
                path=".//text()",
                postprocess=lambda x: x.strip()
            )
        ),
        Extractor(
            label='trivia',
            path="//div[h5='Trivia']/p",
            attrs=Attribute(
                key='trivia',
                multi=True,
                path=".//text()",
                postprocess=lambda x: x.strip()
            )
        ),
        Extractor(
            label='quotes',
            path="//div[h5='Personal Quotes']/p",
            attrs=Attribute(
                key='quotes',
                multi=True,
                path=".//text()",
                postprocess=lambda x: x.strip()
            )
        ),
        Extractor(
            label='salary',
            path="//div[h5='Salary']/table/tr",
            attrs=Attribute(
                key='salary history',
                multi=True,
                path={
                    'title': "./td[1]//text()",
                    'info': "./td[2]/text()",
                },
                postprocess=lambda x: "%s::%s" % (
                    x.get('title').strip(),
                    x.get('info').strip())
            )
        ),
        Extractor(
            label='where now',
            path="//div[h5='Where Are They Now']/p",
            attrs=Attribute(
                key='where now',
                multi=True,
                path=".//text()",
                postprocess=lambda x: x.strip()
            )
        )
    ]
    preprocessors = [
        (re.compile('(<h5>)', re.I), r'</div><div class="_imdbpy">\1'),
@ -329,7 +394,7 @@ class DOMHTMLBioParser(DOMParserBase):
 class DOMHTMLResumeParser(DOMParserBase):
    """Parser for the "resume" page of a given person.
    The page should be provided as a string, as taken from
-    the akas.imdb.com server.  The final result will be a
+    the www.imdb.com server.  The final result will be a
    dictionary, with a key for every relevant section.
    Example:
@ -406,13 +471,13 @@ class DOMHTMLResumeParser(DOMParserBase):
                continue
            if len(data[key][0]) == 3:
                for item in data[key]:
-                    item[:] = [x for x in item if not x == None]
+                    item[:] = [x for x in item if not x is None]
                continue
            if len(data[key][0]) == 2:
                new_key = {}
                for item in data[key]:
-                    if item[0] == None:
+                    if item[0] is None:
                        continue
                    if ':' in item[0]:
                        if item[1].replace(item[0], '')[1:].strip() == '':
@ -422,15 +487,14 @@ class DOMHTMLResumeParser(DOMParserBase):
                        new_key[item[0]] = item[1]
                data[key] = new_key
-        new_data = {}
+        new_data = {'resume': data}
        new_data['resume'] = data
        return new_data
 class DOMHTMLOtherWorksParser(DOMParserBase):
    """Parser for the "other works" and "agent" pages of a given person.
    The page should be provided as a string, as taken from
-    the akas.imdb.com server.  The final result will be a
+    the www.imdb.com server.  The final result will be a
    dictionary, with a key for every relevant section.
    Example:
@ -466,7 +530,7 @@ def _build_episode(link, title, minfo, role, roleA, roleAID):
    minidx = minfo.find(' -')
    # Sometimes, for some unknown reason, the role is left in minfo.
    if minidx != -1:
-        slfRole = minfo[minidx+3:].lstrip()
+        slfRole = minfo[minidx + 3:].lstrip()
        minfo = minfo[:minidx].rstrip()
        if slfRole.endswith(')'):
            commidx = slfRole.rfind('(')
@ -504,7 +568,7 @@ def _build_episode(link, title, minfo, role, roleA, roleAID):
 class DOMHTMLSeriesParser(DOMParserBase):
    """Parser for the "by TV series" page of a given person.
    The page should be provided as a string, as taken from
-    the akas.imdb.com server.  The final result will be a
+    the www.imdb.com server.  The final result will be a
    dictionary, with a key for every relevant section.
    Example:
@ -559,7 +623,7 @@ class DOMHTMLSeriesParser(DOMParserBase):
 class DOMHTMLPersonGenresParser(DOMParserBase):
    """Parser for the "by genre" and "by keywords" pages of a given person.
    The page should be provided as a string, as taken from
-    the akas.imdb.com server.  The final result will be a
+    the www.imdb.com server.  The final result will be a
    dictionary, with a key for every relevant section.
    Example:
--- a/lib/imdb/parser/http/searchCharacterParser.py
+++ b/lib/imdb/parser/http/searchCharacterParser.py
@ -5,7 +5,7 @@ This module provides the HTMLSearchCharacterParser class (and the
 search_character_parser instance), used to parse the results of a search
 for a given character.
 E.g., when searching for the name "Jesse James", the parsed page would be:
-    http://akas.imdb.com/find?s=ch;mx=20;q=Jesse+James
+    http://www.imdb.com/find?s=ch;mx=20;q=Jesse+James
 Copyright 2007-2012 Davide Alberani <da@erlug.linux.it>
               2008 H. Turgut Uyar <uyar@tekir.org>
--- a/lib/imdb/parser/http/searchCompanyParser.py
+++ b/lib/imdb/parser/http/searchCompanyParser.py
@ -5,7 +5,7 @@ This module provides the HTMLSearchCompanyParser class (and the
 search_company_parser instance), used to parse the results of a search
 for a given company.
 E.g., when searching for the name "Columbia Pictures", the parsed page would be:
-    http://akas.imdb.com/find?s=co;mx=20;q=Columbia+Pictures
+    http://www.imdb.com/find?s=co;mx=20;q=Columbia+Pictures
 Copyright 2008-2012 Davide Alberani <da@erlug.linux.it>
          2008 H. Turgut Uyar <uyar@tekir.org>
@ -46,22 +46,29 @@ class DOMHTMLSearchCompanyParser(DOMHTMLSearchMovieParser):
    _titleBuilder = lambda self, x: build_company_name(x)
    _linkPrefix = '/company/co'
-    _attrs = [Attribute(key='data',
+    _attrs = [
-                        multi=True,
+        Attribute(
-                        path={
+            key='data',
-                            'link': "./a[1]/@href",
+            multi=True,
-                            'name': "./a[1]/text()",
+            path={
-                            'notes': "./text()[1]"
+                'link': "./a[1]/@href",
-                            },
+                'name': "./a[1]/text()",
-                        postprocess=lambda x: (
+                'notes': "./text()[1]"
-                            analyze_imdbid(x.get('link')),
+            },
-                            analyze_company_name(x.get('name')+(x.get('notes')
+            postprocess=lambda x: (
-                                                or u''), stripNotes=True)
+                analyze_imdbid(x.get('link')),
-                        ))]
+                analyze_company_name(x.get('name') + (x.get('notes') or u''), stripNotes=True)
-    extractors = [Extractor(label='search',
+            )
-                            path="//td[@class='result_text']/a[starts-with(@href, " \
+        )
-                                    "'/company/co')]/..",
+    ]
-                            attrs=_attrs)]
+
    extractors = [
        Extractor(
            label='search',
            path="//td[@class='result_text']/a[starts-with(@href, '/company/co')]/..",
            attrs=_attrs
        )
    ]
 _OBJECTS = {
--- a/lib/imdb/parser/http/searchKeywordParser.py
+++ b/lib/imdb/parser/http/searchKeywordParser.py
@ -5,7 +5,7 @@ This module provides the HTMLSearchKeywordParser class (and the
 search_company_parser instance), used to parse the results of a search
 for a given keyword.
 E.g., when searching for the keyword "alabama", the parsed page would be:
-    http://akas.imdb.com/find?s=kw;mx=20;q=alabama
+    http://www.imdb.com/find?s=kw;mx=20;q=alabama
 Copyright 2009 Davide Alberani <da@erlug.linux.it>
--- a/lib/imdb/parser/http/searchMovieParser.py
+++ b/lib/imdb/parser/http/searchMovieParser.py
@ -6,7 +6,7 @@ search_movie_parser instance), used to parse the results of a search
 for a given title.
 E.g., for when searching for the title "the passion", the parsed
 page would be:
-    http://akas.imdb.com/find?q=the+passion&tt=on&mx=20
+    http://www.imdb.com/find?q=the+passion&tt=on&mx=20
 Copyright 2004-2013 Davide Alberani <da@erlug.linux.it>
               2008 H. Turgut Uyar <uyar@tekir.org>
@ -67,7 +67,7 @@ class DOMBasicMovieParser(DOMParserBase):
            data = []
        else:
            link = data.pop('link')
-            if (link and data):
+            if link and data:
                data = [(link, data)]
            else:
                data = []
--- a/lib/imdb/parser/http/searchPersonParser.py
+++ b/lib/imdb/parser/http/searchPersonParser.py
@ -5,7 +5,7 @@ This module provides the HTMLSearchPersonParser class (and the
 search_person_parser instance), used to parse the results of a search
 for a given person.
 E.g., when searching for the name "Mel Gibson", the parsed page would be:
-    http://akas.imdb.com/find?q=Mel+Gibson&nm=on&mx=20
+    http://www.imdb.com/find?q=Mel+Gibson&nm=on&mx=20
 Copyright 2004-2013 Davide Alberani <da@erlug.linux.it>
               2008 H. Turgut Uyar <uyar@tekir.org>
--- a/lib/imdb/parser/http/topBottomParser.py
+++ b/lib/imdb/parser/http/topBottomParser.py
@ -4,8 +4,8 @@ parser.http.topBottomParser module (imdb package).
 This module provides the classes (and the instances), used to parse the
 lists of top 250 and bottom 100 movies.
 E.g.:
-    http://akas.imdb.com/chart/top
+    http://www.imdb.com/chart/top
-    http://akas.imdb.com/chart/bottom
+    http://www.imdb.com/chart/bottom
 Copyright 2009-2015 Davide Alberani <da@erlug.linux.it>
@ -31,7 +31,7 @@ from utils import DOMParserBase, Attribute, Extractor, analyze_imdbid
 class DOMHTMLTop250Parser(DOMParserBase):
    """Parser for the "top 250" page.
    The page should be provided as a string, as taken from
-    the akas.imdb.com server.  The final result will be a
+    the www.imdb.com server.  The final result will be a
    dictionary, with a key for every relevant section.
    Example:
@ -42,17 +42,24 @@ class DOMHTMLTop250Parser(DOMParserBase):
    ranktext = 'top 250 rank'
    def _init(self):
-        self.extractors = [Extractor(label=self.label,
+        self.extractors = [
-                        path="//div[@id='main']//div[1]//div//table//tbody//tr",
+            Extractor(
-                        attrs=Attribute(key=None,
+                label=self.label,
-                                multi=True,
+                path="//div[@id='main']//div[1]//div//table//tbody//tr",
-                                path={self.ranktext: "./td[2]//text()",
+                attrs=Attribute(
-                                        'rating': "./td[3]//strong//text()",
+                    key=None,
-                                        'title': "./td[2]//a//text()",
+                    multi=True,
-                                        'year': "./td[2]//span//text()",
+                    path={
-                                        'movieID': "./td[2]//a/@href",
+                        self.ranktext: "./td[2]/text()",
-                                        'votes': "./td[3]//strong/@title"
+                        'rating': "./td[3]//strong//text()",
-                                        }))]
+                        'title': "./td[2]//a//text()",
                        'year': "./td[2]//span//text()",
                        'movieID': "./td[2]//a/@href",
                        'votes': "./td[3]//strong/@title"
                    }
                )
            )
        ]
    def postprocess_data(self, data):
        if not data or self.label not in data:
@ -73,9 +80,11 @@ class DOMHTMLTop250Parser(DOMParserBase):
            if theID in seenIDs:
                continue
            seenIDs.append(theID)
-            minfo = analyze_title(d['title']+" "+d['year'])
+            minfo = analyze_title(d['title'] + ' ' + d['year'])
-            try: minfo[self.ranktext] = int(d[self.ranktext].replace('.', ''))
+            try:
-            except: pass
+                minfo[self.ranktext] = int(d[self.ranktext].replace('.', ''))
            except:
                pass
            if 'votes' in d:
                try:
                    votes = d['votes'].replace(' votes','')
@ -93,7 +102,7 @@ class DOMHTMLTop250Parser(DOMParserBase):
 class DOMHTMLBottom100Parser(DOMHTMLTop250Parser):
    """Parser for the "bottom 100" page.
    The page should be provided as a string, as taken from
-    the akas.imdb.com server.  The final result will be a
+    the www.imdb.com server.  The final result will be a
    dictionary, with a key for every relevant section.
    Example:
--- a/lib/imdb/parser/http/utils.py
+++ b/lib/imdb/parser/http/utils.py
@ -35,7 +35,9 @@ from imdb.Character import Character
 # Year, imdbIndex and kind.
-re_yearKind_index = re.compile(r'(\([0-9\?]{4}(?:/[IVXLCDM]+)?\)(?: \(mini\)| \(TV\)| \(V\)| \(VG\))?)')
+re_yearKind_index = re.compile(
    r'(\([0-9\?]{4}(?:/[IVXLCDM]+)?\)(?: \(mini\)| \(TV\)| \(V\)| \(VG\))?)'
 )
 # Match imdb ids in href tags
 re_imdbid = re.compile(r'(title/tt|name/nm|character/ch|company/co)([0-9]+)')
@ -304,7 +306,7 @@ def build_movie(txt, movieID=None, roleID=None, status=None,
    elif title[-14:] == 'TV mini-series':
        title = title[:-14] + ' (mini)'
    if title and title.endswith(_defSep.rstrip()):
-        title = title[:-len(_defSep)+1]
+        title = title[:-len(_defSep) + 1]
    # Try to understand where the movie title ends.
    while True:
        if year:
@ -320,18 +322,17 @@ def build_movie(txt, movieID=None, roleID=None, status=None,
        # Try to match paired parentheses; yes: sometimes there are
        # parentheses inside comments...
        nidx = title.rfind('(')
-        while (nidx != -1 and \
+        while nidx != -1 and title[nidx:].count('(') != title[nidx:].count(')'):
                    title[nidx:].count('(') != title[nidx:].count(')')):
            nidx = title[:nidx].rfind('(')
        # Unbalanced parentheses: stop here.
        if nidx == -1: break
        # The last item in parentheses seems to be a year: stop here.
-        first4 = title[nidx+1:nidx+5]
+        first4 = title[nidx + 1:nidx + 5]
-        if (first4.isdigit() or first4 == '????') and \
+        if (first4.isdigit() or first4 == '????') and title[nidx + 5:nidx + 6] in (')', '/'):
-                title[nidx+5:nidx+6] in (')', '/'): break
+            break
        # The last item in parentheses is a known kind: stop here.
-        if title[nidx+1:-1] in ('TV', 'V', 'mini', 'VG', 'TV movie',
+        if title[nidx + 1:-1] in ('TV', 'V', 'mini', 'VG', 'TV movie', 'TV series', 'short'):
-                'TV series', 'short'): break
+            break
        # Else, in parentheses there are some notes.
        # XXX: should the notes in the role half be kept separated
        #      from the notes in the movie title half?
@ -471,8 +472,8 @@ class DOMParserBase(object):
                if _gotError:
                    warnings.warn('falling back to "%s"' % mod)
                break
-            except ImportError, e:
+            except ImportError as e:
-                if idx+1 >= nrMods:
+                if idx + 1 >= nrMods:
                    # Raise the exception, if we don't have any more
                    # options to try.
                    raise IMDbError('unable to use any parser in %s: %s' % \
@ -786,10 +787,10 @@ class Extractor(object):
    def __repr__(self):
        """String representation of an Extractor object."""
-        r = '<Extractor id:%s (label=%s, path=%s, attrs=%s, group=%s, ' \
+        t = '<Extractor id:%s (label=%s, path=%s, attrs=%s, group=%s, group_key=%s' + \
-                'group_key=%s group_key_normalize=%s)>' % (id(self),
+            ', group_key_normalize=%s)>'
-                        self.label, self.path, repr(self.attrs), self.group,
+        r = t % (id(self), self.label, self.path, repr(self.attrs), self.group,
-                        self.group_key, self.group_key_normalize)
+                 self.group_key, self.group_key_normalize)
        return r
@ -825,7 +826,7 @@ def _parse_ref(text, link, info):
        yearK = re_yearKind_index.match(info)
        if yearK and yearK.start() == 0:
            text += ' %s' % info[:yearK.end()]
-    return (text.replace('\n', ' '), link)
+    return text.replace('\n', ' '), link
 class GatherRefs(DOMParserBase):
--- a/lib/imdb/parser/sql/init.py
+++ b/lib/imdb/parser/sql/init.py
@ -687,7 +687,7 @@ class IMDbSqlAccessSystem(IMDbBase):
        elif isinstance(o, dict):
            for value in o.values():
                self._findRefs(value, trefs, nrefs)
-        return (trefs, nrefs)
+        return trefs, nrefs
    def _extractRefs(self, o):
        """Scan for titles or names references in strings."""
@ -702,7 +702,7 @@ class IMDbSqlAccessSystem(IMDbBase):
                    "imdb.parser.sql.IMDbSqlAccessSystem; "
                    "if it's not a recursion limit exceeded and we're not "
                    "running in a Symbian environment, it's a bug:\n%s" % e)
-            return (trefs, nrefs)
+            return trefs, nrefs
    def _changeAKAencoding(self, akanotes, akatitle):
        """Return akatitle in the correct charset, as specified in
--- a/lib/imdb/parser/sql/alchemyadapter.py
+++ b/lib/imdb/parser/sql/alchemyadapter.py
@ -437,11 +437,13 @@ def ISNULL(x):
    """Emulate SQLObject's ISNULL."""
    # XXX: Should we use null()?  Can null() be a global instance?
    # XXX: Is it safe to test None with the == operator, in this case?
-    return x == None
+    return x is None
 def ISNOTNULL(x):
    """Emulate SQLObject's ISNOTNULL."""
-    return x != None
+    return x is not None
 def CONTAINSSTRING(expr, pattern):
    """Emulate SQLObject's CONTAINSSTRING."""
--- a/lib/imdb/parser/sql/dbschema.py
+++ b/lib/imdb/parser/sql/dbschema.py
@ -122,53 +122,80 @@ class DBTable(object):
 # Default values to insert in some tables: {'column': (list, of, values, ...)}
-kindTypeDefs = {'kind': ('movie', 'tv series', 'tv movie', 'video movie',
+kindTypeDefs = {
-                        'tv mini series', 'video game', 'episode')}
+    'kind': (
-companyTypeDefs = {'kind': ('distributors', 'production companies',
+        'movie', 'tv series', 'tv movie', 'video movie',
-                        'special effects companies', 'miscellaneous companies')}
+        'tv mini series', 'video game', 'episode', 'short', 'tv short'
-infoTypeDefs = {'info': ('runtimes', 'color info', 'genres', 'languages',
+    )
-    'certificates', 'sound mix', 'tech info', 'countries', 'taglines',
+}
-    'keywords', 'alternate versions', 'crazy credits', 'goofs',
+
-    'soundtrack', 'quotes', 'release dates', 'trivia', 'locations',
+companyTypeDefs = {
-    'mini biography', 'birth notes', 'birth date', 'height',
+    'kind': (
-    'death date', 'spouse', 'other works', 'birth name',
+        'distributors', 'production companies',
-    'salary history', 'nick names', 'books', 'agent address',
+        'special effects companies', 'miscellaneous companies'
-    'biographical movies', 'portrayed in', 'where now', 'trade mark',
+    )
-    'interviews', 'article', 'magazine cover photo', 'pictorial',
+}
-    'death notes', 'LD disc format', 'LD year', 'LD digital sound',
+
-    'LD official retail price', 'LD frequency response', 'LD pressing plant',
+infoTypeDefs = {
-    'LD length', 'LD language', 'LD review', 'LD spaciality', 'LD release date',
+    'info': (
-    'LD production country', 'LD contrast', 'LD color rendition',
+        'runtimes', 'color info', 'genres', 'languages',
-    'LD picture format', 'LD video noise', 'LD video artifacts',
+        'certificates', 'sound mix', 'tech info', 'countries', 'taglines',
-    'LD release country', 'LD sharpness', 'LD dynamic range',
+        'keywords', 'alternate versions', 'crazy credits', 'goofs',
-    'LD audio noise', 'LD color information', 'LD group genre',
+        'soundtrack', 'quotes', 'release dates', 'trivia', 'locations',
-    'LD quality program', 'LD close captions-teletext-ld-g',
+        'mini biography', 'birth notes', 'birth date', 'height',
-    'LD category', 'LD analog left', 'LD certification',
+        'death date', 'spouse', 'other works', 'birth name',
-    'LD audio quality', 'LD video quality', 'LD aspect ratio',
+        'salary history', 'nick names', 'books', 'agent address',
-    'LD analog right', 'LD additional information',
+        'biographical movies', 'portrayed in', 'where now', 'trade mark',
-    'LD number of chapter stops', 'LD dialogue intellegibility',
+        'interviews', 'article', 'magazine cover photo', 'pictorial',
-    'LD disc size', 'LD master format', 'LD subtitles',
+        'death notes', 'LD disc format', 'LD year', 'LD digital sound',
-    'LD status of availablility', 'LD quality of source',
+        'LD official retail price', 'LD frequency response', 'LD pressing plant',
-    'LD number of sides', 'LD video standard', 'LD supplement',
+        'LD length', 'LD language', 'LD review', 'LD spaciality', 'LD release date',
-    'LD original title', 'LD sound encoding', 'LD number', 'LD label',
+        'LD production country', 'LD contrast', 'LD color rendition',
-    'LD catalog number', 'LD laserdisc title', 'screenplay-teleplay',
+        'LD picture format', 'LD video noise', 'LD video artifacts',
-    'novel', 'adaption', 'book', 'production process protocol',
+        'LD release country', 'LD sharpness', 'LD dynamic range',
-    'printed media reviews', 'essays', 'other literature', 'mpaa',
+        'LD audio noise', 'LD color information', 'LD group genre',
-    'plot', 'votes distribution', 'votes', 'rating',
+        'LD quality program', 'LD close captions-teletext-ld-g',
-    'production dates', 'copyright holder', 'filming dates', 'budget',
+        'LD category', 'LD analog left', 'LD certification',
-    'weekend gross', 'gross', 'opening weekend', 'rentals',
+        'LD audio quality', 'LD video quality', 'LD aspect ratio',
-    'admissions', 'studios', 'top 250 rank', 'bottom 10 rank')}
+        'LD analog right', 'LD additional information',
-compCastTypeDefs = {'kind': ('cast', 'crew', 'complete', 'complete+verified')}
+        'LD number of chapter stops', 'LD dialogue intellegibility',
-linkTypeDefs = {'link': ('follows', 'followed by', 'remake of', 'remade as',
+        'LD disc size', 'LD master format', 'LD subtitles',
-                        'references', 'referenced in', 'spoofs', 'spoofed in',
+        'LD status of availablility', 'LD quality of source',
-                        'features', 'featured in', 'spin off from', 'spin off',
+        'LD number of sides', 'LD video standard', 'LD supplement',
-                        'version of', 'similar to', 'edited into',
+        'LD original title', 'LD sound encoding', 'LD number', 'LD label',
-                        'edited from', 'alternate language version of',
+        'LD catalog number', 'LD laserdisc title', 'screenplay-teleplay',
-                        'unknown link')}
+        'novel', 'adaption', 'book', 'production process protocol',
-roleTypeDefs = {'role': ('actor', 'actress', 'producer', 'writer',
+        'printed media reviews', 'essays', 'other literature', 'mpaa',
-                        'cinematographer', 'composer', 'costume designer',
+        'plot', 'votes distribution', 'votes', 'rating',
-                        'director', 'editor', 'miscellaneous crew',
+        'production dates', 'copyright holder', 'filming dates', 'budget',
-                        'production designer', 'guest')}
+        'weekend gross', 'gross', 'opening weekend', 'rentals',
        'admissions', 'studios', 'top 250 rank', 'bottom 10 rank'
    )
 }
 compCastTypeDefs = {
    'kind': ('cast', 'crew', 'complete', 'complete+verified')
 }
 linkTypeDefs = {
    'link': (
        'follows', 'followed by', 'remake of', 'remade as',
        'references', 'referenced in', 'spoofs', 'spoofed in',
        'features', 'featured in', 'spin off from', 'spin off',
        'version of', 'similar to', 'edited into',
        'edited from', 'alternate language version of',
        'unknown link'
    )
 }
 roleTypeDefs = {
    'role': (
        'actor', 'actress', 'producer', 'writer',
        'cinematographer', 'composer', 'costume designer',
        'director', 'editor', 'miscellaneous crew',
        'production designer', 'guest'
    )
 }
 # Schema of tables in our database.
 # XXX: Foreign keys can be used to create constrains between tables,
@ -186,7 +213,7 @@ DB_SCHEMA = [
        # the alternateID attribute here will be ignored by SQLAlchemy.
        DBCol('id', INTCOL, notNone=True, alternateID=True),
        DBCol('name', UNICODECOL, notNone=True, index='idx_name', indexLen=6),
-        DBCol('imdbIndex', UNICODECOL, length=12, default=None),
+        DBCol('imdbIndex', STRINGCOL, length=12, default=None),
        DBCol('imdbID', INTCOL, default=None, index='idx_imdb_id'),
        DBCol('gender', STRINGCOL, length=1, default=None),
        DBCol('namePcodeCf', STRINGCOL, length=5, default=None,
@ -204,7 +231,7 @@ DB_SCHEMA = [
        # from namePcodeNf.
        DBCol('id', INTCOL, notNone=True, alternateID=True),
        DBCol('name', UNICODECOL, notNone=True, index='idx_name', indexLen=6),
-        DBCol('imdbIndex', UNICODECOL, length=12, default=None),
+        DBCol('imdbIndex', STRINGCOL, length=12, default=None),
        DBCol('imdbID', INTCOL, default=None),
        DBCol('namePcodeNf', STRINGCOL, length=5, default=None,
                index='idx_pcodenf'),
@ -218,7 +245,7 @@ DB_SCHEMA = [
        # namePcodeSf is the soundex of the name plus the country code.
        DBCol('id', INTCOL, notNone=True, alternateID=True),
        DBCol('name', UNICODECOL, notNone=True, index='idx_name', indexLen=6),
-        DBCol('countryCode', UNICODECOL, length=255, default=None),
+        DBCol('countryCode', STRINGCOL, length=255, default=None),
        DBCol('imdbID', INTCOL, default=None),
        DBCol('namePcodeNf', STRINGCOL, length=5, default=None,
                index='idx_pcodenf'),
@ -237,7 +264,7 @@ DB_SCHEMA = [
        DBCol('id', INTCOL, notNone=True, alternateID=True),
        DBCol('title', UNICODECOL, notNone=True,
                index='idx_title', indexLen=10),
-        DBCol('imdbIndex', UNICODECOL, length=12, default=None),
+        DBCol('imdbIndex', STRINGCOL, length=12, default=None),
        DBCol('kindID', INTCOL, notNone=True, foreignKey='KindType'),
        DBCol('productionYear', INTCOL, default=None),
        DBCol('imdbID', INTCOL, default=None, index="idx_imdb_id"),
@ -264,7 +291,7 @@ DB_SCHEMA = [
        DBCol('personID', INTCOL, notNone=True, index='idx_person',
                foreignKey='Name'),
        DBCol('name', UNICODECOL, notNone=True),
-        DBCol('imdbIndex', UNICODECOL, length=12, default=None),
+        DBCol('imdbIndex', STRINGCOL, length=12, default=None),
        DBCol('namePcodeCf',  STRINGCOL, length=5, default=None,
                index='idx_pcodecf'),
        DBCol('namePcodeNf',  STRINGCOL, length=5, default=None,
@ -291,7 +318,7 @@ DB_SCHEMA = [
        DBCol('movieID', INTCOL, notNone=True, index='idx_movieid',
                foreignKey='Title'),
        DBCol('title', UNICODECOL, notNone=True),
-        DBCol('imdbIndex', UNICODECOL, length=12, default=None),
+        DBCol('imdbIndex', STRINGCOL, length=12, default=None),
        DBCol('kindID', INTCOL, notNone=True, foreignKey='KindType'),
        DBCol('productionYear', INTCOL, default=None),
        DBCol('phoneticCode',  STRINGCOL, length=5, default=None,
--- a/lib/imdb/utils.py
+++ b/lib/imdb/utils.py
@ -42,8 +42,22 @@ _utils_logger = logging.getLogger('imdbpy.utils')
 # and year of release.
 # XXX: probably L, C, D and M are far too much! ;-)
 re_year_index = re.compile(r'\(([0-9\?]{4}(/[IVXLCDM]+)?)\)')
-re_extended_year_index = re.compile(r'\((TV episode|TV Series|TV mini-series|TV|Video|Video Game)? ?((?:[0-9\?]{4})(?:-[0-9\?]{4})?)(?:/([IVXLCDM]+)?)?\)')
+re_m_episode = re.compile(r'\(TV Episode\)\s+-\s+', re.I)
-re_remove_kind = re.compile(r'\((TV episode|TV Series|TV mini-series|TV|Video|Video Game)? ?')
+re_m_series = re.compile(r'Season\s+\d+\s+\|\s+Episode\s+\d+\s+-', re.I)
 re_m_imdbIndex = re.compile(r'\(([IVXLCDM]+)\)')
 re_m_kind = re.compile(
    r'\((TV episode|TV Series|TV mini-series|mini|TV|Video|Video Game|VG|Short|TV Movie|TV Short|V)\)',
    re.I)
 KIND_MAP = {
    'tv': 'tv movie',
    'tv episode': 'episode',
    'v': 'video movie',
    'video': 'video movie',
    'vg': 'video game',
    'mini': 'tv mini series',
    'tv mini-series': 'tv mini series'
 }
 # Match only the imdbIndex (for name strings).
 re_index = re.compile(r'^\(([IVXLCDM]+)\)$')
@ -283,13 +297,6 @@ def _split_series_episode(title):
                # that means this is an episode title, as returned by
                # the web server.
                series_title = title[:second_quot]
            ##elif episode_or_year[-1:] == '}':
            ##        # Title of the episode, as in the plain text data files.
            ##        begin_eps = episode_or_year.find('{')
            ##        if begin_eps == -1: return series_title, episode_or_year
            ##        series_title = title[:second_quot+begin_eps].rstrip()
            ##        # episode_or_year is returned with the {...}
            ##        episode_or_year = episode_or_year[begin_eps:]
    return series_title, episode_or_year
@ -383,65 +390,24 @@ def analyze_title(title, canonical=None, canonicalSeries=None,
    #      tv mini series: 5,497
    #      video game:     5,490
    #      More up-to-date statistics: http://us.imdb.com/database_statistics
-    if title.endswith('(TV)'):
+    epindex = re_m_episode.search(title)
-        kind = u'tv movie'
+    if epindex:
-        title = title[:-4].rstrip()
+        # It's an episode of a series.
-    elif title.endswith('(TV Movie)'):
+        kind = 'episode'
-        kind = u'tv movie'
+        series_title = title[epindex.end():]
-        title = title[:-10].rstrip()
+        series_title = re_m_series.sub('', series_title)
-    elif title.endswith('(V)'):
+        series_info = analyze_title(series_title)
-        kind = u'video movie'
+        result['episode of'] = series_info.get('title')
-        title = title[:-3].rstrip()
+        result['series year'] = series_info.get('year')
-    elif title.lower().endswith('(video)'):
+        title = title[:epindex.start()].strip()
-        kind = u'video movie'
+    else:
-        title = title[:-7].rstrip()
+        detected_kind = re_m_kind.findall(title)
-    elif title.endswith('(TV Short)'):
+        if detected_kind:
-        kind = u'tv short'
+            kind = detected_kind[-1].lower().replace('-', '')
-        title = title[:-10].rstrip()
+            kind = KIND_MAP.get(kind, kind)
-    elif title.endswith('(TV Mini-Series)'):
+            title = re_m_kind.sub('', title).strip()
        kind = u'tv mini series'
        title = title[:-16].rstrip()
    elif title.endswith('(mini)'):
        kind = u'tv mini series'
        title = title[:-6].rstrip()
    elif title.endswith('(VG)'):
        kind = u'video game'
        title = title[:-4].rstrip()
    elif title.endswith('(Video Game)'):
        kind = u'video game'
        title = title[:-12].rstrip()
    elif title.endswith('(TV Series)'):
        epindex = title.find('(TV Episode) - ')
        if epindex >= 0:
            # It's an episode of a series.
            kind = u'episode'
            series_info = analyze_title(title[epindex + 15:])
            result['episode of'] = series_info.get('title')
            result['series year'] = series_info.get('year')
            title = title[:epindex]
        else:
            kind = u'tv series'
            title = title[:-11].rstrip()
    # Search for the year and the optional imdbIndex (a roman number).
    yi = re_year_index.findall(title)
    if not yi:
        yi = re_extended_year_index.findall(title)
        if yi:
            yk, yiy, yii = yi[-1]
            yi = [(yiy, yii)]
            if yk == 'TV episode':
                kind = u'episode'
            elif yk in ('TV', 'TV Movie'):
                kind = u'tv movie'
            elif yk == 'TV Series':
                kind = u'tv series'
            elif yk == 'Video':
                kind = u'video movie'
            elif yk in ('TV mini-series', 'TV Mini-Series'):
                kind = u'tv mini series'
            elif yk == 'Video Game':
                kind = u'video game'
            title = re_remove_kind.sub('(', title)
    if yi:
        last_yi = yi[-1]
        year = last_yi[0]
@ -450,7 +416,12 @@ def analyze_title(title, canonical=None, canonicalSeries=None,
            year = year[:-len(imdbIndex)-1]
        i = title.rfind('(%s)' % last_yi[0])
        if i != -1:
-            title = title[:i-1].rstrip()
+            title = title[:i - 1].rstrip()
    if not imdbIndex:
        detect_imdbIndex = re_m_imdbIndex.findall(title)
        if detect_imdbIndex:
            imdbIndex = detect_imdbIndex[-1]
            title = re_m_imdbIndex.sub('', title).strip()
    # This is a tv (mini) series: strip the '"' at the begin and at the end.
    # XXX: strip('"') is not used for compatibility with Python 2.0.
    if title and title[0] == title[-1] == '"':
@ -464,8 +435,6 @@ def analyze_title(title, canonical=None, canonicalSeries=None,
            title = canonicalTitle(title)
        else:
            title = normalizeTitle(title)
    # 'kind' is one in ('movie', 'episode', 'tv series', 'tv mini series',
    #                   'tv movie', 'video movie', 'video game')
    result['title'] = title
    result['kind'] = kind or u'movie'
    if year and year != '????':
@ -832,7 +801,7 @@ def date_and_notes(s):
    """Parse (birth|death) date and notes; returns a tuple in the
    form (date, notes)."""
    s = s.strip()
-    if not s: return (u'', u'')
+    if not s: return u'', u''
    notes = u''
    if s[0].isdigit() or s.split()[0].lower() in ('c.', 'january', 'february',
                                                'march', 'april', 'may', 'june',
@ -990,7 +959,7 @@ def _tag4TON(ton, addAccessSystem=False, _containerOnly=False):
    beginTag += extras
    if ton.notes:
        beginTag += u'<notes>%s</notes>' % _normalizeValue(ton.notes)
-    return (beginTag, u'</%s>' % tag)
+    return beginTag, u'</%s>' % tag
 TAGS_TO_MODIFY = {
@ -1264,8 +1233,8 @@ class _Container(object):
            self.__role = role
    currentRole = property(_get_currentRole, _set_currentRole,
-                            doc="The role of a Person in a Movie" + \
+                           doc="The role of a Person in a Movie"
-                            " or the interpreter of a Character in a Movie.")
+                               " or the interpreter of a Character in a Movie.")
    def _init(self, **kwds): pass
@ -1478,10 +1447,10 @@ class _Container(object):
            except RuntimeError, e:
                # Symbian/python 2.2 has a poor regexp implementation.
                import warnings
-                warnings.warn('RuntimeError in '
+                warnings.warn("RuntimeError in imdb.utils._Container.__getitem__;"
-                        "imdb.utils._Container.__getitem__; if it's not "
+                              " if it's not a recursion limit exceeded and we're"
-                        "a recursion limit exceeded and we're not running "
+                              " not running in a Symbian environment, it's a"
-                        "in a Symbian environment, it's a bug:\n%s" % e)
+                              " bug:\n%s" % e)
        return rawData
    def __setitem__(self, key, item):