Merge branch 'feature/UpdateImdbpie' into dev

2024-11-22 04:45:05 +00:00 · 2024-06-07 13:28:21 +01:00 · 2024-06-07 13:28:21 +01:00 · 421022f4c4
commit 421022f4c4
parent 13a7cd8169 909fd3d24e
5 changed files with 124 additions and 92 deletions
--- a/CHANGES.md
+++ b/CHANGES.md
@ -7,6 +7,7 @@
 * Update feedparser 6.0.10 (9865dec) to 6.0.11 (efcb89b)
 * Update filelock 3.12.4 (c1163ae) to 3.14.0 (8556141)
 * Update idna library 3.4 (cab054c) to 3.7 (1d365e1)
+* Update imdbpie 5.6.4 (f695e87) to 5.6.5 (f8ed7a0)
 * Update Requests library 2.31.0 (8812812) to 2.32.3 (0e322af)
 * Update Tornado Web Server 6.4 (b3f2a4b) to 6.4.1 (2a0e1d1)
 * Update urllib3 2.0.7 (56f01e0) to 2.2.1 (54d6edf)
--- a/lib/imdbpie/auth.py
+++ b/lib/imdbpie/auth.py
@ -50,7 +50,7 @@ class ZuluHmacAuthV3HTTPHandler(object):
        them into a string, separated by newlines.
        """
        vals = sorted(['%s:%s' % (n.lower().strip(),
-                                  headers_to_sign[n].strip()) for n in headers_to_sign])
+                    headers_to_sign[n].strip()) for n in headers_to_sign])
        return '\n'.join(vals)

    def headers_to_sign(self, http_request):
@ -90,14 +90,16 @@ class ZuluHmacAuthV3HTTPHandler(object):
        headers_to_sign = self.headers_to_sign(http_request)
        canonical_qs = self.canonical_query_string(http_request)
        canonical_headers = self.canonical_headers(headers_to_sign)
-        string_to_sign = '\n'.join((
-            http_request.method,
-            http_request.path,
-            canonical_qs,
-            canonical_headers,
-            '',
-            http_request.body
-        ))
+        string_to_sign = '\n'.join(
+            (
+                http_request.method,
+                http_request.path,
+                canonical_qs,
+                canonical_headers,
+                '',
+                http_request.body,
+            )
+        )
        return string_to_sign, headers_to_sign

    def add_auth(self, req):
@ -319,9 +321,15 @@ class Auth(object):
            key: val[0] for key, val in parse_qs(parsed_url.query).items()
        }
        request = HTTPRequest(
-            method='GET', protocol='https', host=HOST,
-            port=443, path=parsed_url.path, auth_path=None, params=params,
-            headers={'User-Agent': USER_AGENT}, body=''
+            method='GET',
+            protocol='https',
+            host=HOST,
+            port=443,
+            path=parsed_url.path,
+            auth_path=None,
+            params=params,
+            headers={'User-Agent': USER_AGENT},
+            body='',
        )
        handler.add_auth(req=request)
        headers = request.headers
--- a/lib/imdbpie/facade.py
+++ b/lib/imdbpie/facade.py
@ -7,15 +7,20 @@ from dateutil.parser import parse

 from .imdbpie import Imdb
 from .objects import (
-    Title, TitleEpisodes, Name, TitleName, Image, TitleRelease,
-    TitleSearchResult, NameSearchResult,
+    Title,
+    TitleEpisodes,
+    Name,
+    TitleName,
+    Image,
+    TitleRelease,
+    TitleSearchResult,
+    NameSearchResult,
 )

 REGEX_IMDB_ID = re.compile(r'([a-zA-Z]{2}[0-9]{7})')


 class ImdbFacade(object):
-
    def __init__(self, client=None):
        self._client = client or Imdb()

@ -36,8 +41,11 @@ class ImdbFacade(object):
            season = None
            episode = None
        return Title(
-            season=season, episode=episode, episodes=episodes,
-            runtime=runtime, **title_data
+            season=season,
+            episode=episode,
+            episodes=episodes,
+            runtime=runtime,
+            **title_data
        )

    def get_name(self, imdb_id):
@ -68,8 +76,13 @@ class ImdbFacade(object):
            self._parse_id(f['id']) for f in filmography_data['filmography']
        )
        return Name(
-            name=name, imdb_id=imdb_id, date_of_birth=date_of_birth,
-            gender=gender, birth_place=birth_place, bios=bios, image=image,
+            name=name,
+            imdb_id=imdb_id,
+            date_of_birth=date_of_birth,
+            gender=gender,
+            birth_place=birth_place,
+            bios=bios,
+            image=image,
            filmography=filmography,
        )

@ -77,7 +90,8 @@ class ImdbFacade(object):
        results = []
        for result in self._client.search_for_name(query):
            result = NameSearchResult(
-                imdb_id=result['imdb_id'], name=result['name'],
+                imdb_id=result['imdb_id'],
+                name=result['name'],
            )
            results.append(result)
        return tuple(results)
@ -90,8 +104,10 @@ class ImdbFacade(object):
            else:
                year = None
            result = TitleSearchResult(
-                imdb_id=result['imdb_id'], title=result['title'],
-                type=result['type'], year=year,
+                imdb_id=result['imdb_id'],
+                title=result['title'],
+                type=result['type'],
+                year=year,
            )
            results.append(result)
        return tuple(results)
@ -102,8 +118,9 @@ class ImdbFacade(object):
                name=i['name'],
                job=i.get('job'),
                category=i.get('category'),
-                imdb_id=self._parse_id(i['id'])
-            ) for i in top_crew_data['writers']
+                imdb_id=self._parse_id(i['id']),
+            )
+            for i in top_crew_data['writers']
        )

    def _get_stars(self, principals_data):
@ -113,8 +130,9 @@ class ImdbFacade(object):
                job=i.get('job'),
                characters=tuple(i.get('characters', ())),
                category=i.get('category'),
-                imdb_id=self._parse_id(i['id'])
-            ) for i in principals_data
+                imdb_id=self._parse_id(i['id']),
+            )
+            for i in principals_data
        )

    def _get_creators(self, top_crew_data):
@ -123,8 +141,9 @@ class ImdbFacade(object):
                name=i['name'],
                job=i.get('job'),
                category=i.get('category'),
-                imdb_id=self._parse_id(i['id'])
-            ) for i in top_crew_data['writers']
+                imdb_id=self._parse_id(i['id']),
+            )
+            for i in top_crew_data['writers']
            if i.get('job') == 'creator'
        )

@ -134,20 +153,23 @@ class ImdbFacade(object):
                name=i['name'],
                job=i.get('job'),
                category=i.get('category'),
-                imdb_id=self._parse_id(i['id'])
-            ) for i in top_crew_data['directors']
+                imdb_id=self._parse_id(i['id']),
+            )
+            for i in top_crew_data['directors']
        )

    def _get_credits(self, credits_data):
        credits = []
        for category in credits_data.get('credits', ()):
            for item in credits_data['credits'][category]:
-                credits.append(TitleName(
-                    name=item['name'],
-                    category=item.get('category'),
-                    job=item.get('job'),
-                    imdb_id=self._parse_id(item['id'])
-                ))
+                credits.append(
+                    TitleName(
+                        name=item['name'],
+                        category=item.get('category'),
+                        job=item.get('job'),
+                        imdb_id=self._parse_id(item['id']),
+                    )
+                )
        return tuple(credits)

    def _parse_id(self, string):
@ -212,22 +234,25 @@ class ImdbFacade(object):
            )
        except KeyError:
            image = None
-        return dict(
-            imdb_id=imdb_id,
-            title=title,
-            year=year,
-            rating=rating,
-            type=type_,
-            release_date=release_date,
-            releases=releases,
-            plot_outline=plot_outline,
-            rating_count=rating_count,
-            writers=writers,
-            directors=directors,
-            creators=creators,
-            genres=genres,
-            credits=credits,
-            certification=certification,
-            image=image,
-            stars=stars,
-        ), title_aux_data
+        return (
+            dict(
+                imdb_id=imdb_id,
+                title=title,
+                year=year,
+                rating=rating,
+                type=type_,
+                release_date=release_date,
+                releases=releases,
+                plot_outline=plot_outline,
+                rating_count=rating_count,
+                writers=writers,
+                directors=directors,
+                creators=creators,
+                genres=genres,
+                credits=credits,
+                certification=certification,
+                image=image,
+                stars=stars,
+            ),
+            title_aux_data,
+        )
--- a/lib/imdbpie/imdbpie.py
+++ b/lib/imdbpie/imdbpie.py
@ -11,7 +11,7 @@ from trans import trans
 import requests
 from six import text_type
 from six.moves import http_client as httplib
-from six.moves.urllib.parse import urlencode, urljoin, quote, unquote
+from six.moves.urllib.parse import urlencode, urljoin, quote

 from .constants import BASE_URI, SEARCH_BASE_URI
 from .auth import Auth
@ -52,7 +52,6 @@ _SIMPLE_GET_ENDPOINTS = {


 class Imdb(Auth):
-
    def __init__(self, locale=None, exclude_episodes=False, session=None, cachedir=None):
        self.locale = locale or 'en_US'
        self.region = self.locale.split('_')[-1].upper()
@ -87,8 +86,8 @@ class Imdb(Auth):
            self._title_not_found()

        if (
-            self.exclude_episodes is True and
-            resource['base']['titleType'] == 'tvEpisode'
+            self.exclude_episodes is True
+            and resource['base']['titleType'] == 'tvEpisode'
        ):
            raise LookupError(
                'Title not found. Title was an episode and '
@ -111,14 +110,14 @@ class Imdb(Auth):
                    'region': self.region,
                    'tconst': imdb_id,
                    'today': date.today().strftime('%Y-%m-%d'),
-                }
+                },
            )
        except LookupError:
            self._title_not_found()

        if (
-            self.exclude_episodes is True and
-            resource['titleType'].lower() == 'tvepisode'
+            self.exclude_episodes is True
+            and resource['titleType'].lower() == 'tvepisode'
        ):
            raise LookupError(
                'Title not found. Title was an episode and '
@ -128,19 +127,24 @@ class Imdb(Auth):

    def _simple_get_method(self, method, path):
        """Return client method generated from ``_SIMPLE_GET_ENDPOINTS``."""
+
        def get(imdb_id):
            logger.info('called %s %s', method, imdb_id)
            self.validate_imdb_id(imdb_id)
            self._redirection_title_check(imdb_id)
            return self._get_resource(path.format(imdb_id=imdb_id))
+
        return get

    def title_exists(self, imdb_id):
        self.validate_imdb_id(imdb_id)
        page_url = 'https://www.imdb.com/title/{0}/'.format(imdb_id)

-        response = self.session.get(page_url, allow_redirects=False)
-
+        response = self.session.get(
+            page_url,
+            allow_redirects=False,
+            headers={'User-Agent': 'Mozilla/5.0'},
+        )
        if response.status_code == httplib.OK:
            return True
        elif response.status_code == httplib.NOT_FOUND:
@ -165,7 +169,7 @@ class Imdb(Auth):

    def search_for_name(self, name):
        logger.info('called search_for_name %s', name)
-        name = re.sub(r'\W+', '_', name, flags=re.UNICODE).strip('_')
+        name = re.sub(r'\W+', '+', name).strip('+')
        search_results = self._suggest_search(name)
        results = []
        for result in search_results.get('d', ()):
@ -181,7 +185,7 @@ class Imdb(Auth):

    def search_for_title(self, title):
        logger.info('called search_for_title %s', title)
-        title = re.sub(r'\W+', '_', title, flags=re.UNICODE).strip('_')
+        title = re.sub(r'\W+', '+', title).strip('+')
        search_results = self._suggest_search(title)
        results = []
        for result in search_results.get('d', ()):
@ -238,9 +242,13 @@ class Imdb(Auth):
        if region:
            params.update({'region': region})

-        return self._get(urljoin(
-            BASE_URI, '/template/imdb-ios-writable/tv-episodes-v2.jstl/render'
-        ), params=params)
+        return self._get(
+            urljoin(
+                BASE_URI,
+                '/template/imdb-ios-writable/tv-episodes-v2.jstl/render',
+            ),
+            params=params,
+        )

    def get_title_top_crew(self, imdb_id):
        """
@ -252,24 +260,21 @@ class Imdb(Auth):
        logger.info('called get_title_top_crew %s', imdb_id)
        self.validate_imdb_id(imdb_id)
        params = {'tconst': imdb_id}
-        return self._get(urljoin(
-            BASE_URI,
-            '/template/imdb-android-writable/7.3.top-crew.jstl/render'
-        ), params=params)
+        return self._get(
+            urljoin(
+                BASE_URI,
+                '/template/imdb-android-writable/7.3.top-crew.jstl/render',
+            ),
+            params=params,
+        )

    @staticmethod
    def _parse_dirty_json(data, query=None):
        if query is None:
            match_json_within_dirty_json = r'imdb\$.+\({1}(.+)\){1}'
        else:
-            query_match = ''.join(
-                char if char.isalnum() else '[{0}]'.format(char)
-                for char in unquote(query)
-            )
-            query_match = query_match.replace('[ ]', '.+')
-            match_json_within_dirty_json = (
-                r'imdb\${}\((.+)\)'.format(query_match)
-            )
+            # No need to unquote as the json is containing quoted query
+            match_json_within_dirty_json = r'imdb\${}\((.+)\)'.format(query)
        data_clean = re.match(
            match_json_within_dirty_json, data, re.IGNORECASE
        ).groups()[0]
@ -290,9 +295,8 @@ class Imdb(Auth):
        Redirection results have no information of use.
        """
        imdb_id = response['data'].get('tconst')
-        if (
-            imdb_id and
-            imdb_id != response['data'].get('news', {}).get('channel')
+        if imdb_id and imdb_id != response['data'].get('news', {}).get(
+            'channel'
        ):
            return True
        return False
@ -309,7 +313,6 @@ class Imdb(Auth):
            full_url = url
        headers.update(self.get_auth_headers(full_url))
        resp = self.session.get(url, headers=headers, params=params)
-
        if not resp.ok:
            if resp.status_code == httplib.NOT_FOUND:
                raise LookupError('Resource {0} not found'.format(url))
@ -320,9 +323,7 @@ class Imdb(Auth):
        try:
            resp_dict = json.loads(resp_data)
        except ValueError:
-            resp_dict = self._parse_dirty_json(
-                data=resp_data, query=query
-            )
+            resp_dict = self._parse_dirty_json(data=resp_data, query=query)

        if resp_dict.get('error'):
            return None
--- a/lib/imdbpie/objects.py
+++ b/lib/imdbpie/objects.py
@ -9,12 +9,9 @@ class Image(object):


 class TitleEpisodes(object):
-
    def __init__(self, facade, imdb_id):
        self._facade = facade
-        episodes = self._facade._client.get_title_episodes(
-            imdb_id=imdb_id
-        )
+        episodes = self._facade._client.get_title_episodes(imdb_id=imdb_id)
        self._episode_imdb_ids = []
        for season in episodes['seasons']:
            for episode in season['episodes']: