Merge branch 'feature/UpdateImdbpie' into dev

This commit is contained in:
JackDandy 2024-06-07 13:28:21 +01:00
commit 421022f4c4
5 changed files with 124 additions and 92 deletions

View file

@ -7,6 +7,7 @@
* Update feedparser 6.0.10 (9865dec) to 6.0.11 (efcb89b)
* Update filelock 3.12.4 (c1163ae) to 3.14.0 (8556141)
* Update idna library 3.4 (cab054c) to 3.7 (1d365e1)
* Update imdbpie 5.6.4 (f695e87) to 5.6.5 (f8ed7a0)
* Update Requests library 2.31.0 (8812812) to 2.32.3 (0e322af)
* Update Tornado Web Server 6.4 (b3f2a4b) to 6.4.1 (2a0e1d1)
* Update urllib3 2.0.7 (56f01e0) to 2.2.1 (54d6edf)

View file

@ -50,7 +50,7 @@ class ZuluHmacAuthV3HTTPHandler(object):
them into a string, separated by newlines.
"""
vals = sorted(['%s:%s' % (n.lower().strip(),
headers_to_sign[n].strip()) for n in headers_to_sign])
headers_to_sign[n].strip()) for n in headers_to_sign])
return '\n'.join(vals)
def headers_to_sign(self, http_request):
@ -90,14 +90,16 @@ class ZuluHmacAuthV3HTTPHandler(object):
headers_to_sign = self.headers_to_sign(http_request)
canonical_qs = self.canonical_query_string(http_request)
canonical_headers = self.canonical_headers(headers_to_sign)
string_to_sign = '\n'.join((
http_request.method,
http_request.path,
canonical_qs,
canonical_headers,
'',
http_request.body
))
string_to_sign = '\n'.join(
(
http_request.method,
http_request.path,
canonical_qs,
canonical_headers,
'',
http_request.body,
)
)
return string_to_sign, headers_to_sign
def add_auth(self, req):
@ -319,9 +321,15 @@ class Auth(object):
key: val[0] for key, val in parse_qs(parsed_url.query).items()
}
request = HTTPRequest(
method='GET', protocol='https', host=HOST,
port=443, path=parsed_url.path, auth_path=None, params=params,
headers={'User-Agent': USER_AGENT}, body=''
method='GET',
protocol='https',
host=HOST,
port=443,
path=parsed_url.path,
auth_path=None,
params=params,
headers={'User-Agent': USER_AGENT},
body='',
)
handler.add_auth(req=request)
headers = request.headers

View file

@ -7,15 +7,20 @@ from dateutil.parser import parse
from .imdbpie import Imdb
from .objects import (
Title, TitleEpisodes, Name, TitleName, Image, TitleRelease,
TitleSearchResult, NameSearchResult,
Title,
TitleEpisodes,
Name,
TitleName,
Image,
TitleRelease,
TitleSearchResult,
NameSearchResult,
)
REGEX_IMDB_ID = re.compile(r'([a-zA-Z]{2}[0-9]{7})')
class ImdbFacade(object):
def __init__(self, client=None):
self._client = client or Imdb()
@ -36,8 +41,11 @@ class ImdbFacade(object):
season = None
episode = None
return Title(
season=season, episode=episode, episodes=episodes,
runtime=runtime, **title_data
season=season,
episode=episode,
episodes=episodes,
runtime=runtime,
**title_data
)
def get_name(self, imdb_id):
@ -68,8 +76,13 @@ class ImdbFacade(object):
self._parse_id(f['id']) for f in filmography_data['filmography']
)
return Name(
name=name, imdb_id=imdb_id, date_of_birth=date_of_birth,
gender=gender, birth_place=birth_place, bios=bios, image=image,
name=name,
imdb_id=imdb_id,
date_of_birth=date_of_birth,
gender=gender,
birth_place=birth_place,
bios=bios,
image=image,
filmography=filmography,
)
@ -77,7 +90,8 @@ class ImdbFacade(object):
results = []
for result in self._client.search_for_name(query):
result = NameSearchResult(
imdb_id=result['imdb_id'], name=result['name'],
imdb_id=result['imdb_id'],
name=result['name'],
)
results.append(result)
return tuple(results)
@ -90,8 +104,10 @@ class ImdbFacade(object):
else:
year = None
result = TitleSearchResult(
imdb_id=result['imdb_id'], title=result['title'],
type=result['type'], year=year,
imdb_id=result['imdb_id'],
title=result['title'],
type=result['type'],
year=year,
)
results.append(result)
return tuple(results)
@ -102,8 +118,9 @@ class ImdbFacade(object):
name=i['name'],
job=i.get('job'),
category=i.get('category'),
imdb_id=self._parse_id(i['id'])
) for i in top_crew_data['writers']
imdb_id=self._parse_id(i['id']),
)
for i in top_crew_data['writers']
)
def _get_stars(self, principals_data):
@ -113,8 +130,9 @@ class ImdbFacade(object):
job=i.get('job'),
characters=tuple(i.get('characters', ())),
category=i.get('category'),
imdb_id=self._parse_id(i['id'])
) for i in principals_data
imdb_id=self._parse_id(i['id']),
)
for i in principals_data
)
def _get_creators(self, top_crew_data):
@ -123,8 +141,9 @@ class ImdbFacade(object):
name=i['name'],
job=i.get('job'),
category=i.get('category'),
imdb_id=self._parse_id(i['id'])
) for i in top_crew_data['writers']
imdb_id=self._parse_id(i['id']),
)
for i in top_crew_data['writers']
if i.get('job') == 'creator'
)
@ -134,20 +153,23 @@ class ImdbFacade(object):
name=i['name'],
job=i.get('job'),
category=i.get('category'),
imdb_id=self._parse_id(i['id'])
) for i in top_crew_data['directors']
imdb_id=self._parse_id(i['id']),
)
for i in top_crew_data['directors']
)
def _get_credits(self, credits_data):
credits = []
for category in credits_data.get('credits', ()):
for item in credits_data['credits'][category]:
credits.append(TitleName(
name=item['name'],
category=item.get('category'),
job=item.get('job'),
imdb_id=self._parse_id(item['id'])
))
credits.append(
TitleName(
name=item['name'],
category=item.get('category'),
job=item.get('job'),
imdb_id=self._parse_id(item['id']),
)
)
return tuple(credits)
def _parse_id(self, string):
@ -212,22 +234,25 @@ class ImdbFacade(object):
)
except KeyError:
image = None
return dict(
imdb_id=imdb_id,
title=title,
year=year,
rating=rating,
type=type_,
release_date=release_date,
releases=releases,
plot_outline=plot_outline,
rating_count=rating_count,
writers=writers,
directors=directors,
creators=creators,
genres=genres,
credits=credits,
certification=certification,
image=image,
stars=stars,
), title_aux_data
return (
dict(
imdb_id=imdb_id,
title=title,
year=year,
rating=rating,
type=type_,
release_date=release_date,
releases=releases,
plot_outline=plot_outline,
rating_count=rating_count,
writers=writers,
directors=directors,
creators=creators,
genres=genres,
credits=credits,
certification=certification,
image=image,
stars=stars,
),
title_aux_data,
)

View file

@ -11,7 +11,7 @@ from trans import trans
import requests
from six import text_type
from six.moves import http_client as httplib
from six.moves.urllib.parse import urlencode, urljoin, quote, unquote
from six.moves.urllib.parse import urlencode, urljoin, quote
from .constants import BASE_URI, SEARCH_BASE_URI
from .auth import Auth
@ -52,7 +52,6 @@ _SIMPLE_GET_ENDPOINTS = {
class Imdb(Auth):
def __init__(self, locale=None, exclude_episodes=False, session=None, cachedir=None):
self.locale = locale or 'en_US'
self.region = self.locale.split('_')[-1].upper()
@ -87,8 +86,8 @@ class Imdb(Auth):
self._title_not_found()
if (
self.exclude_episodes is True and
resource['base']['titleType'] == 'tvEpisode'
self.exclude_episodes is True
and resource['base']['titleType'] == 'tvEpisode'
):
raise LookupError(
'Title not found. Title was an episode and '
@ -111,14 +110,14 @@ class Imdb(Auth):
'region': self.region,
'tconst': imdb_id,
'today': date.today().strftime('%Y-%m-%d'),
}
},
)
except LookupError:
self._title_not_found()
if (
self.exclude_episodes is True and
resource['titleType'].lower() == 'tvepisode'
self.exclude_episodes is True
and resource['titleType'].lower() == 'tvepisode'
):
raise LookupError(
'Title not found. Title was an episode and '
@ -128,19 +127,24 @@ class Imdb(Auth):
def _simple_get_method(self, method, path):
"""Return client method generated from ``_SIMPLE_GET_ENDPOINTS``."""
def get(imdb_id):
logger.info('called %s %s', method, imdb_id)
self.validate_imdb_id(imdb_id)
self._redirection_title_check(imdb_id)
return self._get_resource(path.format(imdb_id=imdb_id))
return get
def title_exists(self, imdb_id):
self.validate_imdb_id(imdb_id)
page_url = 'https://www.imdb.com/title/{0}/'.format(imdb_id)
response = self.session.get(page_url, allow_redirects=False)
response = self.session.get(
page_url,
allow_redirects=False,
headers={'User-Agent': 'Mozilla/5.0'},
)
if response.status_code == httplib.OK:
return True
elif response.status_code == httplib.NOT_FOUND:
@ -165,7 +169,7 @@ class Imdb(Auth):
def search_for_name(self, name):
logger.info('called search_for_name %s', name)
name = re.sub(r'\W+', '_', name, flags=re.UNICODE).strip('_')
name = re.sub(r'\W+', '+', name).strip('+')
search_results = self._suggest_search(name)
results = []
for result in search_results.get('d', ()):
@ -181,7 +185,7 @@ class Imdb(Auth):
def search_for_title(self, title):
logger.info('called search_for_title %s', title)
title = re.sub(r'\W+', '_', title, flags=re.UNICODE).strip('_')
title = re.sub(r'\W+', '+', title).strip('+')
search_results = self._suggest_search(title)
results = []
for result in search_results.get('d', ()):
@ -238,9 +242,13 @@ class Imdb(Auth):
if region:
params.update({'region': region})
return self._get(urljoin(
BASE_URI, '/template/imdb-ios-writable/tv-episodes-v2.jstl/render'
), params=params)
return self._get(
urljoin(
BASE_URI,
'/template/imdb-ios-writable/tv-episodes-v2.jstl/render',
),
params=params,
)
def get_title_top_crew(self, imdb_id):
"""
@ -252,24 +260,21 @@ class Imdb(Auth):
logger.info('called get_title_top_crew %s', imdb_id)
self.validate_imdb_id(imdb_id)
params = {'tconst': imdb_id}
return self._get(urljoin(
BASE_URI,
'/template/imdb-android-writable/7.3.top-crew.jstl/render'
), params=params)
return self._get(
urljoin(
BASE_URI,
'/template/imdb-android-writable/7.3.top-crew.jstl/render',
),
params=params,
)
@staticmethod
def _parse_dirty_json(data, query=None):
if query is None:
match_json_within_dirty_json = r'imdb\$.+\({1}(.+)\){1}'
else:
query_match = ''.join(
char if char.isalnum() else '[{0}]'.format(char)
for char in unquote(query)
)
query_match = query_match.replace('[ ]', '.+')
match_json_within_dirty_json = (
r'imdb\${}\((.+)\)'.format(query_match)
)
# No need to unquote as the json is containing quoted query
match_json_within_dirty_json = r'imdb\${}\((.+)\)'.format(query)
data_clean = re.match(
match_json_within_dirty_json, data, re.IGNORECASE
).groups()[0]
@ -290,9 +295,8 @@ class Imdb(Auth):
Redirection results have no information of use.
"""
imdb_id = response['data'].get('tconst')
if (
imdb_id and
imdb_id != response['data'].get('news', {}).get('channel')
if imdb_id and imdb_id != response['data'].get('news', {}).get(
'channel'
):
return True
return False
@ -309,7 +313,6 @@ class Imdb(Auth):
full_url = url
headers.update(self.get_auth_headers(full_url))
resp = self.session.get(url, headers=headers, params=params)
if not resp.ok:
if resp.status_code == httplib.NOT_FOUND:
raise LookupError('Resource {0} not found'.format(url))
@ -320,9 +323,7 @@ class Imdb(Auth):
try:
resp_dict = json.loads(resp_data)
except ValueError:
resp_dict = self._parse_dirty_json(
data=resp_data, query=query
)
resp_dict = self._parse_dirty_json(data=resp_data, query=query)
if resp_dict.get('error'):
return None

View file

@ -9,12 +9,9 @@ class Image(object):
class TitleEpisodes(object):
def __init__(self, facade, imdb_id):
self._facade = facade
episodes = self._facade._client.get_title_episodes(
imdb_id=imdb_id
)
episodes = self._facade._client.get_title_episodes(imdb_id=imdb_id)
self._episode_imdb_ids = []
for season in episodes['seasons']:
for episode in season['episodes']: