SickGear/sickgear/providers/btn.py

# coding=utf-8
#
# This file is part of SickGear.
#
# SickGear is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# SickGear is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with SickGear.  If not, see <http://www.gnu.org/licenses/>.

import math
import random
import re
import time
import traceback

from . import generic
from .. import logger, tvcache
from ..helpers import try_int
from ..indexers.indexer_config import TVINFO_TVDB
from ..show_name_helpers import get_show_names
from bs4_parser import BS4Parser
from exceptions_helper import AuthException
from json_helper import json_dumps

from six import iteritems


class BTNProvider(generic.TorrentProvider):

    def __init__(self):
        generic.TorrentProvider.__init__(self, 'BTN')

        self.url_base = 'https://broadcasthe.net/'
        self.url_api = 'https://api.broadcasthe.net'

        self.urls = {'config_provider_home_uri': self.url_base, 'login': self.url_base + 'login.php',
                     'search': self.url_base + 'torrents.php?searchstr=%s&action=basic&%s'}

        self.proper_search_terms = ['%.proper.%', '%.repack.%']

        self.categories = {'Season': [2], 'Episode': [1]}
        self.categories['Cache'] = self.categories['Season'] + self.categories['Episode']

        self.url = self.urls['config_provider_home_uri']

        self.api_key, self.username, self.password, self.auth_html, self.minseed, self.minleech = 6 * [None]
        self.ua = self.session.headers['User-Agent']
        self.reject_m2ts = False
        self.cache = BTNCache(self)
        self.has_limit = True

    def _authorised(self, **kwargs):

        return self._check_auth()

    def _check_auth(self, **kwargs):

        if not self.api_key and not (self.username and self.password):
            raise AuthException('Must set Api key or Username/Password for %s in config provider options' % self.name)
        return True

    def _check_response(self, data, url, post_data=None, post_json=None):
        if not self.should_skip(log_warning=False):
            if data and 'Call Limit' in data:
                self.tmr_limit_update('1', 'h', '150/hr %s' % data)
                self.log_failure_url(url, post_data, post_json)
            else:
                logger.warning(f'Action prematurely ended. {self.name} server error response = {data}')

    def _search_provider(self, search_params, age=0, **kwargs):

        self._authorised()
        self.auth_html = None

        results = []
        api_up = True

        for mode in search_params:
            for search_param in search_params[mode]:

                params = {}
                if 'Propers' == mode:
                    params.update({'release': search_param})
                    age = 4 * 24 * 60 * 60
                else:
                    search_param and params.update(search_param)
                age and params.update(dict(age='<=%i' % age))  # age in seconds
                search_string = 'tvdb' in params and '%s %s' % (params.pop('series'), params['name']) or ''

                json_rpc = (lambda param_dct, items_per_page=1000, offset=0:
                            '{"jsonrpc": "2.0", "id": "%s", "method": "getTorrents", "params": ["%s", %s, %s, %s]}' %
                            (''.join(random.sample('abcdefghijklmnopqrstuvwxyz0123456789', 8)),
                             self.api_key, json_dumps(param_dct), items_per_page, offset))

                response, error_text = None, None
                try:
                    if api_up and self.api_key:
                        self.session.headers['Content-Type'] = 'application/json-rpc'
                        response = self.get_url(self.url_api, post_data=json_rpc(params), parse_json=True)
                        # response = {'error': {'message': 'Call Limit Exceeded Test'}}
                        error_text = response['error']['message']
                    api_up = False
                    if 'Propers' == mode:
                        return results
                    results = self.html(mode, search_string, results)
                    if not results:
                        self._check_response(error_text, self.url_api, post_data=json_rpc(params))
                        return results
                except AuthException:
                    logger.warning('API looks to be down, add un/pw config detail to be used as a fallback')
                except (KeyError, Exception):
                    pass

                data_json = response and 'result' in response and response['result'] or {}
                if data_json:
                    self.tmr_limit_count = 0
                    found_torrents = 'torrents' in data_json and data_json['torrents'] or {}

                    # We got something, we know the API sends max 1000 results at a time.
                    # See if there are more than 1000 results for our query, if not we
                    # keep requesting until we've got everything.
                    # max 150 requests per hour so limit at that. Scan every 15 minutes. 60 / 15 = 4.
                    max_pages = 5  # 150 was the old value and impractical
                    results_per_page = 1000

                    if 'results' in data_json and int(data_json['results']) >= results_per_page:
                        pages_needed = int(math.ceil(int(data_json['results']) / results_per_page))
                        if pages_needed > max_pages:
                            pages_needed = max_pages

                        # +1 because range(1,4) = 1, 2, 3
                        for page in range(1, pages_needed + 1):

                            try:
                                post_data = json_rpc(params, results_per_page, page * results_per_page)
                                response = self.get_url(self.url_api, parse_json=True, post_data=post_data)
                                error_text = response['error']['message']
                                self._check_response(error_text, self.url_api, post_data=post_data)
                                return results
                            except (KeyError, Exception):
                                data_json = response and 'result' in response and response['result'] or {}

                            # Note that this these are individual requests and might time out individually.
                            # This would result in 'gaps' in the results. There is no way to fix this though.
                            if 'torrents' in data_json:
                                self.tmr_limit_count = 0
                                found_torrents.update(data_json['torrents'])

                    cnt = len(results)
                    for torrentid, torrent_info in iteritems(found_torrents):
                        seeders, leechers, size = (try_int(n, n) for n in [torrent_info.get(x) for x in
                                                                           ('Seeders', 'Leechers', 'Size')])
                        if self._reject_item(seeders, leechers, container=self.reject_m2ts and (
                                re.match(r'(?i)m2?ts', torrent_info.get('Container', '')))):
                            continue

                        title, url = self._get_title_and_url(torrent_info)
                        if title and url:
                            results.append((title, url, seeders, self._bytesizer(size)))

                    self._log_search(mode, len(results) - cnt,
                                     ('search_param: ' + str(search_param), self.name)['Cache' == mode])

                    results = self._sort_seeding(mode, results)
                    break   # search first tvdb item only

        return results

    def _authorised_html(self):

        if self.username and self.password:
            return super(BTNProvider, self)._authorised(
                post_params={'login': 'Log In!'},
                logged_in=(lambda y='': 'casThe' in y[0:512] and '<title>Index' in y[0:512]))
        raise AuthException('Password or Username for %s is empty in config provider options' % self.name)

    def html(self, mode, search_string, results):

        if 'Content-Type' in self.session.headers:
            del (self.session.headers['Content-Type'])
        setattr(self.session, 'reserved', {'headers': {
            'Accept': 'text/html, application/xhtml+xml, */*', 'Accept-Language': 'en-GB',
            'Cache-Control': 'no-cache', 'Referer': 'https://broadcasthe.net/login.php', 'User-Agent': self.ua}})
        self.headers = None

        if self.auth_html or self._authorised_html():
            del (self.session.reserved['headers']['Referer'])
            if 'Referer' in self.session.headers:
                del (self.session.headers['Referer'])
            self.auth_html = True

            search_url = self.urls['search'] % (search_string, self._categories_string(mode, 'filter_cat[%s]=1'))

            html = self.get_url(search_url, use_tmr_limit=False)
            if self.should_skip(log_warning=False, use_tmr_limit=False):
                return results

            cnt = len(results)
            try:
                if not html or self._has_no_results(html):
                    raise generic.HaltParseException

                with BS4Parser(html) as soup:
                    tbl = soup.find(id='torrent_table')
                    tbl_rows = [] if not tbl else tbl.find_all('tr')

                    if 2 > len(tbl_rows):
                        raise generic.HaltParseException

                    rc = dict([(k, re.compile('(?i)' + v)) for (k, v) in iteritems({
                        'cats': r'(?i)cat\[(?:%s)\]' % self._categories_string(mode, template='', delimiter='|'),
                        'get': 'download'})])

                    head = None
                    for tr in tbl_rows[1:]:
                        cells = tr.find_all('td')
                        if 5 > len(cells):
                            continue
                        try:
                            head = head if None is not head else self._header_row(tr)
                            seeders, leechers, size = [try_int(n, n) for n in [
                                cells[head[x]].get_text().strip() for x in ('seed', 'leech', 'size')]]
                            if not tr.find('a', href=rc['cats']) or self._reject_item(
                                    seeders, leechers, container=self.reject_m2ts and (
                                            re.search(r'(?i)\[.*?m2?ts.*?\]', tr.get_text('', strip=True)))):
                                continue

                            title = tr.select('td span[title]')[0].attrs.get('title').strip()
                            download_url = self._link(tr.find('a', href=rc['get'])['href'])
                        except (AttributeError, TypeError, ValueError, KeyError, IndexError):
                            continue

                        if title and download_url:
                            results.append((title, download_url, seeders, self._bytesizer(size)))

            except generic.HaltParseException:
                pass
            except (BaseException, Exception):
                logger.error(f'Failed to parse. Traceback: {traceback.format_exc()}')

            self._log_search(mode, len(results) - cnt, search_url)

            results = self._sort_seeding(mode, results)

        return results

    @staticmethod
    def _get_title_and_url(data_json):

        # The BTN API gives a lot of information in response,
        # however SickGear is built mostly around Scene or
        # release names, which is why we are using them here.

        if 'ReleaseName' in data_json and data_json['ReleaseName']:
            title = data_json['ReleaseName']

        else:
            # If we don't have a release name we need to get creative
            title = ''
            keys = ['Series', 'GroupName', 'Resolution', 'Source', 'Codec']
            for key in keys:
                if key in data_json:
                    title += ('', '.')[any(title)] + data_json[key]

            if title:
                title = title.replace(' ', '.')

        url = None
        if 'DownloadURL' in data_json:
            url = data_json['DownloadURL']
            if url:
                # unescaped / is valid in JSON, but it can be escaped
                url = url.replace('\\/', '/')

        return title, url

    def _season_strings(self, ep_obj, **kwargs):

        base_params = dict(category='Season')

        # Search for entire seasons: no need to do special things for air by date or sports shows
        if ep_obj.show_obj.air_by_date or ep_obj.show_obj.is_sports:
            # Search for the year of the air by date show
            base_params['name'] = str(ep_obj.airdate).split('-')[0]
        elif ep_obj.show_obj.is_anime:
            base_params['name'] = '%s' % ep_obj.scene_absolute_number
        else:
            base_params['name'] = 'Season %s' % (ep_obj.season, ep_obj.scene_season)[bool(ep_obj.show_obj.is_scene)]

        return [dict(Season=self.search_params(ep_obj, base_params))]

    def _episode_strings(self, ep_obj, **kwargs):

        if not ep_obj:
            return [{}]

        base_params = dict(category='Episode')

        if ep_obj.show_obj.air_by_date or ep_obj.show_obj.is_sports:
            date_str = str(ep_obj.airdate)

            # BTN uses dots in dates, we just search for the date since that
            # combined with the series identifier should result in just one episode
            base_params['name'] = date_str.replace('-', '.')
        elif ep_obj.show_obj.is_anime:
            base_params['name'] = '%s' % ep_obj.scene_absolute_number
        else:
            # Do a general name search for the episode, formatted like SXXEYY
            season, episode = ((ep_obj.season, ep_obj.episode),
                               (ep_obj.scene_season, ep_obj.scene_episode))[bool(ep_obj.show_obj.is_scene)]
            base_params['name'] = 'S%02dE%02d' % (season, episode)

        return [dict(Episode=self.search_params(ep_obj, base_params))]

    @staticmethod
    def search_params(ep_obj, base_params):
        search_params = []

        if TVINFO_TVDB == ep_obj.show_obj.tvid:
            base_params['tvdb'] = ep_obj.show_obj.prodid
            base_params['series'] = ep_obj.show_obj.name
            search_params.append(base_params)

        name_exceptions = get_show_names(ep_obj)
        for name in name_exceptions:
            series_param = base_params.copy()
            series_param['series'] = name
            search_params.append(series_param)

        return search_params

    def cache_data(self, **kwargs):

        # Get the torrents uploaded since last check.
        seconds_since_last_update = int(math.ceil(time.time() - time.mktime(kwargs['age'])))

        # default to 15 minutes
        seconds_min_time = kwargs['min_time'] * 60
        if seconds_min_time > seconds_since_last_update:
            seconds_since_last_update = seconds_min_time

        # Set maximum to 24 hours (24 * 60 * 60 = 86400 seconds) of "RSS" data search,
        # older items will be done through backlog
        if 86400 < seconds_since_last_update:
            logger.warning(f'Only trying to fetch the last 24 hours even though the last known successful update on'
                           f' {self.name} was over 24 hours')
            seconds_since_last_update = 86400

        return self._search_provider(dict(Cache=['']), age=seconds_since_last_update)


class BTNCache(tvcache.TVCache):

    def __init__(self, this_provider):
        tvcache.TVCache.__init__(self, this_provider, interval=15)

    def _cache_data(self, **kwargs):

        return self.provider.cache_data(age=self._get_last_update().timetuple(), min_time=self.update_iv)


provider = BTNProvider()