diff --git a/sickbeard/bs4_parser.py b/sickbeard/bs4_parser.py new file mode 100644 index 00000000..0c3bf606 --- /dev/null +++ b/sickbeard/bs4_parser.py @@ -0,0 +1,13 @@ +import sickbeard +from bs4 import BeautifulSoup + +class BS4Parser: + def __init__(self, *args, **kwargs): + self.soup = BeautifulSoup(*args, **kwargs) + + def __enter__(self): + return self.soup + + def __exit__(self, exc_ty, exc_val, tb): + self.soup.clear(True) + self.soup = None \ No newline at end of file diff --git a/sickbeard/helpers.py b/sickbeard/helpers.py index d1d3c23d..c51c01eb 100644 --- a/sickbeard/helpers.py +++ b/sickbeard/helpers.py @@ -31,7 +31,6 @@ import httplib import urlparse import uuid import base64 -import string import zipfile from lib import requests @@ -1241,7 +1240,7 @@ def mapIndexersToShow(showObj): return mapped -def touchFile(self, fname, atime=None): +def touchFile(fname, atime=None): if None != atime: try: with file(fname, 'a'): diff --git a/sickbeard/providers/bitsoup.py b/sickbeard/providers/bitsoup.py index 1e2a869e..552a5b4e 100644 --- a/sickbeard/providers/bitsoup.py +++ b/sickbeard/providers/bitsoup.py @@ -22,7 +22,7 @@ import datetime import urlparse import sickbeard import generic -from sickbeard.common import Quality, cpu_presets +from sickbeard.common import Quality from sickbeard import logger from sickbeard import tvcache from sickbeard import db @@ -33,7 +33,7 @@ from sickbeard.exceptions import ex from sickbeard import clients from lib import requests from lib.requests import exceptions -from bs4 import BeautifulSoup +from sickbeard.bs4_parser import BS4Parser from lib.unidecode import unidecode from sickbeard.helpers import sanitizeSceneName @@ -168,48 +168,45 @@ class BitSoupProvider(generic.TorrentProvider): continue try: - html = BeautifulSoup(data, "html.parser") - - torrent_table = html.find('table', attrs={'class': 'koptekst'}) - torrent_rows = torrent_table.find_all('tr') if torrent_table else [] + with BS4Parser(data, "html.parser") as html: + torrent_table = html.find('table', attrs={'class': 'koptekst'}) + torrent_rows = torrent_table.find_all('tr') if torrent_table else [] - html.clear(True) + #Continue only if one Release is found + if len(torrent_rows) < 2: + logger.log(u"The Data returned from " + self.name + " do not contains any torrent", + logger.DEBUG) + continue - #Continue only if one Release is found - if len(torrent_rows) < 2: - logger.log(u"The Data returned from " + self.name + " do not contains any torrent", - logger.DEBUG) - continue + for result in torrent_rows[1:]: + cells = result.find_all('td') - for result in torrent_rows[1:]: - cells = result.find_all('td') + link = cells[1].find('a') + download_url = self.urls['download'] % cells[3].find('a')['href'] - link = cells[1].find('a') - download_url = self.urls['download'] % cells[3].find('a')['href'] - - id = link['href'] - id = id.replace('details.php?id=','') - id = id.replace('&hit=1', '') - - try: - title = link.getText() - id = int(id) - seeders = int(cells[9].getText()) - leechers = int(cells[10].getText()) - except (AttributeError, TypeError): - continue + id = link['href'] + id = id.replace('details.php?id=','') + id = id.replace('&hit=1', '') - #Filter unseeded torrent - if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech): - continue + try: + title = link.getText() + id = int(id) + seeders = int(cells[9].getText()) + leechers = int(cells[10].getText()) + except (AttributeError, TypeError): + continue - if not title or not download_url: - continue + #Filter unseeded torrent + if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech): + continue - item = title, download_url, id, seeders, leechers - logger.log(u"Found result: " + title + "(" + searchURL + ")", logger.DEBUG) + if not title or not download_url: + continue - items[mode].append(item) + item = title, download_url, id, seeders, leechers + logger.log(u"Found result: " + title + "(" + searchURL + ")", logger.DEBUG) + + items[mode].append(item) except Exception, e: logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR) diff --git a/sickbeard/providers/freshontv.py b/sickbeard/providers/freshontv.py index de61c8e9..22427b6c 100755 --- a/sickbeard/providers/freshontv.py +++ b/sickbeard/providers/freshontv.py @@ -33,7 +33,7 @@ from sickbeard.exceptions import ex from sickbeard import clients from lib import requests from lib.requests import exceptions -from bs4 import BeautifulSoup +from sickbeard.bs4_parser import BS4Parser from lib.unidecode import unidecode from sickbeard.helpers import sanitizeSceneName @@ -175,7 +175,6 @@ class FreshOnTVProvider(generic.TorrentProvider): if not self._doLogin(): return [] - for mode in search_params.keys(): for search_string in search_params[mode]: @@ -193,55 +192,52 @@ class FreshOnTVProvider(generic.TorrentProvider): continue try: - html = BeautifulSoup(data, features=["html5lib", "permissive"]) + with BS4Parser(data, features=["html5lib", "permissive"]) as html: + torrent_table = html.find('table', attrs={'class': 'frame'}) + torrent_rows = torrent_table.findChildren('tr') if torrent_table else [] - torrent_table = html.find('table', attrs={'class': 'frame'}) - torrent_rows = torrent_table.findChildren('tr') if torrent_table else [] - - html.clear(True) - - #Continue only if one Release is found - if len(torrent_rows) < 2: - logger.log(u"The Data returned from " + self.name + " do not contains any torrent", - logger.DEBUG) - continue - - # skip colheader - for result in torrent_rows[1:]: - cells = result.findChildren('td') - - link = cells[1].find('a', attrs = {'class': 'torrent_name_link'}) - #skip if torrent has been nuked due to poor quality - if cells[1].find('img', alt='Nuked') != None: + #Continue only if one Release is found + if len(torrent_rows) < 2: + logger.log(u"The Data returned from " + self.name + " do not contains any torrent", + logger.DEBUG) continue - torrent_id = link['href'].replace('/details.php?id=', '') - - - try: - if link.has_key('title'): - title = cells[1].find('a', {'class': 'torrent_name_link'})['title'] - else: - title = link.contents[0] - download_url = self.urls['download'] % (torrent_id) - id = int(torrent_id) + # skip colheader + for result in torrent_rows[1:]: + cells = result.findChildren('td') - seeders = int(cells[8].find('a', {'class': 'link'}).span.contents[0].strip()) - leechers = int(cells[9].find('a', {'class': 'link'}).contents[0].strip()) - except (AttributeError, TypeError): - continue + link = cells[1].find('a', attrs = {'class': 'torrent_name_link'}) + #skip if torrent has been nuked due to poor quality + if cells[1].find('img', alt='Nuked') != None: + continue - #Filter unseeded torrent - if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech): - continue + torrent_id = link['href'].replace('/details.php?id=', '') - if not title or not download_url: - continue - item = title, download_url, id, seeders, leechers - logger.log(u"Found result: " + title + "(" + searchURL + ")", logger.DEBUG) + try: + if link.has_key('title'): + title = cells[1].find('a', {'class': 'torrent_name_link'})['title'] + else: + title = link.contents[0] + download_url = self.urls['download'] % (torrent_id) + id = int(torrent_id) - items[mode].append(item) + seeders = int(cells[8].find('a', {'class': 'link'}).span.contents[0].strip()) + leechers = int(cells[9].find('a', {'class': 'link'}).contents[0].strip()) + except (AttributeError, TypeError): + continue + + #Filter unseeded torrent + if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech): + continue + + if not title or not download_url: + continue + + item = title, download_url, id, seeders, leechers + logger.log(u"Found result: " + title + "(" + searchURL + ")", logger.DEBUG) + + items[mode].append(item) except Exception, e: logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR) diff --git a/sickbeard/providers/hdtorrents.py b/sickbeard/providers/hdtorrents.py index d0b7d5f8..910a8e53 100644 --- a/sickbeard/providers/hdtorrents.py +++ b/sickbeard/providers/hdtorrents.py @@ -34,7 +34,7 @@ from sickbeard.exceptions import ex from sickbeard import clients from lib import requests from lib.requests import exceptions -from bs4 import BeautifulSoup +from sickbeard.bs4_parser import BS4Parser from lib.unidecode import unidecode from sickbeard.helpers import sanitizeSceneName @@ -196,64 +196,22 @@ class HDTorrentsProvider(generic.TorrentProvider): data = split_data[2] try: - html = BeautifulSoup(data, features=["html5lib", "permissive"]) + with BS4Parser(data, features=["html5lib", "permissive"]) as html: + #Get first entry in table + entries = html.find_all('td', attrs={'align': 'center'}) - #Get first entry in table - entries = html.find_all('td', attrs={'align': 'center'}) - - html.clear(True) - - if not entries: - logger.log(u"The Data returned from " + self.name + " do not contains any torrent", - logger.DEBUG) - continue - - try: - title = entries[22].find('a')['title'].strip('History - ').replace('Blu-ray', 'bd50') - url = self.urls['home'] % entries[15].find('a')['href'] - download_url = self.urls['home'] % entries[15].find('a')['href'] - id = entries[23].find('div')['id'] - seeders = int(entries[20].get_text()) - leechers = int(entries[21].get_text()) - except (AttributeError, TypeError): - continue - - if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech): - continue - - if not title or not download_url: - continue - - item = title, download_url, id, seeders, leechers - logger.log(u"Found result: " + title + "(" + searchURL + ")", logger.DEBUG) - - items[mode].append(item) - - #Now attempt to get any others - result_table = html.find('table', attrs={'class': 'mainblockcontenttt'}) - - if not result_table: - continue - - entries = result_table.find_all('td', attrs={'align': 'center', 'class': 'listas'}) - - if not entries: - continue - - for result in entries: - block2 = result.find_parent('tr').find_next_sibling('tr') - if not block2: + if not entries: + logger.log(u"The Data returned from " + self.name + " do not contains any torrent", + logger.DEBUG) continue - cells = block2.find_all('td') try: - title = cells[1].find('b').get_text().strip('\t ').replace('Blu-ray', 'bd50') - url = self.urls['home'] % cells[4].find('a')['href'] - download_url = self.urls['home'] % cells[4].find('a')['href'] - detail = cells[1].find('a')['href'] - id = detail.replace('details.php?id=', '') - seeders = int(cells[9].get_text()) - leechers = int(cells[10].get_text()) + title = entries[22].find('a')['title'].strip('History - ').replace('Blu-ray', 'bd50') + url = self.urls['home'] % entries[15].find('a')['href'] + download_url = self.urls['home'] % entries[15].find('a')['href'] + id = entries[23].find('div')['id'] + seeders = int(entries[20].get_text()) + leechers = int(entries[21].get_text()) except (AttributeError, TypeError): continue @@ -268,6 +226,45 @@ class HDTorrentsProvider(generic.TorrentProvider): items[mode].append(item) + #Now attempt to get any others + result_table = html.find('table', attrs={'class': 'mainblockcontenttt'}) + + if not result_table: + continue + + entries = result_table.find_all('td', attrs={'align': 'center', 'class': 'listas'}) + + if not entries: + continue + + for result in entries: + block2 = result.find_parent('tr').find_next_sibling('tr') + if not block2: + continue + cells = block2.find_all('td') + + try: + title = cells[1].find('b').get_text().strip('\t ').replace('Blu-ray', 'bd50') + url = self.urls['home'] % cells[4].find('a')['href'] + download_url = self.urls['home'] % cells[4].find('a')['href'] + detail = cells[1].find('a')['href'] + id = detail.replace('details.php?id=', '') + seeders = int(cells[9].get_text()) + leechers = int(cells[10].get_text()) + except (AttributeError, TypeError): + continue + + if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech): + continue + + if not title or not download_url: + continue + + item = title, download_url, id, seeders, leechers + logger.log(u"Found result: " + title + "(" + searchURL + ")", logger.DEBUG) + + items[mode].append(item) + except Exception, e: logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR) diff --git a/sickbeard/providers/iptorrents.py b/sickbeard/providers/iptorrents.py index 84480d63..c88355ec 100644 --- a/sickbeard/providers/iptorrents.py +++ b/sickbeard/providers/iptorrents.py @@ -33,7 +33,7 @@ from sickbeard.exceptions import ex from sickbeard import clients from lib import requests from lib.requests import exceptions -from bs4 import BeautifulSoup +from sickbeard.bs4_parser import BS4Parser from lib.unidecode import unidecode from sickbeard.helpers import sanitizeSceneName from sickbeard.show_name_helpers import allPossibleShowNames @@ -167,51 +167,48 @@ class IPTorrentsProvider(generic.TorrentProvider): continue try: - html = BeautifulSoup(data, features=["html5lib", "permissive"]) - - if not html: - logger.log(u"Invalid HTML data: " + str(data), logger.DEBUG) - continue - - if html.find(text='No Torrents Found!'): - logger.log(u"No results found for: " + search_string + " (" + searchURL + ")", logger.DEBUG) - continue - - torrent_table = html.find('table', attrs={'class': 'torrents'}) - torrents = torrent_table.find_all('tr') if torrent_table else [] - - html.clear(True) - - #Continue only if one Release is found - if len(torrents) < 2: - logger.log(u"The Data returned from " + self.name + " do not contains any torrent", - logger.WARNING) - continue - - for result in torrents[1:]: - - try: - torrent = result.find_all('td')[1].find('a') - torrent_name = torrent.string - torrent_download_url = self.urls['base_url'] + (result.find_all('td')[3].find('a'))['href'] - torrent_details_url = self.urls['base_url'] + torrent['href'] - torrent_seeders = int(result.find('td', attrs={'class': 'ac t_seeders'}).string) - ## Not used, perhaps in the future ## - #torrent_id = int(torrent['href'].replace('/details.php?id=', '')) - #torrent_leechers = int(result.find('td', attrs = {'class' : 'ac t_leechers'}).string) - except (AttributeError, TypeError): + with BS4Parser(data, features=["html5lib", "permissive"]) as html: + if not html: + logger.log(u"Invalid HTML data: " + str(data), logger.DEBUG) continue - # Filter unseeded torrent and torrents with no name/url - if mode != 'RSS' and torrent_seeders == 0: + if html.find(text='No Torrents Found!'): + logger.log(u"No results found for: " + search_string + " (" + searchURL + ")", logger.DEBUG) continue - if not torrent_name or not torrent_download_url: + torrent_table = html.find('table', attrs={'class': 'torrents'}) + torrents = torrent_table.find_all('tr') if torrent_table else [] + + #Continue only if one Release is found + if len(torrents) < 2: + logger.log(u"The Data returned from " + self.name + " do not contains any torrent", + logger.WARNING) continue - item = torrent_name, torrent_download_url - logger.log(u"Found result: " + torrent_name + " (" + torrent_details_url + ")", logger.DEBUG) - items[mode].append(item) + for result in torrents[1:]: + + try: + torrent = result.find_all('td')[1].find('a') + torrent_name = torrent.string + torrent_download_url = self.urls['base_url'] + (result.find_all('td')[3].find('a'))['href'] + torrent_details_url = self.urls['base_url'] + torrent['href'] + torrent_seeders = int(result.find('td', attrs={'class': 'ac t_seeders'}).string) + ## Not used, perhaps in the future ## + #torrent_id = int(torrent['href'].replace('/details.php?id=', '')) + #torrent_leechers = int(result.find('td', attrs = {'class' : 'ac t_leechers'}).string) + except (AttributeError, TypeError): + continue + + # Filter unseeded torrent and torrents with no name/url + if mode != 'RSS' and torrent_seeders == 0: + continue + + if not torrent_name or not torrent_download_url: + continue + + item = torrent_name, torrent_download_url + logger.log(u"Found result: " + torrent_name + " (" + torrent_details_url + ")", logger.DEBUG) + items[mode].append(item) except Exception, e: logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR) diff --git a/sickbeard/providers/kat.py b/sickbeard/providers/kat.py index 18181f20..bc10769c 100644 --- a/sickbeard/providers/kat.py +++ b/sickbeard/providers/kat.py @@ -40,11 +40,9 @@ from sickbeard.show_name_helpers import allPossibleShowNames, sanitizeSceneName from sickbeard.exceptions import ex from sickbeard import encodingKludge as ek from sickbeard import clients -from sickbeard import tv - +from sickbeard.bs4_parser import BS4Parser from lib import requests from lib.requests import exceptions -from bs4 import BeautifulSoup from lib.unidecode import unidecode @@ -119,55 +117,52 @@ class KATProvider(generic.TorrentProvider): return None try: - soup = BeautifulSoup(data, features=["html5lib", "permissive"]) - file_table = soup.find('table', attrs={'class': 'torrentFileList'}) + with BS4Parser(data, features=["html5lib", "permissive"]) as soup: + file_table = soup.find('table', attrs={'class': 'torrentFileList'}) - # cleanup memory - soup.clear(True) + if not file_table: + return None - if not file_table: - return None + files = [x.text for x in file_table.find_all('td', attrs={'class': 'torFileName'})] + videoFiles = filter(lambda x: x.rpartition(".")[2].lower() in mediaExtensions, files) - files = [x.text for x in file_table.find_all('td', attrs={'class': 'torFileName'})] - videoFiles = filter(lambda x: x.rpartition(".")[2].lower() in mediaExtensions, files) + #Filtering SingleEpisode/MultiSeason Torrent + if len(videoFiles) < ep_number or len(videoFiles) > float(ep_number * 1.1): + logger.log(u"Result " + title + " have " + str( + ep_number) + " episode and episodes retrived in torrent are " + str(len(videoFiles)), logger.DEBUG) + logger.log( + u"Result " + title + " Seem to be a Single Episode or MultiSeason torrent, skipping result...", + logger.DEBUG) + return None - #Filtering SingleEpisode/MultiSeason Torrent - if len(videoFiles) < ep_number or len(videoFiles) > float(ep_number * 1.1): - logger.log(u"Result " + title + " have " + str( - ep_number) + " episode and episodes retrived in torrent are " + str(len(videoFiles)), logger.DEBUG) - logger.log( - u"Result " + title + " Seem to be a Single Episode or MultiSeason torrent, skipping result...", - logger.DEBUG) - return None + if Quality.sceneQuality(title) != Quality.UNKNOWN: + return title + + for fileName in videoFiles: + quality = Quality.sceneQuality(os.path.basename(fileName)) + if quality != Quality.UNKNOWN: break + + if fileName is not None and quality == Quality.UNKNOWN: + quality = Quality.assumeQuality(os.path.basename(fileName)) + + if quality == Quality.UNKNOWN: + logger.log(u"Unable to obtain a Season Quality for " + title, logger.DEBUG) + return None + + try: + myParser = NameParser(showObj=self.show) + parse_result = myParser.parse(fileName) + except (InvalidNameException, InvalidShowException): + return None + + logger.log(u"Season quality for " + title + " is " + Quality.qualityStrings[quality], logger.DEBUG) + + if parse_result.series_name and parse_result.season_number: + title = parse_result.series_name + ' S%02d' % int( + parse_result.season_number) + ' ' + self._reverseQuality(quality) - if Quality.sceneQuality(title) != Quality.UNKNOWN: return title - for fileName in videoFiles: - quality = Quality.sceneQuality(os.path.basename(fileName)) - if quality != Quality.UNKNOWN: break - - if fileName is not None and quality == Quality.UNKNOWN: - quality = Quality.assumeQuality(os.path.basename(fileName)) - - if quality == Quality.UNKNOWN: - logger.log(u"Unable to obtain a Season Quality for " + title, logger.DEBUG) - return None - - try: - myParser = NameParser(showObj=self.show) - parse_result = myParser.parse(fileName) - except (InvalidNameException, InvalidShowException): - return None - - logger.log(u"Season quality for " + title + " is " + Quality.qualityStrings[quality], logger.DEBUG) - - if parse_result.series_name and parse_result.season_number: - title = parse_result.series_name + ' S%02d' % int( - parse_result.season_number) + ' ' + self._reverseQuality(quality) - - return title - except Exception, e: logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR) @@ -230,6 +225,7 @@ class KATProvider(generic.TorrentProvider): results = [] items = {'Season': [], 'Episode': [], 'RSS': []} + soup = None for mode in search_params.keys(): for search_string in search_params[mode]: @@ -250,54 +246,51 @@ class KATProvider(generic.TorrentProvider): continue try: - soup = BeautifulSoup(html, features=["html5lib", "permissive"]) + with BS4Parser(html, features=["html5lib", "permissive"]) as soup: + torrent_table = soup.find('table', attrs={'class': 'data'}) + torrent_rows = torrent_table.find_all('tr') if torrent_table else [] - torrent_table = soup.find('table', attrs={'class': 'data'}) - torrent_rows = torrent_table.find_all('tr') if torrent_table else [] - - soup.clear(True) - - #Continue only if one Release is found - if len(torrent_rows) < 2: - logger.log(u"The data returned from " + self.name + " does not contain any torrents", - logger.WARNING) - continue - - for tr in torrent_rows[1:]: - try: - link = urlparse.urljoin(self.url, - (tr.find('div', {'class': 'torrentname'}).find_all('a')[1])['href']) - id = tr.get('id')[-7:] - title = (tr.find('div', {'class': 'torrentname'}).find_all('a')[1]).text \ - or (tr.find('div', {'class': 'torrentname'}).find_all('a')[2]).text - url = tr.find('a', 'imagnet')['href'] - verified = True if tr.find('a', 'iverify') else False - trusted = True if tr.find('img', {'alt': 'verified'}) else False - seeders = int(tr.find_all('td')[-2].text) - leechers = int(tr.find_all('td')[-1].text) - except (AttributeError, TypeError): + #Continue only if one Release is found + if len(torrent_rows) < 2: + logger.log(u"The data returned from " + self.name + " does not contain any torrents", + logger.WARNING) continue - if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech): - continue + for tr in torrent_rows[1:]: + try: + link = urlparse.urljoin(self.url, + (tr.find('div', {'class': 'torrentname'}).find_all('a')[1])['href']) + id = tr.get('id')[-7:] + title = (tr.find('div', {'class': 'torrentname'}).find_all('a')[1]).text \ + or (tr.find('div', {'class': 'torrentname'}).find_all('a')[2]).text + url = tr.find('a', 'imagnet')['href'] + verified = True if tr.find('a', 'iverify') else False + trusted = True if tr.find('img', {'alt': 'verified'}) else False + seeders = int(tr.find_all('td')[-2].text) + leechers = int(tr.find_all('td')[-1].text) + except (AttributeError, TypeError): + continue - if self.confirmed and not verified: - logger.log( - u"KAT Provider found result " + title + " but that doesn't seem like a verified result so I'm ignoring it", - logger.DEBUG) - continue + if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech): + continue - #Check number video files = episode in season and find the real Quality for full season torrent analyzing files in torrent - if mode == 'Season' and search_mode == 'sponly': - ep_number = int(epcount / len(set(allPossibleShowNames(self.show)))) - title = self._find_season_quality(title, link, ep_number) + if self.confirmed and not verified: + logger.log( + u"KAT Provider found result " + title + " but that doesn't seem like a verified result so I'm ignoring it", + logger.DEBUG) + continue - if not title or not url: - continue + #Check number video files = episode in season and find the real Quality for full season torrent analyzing files in torrent + if mode == 'Season' and search_mode == 'sponly': + ep_number = int(epcount / len(set(allPossibleShowNames(self.show)))) + title = self._find_season_quality(title, link, ep_number) - item = title, url, id, seeders, leechers + if not title or not url: + continue - items[mode].append(item) + item = title, url, id, seeders, leechers + + items[mode].append(item) except Exception, e: logger.log(u"Failed to parsing " + self.name + " Traceback: " + traceback.format_exc(), diff --git a/sickbeard/providers/nextgen.py b/sickbeard/providers/nextgen.py index 2eeaca89..33b738db 100644 --- a/sickbeard/providers/nextgen.py +++ b/sickbeard/providers/nextgen.py @@ -37,7 +37,7 @@ from sickbeard.exceptions import ex from sickbeard import clients from lib import requests from lib.requests import exceptions -from bs4 import BeautifulSoup +from sickbeard.bs4_parser import BS4Parser from sickbeard.helpers import sanitizeSceneName @@ -118,16 +118,16 @@ class NextGenProvider(generic.TorrentProvider): self.session.headers.update( {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:24.0) Gecko/20130519 Firefox/24.0)'}) data = self.session.get(self.urls['login_page'], verify=False) - bs = BeautifulSoup(data.content.decode('iso-8859-1')) - csrfraw = bs.find('form', attrs={'id': 'login'})['action'] - output = self.session.post(self.urls['base_url'] + csrfraw, data=login_params) + with BS4Parser(data.content.decode('iso-8859-1')) as bs: + csrfraw = bs.find('form', attrs={'id': 'login'})['action'] + output = self.session.post(self.urls['base_url'] + csrfraw, data=login_params) - if self.loginSuccess(output): - self.last_login_check = now - self.login_opener = self.session - return True + if self.loginSuccess(output): + self.last_login_check = now + self.login_opener = self.session + return True - error = 'unknown' + error = 'unknown' except: error = traceback.format_exc() self.login_opener = None @@ -204,59 +204,58 @@ class NextGenProvider(generic.TorrentProvider): if data: try: - html = BeautifulSoup(data.decode('iso-8859-1'), features=["html5lib", "permissive"]) - resultsTable = html.find('div', attrs={'id': 'torrent-table-wrapper'}) + with BS4Parser(data.decode('iso-8859-1'), features=["html5lib", "permissive"]) as html: + resultsTable = html.find('div', attrs={'id': 'torrent-table-wrapper'}) - if not resultsTable: - logger.log(u"The Data returned from " + self.name + " do not contains any torrent", - logger.DEBUG) - continue - - # Collecting entries - entries_std = html.find_all('div', attrs={'id': 'torrent-std'}) - entries_sticky = html.find_all('div', attrs={'id': 'torrent-sticky'}) - - entries = entries_std + entries_sticky - - #Xirg STANDARD TORRENTS - #Continue only if one Release is found - if len(entries) > 0: - - for result in entries: - - try: - torrentName = \ - ((result.find('div', attrs={'id': 'torrent-udgivelse2-users'})).find('a'))['title'] - torrentId = ( - ((result.find('div', attrs={'id': 'torrent-download'})).find('a'))['href']).replace( - 'download.php?id=', '') - torrent_name = str(torrentName) - torrent_download_url = (self.urls['download'] % torrentId).encode('utf8') - torrent_details_url = (self.urls['detail'] % torrentId).encode('utf8') - #torrent_seeders = int(result.find('div', attrs = {'id' : 'torrent-seeders'}).find('a')['class'][0]) - ## Not used, perhaps in the future ## - #torrent_id = int(torrent['href'].replace('/details.php?id=', '')) - #torrent_leechers = int(result.find('td', attrs = {'class' : 'ac t_leechers'}).string) - except (AttributeError, TypeError): - continue - - # Filter unseeded torrent and torrents with no name/url - #if mode != 'RSS' and torrent_seeders == 0: - # continue - - if not torrent_name or not torrent_download_url: - continue - - item = torrent_name, torrent_download_url - logger.log(u"Found result: " + torrent_name + " (" + torrent_details_url + ")", + if not resultsTable: + logger.log(u"The Data returned from " + self.name + " do not contains any torrent", logger.DEBUG) - items[mode].append(item) + continue - else: - logger.log(u"The Data returned from " + self.name + " do not contains any torrent", - logger.WARNING) - continue + # Collecting entries + entries_std = html.find_all('div', attrs={'id': 'torrent-std'}) + entries_sticky = html.find_all('div', attrs={'id': 'torrent-sticky'}) + entries = entries_std + entries_sticky + + #Xirg STANDARD TORRENTS + #Continue only if one Release is found + if len(entries) > 0: + + for result in entries: + + try: + torrentName = \ + ((result.find('div', attrs={'id': 'torrent-udgivelse2-users'})).find('a'))['title'] + torrentId = ( + ((result.find('div', attrs={'id': 'torrent-download'})).find('a'))['href']).replace( + 'download.php?id=', '') + torrent_name = str(torrentName) + torrent_download_url = (self.urls['download'] % torrentId).encode('utf8') + torrent_details_url = (self.urls['detail'] % torrentId).encode('utf8') + #torrent_seeders = int(result.find('div', attrs = {'id' : 'torrent-seeders'}).find('a')['class'][0]) + ## Not used, perhaps in the future ## + #torrent_id = int(torrent['href'].replace('/details.php?id=', '')) + #torrent_leechers = int(result.find('td', attrs = {'class' : 'ac t_leechers'}).string) + except (AttributeError, TypeError): + continue + + # Filter unseeded torrent and torrents with no name/url + #if mode != 'RSS' and torrent_seeders == 0: + # continue + + if not torrent_name or not torrent_download_url: + continue + + item = torrent_name, torrent_download_url + logger.log(u"Found result: " + torrent_name + " (" + torrent_details_url + ")", + logger.DEBUG) + items[mode].append(item) + + else: + logger.log(u"The Data returned from " + self.name + " do not contains any torrent", + logger.WARNING) + continue except Exception, e: logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), diff --git a/sickbeard/providers/publichd.py b/sickbeard/providers/publichd.py index 818e136a..dba0647d 100644 --- a/sickbeard/providers/publichd.py +++ b/sickbeard/providers/publichd.py @@ -40,7 +40,7 @@ from sickbeard import clients from lib import requests from lib.requests import exceptions -from bs4 import BeautifulSoup +from sickbeard.bs4_parser import BS4Parser from lib.unidecode import unidecode @@ -150,39 +150,36 @@ class PublicHDProvider(generic.TorrentProvider): html = os.linesep.join([s for s in html.splitlines() if not optreg.search(s)]) try: - html = BeautifulSoup(html, features=["html5lib", "permissive"]) + with BS4Parser(html, features=["html5lib", "permissive"]) as html: + torrent_table = html.find('table', attrs={'id': 'torrbg'}) + torrent_rows = torrent_table.find_all('tr') if torrent_table else [] - torrent_table = html.find('table', attrs={'id': 'torrbg'}) - torrent_rows = torrent_table.find_all('tr') if torrent_table else [] - - html.clear(True) - - #Continue only if one Release is found - if len(torrent_rows) < 2: - logger.log(u"The Data returned from " + self.name + " do not contains any torrent", - logger.DEBUG) - continue - - for tr in torrent_rows[1:]: - - try: - link = self.url + tr.find(href=re.compile('page=torrent-details'))['href'] - title = tr.find(lambda x: x.has_attr('title')).text.replace('_', '.') - url = tr.find(href=re.compile('magnet+'))['href'] - seeders = int(tr.find_all('td', {'class': 'header'})[4].text) - leechers = int(tr.find_all('td', {'class': 'header'})[5].text) - except (AttributeError, TypeError): + #Continue only if one Release is found + if len(torrent_rows) < 2: + logger.log(u"The Data returned from " + self.name + " do not contains any torrent", + logger.DEBUG) continue - if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech): - continue + for tr in torrent_rows[1:]: - if not title or not url: - continue + try: + link = self.url + tr.find(href=re.compile('page=torrent-details'))['href'] + title = tr.find(lambda x: x.has_attr('title')).text.replace('_', '.') + url = tr.find(href=re.compile('magnet+'))['href'] + seeders = int(tr.find_all('td', {'class': 'header'})[4].text) + leechers = int(tr.find_all('td', {'class': 'header'})[5].text) + except (AttributeError, TypeError): + continue - item = title, url, link, seeders, leechers + if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech): + continue - items[mode].append(item) + if not title or not url: + continue + + item = title, url, link, seeders, leechers + + items[mode].append(item) except Exception, e: logger.log(u"Failed to parsing " + self.name + " Traceback: " + traceback.format_exc(), diff --git a/sickbeard/providers/scc.py b/sickbeard/providers/scc.py index 8c01a253..070bdc4a 100644 --- a/sickbeard/providers/scc.py +++ b/sickbeard/providers/scc.py @@ -34,7 +34,7 @@ from sickbeard.exceptions import ex from sickbeard import clients from lib import requests from lib.requests import exceptions -from bs4 import BeautifulSoup +from sickbeard.bs4_parser import BS4Parser from lib.unidecode import unidecode from sickbeard.helpers import sanitizeSceneName @@ -196,62 +196,58 @@ class SCCProvider(generic.TorrentProvider): try: for dataItem in data: - html = BeautifulSoup(dataItem, features=["html5lib", "permissive"]) + with BS4Parser(dataItem, features=["html5lib", "permissive"]) as html: + torrent_table = html.find('table', attrs={'id': 'torrents-table'}) + torrent_rows = torrent_table.find_all('tr') if torrent_table else [] - torrent_table = html.find('table', attrs={'id': 'torrents-table'}) - torrent_rows = torrent_table.find_all('tr') if torrent_table else [] - - html.clear(True) - - #Continue only if at least one Release is found - if len(torrent_rows) < 2: - if html.title: - source = self.name + " (" + html.title.string + ")" - else: - source = self.name - logger.log(u"The Data returned from " + source + " does not contain any torrent", logger.DEBUG) - continue - - for result in torrent_table.find_all('tr')[1:]: - - try: - link = result.find('td', attrs={'class': 'ttr_name'}).find('a') - all_urls = result.find('td', attrs={'class': 'td_dl'}).find_all('a', limit=2) - # Foreign section contain two links, the others one - if self._isSection('Foreign', dataItem): - url = all_urls[1] + #Continue only if at least one Release is found + if len(torrent_rows) < 2: + if html.title: + source = self.name + " (" + html.title.string + ")" else: - url = all_urls[0] - - title = link.string - if re.search('\.\.\.', title): - details_html = BeautifulSoup(self.getURL(self.url + "/" + link['href'])) - title = re.search('(?<=").+(?