Created a context manager wrapper for BeautifulSoup4 so that we can cleanup/clear tags/context on exit via WITH statements.

Fixed issues with torrent providers returning no results.
This commit is contained in:
echel0n 2014-07-21 21:26:58 -07:00
parent 77feb5a74c
commit a317ff61c2
12 changed files with 445 additions and 469 deletions

13
sickbeard/bs4_parser.py Normal file
View file

@ -0,0 +1,13 @@
import sickbeard
from bs4 import BeautifulSoup
class BS4Parser:
def __init__(self, *args, **kwargs):
self.soup = BeautifulSoup(*args, **kwargs)
def __enter__(self):
return self.soup
def __exit__(self, exc_ty, exc_val, tb):
self.soup.clear(True)
self.soup = None

View file

@ -31,7 +31,6 @@ import httplib
import urlparse import urlparse
import uuid import uuid
import base64 import base64
import string
import zipfile import zipfile
from lib import requests from lib import requests
@ -1241,7 +1240,7 @@ def mapIndexersToShow(showObj):
return mapped return mapped
def touchFile(self, fname, atime=None): def touchFile(fname, atime=None):
if None != atime: if None != atime:
try: try:
with file(fname, 'a'): with file(fname, 'a'):

View file

@ -22,7 +22,7 @@ import datetime
import urlparse import urlparse
import sickbeard import sickbeard
import generic import generic
from sickbeard.common import Quality, cpu_presets from sickbeard.common import Quality
from sickbeard import logger from sickbeard import logger
from sickbeard import tvcache from sickbeard import tvcache
from sickbeard import db from sickbeard import db
@ -33,7 +33,7 @@ from sickbeard.exceptions import ex
from sickbeard import clients from sickbeard import clients
from lib import requests from lib import requests
from lib.requests import exceptions from lib.requests import exceptions
from bs4 import BeautifulSoup from sickbeard.bs4_parser import BS4Parser
from lib.unidecode import unidecode from lib.unidecode import unidecode
from sickbeard.helpers import sanitizeSceneName from sickbeard.helpers import sanitizeSceneName
@ -168,48 +168,45 @@ class BitSoupProvider(generic.TorrentProvider):
continue continue
try: try:
html = BeautifulSoup(data, "html.parser") with BS4Parser(data, "html.parser") as html:
torrent_table = html.find('table', attrs={'class': 'koptekst'})
torrent_table = html.find('table', attrs={'class': 'koptekst'}) torrent_rows = torrent_table.find_all('tr') if torrent_table else []
torrent_rows = torrent_table.find_all('tr') if torrent_table else []
html.clear(True) #Continue only if one Release is found
if len(torrent_rows) < 2:
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
logger.DEBUG)
continue
#Continue only if one Release is found for result in torrent_rows[1:]:
if len(torrent_rows) < 2: cells = result.find_all('td')
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
logger.DEBUG)
continue
for result in torrent_rows[1:]: link = cells[1].find('a')
cells = result.find_all('td') download_url = self.urls['download'] % cells[3].find('a')['href']
link = cells[1].find('a') id = link['href']
download_url = self.urls['download'] % cells[3].find('a')['href'] id = id.replace('details.php?id=','')
id = id.replace('&hit=1', '')
id = link['href']
id = id.replace('details.php?id=','')
id = id.replace('&hit=1', '')
try:
title = link.getText()
id = int(id)
seeders = int(cells[9].getText())
leechers = int(cells[10].getText())
except (AttributeError, TypeError):
continue
#Filter unseeded torrent try:
if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech): title = link.getText()
continue id = int(id)
seeders = int(cells[9].getText())
leechers = int(cells[10].getText())
except (AttributeError, TypeError):
continue
if not title or not download_url: #Filter unseeded torrent
continue if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech):
continue
item = title, download_url, id, seeders, leechers if not title or not download_url:
logger.log(u"Found result: " + title + "(" + searchURL + ")", logger.DEBUG) continue
items[mode].append(item) item = title, download_url, id, seeders, leechers
logger.log(u"Found result: " + title + "(" + searchURL + ")", logger.DEBUG)
items[mode].append(item)
except Exception, e: except Exception, e:
logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR) logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR)

View file

@ -33,7 +33,7 @@ from sickbeard.exceptions import ex
from sickbeard import clients from sickbeard import clients
from lib import requests from lib import requests
from lib.requests import exceptions from lib.requests import exceptions
from bs4 import BeautifulSoup from sickbeard.bs4_parser import BS4Parser
from lib.unidecode import unidecode from lib.unidecode import unidecode
from sickbeard.helpers import sanitizeSceneName from sickbeard.helpers import sanitizeSceneName
@ -175,7 +175,6 @@ class FreshOnTVProvider(generic.TorrentProvider):
if not self._doLogin(): if not self._doLogin():
return [] return []
for mode in search_params.keys(): for mode in search_params.keys():
for search_string in search_params[mode]: for search_string in search_params[mode]:
@ -193,55 +192,52 @@ class FreshOnTVProvider(generic.TorrentProvider):
continue continue
try: try:
html = BeautifulSoup(data, features=["html5lib", "permissive"]) with BS4Parser(data, features=["html5lib", "permissive"]) as html:
torrent_table = html.find('table', attrs={'class': 'frame'})
torrent_rows = torrent_table.findChildren('tr') if torrent_table else []
torrent_table = html.find('table', attrs={'class': 'frame'}) #Continue only if one Release is found
torrent_rows = torrent_table.findChildren('tr') if torrent_table else [] if len(torrent_rows) < 2:
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
html.clear(True) logger.DEBUG)
#Continue only if one Release is found
if len(torrent_rows) < 2:
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
logger.DEBUG)
continue
# skip colheader
for result in torrent_rows[1:]:
cells = result.findChildren('td')
link = cells[1].find('a', attrs = {'class': 'torrent_name_link'})
#skip if torrent has been nuked due to poor quality
if cells[1].find('img', alt='Nuked') != None:
continue continue
torrent_id = link['href'].replace('/details.php?id=', '') # skip colheader
for result in torrent_rows[1:]:
cells = result.findChildren('td')
try:
if link.has_key('title'):
title = cells[1].find('a', {'class': 'torrent_name_link'})['title']
else:
title = link.contents[0]
download_url = self.urls['download'] % (torrent_id)
id = int(torrent_id)
seeders = int(cells[8].find('a', {'class': 'link'}).span.contents[0].strip()) link = cells[1].find('a', attrs = {'class': 'torrent_name_link'})
leechers = int(cells[9].find('a', {'class': 'link'}).contents[0].strip()) #skip if torrent has been nuked due to poor quality
except (AttributeError, TypeError): if cells[1].find('img', alt='Nuked') != None:
continue continue
#Filter unseeded torrent torrent_id = link['href'].replace('/details.php?id=', '')
if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech):
continue
if not title or not download_url:
continue
item = title, download_url, id, seeders, leechers try:
logger.log(u"Found result: " + title + "(" + searchURL + ")", logger.DEBUG) if link.has_key('title'):
title = cells[1].find('a', {'class': 'torrent_name_link'})['title']
else:
title = link.contents[0]
download_url = self.urls['download'] % (torrent_id)
id = int(torrent_id)
items[mode].append(item) seeders = int(cells[8].find('a', {'class': 'link'}).span.contents[0].strip())
leechers = int(cells[9].find('a', {'class': 'link'}).contents[0].strip())
except (AttributeError, TypeError):
continue
#Filter unseeded torrent
if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech):
continue
if not title or not download_url:
continue
item = title, download_url, id, seeders, leechers
logger.log(u"Found result: " + title + "(" + searchURL + ")", logger.DEBUG)
items[mode].append(item)
except Exception, e: except Exception, e:
logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR) logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR)

View file

@ -34,7 +34,7 @@ from sickbeard.exceptions import ex
from sickbeard import clients from sickbeard import clients
from lib import requests from lib import requests
from lib.requests import exceptions from lib.requests import exceptions
from bs4 import BeautifulSoup from sickbeard.bs4_parser import BS4Parser
from lib.unidecode import unidecode from lib.unidecode import unidecode
from sickbeard.helpers import sanitizeSceneName from sickbeard.helpers import sanitizeSceneName
@ -196,64 +196,22 @@ class HDTorrentsProvider(generic.TorrentProvider):
data = split_data[2] data = split_data[2]
try: try:
html = BeautifulSoup(data, features=["html5lib", "permissive"]) with BS4Parser(data, features=["html5lib", "permissive"]) as html:
#Get first entry in table
entries = html.find_all('td', attrs={'align': 'center'})
#Get first entry in table if not entries:
entries = html.find_all('td', attrs={'align': 'center'}) logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
logger.DEBUG)
html.clear(True)
if not entries:
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
logger.DEBUG)
continue
try:
title = entries[22].find('a')['title'].strip('History - ').replace('Blu-ray', 'bd50')
url = self.urls['home'] % entries[15].find('a')['href']
download_url = self.urls['home'] % entries[15].find('a')['href']
id = entries[23].find('div')['id']
seeders = int(entries[20].get_text())
leechers = int(entries[21].get_text())
except (AttributeError, TypeError):
continue
if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech):
continue
if not title or not download_url:
continue
item = title, download_url, id, seeders, leechers
logger.log(u"Found result: " + title + "(" + searchURL + ")", logger.DEBUG)
items[mode].append(item)
#Now attempt to get any others
result_table = html.find('table', attrs={'class': 'mainblockcontenttt'})
if not result_table:
continue
entries = result_table.find_all('td', attrs={'align': 'center', 'class': 'listas'})
if not entries:
continue
for result in entries:
block2 = result.find_parent('tr').find_next_sibling('tr')
if not block2:
continue continue
cells = block2.find_all('td')
try: try:
title = cells[1].find('b').get_text().strip('\t ').replace('Blu-ray', 'bd50') title = entries[22].find('a')['title'].strip('History - ').replace('Blu-ray', 'bd50')
url = self.urls['home'] % cells[4].find('a')['href'] url = self.urls['home'] % entries[15].find('a')['href']
download_url = self.urls['home'] % cells[4].find('a')['href'] download_url = self.urls['home'] % entries[15].find('a')['href']
detail = cells[1].find('a')['href'] id = entries[23].find('div')['id']
id = detail.replace('details.php?id=', '') seeders = int(entries[20].get_text())
seeders = int(cells[9].get_text()) leechers = int(entries[21].get_text())
leechers = int(cells[10].get_text())
except (AttributeError, TypeError): except (AttributeError, TypeError):
continue continue
@ -268,6 +226,45 @@ class HDTorrentsProvider(generic.TorrentProvider):
items[mode].append(item) items[mode].append(item)
#Now attempt to get any others
result_table = html.find('table', attrs={'class': 'mainblockcontenttt'})
if not result_table:
continue
entries = result_table.find_all('td', attrs={'align': 'center', 'class': 'listas'})
if not entries:
continue
for result in entries:
block2 = result.find_parent('tr').find_next_sibling('tr')
if not block2:
continue
cells = block2.find_all('td')
try:
title = cells[1].find('b').get_text().strip('\t ').replace('Blu-ray', 'bd50')
url = self.urls['home'] % cells[4].find('a')['href']
download_url = self.urls['home'] % cells[4].find('a')['href']
detail = cells[1].find('a')['href']
id = detail.replace('details.php?id=', '')
seeders = int(cells[9].get_text())
leechers = int(cells[10].get_text())
except (AttributeError, TypeError):
continue
if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech):
continue
if not title or not download_url:
continue
item = title, download_url, id, seeders, leechers
logger.log(u"Found result: " + title + "(" + searchURL + ")", logger.DEBUG)
items[mode].append(item)
except Exception, e: except Exception, e:
logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR) logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR)

View file

@ -33,7 +33,7 @@ from sickbeard.exceptions import ex
from sickbeard import clients from sickbeard import clients
from lib import requests from lib import requests
from lib.requests import exceptions from lib.requests import exceptions
from bs4 import BeautifulSoup from sickbeard.bs4_parser import BS4Parser
from lib.unidecode import unidecode from lib.unidecode import unidecode
from sickbeard.helpers import sanitizeSceneName from sickbeard.helpers import sanitizeSceneName
from sickbeard.show_name_helpers import allPossibleShowNames from sickbeard.show_name_helpers import allPossibleShowNames
@ -167,51 +167,48 @@ class IPTorrentsProvider(generic.TorrentProvider):
continue continue
try: try:
html = BeautifulSoup(data, features=["html5lib", "permissive"]) with BS4Parser(data, features=["html5lib", "permissive"]) as html:
if not html:
if not html: logger.log(u"Invalid HTML data: " + str(data), logger.DEBUG)
logger.log(u"Invalid HTML data: " + str(data), logger.DEBUG)
continue
if html.find(text='No Torrents Found!'):
logger.log(u"No results found for: " + search_string + " (" + searchURL + ")", logger.DEBUG)
continue
torrent_table = html.find('table', attrs={'class': 'torrents'})
torrents = torrent_table.find_all('tr') if torrent_table else []
html.clear(True)
#Continue only if one Release is found
if len(torrents) < 2:
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
logger.WARNING)
continue
for result in torrents[1:]:
try:
torrent = result.find_all('td')[1].find('a')
torrent_name = torrent.string
torrent_download_url = self.urls['base_url'] + (result.find_all('td')[3].find('a'))['href']
torrent_details_url = self.urls['base_url'] + torrent['href']
torrent_seeders = int(result.find('td', attrs={'class': 'ac t_seeders'}).string)
## Not used, perhaps in the future ##
#torrent_id = int(torrent['href'].replace('/details.php?id=', ''))
#torrent_leechers = int(result.find('td', attrs = {'class' : 'ac t_leechers'}).string)
except (AttributeError, TypeError):
continue continue
# Filter unseeded torrent and torrents with no name/url if html.find(text='No Torrents Found!'):
if mode != 'RSS' and torrent_seeders == 0: logger.log(u"No results found for: " + search_string + " (" + searchURL + ")", logger.DEBUG)
continue continue
if not torrent_name or not torrent_download_url: torrent_table = html.find('table', attrs={'class': 'torrents'})
torrents = torrent_table.find_all('tr') if torrent_table else []
#Continue only if one Release is found
if len(torrents) < 2:
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
logger.WARNING)
continue continue
item = torrent_name, torrent_download_url for result in torrents[1:]:
logger.log(u"Found result: " + torrent_name + " (" + torrent_details_url + ")", logger.DEBUG)
items[mode].append(item) try:
torrent = result.find_all('td')[1].find('a')
torrent_name = torrent.string
torrent_download_url = self.urls['base_url'] + (result.find_all('td')[3].find('a'))['href']
torrent_details_url = self.urls['base_url'] + torrent['href']
torrent_seeders = int(result.find('td', attrs={'class': 'ac t_seeders'}).string)
## Not used, perhaps in the future ##
#torrent_id = int(torrent['href'].replace('/details.php?id=', ''))
#torrent_leechers = int(result.find('td', attrs = {'class' : 'ac t_leechers'}).string)
except (AttributeError, TypeError):
continue
# Filter unseeded torrent and torrents with no name/url
if mode != 'RSS' and torrent_seeders == 0:
continue
if not torrent_name or not torrent_download_url:
continue
item = torrent_name, torrent_download_url
logger.log(u"Found result: " + torrent_name + " (" + torrent_details_url + ")", logger.DEBUG)
items[mode].append(item)
except Exception, e: except Exception, e:
logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR) logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR)

View file

@ -40,11 +40,9 @@ from sickbeard.show_name_helpers import allPossibleShowNames, sanitizeSceneName
from sickbeard.exceptions import ex from sickbeard.exceptions import ex
from sickbeard import encodingKludge as ek from sickbeard import encodingKludge as ek
from sickbeard import clients from sickbeard import clients
from sickbeard import tv from sickbeard.bs4_parser import BS4Parser
from lib import requests from lib import requests
from lib.requests import exceptions from lib.requests import exceptions
from bs4 import BeautifulSoup
from lib.unidecode import unidecode from lib.unidecode import unidecode
@ -119,55 +117,52 @@ class KATProvider(generic.TorrentProvider):
return None return None
try: try:
soup = BeautifulSoup(data, features=["html5lib", "permissive"]) with BS4Parser(data, features=["html5lib", "permissive"]) as soup:
file_table = soup.find('table', attrs={'class': 'torrentFileList'}) file_table = soup.find('table', attrs={'class': 'torrentFileList'})
# cleanup memory if not file_table:
soup.clear(True) return None
if not file_table: files = [x.text for x in file_table.find_all('td', attrs={'class': 'torFileName'})]
return None videoFiles = filter(lambda x: x.rpartition(".")[2].lower() in mediaExtensions, files)
files = [x.text for x in file_table.find_all('td', attrs={'class': 'torFileName'})] #Filtering SingleEpisode/MultiSeason Torrent
videoFiles = filter(lambda x: x.rpartition(".")[2].lower() in mediaExtensions, files) if len(videoFiles) < ep_number or len(videoFiles) > float(ep_number * 1.1):
logger.log(u"Result " + title + " have " + str(
ep_number) + " episode and episodes retrived in torrent are " + str(len(videoFiles)), logger.DEBUG)
logger.log(
u"Result " + title + " Seem to be a Single Episode or MultiSeason torrent, skipping result...",
logger.DEBUG)
return None
#Filtering SingleEpisode/MultiSeason Torrent if Quality.sceneQuality(title) != Quality.UNKNOWN:
if len(videoFiles) < ep_number or len(videoFiles) > float(ep_number * 1.1): return title
logger.log(u"Result " + title + " have " + str(
ep_number) + " episode and episodes retrived in torrent are " + str(len(videoFiles)), logger.DEBUG) for fileName in videoFiles:
logger.log( quality = Quality.sceneQuality(os.path.basename(fileName))
u"Result " + title + " Seem to be a Single Episode or MultiSeason torrent, skipping result...", if quality != Quality.UNKNOWN: break
logger.DEBUG)
return None if fileName is not None and quality == Quality.UNKNOWN:
quality = Quality.assumeQuality(os.path.basename(fileName))
if quality == Quality.UNKNOWN:
logger.log(u"Unable to obtain a Season Quality for " + title, logger.DEBUG)
return None
try:
myParser = NameParser(showObj=self.show)
parse_result = myParser.parse(fileName)
except (InvalidNameException, InvalidShowException):
return None
logger.log(u"Season quality for " + title + " is " + Quality.qualityStrings[quality], logger.DEBUG)
if parse_result.series_name and parse_result.season_number:
title = parse_result.series_name + ' S%02d' % int(
parse_result.season_number) + ' ' + self._reverseQuality(quality)
if Quality.sceneQuality(title) != Quality.UNKNOWN:
return title return title
for fileName in videoFiles:
quality = Quality.sceneQuality(os.path.basename(fileName))
if quality != Quality.UNKNOWN: break
if fileName is not None and quality == Quality.UNKNOWN:
quality = Quality.assumeQuality(os.path.basename(fileName))
if quality == Quality.UNKNOWN:
logger.log(u"Unable to obtain a Season Quality for " + title, logger.DEBUG)
return None
try:
myParser = NameParser(showObj=self.show)
parse_result = myParser.parse(fileName)
except (InvalidNameException, InvalidShowException):
return None
logger.log(u"Season quality for " + title + " is " + Quality.qualityStrings[quality], logger.DEBUG)
if parse_result.series_name and parse_result.season_number:
title = parse_result.series_name + ' S%02d' % int(
parse_result.season_number) + ' ' + self._reverseQuality(quality)
return title
except Exception, e: except Exception, e:
logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR) logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR)
@ -230,6 +225,7 @@ class KATProvider(generic.TorrentProvider):
results = [] results = []
items = {'Season': [], 'Episode': [], 'RSS': []} items = {'Season': [], 'Episode': [], 'RSS': []}
soup = None
for mode in search_params.keys(): for mode in search_params.keys():
for search_string in search_params[mode]: for search_string in search_params[mode]:
@ -250,54 +246,51 @@ class KATProvider(generic.TorrentProvider):
continue continue
try: try:
soup = BeautifulSoup(html, features=["html5lib", "permissive"]) with BS4Parser(html, features=["html5lib", "permissive"]) as soup:
torrent_table = soup.find('table', attrs={'class': 'data'})
torrent_rows = torrent_table.find_all('tr') if torrent_table else []
torrent_table = soup.find('table', attrs={'class': 'data'}) #Continue only if one Release is found
torrent_rows = torrent_table.find_all('tr') if torrent_table else [] if len(torrent_rows) < 2:
logger.log(u"The data returned from " + self.name + " does not contain any torrents",
soup.clear(True) logger.WARNING)
#Continue only if one Release is found
if len(torrent_rows) < 2:
logger.log(u"The data returned from " + self.name + " does not contain any torrents",
logger.WARNING)
continue
for tr in torrent_rows[1:]:
try:
link = urlparse.urljoin(self.url,
(tr.find('div', {'class': 'torrentname'}).find_all('a')[1])['href'])
id = tr.get('id')[-7:]
title = (tr.find('div', {'class': 'torrentname'}).find_all('a')[1]).text \
or (tr.find('div', {'class': 'torrentname'}).find_all('a')[2]).text
url = tr.find('a', 'imagnet')['href']
verified = True if tr.find('a', 'iverify') else False
trusted = True if tr.find('img', {'alt': 'verified'}) else False
seeders = int(tr.find_all('td')[-2].text)
leechers = int(tr.find_all('td')[-1].text)
except (AttributeError, TypeError):
continue continue
if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech): for tr in torrent_rows[1:]:
continue try:
link = urlparse.urljoin(self.url,
(tr.find('div', {'class': 'torrentname'}).find_all('a')[1])['href'])
id = tr.get('id')[-7:]
title = (tr.find('div', {'class': 'torrentname'}).find_all('a')[1]).text \
or (tr.find('div', {'class': 'torrentname'}).find_all('a')[2]).text
url = tr.find('a', 'imagnet')['href']
verified = True if tr.find('a', 'iverify') else False
trusted = True if tr.find('img', {'alt': 'verified'}) else False
seeders = int(tr.find_all('td')[-2].text)
leechers = int(tr.find_all('td')[-1].text)
except (AttributeError, TypeError):
continue
if self.confirmed and not verified: if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech):
logger.log( continue
u"KAT Provider found result " + title + " but that doesn't seem like a verified result so I'm ignoring it",
logger.DEBUG)
continue
#Check number video files = episode in season and find the real Quality for full season torrent analyzing files in torrent if self.confirmed and not verified:
if mode == 'Season' and search_mode == 'sponly': logger.log(
ep_number = int(epcount / len(set(allPossibleShowNames(self.show)))) u"KAT Provider found result " + title + " but that doesn't seem like a verified result so I'm ignoring it",
title = self._find_season_quality(title, link, ep_number) logger.DEBUG)
continue
if not title or not url: #Check number video files = episode in season and find the real Quality for full season torrent analyzing files in torrent
continue if mode == 'Season' and search_mode == 'sponly':
ep_number = int(epcount / len(set(allPossibleShowNames(self.show))))
title = self._find_season_quality(title, link, ep_number)
item = title, url, id, seeders, leechers if not title or not url:
continue
items[mode].append(item) item = title, url, id, seeders, leechers
items[mode].append(item)
except Exception, e: except Exception, e:
logger.log(u"Failed to parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.log(u"Failed to parsing " + self.name + " Traceback: " + traceback.format_exc(),

View file

@ -37,7 +37,7 @@ from sickbeard.exceptions import ex
from sickbeard import clients from sickbeard import clients
from lib import requests from lib import requests
from lib.requests import exceptions from lib.requests import exceptions
from bs4 import BeautifulSoup from sickbeard.bs4_parser import BS4Parser
from sickbeard.helpers import sanitizeSceneName from sickbeard.helpers import sanitizeSceneName
@ -118,16 +118,16 @@ class NextGenProvider(generic.TorrentProvider):
self.session.headers.update( self.session.headers.update(
{'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:24.0) Gecko/20130519 Firefox/24.0)'}) {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:24.0) Gecko/20130519 Firefox/24.0)'})
data = self.session.get(self.urls['login_page'], verify=False) data = self.session.get(self.urls['login_page'], verify=False)
bs = BeautifulSoup(data.content.decode('iso-8859-1')) with BS4Parser(data.content.decode('iso-8859-1')) as bs:
csrfraw = bs.find('form', attrs={'id': 'login'})['action'] csrfraw = bs.find('form', attrs={'id': 'login'})['action']
output = self.session.post(self.urls['base_url'] + csrfraw, data=login_params) output = self.session.post(self.urls['base_url'] + csrfraw, data=login_params)
if self.loginSuccess(output): if self.loginSuccess(output):
self.last_login_check = now self.last_login_check = now
self.login_opener = self.session self.login_opener = self.session
return True return True
error = 'unknown' error = 'unknown'
except: except:
error = traceback.format_exc() error = traceback.format_exc()
self.login_opener = None self.login_opener = None
@ -204,59 +204,58 @@ class NextGenProvider(generic.TorrentProvider):
if data: if data:
try: try:
html = BeautifulSoup(data.decode('iso-8859-1'), features=["html5lib", "permissive"]) with BS4Parser(data.decode('iso-8859-1'), features=["html5lib", "permissive"]) as html:
resultsTable = html.find('div', attrs={'id': 'torrent-table-wrapper'}) resultsTable = html.find('div', attrs={'id': 'torrent-table-wrapper'})
if not resultsTable: if not resultsTable:
logger.log(u"The Data returned from " + self.name + " do not contains any torrent", logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
logger.DEBUG)
continue
# Collecting entries
entries_std = html.find_all('div', attrs={'id': 'torrent-std'})
entries_sticky = html.find_all('div', attrs={'id': 'torrent-sticky'})
entries = entries_std + entries_sticky
#Xirg STANDARD TORRENTS
#Continue only if one Release is found
if len(entries) > 0:
for result in entries:
try:
torrentName = \
((result.find('div', attrs={'id': 'torrent-udgivelse2-users'})).find('a'))['title']
torrentId = (
((result.find('div', attrs={'id': 'torrent-download'})).find('a'))['href']).replace(
'download.php?id=', '')
torrent_name = str(torrentName)
torrent_download_url = (self.urls['download'] % torrentId).encode('utf8')
torrent_details_url = (self.urls['detail'] % torrentId).encode('utf8')
#torrent_seeders = int(result.find('div', attrs = {'id' : 'torrent-seeders'}).find('a')['class'][0])
## Not used, perhaps in the future ##
#torrent_id = int(torrent['href'].replace('/details.php?id=', ''))
#torrent_leechers = int(result.find('td', attrs = {'class' : 'ac t_leechers'}).string)
except (AttributeError, TypeError):
continue
# Filter unseeded torrent and torrents with no name/url
#if mode != 'RSS' and torrent_seeders == 0:
# continue
if not torrent_name or not torrent_download_url:
continue
item = torrent_name, torrent_download_url
logger.log(u"Found result: " + torrent_name + " (" + torrent_details_url + ")",
logger.DEBUG) logger.DEBUG)
items[mode].append(item) continue
else: # Collecting entries
logger.log(u"The Data returned from " + self.name + " do not contains any torrent", entries_std = html.find_all('div', attrs={'id': 'torrent-std'})
logger.WARNING) entries_sticky = html.find_all('div', attrs={'id': 'torrent-sticky'})
continue
entries = entries_std + entries_sticky
#Xirg STANDARD TORRENTS
#Continue only if one Release is found
if len(entries) > 0:
for result in entries:
try:
torrentName = \
((result.find('div', attrs={'id': 'torrent-udgivelse2-users'})).find('a'))['title']
torrentId = (
((result.find('div', attrs={'id': 'torrent-download'})).find('a'))['href']).replace(
'download.php?id=', '')
torrent_name = str(torrentName)
torrent_download_url = (self.urls['download'] % torrentId).encode('utf8')
torrent_details_url = (self.urls['detail'] % torrentId).encode('utf8')
#torrent_seeders = int(result.find('div', attrs = {'id' : 'torrent-seeders'}).find('a')['class'][0])
## Not used, perhaps in the future ##
#torrent_id = int(torrent['href'].replace('/details.php?id=', ''))
#torrent_leechers = int(result.find('td', attrs = {'class' : 'ac t_leechers'}).string)
except (AttributeError, TypeError):
continue
# Filter unseeded torrent and torrents with no name/url
#if mode != 'RSS' and torrent_seeders == 0:
# continue
if not torrent_name or not torrent_download_url:
continue
item = torrent_name, torrent_download_url
logger.log(u"Found result: " + torrent_name + " (" + torrent_details_url + ")",
logger.DEBUG)
items[mode].append(item)
else:
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
logger.WARNING)
continue
except Exception, e: except Exception, e:
logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(),

View file

@ -40,7 +40,7 @@ from sickbeard import clients
from lib import requests from lib import requests
from lib.requests import exceptions from lib.requests import exceptions
from bs4 import BeautifulSoup from sickbeard.bs4_parser import BS4Parser
from lib.unidecode import unidecode from lib.unidecode import unidecode
@ -150,39 +150,36 @@ class PublicHDProvider(generic.TorrentProvider):
html = os.linesep.join([s for s in html.splitlines() if not optreg.search(s)]) html = os.linesep.join([s for s in html.splitlines() if not optreg.search(s)])
try: try:
html = BeautifulSoup(html, features=["html5lib", "permissive"]) with BS4Parser(html, features=["html5lib", "permissive"]) as html:
torrent_table = html.find('table', attrs={'id': 'torrbg'})
torrent_rows = torrent_table.find_all('tr') if torrent_table else []
torrent_table = html.find('table', attrs={'id': 'torrbg'}) #Continue only if one Release is found
torrent_rows = torrent_table.find_all('tr') if torrent_table else [] if len(torrent_rows) < 2:
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
html.clear(True) logger.DEBUG)
#Continue only if one Release is found
if len(torrent_rows) < 2:
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
logger.DEBUG)
continue
for tr in torrent_rows[1:]:
try:
link = self.url + tr.find(href=re.compile('page=torrent-details'))['href']
title = tr.find(lambda x: x.has_attr('title')).text.replace('_', '.')
url = tr.find(href=re.compile('magnet+'))['href']
seeders = int(tr.find_all('td', {'class': 'header'})[4].text)
leechers = int(tr.find_all('td', {'class': 'header'})[5].text)
except (AttributeError, TypeError):
continue continue
if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech): for tr in torrent_rows[1:]:
continue
if not title or not url: try:
continue link = self.url + tr.find(href=re.compile('page=torrent-details'))['href']
title = tr.find(lambda x: x.has_attr('title')).text.replace('_', '.')
url = tr.find(href=re.compile('magnet+'))['href']
seeders = int(tr.find_all('td', {'class': 'header'})[4].text)
leechers = int(tr.find_all('td', {'class': 'header'})[5].text)
except (AttributeError, TypeError):
continue
item = title, url, link, seeders, leechers if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech):
continue
items[mode].append(item) if not title or not url:
continue
item = title, url, link, seeders, leechers
items[mode].append(item)
except Exception, e: except Exception, e:
logger.log(u"Failed to parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.log(u"Failed to parsing " + self.name + " Traceback: " + traceback.format_exc(),

View file

@ -34,7 +34,7 @@ from sickbeard.exceptions import ex
from sickbeard import clients from sickbeard import clients
from lib import requests from lib import requests
from lib.requests import exceptions from lib.requests import exceptions
from bs4 import BeautifulSoup from sickbeard.bs4_parser import BS4Parser
from lib.unidecode import unidecode from lib.unidecode import unidecode
from sickbeard.helpers import sanitizeSceneName from sickbeard.helpers import sanitizeSceneName
@ -196,62 +196,58 @@ class SCCProvider(generic.TorrentProvider):
try: try:
for dataItem in data: for dataItem in data:
html = BeautifulSoup(dataItem, features=["html5lib", "permissive"]) with BS4Parser(dataItem, features=["html5lib", "permissive"]) as html:
torrent_table = html.find('table', attrs={'id': 'torrents-table'})
torrent_rows = torrent_table.find_all('tr') if torrent_table else []
torrent_table = html.find('table', attrs={'id': 'torrents-table'}) #Continue only if at least one Release is found
torrent_rows = torrent_table.find_all('tr') if torrent_table else [] if len(torrent_rows) < 2:
if html.title:
html.clear(True) source = self.name + " (" + html.title.string + ")"
#Continue only if at least one Release is found
if len(torrent_rows) < 2:
if html.title:
source = self.name + " (" + html.title.string + ")"
else:
source = self.name
logger.log(u"The Data returned from " + source + " does not contain any torrent", logger.DEBUG)
continue
for result in torrent_table.find_all('tr')[1:]:
try:
link = result.find('td', attrs={'class': 'ttr_name'}).find('a')
all_urls = result.find('td', attrs={'class': 'td_dl'}).find_all('a', limit=2)
# Foreign section contain two links, the others one
if self._isSection('Foreign', dataItem):
url = all_urls[1]
else: else:
url = all_urls[0] source = self.name
logger.log(u"The Data returned from " + source + " does not contain any torrent", logger.DEBUG)
title = link.string
if re.search('\.\.\.', title):
details_html = BeautifulSoup(self.getURL(self.url + "/" + link['href']))
title = re.search('(?<=").+(?<!")', details_html.title.string).group(0)
details_html.clear(True)
download_url = self.urls['download'] % url['href']
id = int(link['href'].replace('details?id=', ''))
seeders = int(result.find('td', attrs={'class': 'ttr_seeders'}).string)
leechers = int(result.find('td', attrs={'class': 'ttr_leechers'}).string)
except (AttributeError, TypeError):
continue continue
if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech): for result in torrent_table.find_all('tr')[1:]:
continue
if not title or not download_url: try:
continue link = result.find('td', attrs={'class': 'ttr_name'}).find('a')
all_urls = result.find('td', attrs={'class': 'td_dl'}).find_all('a', limit=2)
# Foreign section contain two links, the others one
if self._isSection('Foreign', dataItem):
url = all_urls[1]
else:
url = all_urls[0]
item = title, download_url, id, seeders, leechers title = link.string
if re.search('\.\.\.', title):
with BS4Parser(self.getURL(self.url + "/" + link['href'])) as details_html:
title = re.search('(?<=").+(?<!")', details_html.title.string).group(0)
if self._isSection('Non-Scene', dataItem): download_url = self.urls['download'] % url['href']
logger.log(u"Found result: " + title + "(" + nonsceneSearchURL + ")", logger.DEBUG) id = int(link['href'].replace('details?id=', ''))
elif self._isSection('Foreign', dataItem): seeders = int(result.find('td', attrs={'class': 'ttr_seeders'}).string)
logger.log(u"Found result: " + title + "(" + foreignSearchURL + ")", logger.DEBUG) leechers = int(result.find('td', attrs={'class': 'ttr_leechers'}).string)
else: except (AttributeError, TypeError):
logger.log(u"Found result: " + title + "(" + searchURL + ")", logger.DEBUG) continue
items[mode].append(item) if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech):
continue
if not title or not download_url:
continue
item = title, download_url, id, seeders, leechers
if self._isSection('Non-Scene', dataItem):
logger.log(u"Found result: " + title + "(" + nonsceneSearchURL + ")", logger.DEBUG)
elif self._isSection('Foreign', dataItem):
logger.log(u"Found result: " + title + "(" + foreignSearchURL + ")", logger.DEBUG)
else:
logger.log(u"Found result: " + title + "(" + searchURL + ")", logger.DEBUG)
items[mode].append(item)
except Exception, e: except Exception, e:
logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR) logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR)

View file

@ -33,7 +33,7 @@ from sickbeard.exceptions import ex
from sickbeard import clients from sickbeard import clients
from lib import requests from lib import requests
from lib.requests import exceptions from lib.requests import exceptions
from bs4 import BeautifulSoup from sickbeard.bs4_parser import BS4Parser
from lib.unidecode import unidecode from lib.unidecode import unidecode
from sickbeard.helpers import sanitizeSceneName from sickbeard.helpers import sanitizeSceneName
@ -168,51 +168,47 @@ class TorrentBytesProvider(generic.TorrentProvider):
continue continue
try: try:
html = BeautifulSoup(data) with BS4Parser(data, features=["html5lib", "permissive"]) as html:
torrent_table = html.find('table', attrs={'border': '1'})
torrent_rows = torrent_table.find_all('tr') if torrent_table else []
torrent_table = html.find('table', attrs={'border': '1'}) #Continue only if one Release is found
torrent_rows = torrent_table.find_all('tr') if torrent_table else [] if len(torrent_rows) < 2:
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
# cleanup memory logger.DEBUG)
html.clear(True)
#Continue only if one Release is found
if len(torrent_rows) < 2:
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
logger.DEBUG)
continue
for result in torrent_rows[1:]:
cells = result.find_all('td')
link = cells[1].find('a', attrs={'class': 'index'})
full_id = link['href'].replace('details.php?id=', '')
torrent_id = full_id[:6]
try:
if link.has_key('title'):
title = cells[1].find('a', {'class': 'index'})['title']
else:
title = link.contents[0]
download_url = self.urls['download'] % (torrent_id, link.contents[0])
id = int(torrent_id)
seeders = int(cells[8].find('span').contents[0])
leechers = int(cells[9].find('span').contents[0])
except (AttributeError, TypeError):
continue continue
#Filter unseeded torrent for result in torrent_rows[1:]:
if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech): cells = result.find_all('td')
continue
if not title or not download_url: link = cells[1].find('a', attrs={'class': 'index'})
continue
item = title, download_url, id, seeders, leechers full_id = link['href'].replace('details.php?id=', '')
logger.log(u"Found result: " + title + "(" + searchURL + ")", logger.DEBUG) torrent_id = full_id[:6]
items[mode].append(item) try:
if link.has_key('title'):
title = cells[1].find('a', {'class': 'index'})['title']
else:
title = link.contents[0]
download_url = self.urls['download'] % (torrent_id, link.contents[0])
id = int(torrent_id)
seeders = int(cells[8].find('span').contents[0])
leechers = int(cells[9].find('span').contents[0])
except (AttributeError, TypeError):
continue
#Filter unseeded torrent
if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech):
continue
if not title or not download_url:
continue
item = title, download_url, id, seeders, leechers
logger.log(u"Found result: " + title + "(" + searchURL + ")", logger.DEBUG)
items[mode].append(item)
except Exception, e: except Exception, e:
logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR) logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR)

View file

@ -34,7 +34,7 @@ from sickbeard.exceptions import ex
from sickbeard import clients from sickbeard import clients
from lib import requests from lib import requests
from lib.requests import exceptions from lib.requests import exceptions
from bs4 import BeautifulSoup from sickbeard.bs4_parser import BS4Parser
from lib.unidecode import unidecode from lib.unidecode import unidecode
from sickbeard.helpers import sanitizeSceneName from sickbeard.helpers import sanitizeSceneName
@ -172,44 +172,40 @@ class TorrentLeechProvider(generic.TorrentProvider):
continue continue
try: try:
html = BeautifulSoup(data, features=["html5lib", "permissive"]) with BS4Parser(data, features=["html5lib", "permissive"]) as html:
torrent_table = html.find('table', attrs={'id': 'torrenttable'})
torrent_rows = torrent_table.find_all('tr') if torrent_table else []
torrent_table = html.find('table', attrs={'id': 'torrenttable'}) #Continue only if one Release is found
torrent_rows = torrent_table.find_all('tr') if torrent_table else [] if len(torrent_rows) < 2:
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
# cleanup memory logger.DEBUG)
html.clear(True)
#Continue only if one Release is found
if len(torrent_rows) < 2:
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
logger.DEBUG)
continue
for result in torrent_table.find_all('tr')[1:]:
try:
link = result.find('td', attrs={'class': 'name'}).find('a')
url = result.find('td', attrs={'class': 'quickdownload'}).find('a')
title = link.string
download_url = self.urls['download'] % url['href']
id = int(link['href'].replace('/torrent/', ''))
seeders = int(result.find('td', attrs={'class': 'seeders'}).string)
leechers = int(result.find('td', attrs={'class': 'leechers'}).string)
except (AttributeError, TypeError):
continue continue
#Filter unseeded torrent for result in torrent_table.find_all('tr')[1:]:
if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech):
continue
if not title or not download_url: try:
continue link = result.find('td', attrs={'class': 'name'}).find('a')
url = result.find('td', attrs={'class': 'quickdownload'}).find('a')
title = link.string
download_url = self.urls['download'] % url['href']
id = int(link['href'].replace('/torrent/', ''))
seeders = int(result.find('td', attrs={'class': 'seeders'}).string)
leechers = int(result.find('td', attrs={'class': 'leechers'}).string)
except (AttributeError, TypeError):
continue
item = title, download_url, id, seeders, leechers #Filter unseeded torrent
logger.log(u"Found result: " + title + "(" + searchURL + ")", logger.DEBUG) if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech):
continue
items[mode].append(item) if not title or not download_url:
continue
item = title, download_url, id, seeders, leechers
logger.log(u"Found result: " + title + "(" + searchURL + ")", logger.DEBUG)
items[mode].append(item)
except Exception, e: except Exception, e:
logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR) logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR)