Created a context manager wrapper for BeautifulSoup4 so that we can cleanup/clear tags/context on exit via WITH statements.

Fixed issues with torrent providers returning no results.
This commit is contained in:
echel0n 2014-07-21 21:26:58 -07:00
parent 77feb5a74c
commit a317ff61c2
12 changed files with 445 additions and 469 deletions

13
sickbeard/bs4_parser.py Normal file
View file

@ -0,0 +1,13 @@
import sickbeard
from bs4 import BeautifulSoup
class BS4Parser:
def __init__(self, *args, **kwargs):
self.soup = BeautifulSoup(*args, **kwargs)
def __enter__(self):
return self.soup
def __exit__(self, exc_ty, exc_val, tb):
self.soup.clear(True)
self.soup = None

View file

@ -31,7 +31,6 @@ import httplib
import urlparse
import uuid
import base64
import string
import zipfile
from lib import requests
@ -1241,7 +1240,7 @@ def mapIndexersToShow(showObj):
return mapped
def touchFile(self, fname, atime=None):
def touchFile(fname, atime=None):
if None != atime:
try:
with file(fname, 'a'):

View file

@ -22,7 +22,7 @@ import datetime
import urlparse
import sickbeard
import generic
from sickbeard.common import Quality, cpu_presets
from sickbeard.common import Quality
from sickbeard import logger
from sickbeard import tvcache
from sickbeard import db
@ -33,7 +33,7 @@ from sickbeard.exceptions import ex
from sickbeard import clients
from lib import requests
from lib.requests import exceptions
from bs4 import BeautifulSoup
from sickbeard.bs4_parser import BS4Parser
from lib.unidecode import unidecode
from sickbeard.helpers import sanitizeSceneName
@ -168,48 +168,45 @@ class BitSoupProvider(generic.TorrentProvider):
continue
try:
html = BeautifulSoup(data, "html.parser")
with BS4Parser(data, "html.parser") as html:
torrent_table = html.find('table', attrs={'class': 'koptekst'})
torrent_rows = torrent_table.find_all('tr') if torrent_table else []
torrent_table = html.find('table', attrs={'class': 'koptekst'})
torrent_rows = torrent_table.find_all('tr') if torrent_table else []
#Continue only if one Release is found
if len(torrent_rows) < 2:
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
logger.DEBUG)
continue
html.clear(True)
for result in torrent_rows[1:]:
cells = result.find_all('td')
#Continue only if one Release is found
if len(torrent_rows) < 2:
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
logger.DEBUG)
continue
link = cells[1].find('a')
download_url = self.urls['download'] % cells[3].find('a')['href']
for result in torrent_rows[1:]:
cells = result.find_all('td')
id = link['href']
id = id.replace('details.php?id=','')
id = id.replace('&hit=1', '')
link = cells[1].find('a')
download_url = self.urls['download'] % cells[3].find('a')['href']
try:
title = link.getText()
id = int(id)
seeders = int(cells[9].getText())
leechers = int(cells[10].getText())
except (AttributeError, TypeError):
continue
id = link['href']
id = id.replace('details.php?id=','')
id = id.replace('&hit=1', '')
#Filter unseeded torrent
if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech):
continue
try:
title = link.getText()
id = int(id)
seeders = int(cells[9].getText())
leechers = int(cells[10].getText())
except (AttributeError, TypeError):
continue
if not title or not download_url:
continue
#Filter unseeded torrent
if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech):
continue
item = title, download_url, id, seeders, leechers
logger.log(u"Found result: " + title + "(" + searchURL + ")", logger.DEBUG)
if not title or not download_url:
continue
item = title, download_url, id, seeders, leechers
logger.log(u"Found result: " + title + "(" + searchURL + ")", logger.DEBUG)
items[mode].append(item)
items[mode].append(item)
except Exception, e:
logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR)

View file

@ -33,7 +33,7 @@ from sickbeard.exceptions import ex
from sickbeard import clients
from lib import requests
from lib.requests import exceptions
from bs4 import BeautifulSoup
from sickbeard.bs4_parser import BS4Parser
from lib.unidecode import unidecode
from sickbeard.helpers import sanitizeSceneName
@ -175,7 +175,6 @@ class FreshOnTVProvider(generic.TorrentProvider):
if not self._doLogin():
return []
for mode in search_params.keys():
for search_string in search_params[mode]:
@ -193,55 +192,52 @@ class FreshOnTVProvider(generic.TorrentProvider):
continue
try:
html = BeautifulSoup(data, features=["html5lib", "permissive"])
with BS4Parser(data, features=["html5lib", "permissive"]) as html:
torrent_table = html.find('table', attrs={'class': 'frame'})
torrent_rows = torrent_table.findChildren('tr') if torrent_table else []
torrent_table = html.find('table', attrs={'class': 'frame'})
torrent_rows = torrent_table.findChildren('tr') if torrent_table else []
html.clear(True)
#Continue only if one Release is found
if len(torrent_rows) < 2:
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
logger.DEBUG)
continue
# skip colheader
for result in torrent_rows[1:]:
cells = result.findChildren('td')
link = cells[1].find('a', attrs = {'class': 'torrent_name_link'})
#skip if torrent has been nuked due to poor quality
if cells[1].find('img', alt='Nuked') != None:
#Continue only if one Release is found
if len(torrent_rows) < 2:
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
logger.DEBUG)
continue
torrent_id = link['href'].replace('/details.php?id=', '')
# skip colheader
for result in torrent_rows[1:]:
cells = result.findChildren('td')
link = cells[1].find('a', attrs = {'class': 'torrent_name_link'})
#skip if torrent has been nuked due to poor quality
if cells[1].find('img', alt='Nuked') != None:
continue
torrent_id = link['href'].replace('/details.php?id=', '')
try:
if link.has_key('title'):
title = cells[1].find('a', {'class': 'torrent_name_link'})['title']
else:
title = link.contents[0]
download_url = self.urls['download'] % (torrent_id)
id = int(torrent_id)
try:
if link.has_key('title'):
title = cells[1].find('a', {'class': 'torrent_name_link'})['title']
else:
title = link.contents[0]
download_url = self.urls['download'] % (torrent_id)
id = int(torrent_id)
seeders = int(cells[8].find('a', {'class': 'link'}).span.contents[0].strip())
leechers = int(cells[9].find('a', {'class': 'link'}).contents[0].strip())
except (AttributeError, TypeError):
continue
seeders = int(cells[8].find('a', {'class': 'link'}).span.contents[0].strip())
leechers = int(cells[9].find('a', {'class': 'link'}).contents[0].strip())
except (AttributeError, TypeError):
continue
#Filter unseeded torrent
if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech):
continue
#Filter unseeded torrent
if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech):
continue
if not title or not download_url:
continue
if not title or not download_url:
continue
item = title, download_url, id, seeders, leechers
logger.log(u"Found result: " + title + "(" + searchURL + ")", logger.DEBUG)
item = title, download_url, id, seeders, leechers
logger.log(u"Found result: " + title + "(" + searchURL + ")", logger.DEBUG)
items[mode].append(item)
items[mode].append(item)
except Exception, e:
logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR)

View file

@ -34,7 +34,7 @@ from sickbeard.exceptions import ex
from sickbeard import clients
from lib import requests
from lib.requests import exceptions
from bs4 import BeautifulSoup
from sickbeard.bs4_parser import BS4Parser
from lib.unidecode import unidecode
from sickbeard.helpers import sanitizeSceneName
@ -196,64 +196,22 @@ class HDTorrentsProvider(generic.TorrentProvider):
data = split_data[2]
try:
html = BeautifulSoup(data, features=["html5lib", "permissive"])
with BS4Parser(data, features=["html5lib", "permissive"]) as html:
#Get first entry in table
entries = html.find_all('td', attrs={'align': 'center'})
#Get first entry in table
entries = html.find_all('td', attrs={'align': 'center'})
html.clear(True)
if not entries:
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
logger.DEBUG)
continue
try:
title = entries[22].find('a')['title'].strip('History - ').replace('Blu-ray', 'bd50')
url = self.urls['home'] % entries[15].find('a')['href']
download_url = self.urls['home'] % entries[15].find('a')['href']
id = entries[23].find('div')['id']
seeders = int(entries[20].get_text())
leechers = int(entries[21].get_text())
except (AttributeError, TypeError):
continue
if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech):
continue
if not title or not download_url:
continue
item = title, download_url, id, seeders, leechers
logger.log(u"Found result: " + title + "(" + searchURL + ")", logger.DEBUG)
items[mode].append(item)
#Now attempt to get any others
result_table = html.find('table', attrs={'class': 'mainblockcontenttt'})
if not result_table:
continue
entries = result_table.find_all('td', attrs={'align': 'center', 'class': 'listas'})
if not entries:
continue
for result in entries:
block2 = result.find_parent('tr').find_next_sibling('tr')
if not block2:
if not entries:
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
logger.DEBUG)
continue
cells = block2.find_all('td')
try:
title = cells[1].find('b').get_text().strip('\t ').replace('Blu-ray', 'bd50')
url = self.urls['home'] % cells[4].find('a')['href']
download_url = self.urls['home'] % cells[4].find('a')['href']
detail = cells[1].find('a')['href']
id = detail.replace('details.php?id=', '')
seeders = int(cells[9].get_text())
leechers = int(cells[10].get_text())
title = entries[22].find('a')['title'].strip('History - ').replace('Blu-ray', 'bd50')
url = self.urls['home'] % entries[15].find('a')['href']
download_url = self.urls['home'] % entries[15].find('a')['href']
id = entries[23].find('div')['id']
seeders = int(entries[20].get_text())
leechers = int(entries[21].get_text())
except (AttributeError, TypeError):
continue
@ -268,6 +226,45 @@ class HDTorrentsProvider(generic.TorrentProvider):
items[mode].append(item)
#Now attempt to get any others
result_table = html.find('table', attrs={'class': 'mainblockcontenttt'})
if not result_table:
continue
entries = result_table.find_all('td', attrs={'align': 'center', 'class': 'listas'})
if not entries:
continue
for result in entries:
block2 = result.find_parent('tr').find_next_sibling('tr')
if not block2:
continue
cells = block2.find_all('td')
try:
title = cells[1].find('b').get_text().strip('\t ').replace('Blu-ray', 'bd50')
url = self.urls['home'] % cells[4].find('a')['href']
download_url = self.urls['home'] % cells[4].find('a')['href']
detail = cells[1].find('a')['href']
id = detail.replace('details.php?id=', '')
seeders = int(cells[9].get_text())
leechers = int(cells[10].get_text())
except (AttributeError, TypeError):
continue
if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech):
continue
if not title or not download_url:
continue
item = title, download_url, id, seeders, leechers
logger.log(u"Found result: " + title + "(" + searchURL + ")", logger.DEBUG)
items[mode].append(item)
except Exception, e:
logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR)

View file

@ -33,7 +33,7 @@ from sickbeard.exceptions import ex
from sickbeard import clients
from lib import requests
from lib.requests import exceptions
from bs4 import BeautifulSoup
from sickbeard.bs4_parser import BS4Parser
from lib.unidecode import unidecode
from sickbeard.helpers import sanitizeSceneName
from sickbeard.show_name_helpers import allPossibleShowNames
@ -167,51 +167,48 @@ class IPTorrentsProvider(generic.TorrentProvider):
continue
try:
html = BeautifulSoup(data, features=["html5lib", "permissive"])
if not html:
logger.log(u"Invalid HTML data: " + str(data), logger.DEBUG)
continue
if html.find(text='No Torrents Found!'):
logger.log(u"No results found for: " + search_string + " (" + searchURL + ")", logger.DEBUG)
continue
torrent_table = html.find('table', attrs={'class': 'torrents'})
torrents = torrent_table.find_all('tr') if torrent_table else []
html.clear(True)
#Continue only if one Release is found
if len(torrents) < 2:
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
logger.WARNING)
continue
for result in torrents[1:]:
try:
torrent = result.find_all('td')[1].find('a')
torrent_name = torrent.string
torrent_download_url = self.urls['base_url'] + (result.find_all('td')[3].find('a'))['href']
torrent_details_url = self.urls['base_url'] + torrent['href']
torrent_seeders = int(result.find('td', attrs={'class': 'ac t_seeders'}).string)
## Not used, perhaps in the future ##
#torrent_id = int(torrent['href'].replace('/details.php?id=', ''))
#torrent_leechers = int(result.find('td', attrs = {'class' : 'ac t_leechers'}).string)
except (AttributeError, TypeError):
with BS4Parser(data, features=["html5lib", "permissive"]) as html:
if not html:
logger.log(u"Invalid HTML data: " + str(data), logger.DEBUG)
continue
# Filter unseeded torrent and torrents with no name/url
if mode != 'RSS' and torrent_seeders == 0:
if html.find(text='No Torrents Found!'):
logger.log(u"No results found for: " + search_string + " (" + searchURL + ")", logger.DEBUG)
continue
if not torrent_name or not torrent_download_url:
torrent_table = html.find('table', attrs={'class': 'torrents'})
torrents = torrent_table.find_all('tr') if torrent_table else []
#Continue only if one Release is found
if len(torrents) < 2:
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
logger.WARNING)
continue
item = torrent_name, torrent_download_url
logger.log(u"Found result: " + torrent_name + " (" + torrent_details_url + ")", logger.DEBUG)
items[mode].append(item)
for result in torrents[1:]:
try:
torrent = result.find_all('td')[1].find('a')
torrent_name = torrent.string
torrent_download_url = self.urls['base_url'] + (result.find_all('td')[3].find('a'))['href']
torrent_details_url = self.urls['base_url'] + torrent['href']
torrent_seeders = int(result.find('td', attrs={'class': 'ac t_seeders'}).string)
## Not used, perhaps in the future ##
#torrent_id = int(torrent['href'].replace('/details.php?id=', ''))
#torrent_leechers = int(result.find('td', attrs = {'class' : 'ac t_leechers'}).string)
except (AttributeError, TypeError):
continue
# Filter unseeded torrent and torrents with no name/url
if mode != 'RSS' and torrent_seeders == 0:
continue
if not torrent_name or not torrent_download_url:
continue
item = torrent_name, torrent_download_url
logger.log(u"Found result: " + torrent_name + " (" + torrent_details_url + ")", logger.DEBUG)
items[mode].append(item)
except Exception, e:
logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR)

View file

@ -40,11 +40,9 @@ from sickbeard.show_name_helpers import allPossibleShowNames, sanitizeSceneName
from sickbeard.exceptions import ex
from sickbeard import encodingKludge as ek
from sickbeard import clients
from sickbeard import tv
from sickbeard.bs4_parser import BS4Parser
from lib import requests
from lib.requests import exceptions
from bs4 import BeautifulSoup
from lib.unidecode import unidecode
@ -119,55 +117,52 @@ class KATProvider(generic.TorrentProvider):
return None
try:
soup = BeautifulSoup(data, features=["html5lib", "permissive"])
file_table = soup.find('table', attrs={'class': 'torrentFileList'})
with BS4Parser(data, features=["html5lib", "permissive"]) as soup:
file_table = soup.find('table', attrs={'class': 'torrentFileList'})
# cleanup memory
soup.clear(True)
if not file_table:
return None
if not file_table:
return None
files = [x.text for x in file_table.find_all('td', attrs={'class': 'torFileName'})]
videoFiles = filter(lambda x: x.rpartition(".")[2].lower() in mediaExtensions, files)
files = [x.text for x in file_table.find_all('td', attrs={'class': 'torFileName'})]
videoFiles = filter(lambda x: x.rpartition(".")[2].lower() in mediaExtensions, files)
#Filtering SingleEpisode/MultiSeason Torrent
if len(videoFiles) < ep_number or len(videoFiles) > float(ep_number * 1.1):
logger.log(u"Result " + title + " have " + str(
ep_number) + " episode and episodes retrived in torrent are " + str(len(videoFiles)), logger.DEBUG)
logger.log(
u"Result " + title + " Seem to be a Single Episode or MultiSeason torrent, skipping result...",
logger.DEBUG)
return None
#Filtering SingleEpisode/MultiSeason Torrent
if len(videoFiles) < ep_number or len(videoFiles) > float(ep_number * 1.1):
logger.log(u"Result " + title + " have " + str(
ep_number) + " episode and episodes retrived in torrent are " + str(len(videoFiles)), logger.DEBUG)
logger.log(
u"Result " + title + " Seem to be a Single Episode or MultiSeason torrent, skipping result...",
logger.DEBUG)
return None
if Quality.sceneQuality(title) != Quality.UNKNOWN:
return title
for fileName in videoFiles:
quality = Quality.sceneQuality(os.path.basename(fileName))
if quality != Quality.UNKNOWN: break
if fileName is not None and quality == Quality.UNKNOWN:
quality = Quality.assumeQuality(os.path.basename(fileName))
if quality == Quality.UNKNOWN:
logger.log(u"Unable to obtain a Season Quality for " + title, logger.DEBUG)
return None
try:
myParser = NameParser(showObj=self.show)
parse_result = myParser.parse(fileName)
except (InvalidNameException, InvalidShowException):
return None
logger.log(u"Season quality for " + title + " is " + Quality.qualityStrings[quality], logger.DEBUG)
if parse_result.series_name and parse_result.season_number:
title = parse_result.series_name + ' S%02d' % int(
parse_result.season_number) + ' ' + self._reverseQuality(quality)
if Quality.sceneQuality(title) != Quality.UNKNOWN:
return title
for fileName in videoFiles:
quality = Quality.sceneQuality(os.path.basename(fileName))
if quality != Quality.UNKNOWN: break
if fileName is not None and quality == Quality.UNKNOWN:
quality = Quality.assumeQuality(os.path.basename(fileName))
if quality == Quality.UNKNOWN:
logger.log(u"Unable to obtain a Season Quality for " + title, logger.DEBUG)
return None
try:
myParser = NameParser(showObj=self.show)
parse_result = myParser.parse(fileName)
except (InvalidNameException, InvalidShowException):
return None
logger.log(u"Season quality for " + title + " is " + Quality.qualityStrings[quality], logger.DEBUG)
if parse_result.series_name and parse_result.season_number:
title = parse_result.series_name + ' S%02d' % int(
parse_result.season_number) + ' ' + self._reverseQuality(quality)
return title
except Exception, e:
logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR)
@ -230,6 +225,7 @@ class KATProvider(generic.TorrentProvider):
results = []
items = {'Season': [], 'Episode': [], 'RSS': []}
soup = None
for mode in search_params.keys():
for search_string in search_params[mode]:
@ -250,54 +246,51 @@ class KATProvider(generic.TorrentProvider):
continue
try:
soup = BeautifulSoup(html, features=["html5lib", "permissive"])
with BS4Parser(html, features=["html5lib", "permissive"]) as soup:
torrent_table = soup.find('table', attrs={'class': 'data'})
torrent_rows = torrent_table.find_all('tr') if torrent_table else []
torrent_table = soup.find('table', attrs={'class': 'data'})
torrent_rows = torrent_table.find_all('tr') if torrent_table else []
soup.clear(True)
#Continue only if one Release is found
if len(torrent_rows) < 2:
logger.log(u"The data returned from " + self.name + " does not contain any torrents",
logger.WARNING)
continue
for tr in torrent_rows[1:]:
try:
link = urlparse.urljoin(self.url,
(tr.find('div', {'class': 'torrentname'}).find_all('a')[1])['href'])
id = tr.get('id')[-7:]
title = (tr.find('div', {'class': 'torrentname'}).find_all('a')[1]).text \
or (tr.find('div', {'class': 'torrentname'}).find_all('a')[2]).text
url = tr.find('a', 'imagnet')['href']
verified = True if tr.find('a', 'iverify') else False
trusted = True if tr.find('img', {'alt': 'verified'}) else False
seeders = int(tr.find_all('td')[-2].text)
leechers = int(tr.find_all('td')[-1].text)
except (AttributeError, TypeError):
#Continue only if one Release is found
if len(torrent_rows) < 2:
logger.log(u"The data returned from " + self.name + " does not contain any torrents",
logger.WARNING)
continue
if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech):
continue
for tr in torrent_rows[1:]:
try:
link = urlparse.urljoin(self.url,
(tr.find('div', {'class': 'torrentname'}).find_all('a')[1])['href'])
id = tr.get('id')[-7:]
title = (tr.find('div', {'class': 'torrentname'}).find_all('a')[1]).text \
or (tr.find('div', {'class': 'torrentname'}).find_all('a')[2]).text
url = tr.find('a', 'imagnet')['href']
verified = True if tr.find('a', 'iverify') else False
trusted = True if tr.find('img', {'alt': 'verified'}) else False
seeders = int(tr.find_all('td')[-2].text)
leechers = int(tr.find_all('td')[-1].text)
except (AttributeError, TypeError):
continue
if self.confirmed and not verified:
logger.log(
u"KAT Provider found result " + title + " but that doesn't seem like a verified result so I'm ignoring it",
logger.DEBUG)
continue
if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech):
continue
#Check number video files = episode in season and find the real Quality for full season torrent analyzing files in torrent
if mode == 'Season' and search_mode == 'sponly':
ep_number = int(epcount / len(set(allPossibleShowNames(self.show))))
title = self._find_season_quality(title, link, ep_number)
if self.confirmed and not verified:
logger.log(
u"KAT Provider found result " + title + " but that doesn't seem like a verified result so I'm ignoring it",
logger.DEBUG)
continue
if not title or not url:
continue
#Check number video files = episode in season and find the real Quality for full season torrent analyzing files in torrent
if mode == 'Season' and search_mode == 'sponly':
ep_number = int(epcount / len(set(allPossibleShowNames(self.show))))
title = self._find_season_quality(title, link, ep_number)
item = title, url, id, seeders, leechers
if not title or not url:
continue
items[mode].append(item)
item = title, url, id, seeders, leechers
items[mode].append(item)
except Exception, e:
logger.log(u"Failed to parsing " + self.name + " Traceback: " + traceback.format_exc(),

View file

@ -37,7 +37,7 @@ from sickbeard.exceptions import ex
from sickbeard import clients
from lib import requests
from lib.requests import exceptions
from bs4 import BeautifulSoup
from sickbeard.bs4_parser import BS4Parser
from sickbeard.helpers import sanitizeSceneName
@ -118,16 +118,16 @@ class NextGenProvider(generic.TorrentProvider):
self.session.headers.update(
{'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:24.0) Gecko/20130519 Firefox/24.0)'})
data = self.session.get(self.urls['login_page'], verify=False)
bs = BeautifulSoup(data.content.decode('iso-8859-1'))
csrfraw = bs.find('form', attrs={'id': 'login'})['action']
output = self.session.post(self.urls['base_url'] + csrfraw, data=login_params)
with BS4Parser(data.content.decode('iso-8859-1')) as bs:
csrfraw = bs.find('form', attrs={'id': 'login'})['action']
output = self.session.post(self.urls['base_url'] + csrfraw, data=login_params)
if self.loginSuccess(output):
self.last_login_check = now
self.login_opener = self.session
return True
if self.loginSuccess(output):
self.last_login_check = now
self.login_opener = self.session
return True
error = 'unknown'
error = 'unknown'
except:
error = traceback.format_exc()
self.login_opener = None
@ -204,59 +204,58 @@ class NextGenProvider(generic.TorrentProvider):
if data:
try:
html = BeautifulSoup(data.decode('iso-8859-1'), features=["html5lib", "permissive"])
resultsTable = html.find('div', attrs={'id': 'torrent-table-wrapper'})
with BS4Parser(data.decode('iso-8859-1'), features=["html5lib", "permissive"]) as html:
resultsTable = html.find('div', attrs={'id': 'torrent-table-wrapper'})
if not resultsTable:
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
logger.DEBUG)
continue
# Collecting entries
entries_std = html.find_all('div', attrs={'id': 'torrent-std'})
entries_sticky = html.find_all('div', attrs={'id': 'torrent-sticky'})
entries = entries_std + entries_sticky
#Xirg STANDARD TORRENTS
#Continue only if one Release is found
if len(entries) > 0:
for result in entries:
try:
torrentName = \
((result.find('div', attrs={'id': 'torrent-udgivelse2-users'})).find('a'))['title']
torrentId = (
((result.find('div', attrs={'id': 'torrent-download'})).find('a'))['href']).replace(
'download.php?id=', '')
torrent_name = str(torrentName)
torrent_download_url = (self.urls['download'] % torrentId).encode('utf8')
torrent_details_url = (self.urls['detail'] % torrentId).encode('utf8')
#torrent_seeders = int(result.find('div', attrs = {'id' : 'torrent-seeders'}).find('a')['class'][0])
## Not used, perhaps in the future ##
#torrent_id = int(torrent['href'].replace('/details.php?id=', ''))
#torrent_leechers = int(result.find('td', attrs = {'class' : 'ac t_leechers'}).string)
except (AttributeError, TypeError):
continue
# Filter unseeded torrent and torrents with no name/url
#if mode != 'RSS' and torrent_seeders == 0:
# continue
if not torrent_name or not torrent_download_url:
continue
item = torrent_name, torrent_download_url
logger.log(u"Found result: " + torrent_name + " (" + torrent_details_url + ")",
if not resultsTable:
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
logger.DEBUG)
items[mode].append(item)
continue
else:
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
logger.WARNING)
continue
# Collecting entries
entries_std = html.find_all('div', attrs={'id': 'torrent-std'})
entries_sticky = html.find_all('div', attrs={'id': 'torrent-sticky'})
entries = entries_std + entries_sticky
#Xirg STANDARD TORRENTS
#Continue only if one Release is found
if len(entries) > 0:
for result in entries:
try:
torrentName = \
((result.find('div', attrs={'id': 'torrent-udgivelse2-users'})).find('a'))['title']
torrentId = (
((result.find('div', attrs={'id': 'torrent-download'})).find('a'))['href']).replace(
'download.php?id=', '')
torrent_name = str(torrentName)
torrent_download_url = (self.urls['download'] % torrentId).encode('utf8')
torrent_details_url = (self.urls['detail'] % torrentId).encode('utf8')
#torrent_seeders = int(result.find('div', attrs = {'id' : 'torrent-seeders'}).find('a')['class'][0])
## Not used, perhaps in the future ##
#torrent_id = int(torrent['href'].replace('/details.php?id=', ''))
#torrent_leechers = int(result.find('td', attrs = {'class' : 'ac t_leechers'}).string)
except (AttributeError, TypeError):
continue
# Filter unseeded torrent and torrents with no name/url
#if mode != 'RSS' and torrent_seeders == 0:
# continue
if not torrent_name or not torrent_download_url:
continue
item = torrent_name, torrent_download_url
logger.log(u"Found result: " + torrent_name + " (" + torrent_details_url + ")",
logger.DEBUG)
items[mode].append(item)
else:
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
logger.WARNING)
continue
except Exception, e:
logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(),

View file

@ -40,7 +40,7 @@ from sickbeard import clients
from lib import requests
from lib.requests import exceptions
from bs4 import BeautifulSoup
from sickbeard.bs4_parser import BS4Parser
from lib.unidecode import unidecode
@ -150,39 +150,36 @@ class PublicHDProvider(generic.TorrentProvider):
html = os.linesep.join([s for s in html.splitlines() if not optreg.search(s)])
try:
html = BeautifulSoup(html, features=["html5lib", "permissive"])
with BS4Parser(html, features=["html5lib", "permissive"]) as html:
torrent_table = html.find('table', attrs={'id': 'torrbg'})
torrent_rows = torrent_table.find_all('tr') if torrent_table else []
torrent_table = html.find('table', attrs={'id': 'torrbg'})
torrent_rows = torrent_table.find_all('tr') if torrent_table else []
html.clear(True)
#Continue only if one Release is found
if len(torrent_rows) < 2:
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
logger.DEBUG)
continue
for tr in torrent_rows[1:]:
try:
link = self.url + tr.find(href=re.compile('page=torrent-details'))['href']
title = tr.find(lambda x: x.has_attr('title')).text.replace('_', '.')
url = tr.find(href=re.compile('magnet+'))['href']
seeders = int(tr.find_all('td', {'class': 'header'})[4].text)
leechers = int(tr.find_all('td', {'class': 'header'})[5].text)
except (AttributeError, TypeError):
#Continue only if one Release is found
if len(torrent_rows) < 2:
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
logger.DEBUG)
continue
if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech):
continue
for tr in torrent_rows[1:]:
if not title or not url:
continue
try:
link = self.url + tr.find(href=re.compile('page=torrent-details'))['href']
title = tr.find(lambda x: x.has_attr('title')).text.replace('_', '.')
url = tr.find(href=re.compile('magnet+'))['href']
seeders = int(tr.find_all('td', {'class': 'header'})[4].text)
leechers = int(tr.find_all('td', {'class': 'header'})[5].text)
except (AttributeError, TypeError):
continue
item = title, url, link, seeders, leechers
if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech):
continue
items[mode].append(item)
if not title or not url:
continue
item = title, url, link, seeders, leechers
items[mode].append(item)
except Exception, e:
logger.log(u"Failed to parsing " + self.name + " Traceback: " + traceback.format_exc(),

View file

@ -34,7 +34,7 @@ from sickbeard.exceptions import ex
from sickbeard import clients
from lib import requests
from lib.requests import exceptions
from bs4 import BeautifulSoup
from sickbeard.bs4_parser import BS4Parser
from lib.unidecode import unidecode
from sickbeard.helpers import sanitizeSceneName
@ -196,62 +196,58 @@ class SCCProvider(generic.TorrentProvider):
try:
for dataItem in data:
html = BeautifulSoup(dataItem, features=["html5lib", "permissive"])
with BS4Parser(dataItem, features=["html5lib", "permissive"]) as html:
torrent_table = html.find('table', attrs={'id': 'torrents-table'})
torrent_rows = torrent_table.find_all('tr') if torrent_table else []
torrent_table = html.find('table', attrs={'id': 'torrents-table'})
torrent_rows = torrent_table.find_all('tr') if torrent_table else []
html.clear(True)
#Continue only if at least one Release is found
if len(torrent_rows) < 2:
if html.title:
source = self.name + " (" + html.title.string + ")"
else:
source = self.name
logger.log(u"The Data returned from " + source + " does not contain any torrent", logger.DEBUG)
continue
for result in torrent_table.find_all('tr')[1:]:
try:
link = result.find('td', attrs={'class': 'ttr_name'}).find('a')
all_urls = result.find('td', attrs={'class': 'td_dl'}).find_all('a', limit=2)
# Foreign section contain two links, the others one
if self._isSection('Foreign', dataItem):
url = all_urls[1]
#Continue only if at least one Release is found
if len(torrent_rows) < 2:
if html.title:
source = self.name + " (" + html.title.string + ")"
else:
url = all_urls[0]
title = link.string
if re.search('\.\.\.', title):
details_html = BeautifulSoup(self.getURL(self.url + "/" + link['href']))
title = re.search('(?<=").+(?<!")', details_html.title.string).group(0)
details_html.clear(True)
download_url = self.urls['download'] % url['href']
id = int(link['href'].replace('details?id=', ''))
seeders = int(result.find('td', attrs={'class': 'ttr_seeders'}).string)
leechers = int(result.find('td', attrs={'class': 'ttr_leechers'}).string)
except (AttributeError, TypeError):
source = self.name
logger.log(u"The Data returned from " + source + " does not contain any torrent", logger.DEBUG)
continue
if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech):
continue
for result in torrent_table.find_all('tr')[1:]:
if not title or not download_url:
continue
try:
link = result.find('td', attrs={'class': 'ttr_name'}).find('a')
all_urls = result.find('td', attrs={'class': 'td_dl'}).find_all('a', limit=2)
# Foreign section contain two links, the others one
if self._isSection('Foreign', dataItem):
url = all_urls[1]
else:
url = all_urls[0]
item = title, download_url, id, seeders, leechers
title = link.string
if re.search('\.\.\.', title):
with BS4Parser(self.getURL(self.url + "/" + link['href'])) as details_html:
title = re.search('(?<=").+(?<!")', details_html.title.string).group(0)
if self._isSection('Non-Scene', dataItem):
logger.log(u"Found result: " + title + "(" + nonsceneSearchURL + ")", logger.DEBUG)
elif self._isSection('Foreign', dataItem):
logger.log(u"Found result: " + title + "(" + foreignSearchURL + ")", logger.DEBUG)
else:
logger.log(u"Found result: " + title + "(" + searchURL + ")", logger.DEBUG)
download_url = self.urls['download'] % url['href']
id = int(link['href'].replace('details?id=', ''))
seeders = int(result.find('td', attrs={'class': 'ttr_seeders'}).string)
leechers = int(result.find('td', attrs={'class': 'ttr_leechers'}).string)
except (AttributeError, TypeError):
continue
items[mode].append(item)
if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech):
continue
if not title or not download_url:
continue
item = title, download_url, id, seeders, leechers
if self._isSection('Non-Scene', dataItem):
logger.log(u"Found result: " + title + "(" + nonsceneSearchURL + ")", logger.DEBUG)
elif self._isSection('Foreign', dataItem):
logger.log(u"Found result: " + title + "(" + foreignSearchURL + ")", logger.DEBUG)
else:
logger.log(u"Found result: " + title + "(" + searchURL + ")", logger.DEBUG)
items[mode].append(item)
except Exception, e:
logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR)

View file

@ -33,7 +33,7 @@ from sickbeard.exceptions import ex
from sickbeard import clients
from lib import requests
from lib.requests import exceptions
from bs4 import BeautifulSoup
from sickbeard.bs4_parser import BS4Parser
from lib.unidecode import unidecode
from sickbeard.helpers import sanitizeSceneName
@ -168,51 +168,47 @@ class TorrentBytesProvider(generic.TorrentProvider):
continue
try:
html = BeautifulSoup(data)
with BS4Parser(data, features=["html5lib", "permissive"]) as html:
torrent_table = html.find('table', attrs={'border': '1'})
torrent_rows = torrent_table.find_all('tr') if torrent_table else []
torrent_table = html.find('table', attrs={'border': '1'})
torrent_rows = torrent_table.find_all('tr') if torrent_table else []
# cleanup memory
html.clear(True)
#Continue only if one Release is found
if len(torrent_rows) < 2:
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
logger.DEBUG)
continue
for result in torrent_rows[1:]:
cells = result.find_all('td')
link = cells[1].find('a', attrs={'class': 'index'})
full_id = link['href'].replace('details.php?id=', '')
torrent_id = full_id[:6]
try:
if link.has_key('title'):
title = cells[1].find('a', {'class': 'index'})['title']
else:
title = link.contents[0]
download_url = self.urls['download'] % (torrent_id, link.contents[0])
id = int(torrent_id)
seeders = int(cells[8].find('span').contents[0])
leechers = int(cells[9].find('span').contents[0])
except (AttributeError, TypeError):
#Continue only if one Release is found
if len(torrent_rows) < 2:
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
logger.DEBUG)
continue
#Filter unseeded torrent
if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech):
continue
for result in torrent_rows[1:]:
cells = result.find_all('td')
if not title or not download_url:
continue
link = cells[1].find('a', attrs={'class': 'index'})
item = title, download_url, id, seeders, leechers
logger.log(u"Found result: " + title + "(" + searchURL + ")", logger.DEBUG)
full_id = link['href'].replace('details.php?id=', '')
torrent_id = full_id[:6]
items[mode].append(item)
try:
if link.has_key('title'):
title = cells[1].find('a', {'class': 'index'})['title']
else:
title = link.contents[0]
download_url = self.urls['download'] % (torrent_id, link.contents[0])
id = int(torrent_id)
seeders = int(cells[8].find('span').contents[0])
leechers = int(cells[9].find('span').contents[0])
except (AttributeError, TypeError):
continue
#Filter unseeded torrent
if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech):
continue
if not title or not download_url:
continue
item = title, download_url, id, seeders, leechers
logger.log(u"Found result: " + title + "(" + searchURL + ")", logger.DEBUG)
items[mode].append(item)
except Exception, e:
logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR)

View file

@ -34,7 +34,7 @@ from sickbeard.exceptions import ex
from sickbeard import clients
from lib import requests
from lib.requests import exceptions
from bs4 import BeautifulSoup
from sickbeard.bs4_parser import BS4Parser
from lib.unidecode import unidecode
from sickbeard.helpers import sanitizeSceneName
@ -172,44 +172,40 @@ class TorrentLeechProvider(generic.TorrentProvider):
continue
try:
html = BeautifulSoup(data, features=["html5lib", "permissive"])
with BS4Parser(data, features=["html5lib", "permissive"]) as html:
torrent_table = html.find('table', attrs={'id': 'torrenttable'})
torrent_rows = torrent_table.find_all('tr') if torrent_table else []
torrent_table = html.find('table', attrs={'id': 'torrenttable'})
torrent_rows = torrent_table.find_all('tr') if torrent_table else []
# cleanup memory
html.clear(True)
#Continue only if one Release is found
if len(torrent_rows) < 2:
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
logger.DEBUG)
continue
for result in torrent_table.find_all('tr')[1:]:
try:
link = result.find('td', attrs={'class': 'name'}).find('a')
url = result.find('td', attrs={'class': 'quickdownload'}).find('a')
title = link.string
download_url = self.urls['download'] % url['href']
id = int(link['href'].replace('/torrent/', ''))
seeders = int(result.find('td', attrs={'class': 'seeders'}).string)
leechers = int(result.find('td', attrs={'class': 'leechers'}).string)
except (AttributeError, TypeError):
#Continue only if one Release is found
if len(torrent_rows) < 2:
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
logger.DEBUG)
continue
#Filter unseeded torrent
if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech):
continue
for result in torrent_table.find_all('tr')[1:]:
if not title or not download_url:
continue
try:
link = result.find('td', attrs={'class': 'name'}).find('a')
url = result.find('td', attrs={'class': 'quickdownload'}).find('a')
title = link.string
download_url = self.urls['download'] % url['href']
id = int(link['href'].replace('/torrent/', ''))
seeders = int(result.find('td', attrs={'class': 'seeders'}).string)
leechers = int(result.find('td', attrs={'class': 'leechers'}).string)
except (AttributeError, TypeError):
continue
item = title, download_url, id, seeders, leechers
logger.log(u"Found result: " + title + "(" + searchURL + ")", logger.DEBUG)
#Filter unseeded torrent
if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech):
continue
items[mode].append(item)
if not title or not download_url:
continue
item = title, download_url, id, seeders, leechers
logger.log(u"Found result: " + title + "(" + searchURL + ")", logger.DEBUG)
items[mode].append(item)
except Exception, e:
logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR)