mirror of
https://github.com/SickGear/SickGear.git
synced 2024-12-01 00:43:37 +00:00
Created a context manager wrapper for BeautifulSoup4 so that we can cleanup/clear tags/context on exit via WITH statements.
Fixed issues with torrent providers returning no results.
This commit is contained in:
parent
77feb5a74c
commit
a317ff61c2
12 changed files with 445 additions and 469 deletions
13
sickbeard/bs4_parser.py
Normal file
13
sickbeard/bs4_parser.py
Normal file
|
@ -0,0 +1,13 @@
|
|||
import sickbeard
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
class BS4Parser:
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.soup = BeautifulSoup(*args, **kwargs)
|
||||
|
||||
def __enter__(self):
|
||||
return self.soup
|
||||
|
||||
def __exit__(self, exc_ty, exc_val, tb):
|
||||
self.soup.clear(True)
|
||||
self.soup = None
|
|
@ -31,7 +31,6 @@ import httplib
|
|||
import urlparse
|
||||
import uuid
|
||||
import base64
|
||||
import string
|
||||
import zipfile
|
||||
|
||||
from lib import requests
|
||||
|
@ -1241,7 +1240,7 @@ def mapIndexersToShow(showObj):
|
|||
return mapped
|
||||
|
||||
|
||||
def touchFile(self, fname, atime=None):
|
||||
def touchFile(fname, atime=None):
|
||||
if None != atime:
|
||||
try:
|
||||
with file(fname, 'a'):
|
||||
|
|
|
@ -22,7 +22,7 @@ import datetime
|
|||
import urlparse
|
||||
import sickbeard
|
||||
import generic
|
||||
from sickbeard.common import Quality, cpu_presets
|
||||
from sickbeard.common import Quality
|
||||
from sickbeard import logger
|
||||
from sickbeard import tvcache
|
||||
from sickbeard import db
|
||||
|
@ -33,7 +33,7 @@ from sickbeard.exceptions import ex
|
|||
from sickbeard import clients
|
||||
from lib import requests
|
||||
from lib.requests import exceptions
|
||||
from bs4 import BeautifulSoup
|
||||
from sickbeard.bs4_parser import BS4Parser
|
||||
from lib.unidecode import unidecode
|
||||
from sickbeard.helpers import sanitizeSceneName
|
||||
|
||||
|
@ -168,13 +168,10 @@ class BitSoupProvider(generic.TorrentProvider):
|
|||
continue
|
||||
|
||||
try:
|
||||
html = BeautifulSoup(data, "html.parser")
|
||||
|
||||
with BS4Parser(data, "html.parser") as html:
|
||||
torrent_table = html.find('table', attrs={'class': 'koptekst'})
|
||||
torrent_rows = torrent_table.find_all('tr') if torrent_table else []
|
||||
|
||||
html.clear(True)
|
||||
|
||||
#Continue only if one Release is found
|
||||
if len(torrent_rows) < 2:
|
||||
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
|
||||
|
|
|
@ -33,7 +33,7 @@ from sickbeard.exceptions import ex
|
|||
from sickbeard import clients
|
||||
from lib import requests
|
||||
from lib.requests import exceptions
|
||||
from bs4 import BeautifulSoup
|
||||
from sickbeard.bs4_parser import BS4Parser
|
||||
from lib.unidecode import unidecode
|
||||
from sickbeard.helpers import sanitizeSceneName
|
||||
|
||||
|
@ -175,7 +175,6 @@ class FreshOnTVProvider(generic.TorrentProvider):
|
|||
if not self._doLogin():
|
||||
return []
|
||||
|
||||
|
||||
for mode in search_params.keys():
|
||||
for search_string in search_params[mode]:
|
||||
|
||||
|
@ -193,13 +192,10 @@ class FreshOnTVProvider(generic.TorrentProvider):
|
|||
continue
|
||||
|
||||
try:
|
||||
html = BeautifulSoup(data, features=["html5lib", "permissive"])
|
||||
|
||||
with BS4Parser(data, features=["html5lib", "permissive"]) as html:
|
||||
torrent_table = html.find('table', attrs={'class': 'frame'})
|
||||
torrent_rows = torrent_table.findChildren('tr') if torrent_table else []
|
||||
|
||||
html.clear(True)
|
||||
|
||||
#Continue only if one Release is found
|
||||
if len(torrent_rows) < 2:
|
||||
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
|
||||
|
|
|
@ -34,7 +34,7 @@ from sickbeard.exceptions import ex
|
|||
from sickbeard import clients
|
||||
from lib import requests
|
||||
from lib.requests import exceptions
|
||||
from bs4 import BeautifulSoup
|
||||
from sickbeard.bs4_parser import BS4Parser
|
||||
from lib.unidecode import unidecode
|
||||
from sickbeard.helpers import sanitizeSceneName
|
||||
|
||||
|
@ -196,13 +196,10 @@ class HDTorrentsProvider(generic.TorrentProvider):
|
|||
data = split_data[2]
|
||||
|
||||
try:
|
||||
html = BeautifulSoup(data, features=["html5lib", "permissive"])
|
||||
|
||||
with BS4Parser(data, features=["html5lib", "permissive"]) as html:
|
||||
#Get first entry in table
|
||||
entries = html.find_all('td', attrs={'align': 'center'})
|
||||
|
||||
html.clear(True)
|
||||
|
||||
if not entries:
|
||||
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
|
||||
logger.DEBUG)
|
||||
|
|
|
@ -33,7 +33,7 @@ from sickbeard.exceptions import ex
|
|||
from sickbeard import clients
|
||||
from lib import requests
|
||||
from lib.requests import exceptions
|
||||
from bs4 import BeautifulSoup
|
||||
from sickbeard.bs4_parser import BS4Parser
|
||||
from lib.unidecode import unidecode
|
||||
from sickbeard.helpers import sanitizeSceneName
|
||||
from sickbeard.show_name_helpers import allPossibleShowNames
|
||||
|
@ -167,8 +167,7 @@ class IPTorrentsProvider(generic.TorrentProvider):
|
|||
continue
|
||||
|
||||
try:
|
||||
html = BeautifulSoup(data, features=["html5lib", "permissive"])
|
||||
|
||||
with BS4Parser(data, features=["html5lib", "permissive"]) as html:
|
||||
if not html:
|
||||
logger.log(u"Invalid HTML data: " + str(data), logger.DEBUG)
|
||||
continue
|
||||
|
@ -180,8 +179,6 @@ class IPTorrentsProvider(generic.TorrentProvider):
|
|||
torrent_table = html.find('table', attrs={'class': 'torrents'})
|
||||
torrents = torrent_table.find_all('tr') if torrent_table else []
|
||||
|
||||
html.clear(True)
|
||||
|
||||
#Continue only if one Release is found
|
||||
if len(torrents) < 2:
|
||||
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
|
||||
|
|
|
@ -40,11 +40,9 @@ from sickbeard.show_name_helpers import allPossibleShowNames, sanitizeSceneName
|
|||
from sickbeard.exceptions import ex
|
||||
from sickbeard import encodingKludge as ek
|
||||
from sickbeard import clients
|
||||
from sickbeard import tv
|
||||
|
||||
from sickbeard.bs4_parser import BS4Parser
|
||||
from lib import requests
|
||||
from lib.requests import exceptions
|
||||
from bs4 import BeautifulSoup
|
||||
from lib.unidecode import unidecode
|
||||
|
||||
|
||||
|
@ -119,12 +117,9 @@ class KATProvider(generic.TorrentProvider):
|
|||
return None
|
||||
|
||||
try:
|
||||
soup = BeautifulSoup(data, features=["html5lib", "permissive"])
|
||||
with BS4Parser(data, features=["html5lib", "permissive"]) as soup:
|
||||
file_table = soup.find('table', attrs={'class': 'torrentFileList'})
|
||||
|
||||
# cleanup memory
|
||||
soup.clear(True)
|
||||
|
||||
if not file_table:
|
||||
return None
|
||||
|
||||
|
@ -230,6 +225,7 @@ class KATProvider(generic.TorrentProvider):
|
|||
results = []
|
||||
items = {'Season': [], 'Episode': [], 'RSS': []}
|
||||
|
||||
soup = None
|
||||
for mode in search_params.keys():
|
||||
for search_string in search_params[mode]:
|
||||
|
||||
|
@ -250,13 +246,10 @@ class KATProvider(generic.TorrentProvider):
|
|||
continue
|
||||
|
||||
try:
|
||||
soup = BeautifulSoup(html, features=["html5lib", "permissive"])
|
||||
|
||||
with BS4Parser(html, features=["html5lib", "permissive"]) as soup:
|
||||
torrent_table = soup.find('table', attrs={'class': 'data'})
|
||||
torrent_rows = torrent_table.find_all('tr') if torrent_table else []
|
||||
|
||||
soup.clear(True)
|
||||
|
||||
#Continue only if one Release is found
|
||||
if len(torrent_rows) < 2:
|
||||
logger.log(u"The data returned from " + self.name + " does not contain any torrents",
|
||||
|
|
|
@ -37,7 +37,7 @@ from sickbeard.exceptions import ex
|
|||
from sickbeard import clients
|
||||
from lib import requests
|
||||
from lib.requests import exceptions
|
||||
from bs4 import BeautifulSoup
|
||||
from sickbeard.bs4_parser import BS4Parser
|
||||
from sickbeard.helpers import sanitizeSceneName
|
||||
|
||||
|
||||
|
@ -118,7 +118,7 @@ class NextGenProvider(generic.TorrentProvider):
|
|||
self.session.headers.update(
|
||||
{'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:24.0) Gecko/20130519 Firefox/24.0)'})
|
||||
data = self.session.get(self.urls['login_page'], verify=False)
|
||||
bs = BeautifulSoup(data.content.decode('iso-8859-1'))
|
||||
with BS4Parser(data.content.decode('iso-8859-1')) as bs:
|
||||
csrfraw = bs.find('form', attrs={'id': 'login'})['action']
|
||||
output = self.session.post(self.urls['base_url'] + csrfraw, data=login_params)
|
||||
|
||||
|
@ -204,7 +204,7 @@ class NextGenProvider(generic.TorrentProvider):
|
|||
if data:
|
||||
|
||||
try:
|
||||
html = BeautifulSoup(data.decode('iso-8859-1'), features=["html5lib", "permissive"])
|
||||
with BS4Parser(data.decode('iso-8859-1'), features=["html5lib", "permissive"]) as html:
|
||||
resultsTable = html.find('div', attrs={'id': 'torrent-table-wrapper'})
|
||||
|
||||
if not resultsTable:
|
||||
|
@ -257,7 +257,6 @@ class NextGenProvider(generic.TorrentProvider):
|
|||
logger.WARNING)
|
||||
continue
|
||||
|
||||
|
||||
except Exception, e:
|
||||
logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(),
|
||||
logger.ERROR)
|
||||
|
|
|
@ -40,7 +40,7 @@ from sickbeard import clients
|
|||
|
||||
from lib import requests
|
||||
from lib.requests import exceptions
|
||||
from bs4 import BeautifulSoup
|
||||
from sickbeard.bs4_parser import BS4Parser
|
||||
from lib.unidecode import unidecode
|
||||
|
||||
|
||||
|
@ -150,13 +150,10 @@ class PublicHDProvider(generic.TorrentProvider):
|
|||
html = os.linesep.join([s for s in html.splitlines() if not optreg.search(s)])
|
||||
|
||||
try:
|
||||
html = BeautifulSoup(html, features=["html5lib", "permissive"])
|
||||
|
||||
with BS4Parser(html, features=["html5lib", "permissive"]) as html:
|
||||
torrent_table = html.find('table', attrs={'id': 'torrbg'})
|
||||
torrent_rows = torrent_table.find_all('tr') if torrent_table else []
|
||||
|
||||
html.clear(True)
|
||||
|
||||
#Continue only if one Release is found
|
||||
if len(torrent_rows) < 2:
|
||||
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
|
||||
|
|
|
@ -34,7 +34,7 @@ from sickbeard.exceptions import ex
|
|||
from sickbeard import clients
|
||||
from lib import requests
|
||||
from lib.requests import exceptions
|
||||
from bs4 import BeautifulSoup
|
||||
from sickbeard.bs4_parser import BS4Parser
|
||||
from lib.unidecode import unidecode
|
||||
from sickbeard.helpers import sanitizeSceneName
|
||||
|
||||
|
@ -196,13 +196,10 @@ class SCCProvider(generic.TorrentProvider):
|
|||
|
||||
try:
|
||||
for dataItem in data:
|
||||
html = BeautifulSoup(dataItem, features=["html5lib", "permissive"])
|
||||
|
||||
with BS4Parser(dataItem, features=["html5lib", "permissive"]) as html:
|
||||
torrent_table = html.find('table', attrs={'id': 'torrents-table'})
|
||||
torrent_rows = torrent_table.find_all('tr') if torrent_table else []
|
||||
|
||||
html.clear(True)
|
||||
|
||||
#Continue only if at least one Release is found
|
||||
if len(torrent_rows) < 2:
|
||||
if html.title:
|
||||
|
@ -225,9 +222,8 @@ class SCCProvider(generic.TorrentProvider):
|
|||
|
||||
title = link.string
|
||||
if re.search('\.\.\.', title):
|
||||
details_html = BeautifulSoup(self.getURL(self.url + "/" + link['href']))
|
||||
with BS4Parser(self.getURL(self.url + "/" + link['href'])) as details_html:
|
||||
title = re.search('(?<=").+(?<!")', details_html.title.string).group(0)
|
||||
details_html.clear(True)
|
||||
|
||||
download_url = self.urls['download'] % url['href']
|
||||
id = int(link['href'].replace('details?id=', ''))
|
||||
|
|
|
@ -33,7 +33,7 @@ from sickbeard.exceptions import ex
|
|||
from sickbeard import clients
|
||||
from lib import requests
|
||||
from lib.requests import exceptions
|
||||
from bs4 import BeautifulSoup
|
||||
from sickbeard.bs4_parser import BS4Parser
|
||||
from lib.unidecode import unidecode
|
||||
from sickbeard.helpers import sanitizeSceneName
|
||||
|
||||
|
@ -168,14 +168,10 @@ class TorrentBytesProvider(generic.TorrentProvider):
|
|||
continue
|
||||
|
||||
try:
|
||||
html = BeautifulSoup(data)
|
||||
|
||||
with BS4Parser(data, features=["html5lib", "permissive"]) as html:
|
||||
torrent_table = html.find('table', attrs={'border': '1'})
|
||||
torrent_rows = torrent_table.find_all('tr') if torrent_table else []
|
||||
|
||||
# cleanup memory
|
||||
html.clear(True)
|
||||
|
||||
#Continue only if one Release is found
|
||||
if len(torrent_rows) < 2:
|
||||
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
|
||||
|
|
|
@ -34,7 +34,7 @@ from sickbeard.exceptions import ex
|
|||
from sickbeard import clients
|
||||
from lib import requests
|
||||
from lib.requests import exceptions
|
||||
from bs4 import BeautifulSoup
|
||||
from sickbeard.bs4_parser import BS4Parser
|
||||
from lib.unidecode import unidecode
|
||||
from sickbeard.helpers import sanitizeSceneName
|
||||
|
||||
|
@ -172,14 +172,10 @@ class TorrentLeechProvider(generic.TorrentProvider):
|
|||
continue
|
||||
|
||||
try:
|
||||
html = BeautifulSoup(data, features=["html5lib", "permissive"])
|
||||
|
||||
with BS4Parser(data, features=["html5lib", "permissive"]) as html:
|
||||
torrent_table = html.find('table', attrs={'id': 'torrenttable'})
|
||||
torrent_rows = torrent_table.find_all('tr') if torrent_table else []
|
||||
|
||||
# cleanup memory
|
||||
html.clear(True)
|
||||
|
||||
#Continue only if one Release is found
|
||||
if len(torrent_rows) < 2:
|
||||
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
|
||||
|
|
Loading…
Reference in a new issue