mirror of
https://github.com/SickGear/SickGear.git
synced 2024-12-11 13:43:37 +00:00
0d9fbc1ad7
This version of SickBeard uses both TVDB and TVRage to search and gather it's series data from allowing you to now have access to and download shows that you couldn't before because of being locked into only what TheTVDB had to offer. Also this edition is based off the code we used in our XEM editon so it does come with scene numbering support as well as all the other features our XEM edition has to offer. Please before using this with your existing database (sickbeard.db) please make a backup copy of it and delete any other database files such as cache.db and failed.db if present, we HIGHLY recommend starting out with no database files at all to make this a fresh start but the choice is at your own risk! Enjoy!
236 lines
8.5 KiB
Python
236 lines
8.5 KiB
Python
import warnings
|
|
|
|
warnings.warn("BeautifulSoup 3.x (as of 3.1) is not fully compatible with html5lib and support will be removed in the future", DeprecationWarning)
|
|
|
|
from BeautifulSoup import BeautifulSoup, Tag, NavigableString, Comment, Declaration
|
|
|
|
import _base
|
|
from html5lib.constants import namespaces, DataLossWarning
|
|
|
|
class AttrList(object):
|
|
def __init__(self, element):
|
|
self.element = element
|
|
self.attrs = dict(self.element.attrs)
|
|
def __iter__(self):
|
|
return self.attrs.items().__iter__()
|
|
def __setitem__(self, name, value):
|
|
"set attr", name, value
|
|
self.element[name] = value
|
|
def items(self):
|
|
return self.attrs.items()
|
|
def keys(self):
|
|
return self.attrs.keys()
|
|
def __getitem__(self, name):
|
|
return self.attrs[name]
|
|
def __contains__(self, name):
|
|
return name in self.attrs.keys()
|
|
def __eq__(self, other):
|
|
if len(self.keys()) != len(other.keys()):
|
|
return False
|
|
for item in self.keys():
|
|
if item not in other:
|
|
return False
|
|
if self[item] != other[item]:
|
|
return False
|
|
return True
|
|
|
|
class Element(_base.Node):
|
|
def __init__(self, element, soup, namespace):
|
|
_base.Node.__init__(self, element.name)
|
|
self.element = element
|
|
self.soup = soup
|
|
self.namespace = namespace
|
|
|
|
def _nodeIndex(self, node, refNode):
|
|
# Finds a node by identity rather than equality
|
|
for index in range(len(self.element.contents)):
|
|
if id(self.element.contents[index]) == id(refNode.element):
|
|
return index
|
|
return None
|
|
|
|
def appendChild(self, node):
|
|
if (node.element.__class__ == NavigableString and self.element.contents
|
|
and self.element.contents[-1].__class__ == NavigableString):
|
|
# Concatenate new text onto old text node
|
|
# (TODO: This has O(n^2) performance, for input like "a</a>a</a>a</a>...")
|
|
newStr = NavigableString(self.element.contents[-1]+node.element)
|
|
|
|
# Remove the old text node
|
|
# (Can't simply use .extract() by itself, because it fails if
|
|
# an equal text node exists within the parent node)
|
|
oldElement = self.element.contents[-1]
|
|
del self.element.contents[-1]
|
|
oldElement.parent = None
|
|
oldElement.extract()
|
|
|
|
self.element.insert(len(self.element.contents), newStr)
|
|
else:
|
|
self.element.insert(len(self.element.contents), node.element)
|
|
node.parent = self
|
|
|
|
def getAttributes(self):
|
|
return AttrList(self.element)
|
|
|
|
def setAttributes(self, attributes):
|
|
if attributes:
|
|
for name, value in attributes.items():
|
|
self.element[name] = value
|
|
|
|
attributes = property(getAttributes, setAttributes)
|
|
|
|
def insertText(self, data, insertBefore=None):
|
|
text = TextNode(NavigableString(data), self.soup)
|
|
if insertBefore:
|
|
self.insertBefore(text, insertBefore)
|
|
else:
|
|
self.appendChild(text)
|
|
|
|
def insertBefore(self, node, refNode):
|
|
index = self._nodeIndex(node, refNode)
|
|
if (node.element.__class__ == NavigableString and self.element.contents
|
|
and self.element.contents[index-1].__class__ == NavigableString):
|
|
# (See comments in appendChild)
|
|
newStr = NavigableString(self.element.contents[index-1]+node.element)
|
|
oldNode = self.element.contents[index-1]
|
|
del self.element.contents[index-1]
|
|
oldNode.parent = None
|
|
oldNode.extract()
|
|
|
|
self.element.insert(index-1, newStr)
|
|
else:
|
|
self.element.insert(index, node.element)
|
|
node.parent = self
|
|
|
|
def removeChild(self, node):
|
|
index = self._nodeIndex(node.parent, node)
|
|
del node.parent.element.contents[index]
|
|
node.element.parent = None
|
|
node.element.extract()
|
|
node.parent = None
|
|
|
|
def reparentChildren(self, newParent):
|
|
while self.element.contents:
|
|
child = self.element.contents[0]
|
|
child.extract()
|
|
if isinstance(child, Tag):
|
|
newParent.appendChild(Element(child, self.soup, namespaces["html"]))
|
|
else:
|
|
newParent.appendChild(TextNode(child, self.soup))
|
|
|
|
def cloneNode(self):
|
|
node = Element(Tag(self.soup, self.element.name), self.soup, self.namespace)
|
|
for key,value in self.attributes:
|
|
node.attributes[key] = value
|
|
return node
|
|
|
|
def hasContent(self):
|
|
return self.element.contents
|
|
|
|
def getNameTuple(self):
|
|
if self.namespace == None:
|
|
return namespaces["html"], self.name
|
|
else:
|
|
return self.namespace, self.name
|
|
|
|
nameTuple = property(getNameTuple)
|
|
|
|
class TextNode(Element):
|
|
def __init__(self, element, soup):
|
|
_base.Node.__init__(self, None)
|
|
self.element = element
|
|
self.soup = soup
|
|
|
|
def cloneNode(self):
|
|
raise NotImplementedError
|
|
|
|
class TreeBuilder(_base.TreeBuilder):
|
|
def __init__(self, namespaceHTMLElements):
|
|
if namespaceHTMLElements:
|
|
warnings.warn("BeautifulSoup cannot represent elements in any namespace", DataLossWarning)
|
|
_base.TreeBuilder.__init__(self, namespaceHTMLElements)
|
|
|
|
def documentClass(self):
|
|
self.soup = BeautifulSoup("")
|
|
return Element(self.soup, self.soup, None)
|
|
|
|
def insertDoctype(self, token):
|
|
name = token["name"]
|
|
publicId = token["publicId"]
|
|
systemId = token["systemId"]
|
|
|
|
if publicId:
|
|
self.soup.insert(0, Declaration("DOCTYPE %s PUBLIC \"%s\" \"%s\""%(name, publicId, systemId or "")))
|
|
elif systemId:
|
|
self.soup.insert(0, Declaration("DOCTYPE %s SYSTEM \"%s\""%
|
|
(name, systemId)))
|
|
else:
|
|
self.soup.insert(0, Declaration("DOCTYPE %s"%name))
|
|
|
|
def elementClass(self, name, namespace):
|
|
if namespace is not None:
|
|
warnings.warn("BeautifulSoup cannot represent elements in any namespace", DataLossWarning)
|
|
return Element(Tag(self.soup, name), self.soup, namespace)
|
|
|
|
def commentClass(self, data):
|
|
return TextNode(Comment(data), self.soup)
|
|
|
|
def fragmentClass(self):
|
|
self.soup = BeautifulSoup("")
|
|
self.soup.name = "[document_fragment]"
|
|
return Element(self.soup, self.soup, None)
|
|
|
|
def appendChild(self, node):
|
|
self.soup.insert(len(self.soup.contents), node.element)
|
|
|
|
def testSerializer(self, element):
|
|
return testSerializer(element)
|
|
|
|
def getDocument(self):
|
|
return self.soup
|
|
|
|
def getFragment(self):
|
|
return _base.TreeBuilder.getFragment(self).element
|
|
|
|
def testSerializer(element):
|
|
import re
|
|
rv = []
|
|
def serializeElement(element, indent=0):
|
|
if isinstance(element, Declaration):
|
|
doctype_regexp = r'DOCTYPE\s+(?P<name>[^\s]*)( PUBLIC "(?P<publicId>.*)" "(?P<systemId1>.*)"| SYSTEM "(?P<systemId2>.*)")?'
|
|
m = re.compile(doctype_regexp).match(element.string)
|
|
assert m is not None, "DOCTYPE did not match expected format"
|
|
name = m.group('name')
|
|
publicId = m.group('publicId')
|
|
if publicId is not None:
|
|
systemId = m.group('systemId1') or ""
|
|
else:
|
|
systemId = m.group('systemId2')
|
|
|
|
if publicId is not None or systemId is not None:
|
|
rv.append("""|%s<!DOCTYPE %s "%s" "%s">"""%
|
|
(' '*indent, name, publicId or "", systemId or ""))
|
|
else:
|
|
rv.append("|%s<!DOCTYPE %s>"%(' '*indent, name))
|
|
|
|
elif isinstance(element, BeautifulSoup):
|
|
if element.name == "[document_fragment]":
|
|
rv.append("#document-fragment")
|
|
else:
|
|
rv.append("#document")
|
|
|
|
elif isinstance(element, Comment):
|
|
rv.append("|%s<!-- %s -->"%(' '*indent, element.string))
|
|
elif isinstance(element, unicode):
|
|
rv.append("|%s\"%s\"" %(' '*indent, element))
|
|
else:
|
|
rv.append("|%s<%s>"%(' '*indent, element.name))
|
|
if element.attrs:
|
|
for name, value in sorted(element.attrs):
|
|
rv.append('|%s%s="%s"' % (' '*(indent+2), name, value))
|
|
indent += 2
|
|
if hasattr(element, "contents"):
|
|
for child in element.contents:
|
|
serializeElement(child, indent)
|
|
serializeElement(element, 0)
|
|
|
|
return "\n".join(rv)
|