mirror of
https://github.com/SickGear/SickGear.git
synced 2024-12-18 08:43:37 +00:00
Merge branch 'feature/UpdateHtml5lib' into dev
This commit is contained in:
commit
8239168a5a
11 changed files with 49 additions and 25 deletions
|
@ -1,5 +1,6 @@
|
||||||
### 3.28.0 (2023-xx-xx xx:xx:00 UTC)
|
### 3.28.0 (2023-xx-xx xx:xx:00 UTC)
|
||||||
|
|
||||||
|
* Update html5lib 1.1 (f87487a) to 1.2-dev (3e500bb)
|
||||||
* Update package resource API 63.2.0 (3ae44cd) to 67.3.2 (b9bf2ec)
|
* Update package resource API 63.2.0 (3ae44cd) to 67.3.2 (b9bf2ec)
|
||||||
* Change remove calls to legacy py2 fix encoding function
|
* Change remove calls to legacy py2 fix encoding function
|
||||||
* Change requirements for pure py3
|
* Change requirements for pure py3
|
||||||
|
|
|
@ -32,4 +32,4 @@ __all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder",
|
||||||
|
|
||||||
# this has to be at the top level, see how setup.py parses this
|
# this has to be at the top level, see how setup.py parses this
|
||||||
#: Distribution version number.
|
#: Distribution version number.
|
||||||
__version__ = "1.1"
|
__version__ = "1.2-dev"
|
||||||
|
|
|
@ -104,18 +104,15 @@ def charStringToList(chars):
|
||||||
charRanges = [item.strip() for item in chars.split(" | ")]
|
charRanges = [item.strip() for item in chars.split(" | ")]
|
||||||
rv = []
|
rv = []
|
||||||
for item in charRanges:
|
for item in charRanges:
|
||||||
foundMatch = False
|
|
||||||
for regexp in (reChar, reCharRange):
|
for regexp in (reChar, reCharRange):
|
||||||
match = regexp.match(item)
|
match = regexp.match(item)
|
||||||
if match is not None:
|
if match is not None:
|
||||||
rv.append([hexToInt(item) for item in match.groups()])
|
rv.append([hexToInt(item) for item in match.groups()])
|
||||||
if len(rv[-1]) == 1:
|
if len(rv[-1]) == 1:
|
||||||
rv[-1] = rv[-1] * 2
|
rv[-1] = rv[-1] * 2
|
||||||
foundMatch = True
|
|
||||||
break
|
break
|
||||||
if not foundMatch:
|
else:
|
||||||
assert len(item) == 1
|
assert len(item) == 1
|
||||||
|
|
||||||
rv.append([ord(item)] * 2)
|
rv.append([ord(item)] * 2)
|
||||||
rv = normaliseCharList(rv)
|
rv = normaliseCharList(rv)
|
||||||
return rv
|
return rv
|
||||||
|
|
|
@ -324,7 +324,7 @@ class HTMLUnicodeInputStream(object):
|
||||||
except KeyError:
|
except KeyError:
|
||||||
if __debug__:
|
if __debug__:
|
||||||
for c in characters:
|
for c in characters:
|
||||||
assert(ord(c) < 128)
|
assert ord(c) < 128
|
||||||
regex = "".join(["\\x%02x" % ord(c) for c in characters])
|
regex = "".join(["\\x%02x" % ord(c) for c in characters])
|
||||||
if not opposite:
|
if not opposite:
|
||||||
regex = "^%s" % regex
|
regex = "^%s" % regex
|
||||||
|
|
|
@ -557,23 +557,36 @@ headingElements = (
|
||||||
)
|
)
|
||||||
|
|
||||||
voidElements = frozenset([
|
voidElements = frozenset([
|
||||||
|
"area",
|
||||||
"base",
|
"base",
|
||||||
"command",
|
"br",
|
||||||
"event-source",
|
"col",
|
||||||
|
"command", # removed ^1
|
||||||
|
"embed",
|
||||||
|
"event-source", # renamed and later removed ^2
|
||||||
|
"hr",
|
||||||
|
"img",
|
||||||
|
"input",
|
||||||
"link",
|
"link",
|
||||||
"meta",
|
"meta",
|
||||||
"hr",
|
"param", # deprecated ^3
|
||||||
"br",
|
|
||||||
"img",
|
|
||||||
"embed",
|
|
||||||
"param",
|
|
||||||
"area",
|
|
||||||
"col",
|
|
||||||
"input",
|
|
||||||
"source",
|
"source",
|
||||||
"track"
|
"track",
|
||||||
|
"wbr",
|
||||||
])
|
])
|
||||||
|
|
||||||
|
# Removals and deprecations in the HTML 5 spec:
|
||||||
|
# ^1: command
|
||||||
|
# http://lists.whatwg.org/pipermail/whatwg-whatwg.org/2012-December/038472.html
|
||||||
|
# https://github.com/whatwg/html/commit/9e2e25f4ae90969a7c64e0763c98548a35b50af8
|
||||||
|
# ^2: event-source
|
||||||
|
# renamed to eventsource in 7/2008:
|
||||||
|
# https://github.com/whatwg/html/commit/d157945d0285b4463a04b57318da0c4b300a99e7
|
||||||
|
# removed entirely in 2/2009:
|
||||||
|
# https://github.com/whatwg/html/commit/43cbdbfbb7eb74b0d65e0f4caab2020c0b2a16ff
|
||||||
|
# ^3: param
|
||||||
|
# https://developer.mozilla.org/en-US/docs/Web/HTML/Element/param
|
||||||
|
|
||||||
cdataElements = frozenset(['title', 'textarea'])
|
cdataElements = frozenset(['title', 'textarea'])
|
||||||
|
|
||||||
rcdataElements = frozenset([
|
rcdataElements = frozenset([
|
||||||
|
@ -604,6 +617,7 @@ booleanAttributes = {
|
||||||
"button": frozenset(["disabled", "autofocus"]),
|
"button": frozenset(["disabled", "autofocus"]),
|
||||||
"input": frozenset(["disabled", "readonly", "required", "autofocus", "checked", "ismap"]),
|
"input": frozenset(["disabled", "readonly", "required", "autofocus", "checked", "ismap"]),
|
||||||
"select": frozenset(["disabled", "readonly", "autofocus", "multiple"]),
|
"select": frozenset(["disabled", "readonly", "autofocus", "multiple"]),
|
||||||
|
"ol": frozenset(["reversed"]),
|
||||||
"output": frozenset(["disabled", "readonly"]),
|
"output": frozenset(["disabled", "readonly"]),
|
||||||
"iframe": frozenset(["seamless"]),
|
"iframe": frozenset(["seamless"]),
|
||||||
}
|
}
|
||||||
|
|
|
@ -113,6 +113,7 @@ allowed_elements = frozenset((
|
||||||
(namespaces['html'], 'strike'),
|
(namespaces['html'], 'strike'),
|
||||||
(namespaces['html'], 'strong'),
|
(namespaces['html'], 'strong'),
|
||||||
(namespaces['html'], 'sub'),
|
(namespaces['html'], 'sub'),
|
||||||
|
(namespaces['html'], 'summary'),
|
||||||
(namespaces['html'], 'sup'),
|
(namespaces['html'], 'sup'),
|
||||||
(namespaces['html'], 'table'),
|
(namespaces['html'], 'table'),
|
||||||
(namespaces['html'], 'tbody'),
|
(namespaces['html'], 'tbody'),
|
||||||
|
@ -128,6 +129,7 @@ allowed_elements = frozenset((
|
||||||
(namespaces['html'], 'ul'),
|
(namespaces['html'], 'ul'),
|
||||||
(namespaces['html'], 'var'),
|
(namespaces['html'], 'var'),
|
||||||
(namespaces['html'], 'video'),
|
(namespaces['html'], 'video'),
|
||||||
|
(namespaces['html'], 'wbr'),
|
||||||
(namespaces['mathml'], 'maction'),
|
(namespaces['mathml'], 'maction'),
|
||||||
(namespaces['mathml'], 'math'),
|
(namespaces['mathml'], 'math'),
|
||||||
(namespaces['mathml'], 'merror'),
|
(namespaces['mathml'], 'merror'),
|
||||||
|
@ -363,6 +365,7 @@ allowed_attributes = frozenset((
|
||||||
(None, 'maxsize'),
|
(None, 'maxsize'),
|
||||||
(None, 'minsize'),
|
(None, 'minsize'),
|
||||||
(None, 'other'),
|
(None, 'other'),
|
||||||
|
(None, 'reversed'),
|
||||||
(None, 'rowalign'),
|
(None, 'rowalign'),
|
||||||
(None, 'rowalign'),
|
(None, 'rowalign'),
|
||||||
(None, 'rowalign'),
|
(None, 'rowalign'),
|
||||||
|
@ -373,6 +376,7 @@ allowed_attributes = frozenset((
|
||||||
(None, 'scriptlevel'),
|
(None, 'scriptlevel'),
|
||||||
(None, 'selection'),
|
(None, 'selection'),
|
||||||
(None, 'separator'),
|
(None, 'separator'),
|
||||||
|
(None, 'start'),
|
||||||
(None, 'stretchy'),
|
(None, 'stretchy'),
|
||||||
(None, 'width'),
|
(None, 'width'),
|
||||||
(None, 'width'),
|
(None, 'width'),
|
||||||
|
@ -594,6 +598,10 @@ allowed_css_properties = frozenset((
|
||||||
'height',
|
'height',
|
||||||
'letter-spacing',
|
'letter-spacing',
|
||||||
'line-height',
|
'line-height',
|
||||||
|
'max-height',
|
||||||
|
'min-height',
|
||||||
|
'max-width',
|
||||||
|
'min-width',
|
||||||
'overflow',
|
'overflow',
|
||||||
'pause',
|
'pause',
|
||||||
'pause-after',
|
'pause-after',
|
||||||
|
|
|
@ -115,6 +115,9 @@ class HTMLParser(object):
|
||||||
|
|
||||||
if tree is None:
|
if tree is None:
|
||||||
tree = treebuilders.getTreeBuilder("etree")
|
tree = treebuilders.getTreeBuilder("etree")
|
||||||
|
elif isinstance(tree, str):
|
||||||
|
tree = treebuilders.getTreeBuilder(tree)
|
||||||
|
|
||||||
self.tree = tree(namespaceHTMLElements)
|
self.tree = tree(namespaceHTMLElements)
|
||||||
self.errors = []
|
self.errors = []
|
||||||
|
|
||||||
|
@ -1002,8 +1005,8 @@ def getPhases(debug):
|
||||||
self.tree.insertText(token["data"])
|
self.tree.insertText(token["data"])
|
||||||
# This must be bad for performance
|
# This must be bad for performance
|
||||||
if (self.parser.framesetOK and
|
if (self.parser.framesetOK and
|
||||||
any([char not in spaceCharacters
|
any(char not in spaceCharacters
|
||||||
for char in token["data"]])):
|
for char in token["data"])):
|
||||||
self.parser.framesetOK = False
|
self.parser.framesetOK = False
|
||||||
|
|
||||||
def processSpaceCharactersNonPre(self, token):
|
def processSpaceCharactersNonPre(self, token):
|
||||||
|
@ -1850,7 +1853,7 @@ def getPhases(debug):
|
||||||
|
|
||||||
def flushCharacters(self):
|
def flushCharacters(self):
|
||||||
data = "".join([item["data"] for item in self.characterTokens])
|
data = "".join([item["data"] for item in self.characterTokens])
|
||||||
if any([item not in spaceCharacters for item in data]):
|
if any(item not in spaceCharacters for item in data):
|
||||||
token = {"type": tokenTypes["Characters"], "data": data}
|
token = {"type": tokenTypes["Characters"], "data": data}
|
||||||
self.parser.phases["inTable"].insertText(token)
|
self.parser.phases["inTable"].insertText(token)
|
||||||
elif data:
|
elif data:
|
||||||
|
|
|
@ -222,14 +222,14 @@ class HTMLSerializer(object):
|
||||||
self.strict = False
|
self.strict = False
|
||||||
|
|
||||||
def encode(self, string):
|
def encode(self, string):
|
||||||
assert(isinstance(string, text_type))
|
assert isinstance(string, text_type)
|
||||||
if self.encoding:
|
if self.encoding:
|
||||||
return string.encode(self.encoding, "htmlentityreplace")
|
return string.encode(self.encoding, "htmlentityreplace")
|
||||||
else:
|
else:
|
||||||
return string
|
return string
|
||||||
|
|
||||||
def encodeStrict(self, string):
|
def encodeStrict(self, string):
|
||||||
assert(isinstance(string, text_type))
|
assert isinstance(string, text_type)
|
||||||
if self.encoding:
|
if self.encoding:
|
||||||
return string.encode(self.encoding, "strict")
|
return string.encode(self.encoding, "strict")
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -121,6 +121,7 @@ class Node(object):
|
||||||
|
|
||||||
class ActiveFormattingElements(list):
|
class ActiveFormattingElements(list):
|
||||||
def append(self, node):
|
def append(self, node):
|
||||||
|
"""Append node to the end of the list."""
|
||||||
equalCount = 0
|
equalCount = 0
|
||||||
if node != Marker:
|
if node != Marker:
|
||||||
for element in self[::-1]:
|
for element in self[::-1]:
|
||||||
|
|
|
@ -108,7 +108,7 @@ def getETreeBuilder(ElementTreeImplementation, fullTree=False):
|
||||||
node.parent = None
|
node.parent = None
|
||||||
|
|
||||||
def insertText(self, data, insertBefore=None):
|
def insertText(self, data, insertBefore=None):
|
||||||
if not(len(self._element)):
|
if not len(self._element):
|
||||||
if not self._element.text:
|
if not self._element.text:
|
||||||
self._element.text = ""
|
self._element.text = ""
|
||||||
self._element.text += data
|
self._element.text += data
|
||||||
|
@ -201,7 +201,7 @@ def getETreeBuilder(ElementTreeImplementation, fullTree=False):
|
||||||
rv = []
|
rv = []
|
||||||
|
|
||||||
def serializeElement(element, indent=0):
|
def serializeElement(element, indent=0):
|
||||||
if not(hasattr(element, "tag")):
|
if not hasattr(element, "tag"):
|
||||||
element = element.getroot()
|
element = element.getroot()
|
||||||
if element.tag == "<!DOCTYPE>":
|
if element.tag == "<!DOCTYPE>":
|
||||||
if element.get("publicId") or element.get("systemId"):
|
if element.get("publicId") or element.get("systemId"):
|
||||||
|
|
|
@ -37,7 +37,7 @@ def getETreeBuilder(ElementTreeImplementation):
|
||||||
else:
|
else:
|
||||||
node = elt
|
node = elt
|
||||||
|
|
||||||
if not(hasattr(node, "tag")):
|
if not hasattr(node, "tag"):
|
||||||
node = node.getroot()
|
node = node.getroot()
|
||||||
|
|
||||||
if node.tag in ("DOCUMENT_ROOT", "DOCUMENT_FRAGMENT"):
|
if node.tag in ("DOCUMENT_ROOT", "DOCUMENT_FRAGMENT"):
|
||||||
|
|
Loading…
Reference in a new issue