mirror of
https://github.com/SickGear/SickGear.git
synced 2024-12-18 08:43:37 +00:00
Merge branch 'feature/UpdateHtml5lib' into dev
This commit is contained in:
commit
8239168a5a
11 changed files with 49 additions and 25 deletions
|
@ -1,5 +1,6 @@
|
|||
### 3.28.0 (2023-xx-xx xx:xx:00 UTC)
|
||||
|
||||
* Update html5lib 1.1 (f87487a) to 1.2-dev (3e500bb)
|
||||
* Update package resource API 63.2.0 (3ae44cd) to 67.3.2 (b9bf2ec)
|
||||
* Change remove calls to legacy py2 fix encoding function
|
||||
* Change requirements for pure py3
|
||||
|
|
|
@ -32,4 +32,4 @@ __all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder",
|
|||
|
||||
# this has to be at the top level, see how setup.py parses this
|
||||
#: Distribution version number.
|
||||
__version__ = "1.1"
|
||||
__version__ = "1.2-dev"
|
||||
|
|
|
@ -104,18 +104,15 @@ def charStringToList(chars):
|
|||
charRanges = [item.strip() for item in chars.split(" | ")]
|
||||
rv = []
|
||||
for item in charRanges:
|
||||
foundMatch = False
|
||||
for regexp in (reChar, reCharRange):
|
||||
match = regexp.match(item)
|
||||
if match is not None:
|
||||
rv.append([hexToInt(item) for item in match.groups()])
|
||||
if len(rv[-1]) == 1:
|
||||
rv[-1] = rv[-1] * 2
|
||||
foundMatch = True
|
||||
break
|
||||
if not foundMatch:
|
||||
else:
|
||||
assert len(item) == 1
|
||||
|
||||
rv.append([ord(item)] * 2)
|
||||
rv = normaliseCharList(rv)
|
||||
return rv
|
||||
|
|
|
@ -324,7 +324,7 @@ class HTMLUnicodeInputStream(object):
|
|||
except KeyError:
|
||||
if __debug__:
|
||||
for c in characters:
|
||||
assert(ord(c) < 128)
|
||||
assert ord(c) < 128
|
||||
regex = "".join(["\\x%02x" % ord(c) for c in characters])
|
||||
if not opposite:
|
||||
regex = "^%s" % regex
|
||||
|
|
|
@ -557,23 +557,36 @@ headingElements = (
|
|||
)
|
||||
|
||||
voidElements = frozenset([
|
||||
"area",
|
||||
"base",
|
||||
"command",
|
||||
"event-source",
|
||||
"br",
|
||||
"col",
|
||||
"command", # removed ^1
|
||||
"embed",
|
||||
"event-source", # renamed and later removed ^2
|
||||
"hr",
|
||||
"img",
|
||||
"input",
|
||||
"link",
|
||||
"meta",
|
||||
"hr",
|
||||
"br",
|
||||
"img",
|
||||
"embed",
|
||||
"param",
|
||||
"area",
|
||||
"col",
|
||||
"input",
|
||||
"param", # deprecated ^3
|
||||
"source",
|
||||
"track"
|
||||
"track",
|
||||
"wbr",
|
||||
])
|
||||
|
||||
# Removals and deprecations in the HTML 5 spec:
|
||||
# ^1: command
|
||||
# http://lists.whatwg.org/pipermail/whatwg-whatwg.org/2012-December/038472.html
|
||||
# https://github.com/whatwg/html/commit/9e2e25f4ae90969a7c64e0763c98548a35b50af8
|
||||
# ^2: event-source
|
||||
# renamed to eventsource in 7/2008:
|
||||
# https://github.com/whatwg/html/commit/d157945d0285b4463a04b57318da0c4b300a99e7
|
||||
# removed entirely in 2/2009:
|
||||
# https://github.com/whatwg/html/commit/43cbdbfbb7eb74b0d65e0f4caab2020c0b2a16ff
|
||||
# ^3: param
|
||||
# https://developer.mozilla.org/en-US/docs/Web/HTML/Element/param
|
||||
|
||||
cdataElements = frozenset(['title', 'textarea'])
|
||||
|
||||
rcdataElements = frozenset([
|
||||
|
@ -604,6 +617,7 @@ booleanAttributes = {
|
|||
"button": frozenset(["disabled", "autofocus"]),
|
||||
"input": frozenset(["disabled", "readonly", "required", "autofocus", "checked", "ismap"]),
|
||||
"select": frozenset(["disabled", "readonly", "autofocus", "multiple"]),
|
||||
"ol": frozenset(["reversed"]),
|
||||
"output": frozenset(["disabled", "readonly"]),
|
||||
"iframe": frozenset(["seamless"]),
|
||||
}
|
||||
|
|
|
@ -113,6 +113,7 @@ allowed_elements = frozenset((
|
|||
(namespaces['html'], 'strike'),
|
||||
(namespaces['html'], 'strong'),
|
||||
(namespaces['html'], 'sub'),
|
||||
(namespaces['html'], 'summary'),
|
||||
(namespaces['html'], 'sup'),
|
||||
(namespaces['html'], 'table'),
|
||||
(namespaces['html'], 'tbody'),
|
||||
|
@ -128,6 +129,7 @@ allowed_elements = frozenset((
|
|||
(namespaces['html'], 'ul'),
|
||||
(namespaces['html'], 'var'),
|
||||
(namespaces['html'], 'video'),
|
||||
(namespaces['html'], 'wbr'),
|
||||
(namespaces['mathml'], 'maction'),
|
||||
(namespaces['mathml'], 'math'),
|
||||
(namespaces['mathml'], 'merror'),
|
||||
|
@ -363,6 +365,7 @@ allowed_attributes = frozenset((
|
|||
(None, 'maxsize'),
|
||||
(None, 'minsize'),
|
||||
(None, 'other'),
|
||||
(None, 'reversed'),
|
||||
(None, 'rowalign'),
|
||||
(None, 'rowalign'),
|
||||
(None, 'rowalign'),
|
||||
|
@ -373,6 +376,7 @@ allowed_attributes = frozenset((
|
|||
(None, 'scriptlevel'),
|
||||
(None, 'selection'),
|
||||
(None, 'separator'),
|
||||
(None, 'start'),
|
||||
(None, 'stretchy'),
|
||||
(None, 'width'),
|
||||
(None, 'width'),
|
||||
|
@ -594,6 +598,10 @@ allowed_css_properties = frozenset((
|
|||
'height',
|
||||
'letter-spacing',
|
||||
'line-height',
|
||||
'max-height',
|
||||
'min-height',
|
||||
'max-width',
|
||||
'min-width',
|
||||
'overflow',
|
||||
'pause',
|
||||
'pause-after',
|
||||
|
|
|
@ -115,6 +115,9 @@ class HTMLParser(object):
|
|||
|
||||
if tree is None:
|
||||
tree = treebuilders.getTreeBuilder("etree")
|
||||
elif isinstance(tree, str):
|
||||
tree = treebuilders.getTreeBuilder(tree)
|
||||
|
||||
self.tree = tree(namespaceHTMLElements)
|
||||
self.errors = []
|
||||
|
||||
|
@ -1002,8 +1005,8 @@ def getPhases(debug):
|
|||
self.tree.insertText(token["data"])
|
||||
# This must be bad for performance
|
||||
if (self.parser.framesetOK and
|
||||
any([char not in spaceCharacters
|
||||
for char in token["data"]])):
|
||||
any(char not in spaceCharacters
|
||||
for char in token["data"])):
|
||||
self.parser.framesetOK = False
|
||||
|
||||
def processSpaceCharactersNonPre(self, token):
|
||||
|
@ -1850,7 +1853,7 @@ def getPhases(debug):
|
|||
|
||||
def flushCharacters(self):
|
||||
data = "".join([item["data"] for item in self.characterTokens])
|
||||
if any([item not in spaceCharacters for item in data]):
|
||||
if any(item not in spaceCharacters for item in data):
|
||||
token = {"type": tokenTypes["Characters"], "data": data}
|
||||
self.parser.phases["inTable"].insertText(token)
|
||||
elif data:
|
||||
|
|
|
@ -222,14 +222,14 @@ class HTMLSerializer(object):
|
|||
self.strict = False
|
||||
|
||||
def encode(self, string):
|
||||
assert(isinstance(string, text_type))
|
||||
assert isinstance(string, text_type)
|
||||
if self.encoding:
|
||||
return string.encode(self.encoding, "htmlentityreplace")
|
||||
else:
|
||||
return string
|
||||
|
||||
def encodeStrict(self, string):
|
||||
assert(isinstance(string, text_type))
|
||||
assert isinstance(string, text_type)
|
||||
if self.encoding:
|
||||
return string.encode(self.encoding, "strict")
|
||||
else:
|
||||
|
|
|
@ -121,6 +121,7 @@ class Node(object):
|
|||
|
||||
class ActiveFormattingElements(list):
|
||||
def append(self, node):
|
||||
"""Append node to the end of the list."""
|
||||
equalCount = 0
|
||||
if node != Marker:
|
||||
for element in self[::-1]:
|
||||
|
|
|
@ -108,7 +108,7 @@ def getETreeBuilder(ElementTreeImplementation, fullTree=False):
|
|||
node.parent = None
|
||||
|
||||
def insertText(self, data, insertBefore=None):
|
||||
if not(len(self._element)):
|
||||
if not len(self._element):
|
||||
if not self._element.text:
|
||||
self._element.text = ""
|
||||
self._element.text += data
|
||||
|
@ -201,7 +201,7 @@ def getETreeBuilder(ElementTreeImplementation, fullTree=False):
|
|||
rv = []
|
||||
|
||||
def serializeElement(element, indent=0):
|
||||
if not(hasattr(element, "tag")):
|
||||
if not hasattr(element, "tag"):
|
||||
element = element.getroot()
|
||||
if element.tag == "<!DOCTYPE>":
|
||||
if element.get("publicId") or element.get("systemId"):
|
||||
|
|
|
@ -37,7 +37,7 @@ def getETreeBuilder(ElementTreeImplementation):
|
|||
else:
|
||||
node = elt
|
||||
|
||||
if not(hasattr(node, "tag")):
|
||||
if not hasattr(node, "tag"):
|
||||
node = node.getroot()
|
||||
|
||||
if node.tag in ("DOCUMENT_ROOT", "DOCUMENT_FRAGMENT"):
|
||||
|
|
Loading…
Reference in a new issue