mirror of
https://github.com/SickGear/SickGear.git
synced 2024-12-01 00:43:37 +00:00
76 lines
2.6 KiB
Python
76 lines
2.6 KiB
Python
from __future__ import absolute_import
|
|
|
|
import re
|
|
|
|
__all__ = [
|
|
'_SGML_AVAILABLE',
|
|
'sgmllib',
|
|
'charref',
|
|
'tagfind',
|
|
'attrfind',
|
|
'entityref',
|
|
'incomplete',
|
|
'interesting',
|
|
'shorttag',
|
|
'shorttagopen',
|
|
'starttagopen',
|
|
'endbracket',
|
|
]
|
|
|
|
# sgmllib is not available by default in Python 3; if the end user doesn't have
|
|
# it available then we'll lose illformed XML parsing and content sanitizing
|
|
try:
|
|
import sgmllib
|
|
except ImportError:
|
|
# This is probably Python 3, which doesn't include sgmllib anymore
|
|
_SGML_AVAILABLE = 0
|
|
|
|
# Mock sgmllib enough to allow subclassing later on
|
|
class sgmllib(object):
|
|
class SGMLParser(object):
|
|
def goahead(self, i):
|
|
pass
|
|
def parse_starttag(self, i):
|
|
pass
|
|
else:
|
|
_SGML_AVAILABLE = 1
|
|
|
|
# sgmllib defines a number of module-level regular expressions that are
|
|
# insufficient for the XML parsing feedparser needs. Rather than modify
|
|
# the variables directly in sgmllib, they're defined here using the same
|
|
# names, and the compiled code objects of several sgmllib.SGMLParser
|
|
# methods are copied into _BaseHTMLProcessor so that they execute in
|
|
# feedparser's scope instead of sgmllib's scope.
|
|
charref = re.compile('&#(\d+|[xX][0-9a-fA-F]+);')
|
|
tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*')
|
|
attrfind = re.compile(
|
|
r'\s*([a-zA-Z_][-:.a-zA-Z_0-9]*)[$]?(\s*=\s*'
|
|
r'(\'[^\']*\'|"[^"]*"|[][\-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~\'"@]*))?'
|
|
)
|
|
|
|
# Unfortunately, these must be copied over to prevent NameError exceptions
|
|
entityref = sgmllib.entityref
|
|
incomplete = sgmllib.incomplete
|
|
interesting = sgmllib.interesting
|
|
shorttag = sgmllib.shorttag
|
|
shorttagopen = sgmllib.shorttagopen
|
|
starttagopen = sgmllib.starttagopen
|
|
|
|
class _EndBracketRegEx:
|
|
def __init__(self):
|
|
# Overriding the built-in sgmllib.endbracket regex allows the
|
|
# parser to find angle brackets embedded in element attributes.
|
|
self.endbracket = re.compile('''([^'"<>]|"[^"]*"(?=>|/|\s|\w+=)|'[^']*'(?=>|/|\s|\w+=))*(?=[<>])|.*?(?=[<>])''')
|
|
def search(self, target, index=0):
|
|
match = self.endbracket.match(target, index)
|
|
if match is not None:
|
|
# Returning a new object in the calling thread's context
|
|
# resolves a thread-safety.
|
|
return EndBracketMatch(match)
|
|
return None
|
|
class EndBracketMatch:
|
|
def __init__(self, match):
|
|
self.match = match
|
|
def start(self, n):
|
|
return self.match.end(n)
|
|
endbracket = _EndBracketRegEx()
|