Merge branch 'feature/UpdateSoupsieve' into dev

This commit is contained in:
JackDandy 2023-02-09 14:42:00 +00:00
commit 062ccbeacc
8 changed files with 894 additions and 399 deletions

View file

@ -19,6 +19,7 @@
* Update UnRar x64 for Windows 6.11 to 6.20
* Update Send2Trash 1.5.0 (66afce7) to 1.8.1b0 (0ef9b32)
* Update SimpleJSON 3.16.1 (ce75e60) to 3.18.1 (c891b95)
* Update soupsieve 2.0.2.dev (05086ef) to 2.3.2.post1 (792d566)
* Update tmdbsimple 2.6.6 (679e343) to 2.9.1 (9da400a)
* Update torrent_parser 0.3.0 (2a4eecb) to 0.4.0 (23b9e11)
* Update unidecode module 1.1.1 (632af82) to 1.3.6 (4141992)

View file

@ -25,11 +25,14 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
"""
from __future__ import annotations
from .__meta__ import __version__, __version_info__ # noqa: F401
from . import css_parser as cp
from . import css_match as cm
from . import css_types as ct
from .util import DEBUG, SelectorSyntaxError # noqa: F401
import bs4 # type: ignore[import]
from typing import Optional, Any, Iterator, Iterable
__all__ = (
'DEBUG', 'SelectorSyntaxError', 'SoupSieve',
@ -40,15 +43,18 @@ __all__ = (
SoupSieve = cm.SoupSieve
def compile(pattern, namespaces=None, flags=0, **kwargs): # noqa: A001
def compile( # noqa: A001
pattern: str,
namespaces: Optional[dict[str, str]] = None,
flags: int = 0,
*,
custom: Optional[dict[str, str]] = None,
**kwargs: Any
) -> cm.SoupSieve:
"""Compile CSS pattern."""
if namespaces is not None:
namespaces = ct.Namespaces(**namespaces)
custom = kwargs.get('custom')
if custom is not None:
custom = ct.CustomSelectors(**custom)
ns = ct.Namespaces(namespaces) if namespaces is not None else namespaces # type: Optional[ct.Namespaces]
cs = ct.CustomSelectors(custom) if custom is not None else custom # type: Optional[ct.CustomSelectors]
if isinstance(pattern, SoupSieve):
if flags:
@ -59,53 +65,103 @@ def compile(pattern, namespaces=None, flags=0, **kwargs): # noqa: A001
raise ValueError("Cannot process 'custom' argument on a compiled selector list")
return pattern
return cp._cached_css_compile(pattern, namespaces, custom, flags)
return cp._cached_css_compile(pattern, ns, cs, flags)
def purge():
def purge() -> None:
"""Purge cached patterns."""
cp._purge_cache()
def closest(select, tag, namespaces=None, flags=0, **kwargs):
def closest(
select: str,
tag: 'bs4.Tag',
namespaces: Optional[dict[str, str]] = None,
flags: int = 0,
*,
custom: Optional[dict[str, str]] = None,
**kwargs: Any
) -> 'bs4.Tag':
"""Match closest ancestor."""
return compile(select, namespaces, flags, **kwargs).closest(tag)
def match(select, tag, namespaces=None, flags=0, **kwargs):
def match(
select: str,
tag: 'bs4.Tag',
namespaces: Optional[dict[str, str]] = None,
flags: int = 0,
*,
custom: Optional[dict[str, str]] = None,
**kwargs: Any
) -> bool:
"""Match node."""
return compile(select, namespaces, flags, **kwargs).match(tag)
def filter(select, iterable, namespaces=None, flags=0, **kwargs): # noqa: A001
def filter( # noqa: A001
select: str,
iterable: Iterable['bs4.Tag'],
namespaces: Optional[dict[str, str]] = None,
flags: int = 0,
*,
custom: Optional[dict[str, str]] = None,
**kwargs: Any
) -> list['bs4.Tag']:
"""Filter list of nodes."""
return compile(select, namespaces, flags, **kwargs).filter(iterable)
def select_one(select, tag, namespaces=None, flags=0, **kwargs):
def select_one(
select: str,
tag: 'bs4.Tag',
namespaces: Optional[dict[str, str]] = None,
flags: int = 0,
*,
custom: Optional[dict[str, str]] = None,
**kwargs: Any
) -> 'bs4.Tag':
"""Select a single tag."""
return compile(select, namespaces, flags, **kwargs).select_one(tag)
def select(select, tag, namespaces=None, limit=0, flags=0, **kwargs):
def select(
select: str,
tag: 'bs4.Tag',
namespaces: Optional[dict[str, str]] = None,
limit: int = 0,
flags: int = 0,
*,
custom: Optional[dict[str, str]] = None,
**kwargs: Any
) -> list['bs4.Tag']:
"""Select the specified tags."""
return compile(select, namespaces, flags, **kwargs).select(tag, limit)
def iselect(select, tag, namespaces=None, limit=0, flags=0, **kwargs):
def iselect(
select: str,
tag: 'bs4.Tag',
namespaces: Optional[dict[str, str]] = None,
limit: int = 0,
flags: int = 0,
*,
custom: Optional[dict[str, str]] = None,
**kwargs: Any
) -> Iterator['bs4.Tag']:
"""Iterate the specified tags."""
for el in compile(select, namespaces, flags, **kwargs).iselect(tag, limit):
yield el
def escape(ident):
def escape(ident: str) -> str:
"""Escape identifier."""
return cp.escape(ident)

View file

@ -1,4 +1,5 @@
"""Meta related things."""
from __future__ import annotations
from collections import namedtuple
import re
@ -79,7 +80,11 @@ class Version(namedtuple("Version", ["major", "minor", "micro", "release", "pre"
"""
def __new__(cls, major, minor, micro, release="final", pre=0, post=0, dev=0):
def __new__(
cls,
major: int, minor: int, micro: int, release: str = "final",
pre: int = 0, post: int = 0, dev: int = 0
) -> Version:
"""Validate version info."""
# Ensure all parts are positive integers.
@ -115,27 +120,27 @@ class Version(namedtuple("Version", ["major", "minor", "micro", "release", "pre"
return super(Version, cls).__new__(cls, major, minor, micro, release, pre, post, dev)
def _is_pre(self):
def _is_pre(self) -> bool:
"""Is prerelease."""
return self.pre > 0
return bool(self.pre > 0)
def _is_dev(self):
def _is_dev(self) -> bool:
"""Is development."""
return bool(self.release < "alpha")
def _is_post(self):
def _is_post(self) -> bool:
"""Is post."""
return self.post > 0
return bool(self.post > 0)
def _get_dev_status(self): # pragma: no cover
def _get_dev_status(self) -> str: # pragma: no cover
"""Get development status string."""
return DEV_STATUS[self.release]
def _get_canonical(self):
def _get_canonical(self) -> str:
"""Get the canonical output string."""
# Assemble major, minor, micro version and append `pre`, `post`, or `dev` if needed..
@ -153,11 +158,14 @@ class Version(namedtuple("Version", ["major", "minor", "micro", "release", "pre"
return ver
def parse_version(ver, pre=False):
def parse_version(ver: str) -> Version:
"""Parse version into a comparable Version tuple."""
m = RE_VER.match(ver)
if m is None:
raise ValueError("'{}' is not a valid version".format(ver))
# Handle major, minor, micro
major = int(m.group('major'))
minor = int(m.group('minor')) if m.group('minor') else 0
@ -185,5 +193,5 @@ def parse_version(ver, pre=False):
return Version(major, minor, micro, release, pre, post, dev)
__version_info__ = Version(2, 0, 2, ".dev")
__version_info__ = Version(2, 5, 0, "final", post=1)
__version__ = __version_info__._get_canonical()

File diff suppressed because it is too large Load diff

View file

@ -1,10 +1,13 @@
"""CSS selector parser."""
from __future__ import annotations
import re
from functools import lru_cache
from . import util
from . import css_match as cm
from . import css_types as ct
from .util import SelectorSyntaxError
import warnings
from typing import Optional, Match, Any, Iterator, cast
UNICODE_REPLACEMENT_CHAR = 0xFFFD
@ -59,6 +62,8 @@ PSEUDO_SIMPLE_NO_MATCH = {
# Complex pseudo classes that take selector lists
PSEUDO_COMPLEX = {
':contains',
':-soup-contains',
':-soup-contains-own',
':has',
':is',
':matches',
@ -193,32 +198,42 @@ FLG_OPEN = 0x40
FLG_IN_RANGE = 0x80
FLG_OUT_OF_RANGE = 0x100
FLG_PLACEHOLDER_SHOWN = 0x200
FLG_FORGIVE = 0x400
# Maximum cached patterns to store
_MAXCACHE = 500
@lru_cache(maxsize=_MAXCACHE)
def _cached_css_compile(pattern, namespaces, custom, flags):
def _cached_css_compile(
pattern: str,
namespaces: Optional[ct.Namespaces],
custom: Optional[ct.CustomSelectors],
flags: int
) -> cm.SoupSieve:
"""Cached CSS compile."""
custom_selectors = process_custom(custom)
return cm.SoupSieve(
pattern,
CSSParser(pattern, custom=custom_selectors, flags=flags).process_selectors(),
CSSParser(
pattern,
custom=custom_selectors,
flags=flags
).process_selectors(),
namespaces,
custom,
flags
)
def _purge_cache():
def _purge_cache() -> None:
"""Purge the cache."""
_cached_css_compile.cache_clear()
def process_custom(custom):
def process_custom(custom: Optional[ct.CustomSelectors]) -> dict[str, str | ct.SelectorList]:
"""Process custom."""
custom_selectors = {}
@ -233,14 +248,14 @@ def process_custom(custom):
return custom_selectors
def css_unescape(content, string=False):
def css_unescape(content: str, string: bool = False) -> str:
"""
Unescape CSS value.
Strings allow for spanning the value on multiple strings by escaping a new line.
"""
def replace(m):
def replace(m: Match[str]) -> str:
"""Replace with the appropriate substitute."""
if m.group(1):
@ -260,7 +275,7 @@ def css_unescape(content, string=False):
return (RE_CSS_ESC if not string else RE_CSS_STR_ESC).sub(replace, content)
def escape(ident):
def escape(ident: str) -> str:
"""Escape identifier."""
string = []
@ -288,21 +303,21 @@ def escape(ident):
return ''.join(string)
class SelectorPattern(object):
class SelectorPattern:
"""Selector pattern."""
def __init__(self, name, pattern):
def __init__(self, name: str, pattern: str) -> None:
"""Initialize."""
self.name = name
self.re_pattern = re.compile(pattern, re.I | re.X | re.U)
def get_name(self):
def get_name(self) -> str:
"""Get name."""
return self.name
def match(self, selector, index, flags):
def match(self, selector: str, index: int, flags: int) -> Optional[Match[str]]:
"""Match the selector."""
return self.re_pattern.match(selector, index)
@ -311,7 +326,7 @@ class SelectorPattern(object):
class SpecialPseudoPattern(SelectorPattern):
"""Selector pattern."""
def __init__(self, patterns):
def __init__(self, patterns: tuple[tuple[str, tuple[str, ...], str, type[SelectorPattern]], ...]) -> None:
"""Initialize."""
self.patterns = {}
@ -321,15 +336,15 @@ class SpecialPseudoPattern(SelectorPattern):
for pseudo in p[1]:
self.patterns[pseudo] = pattern
self.matched_name = None
self.matched_name = None # type: Optional[SelectorPattern]
self.re_pseudo_name = re.compile(PAT_PSEUDO_CLASS_SPECIAL, re.I | re.X | re.U)
def get_name(self):
def get_name(self) -> str:
"""Get name."""
return self.matched_name.get_name()
return '' if self.matched_name is None else self.matched_name.get_name()
def match(self, selector, index, flags):
def match(self, selector: str, index: int, flags: int) -> Optional[Match[str]]:
"""Match the selector."""
pseudo = None
@ -345,7 +360,7 @@ class SpecialPseudoPattern(SelectorPattern):
return pseudo
class _Selector(object):
class _Selector:
"""
Intermediate selector class.
@ -354,23 +369,23 @@ class _Selector(object):
the data in an object that can be pickled and hashed.
"""
def __init__(self, **kwargs):
def __init__(self, **kwargs: Any) -> None:
"""Initialize."""
self.tag = kwargs.get('tag', None)
self.ids = kwargs.get('ids', [])
self.classes = kwargs.get('classes', [])
self.attributes = kwargs.get('attributes', [])
self.nth = kwargs.get('nth', [])
self.selectors = kwargs.get('selectors', [])
self.relations = kwargs.get('relations', [])
self.rel_type = kwargs.get('rel_type', None)
self.contains = kwargs.get('contains', [])
self.lang = kwargs.get('lang', [])
self.flags = kwargs.get('flags', 0)
self.no_match = kwargs.get('no_match', False)
self.tag = kwargs.get('tag', None) # type: Optional[ct.SelectorTag]
self.ids = kwargs.get('ids', []) # type: list[str]
self.classes = kwargs.get('classes', []) # type: list[str]
self.attributes = kwargs.get('attributes', []) # type: list[ct.SelectorAttribute]
self.nth = kwargs.get('nth', []) # type: list[ct.SelectorNth]
self.selectors = kwargs.get('selectors', []) # type: list[ct.SelectorList]
self.relations = kwargs.get('relations', []) # type: list[_Selector]
self.rel_type = kwargs.get('rel_type', None) # type: Optional[str]
self.contains = kwargs.get('contains', []) # type: list[ct.SelectorContains]
self.lang = kwargs.get('lang', []) # type: list[ct.SelectorLang]
self.flags = kwargs.get('flags', 0) # type: int
self.no_match = kwargs.get('no_match', False) # type: bool
def _freeze_relations(self, relations):
def _freeze_relations(self, relations: list[_Selector]) -> ct.SelectorList:
"""Freeze relation."""
if relations:
@ -380,7 +395,7 @@ class _Selector(object):
else:
return ct.SelectorList()
def freeze(self):
def freeze(self) -> ct.Selector | ct.SelectorNull:
"""Freeze self."""
if self.no_match:
@ -400,7 +415,7 @@ class _Selector(object):
self.flags
)
def __str__(self): # pragma: no cover
def __str__(self) -> str: # pragma: no cover
"""String representation."""
return (
@ -414,14 +429,19 @@ class _Selector(object):
__repr__ = __str__
class CSSParser(object):
class CSSParser:
"""Parse CSS selectors."""
css_tokens = (
SelectorPattern("pseudo_close", PAT_PSEUDO_CLOSE),
SpecialPseudoPattern(
(
("pseudo_contains", (':contains',), PAT_PSEUDO_CONTAINS, SelectorPattern),
(
"pseudo_contains",
(':contains', ':-soup-contains', ':-soup-contains-own'),
PAT_PSEUDO_CONTAINS,
SelectorPattern
),
("pseudo_nth_child", (':nth-child', ':nth-last-child'), PAT_PSEUDO_NTH_CHILD, SelectorPattern),
("pseudo_nth_type", (':nth-of-type', ':nth-last-of-type'), PAT_PSEUDO_NTH_TYPE, SelectorPattern),
("pseudo_lang", (':lang',), PAT_PSEUDO_LANG, SelectorPattern),
@ -439,7 +459,12 @@ class CSSParser(object):
SelectorPattern("combine", PAT_COMBINE)
)
def __init__(self, selector, custom=None, flags=0):
def __init__(
self,
selector: str,
custom: Optional[dict[str, str | ct.SelectorList]] = None,
flags: int = 0
) -> None:
"""Initialize."""
self.pattern = selector.replace('\x00', '\ufffd')
@ -447,7 +472,7 @@ class CSSParser(object):
self.debug = self.flags & util.DEBUG
self.custom = {} if custom is None else custom
def parse_attribute_selector(self, sel, m, has_selector):
def parse_attribute_selector(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool:
"""Create attribute selector from the returned regex match."""
inverse = False
@ -457,22 +482,22 @@ class CSSParser(object):
attr = css_unescape(m.group('attr_name'))
is_type = False
pattern2 = None
value = ''
if case:
flags = re.I if case == 'i' else 0
flags = (re.I if case == 'i' else 0) | re.DOTALL
elif util.lower(attr) == 'type':
flags = re.I
flags = re.I | re.DOTALL
is_type = True
else:
flags = 0
flags = re.DOTALL
if op:
if m.group('value').startswith(('"', "'")):
value = css_unescape(m.group('value')[1:-1], True)
else:
value = css_unescape(m.group('value'))
else:
value = None
if not op:
# Attribute name
pattern = None
@ -517,7 +542,7 @@ class CSSParser(object):
has_selector = True
return has_selector
def parse_tag_pattern(self, sel, m, has_selector):
def parse_tag_pattern(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool:
"""Parse tag pattern from regex match."""
prefix = css_unescape(m.group('tag_ns')[:-1]) if m.group('tag_ns') else None
@ -526,7 +551,7 @@ class CSSParser(object):
has_selector = True
return has_selector
def parse_pseudo_class_custom(self, sel, m, has_selector):
def parse_pseudo_class_custom(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool:
"""
Parse custom pseudo class alias.
@ -538,13 +563,13 @@ class CSSParser(object):
selector = self.custom.get(pseudo)
if selector is None:
raise SelectorSyntaxError(
"Undefined custom selector '{}' found at postion {}".format(pseudo, m.end(0)),
"Undefined custom selector '{}' found at position {}".format(pseudo, m.end(0)),
self.pattern,
m.end(0)
)
if not isinstance(selector, ct.SelectorList):
self.custom[pseudo] = None
del self.custom[pseudo]
selector = CSSParser(
selector, custom=self.custom, flags=self.flags
).process_selectors(flags=FLG_PSEUDO)
@ -554,7 +579,14 @@ class CSSParser(object):
has_selector = True
return has_selector
def parse_pseudo_class(self, sel, m, has_selector, iselector, is_html):
def parse_pseudo_class(
self,
sel: _Selector,
m: Match[str],
has_selector: bool,
iselector: Iterator[tuple[str, Match[str]]],
is_html: bool
) -> tuple[bool, bool]:
"""Parse pseudo class."""
complex_pseudo = False
@ -642,7 +674,13 @@ class CSSParser(object):
return has_selector, is_html
def parse_pseudo_nth(self, sel, m, has_selector, iselector):
def parse_pseudo_nth(
self,
sel: _Selector,
m: Match[str],
has_selector: bool,
iselector: Iterator[tuple[str, Match[str]]]
) -> bool:
"""Parse `nth` pseudo."""
mdict = m.groupdict()
@ -663,29 +701,29 @@ class CSSParser(object):
s2 = 1
var = True
else:
nth_parts = RE_NTH.match(content)
s1 = '-' if nth_parts.group('s1') and nth_parts.group('s1') == '-' else ''
nth_parts = cast(Match[str], RE_NTH.match(content))
_s1 = '-' if nth_parts.group('s1') and nth_parts.group('s1') == '-' else ''
a = nth_parts.group('a')
var = a.endswith('n')
if a.startswith('n'):
s1 += '1'
_s1 += '1'
elif var:
s1 += a[:-1]
_s1 += a[:-1]
else:
s1 += a
s2 = '-' if nth_parts.group('s2') and nth_parts.group('s2') == '-' else ''
_s1 += a
_s2 = '-' if nth_parts.group('s2') and nth_parts.group('s2') == '-' else ''
if nth_parts.group('b'):
s2 += nth_parts.group('b')
_s2 += nth_parts.group('b')
else:
s2 = '0'
s1 = int(s1, 10)
s2 = int(s2, 10)
_s2 = '0'
s1 = int(_s1, 10)
s2 = int(_s2, 10)
pseudo_sel = mdict['name']
if postfix == '_child':
if m.group('of'):
# Parse the rest of `of S`.
nth_sel = self.parse_selectors(iselector, m.end(0), FLG_PSEUDO | FLG_OPEN)
nth_sel = self.parse_selectors(iselector, m.end(0), FLG_PSEUDO | FLG_OPEN | FLG_FORGIVE)
else:
# Use default `*|*` for `of S`.
nth_sel = CSS_NTH_OF_S_DEFAULT
@ -701,20 +739,38 @@ class CSSParser(object):
has_selector = True
return has_selector
def parse_pseudo_open(self, sel, name, has_selector, iselector, index):
def parse_pseudo_open(
self,
sel: _Selector,
name: str,
has_selector: bool,
iselector: Iterator[tuple[str, Match[str]]],
index: int
) -> bool:
"""Parse pseudo with opening bracket."""
flags = FLG_PSEUDO | FLG_OPEN
if name == ':not':
flags |= FLG_NOT
if name == ':has':
flags |= FLG_RELATIVE
elif name == ':has':
flags |= FLG_RELATIVE | FLG_FORGIVE
elif name in (':where', ':is'):
flags |= FLG_FORGIVE
sel.selectors.append(self.parse_selectors(iselector, index, flags))
has_selector = True
return has_selector
def parse_has_combinator(self, sel, m, has_selector, selectors, rel_type, index):
def parse_has_combinator(
self,
sel: _Selector,
m: Match[str],
has_selector: bool,
selectors: list[_Selector],
rel_type: str,
index: int
) -> tuple[bool, _Selector, str]:
"""Parse combinator tokens."""
combinator = m.group('relation').strip()
@ -723,12 +779,9 @@ class CSSParser(object):
if combinator == COMMA_COMBINATOR:
if not has_selector:
# If we've not captured any selector parts, the comma is either at the beginning of the pattern
# or following another comma, both of which are unexpected. Commas must split selectors.
raise SelectorSyntaxError(
"The combinator '{}' at postion {}, must have a selector before it".format(combinator, index),
self.pattern,
index
)
# or following another comma, both of which are unexpected. But shouldn't fail the pseudo-class.
sel.no_match = True
sel.rel_type = rel_type
selectors[-1].relations.append(sel)
rel_type = ":" + WS_COMBINATOR
@ -749,44 +802,63 @@ class CSSParser(object):
self.pattern,
index
)
# Set the leading combinator for the next selector.
rel_type = ':' + combinator
sel = _Selector()
sel = _Selector()
has_selector = False
return has_selector, sel, rel_type
def parse_combinator(self, sel, m, has_selector, selectors, relations, is_pseudo, index):
def parse_combinator(
self,
sel: _Selector,
m: Match[str],
has_selector: bool,
selectors: list[_Selector],
relations: list[_Selector],
is_pseudo: bool,
is_forgive: bool,
index: int
) -> tuple[bool, _Selector]:
"""Parse combinator tokens."""
combinator = m.group('relation').strip()
if not combinator:
combinator = WS_COMBINATOR
if not has_selector:
raise SelectorSyntaxError(
"The combinator '{}' at postion {}, must have a selector before it".format(combinator, index),
self.pattern,
index
)
if not is_forgive or combinator != COMMA_COMBINATOR:
raise SelectorSyntaxError(
"The combinator '{}' at position {}, must have a selector before it".format(combinator, index),
self.pattern,
index
)
if combinator == COMMA_COMBINATOR:
if not sel.tag and not is_pseudo:
# Implied `*`
sel.tag = ct.SelectorTag('*', None)
sel.relations.extend(relations)
selectors.append(sel)
del relations[:]
# If we are in a forgiving pseudo class, just make the selector a "no match"
if combinator == COMMA_COMBINATOR:
sel.no_match = True
del relations[:]
selectors.append(sel)
else:
sel.relations.extend(relations)
sel.rel_type = combinator
del relations[:]
relations.append(sel)
sel = _Selector()
if combinator == COMMA_COMBINATOR:
if not sel.tag and not is_pseudo:
# Implied `*`
sel.tag = ct.SelectorTag('*', None)
sel.relations.extend(relations)
selectors.append(sel)
del relations[:]
else:
sel.relations.extend(relations)
sel.rel_type = combinator
del relations[:]
relations.append(sel)
sel = _Selector()
has_selector = False
return has_selector, sel
def parse_class_id(self, sel, m, has_selector):
def parse_class_id(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool:
"""Parse HTML classes and ids."""
selector = m.group(0)
@ -797,10 +869,17 @@ class CSSParser(object):
has_selector = True
return has_selector
def parse_pseudo_contains(self, sel, m, has_selector):
def parse_pseudo_contains(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool:
"""Parse contains."""
values = m.group('values')
pseudo = util.lower(css_unescape(m.group('name')))
if pseudo == ":contains":
warnings.warn(
"The pseudo class ':contains' is deprecated, ':-soup-contains' should be used moving forward.",
FutureWarning
)
contains_own = pseudo == ":-soup-contains-own"
values = css_unescape(m.group('values'))
patterns = []
for token in RE_VALUES.finditer(values):
if token.group('split'):
@ -811,11 +890,11 @@ class CSSParser(object):
else:
value = css_unescape(value)
patterns.append(value)
sel.contains.append(ct.SelectorContains(tuple(patterns)))
sel.contains.append(ct.SelectorContains(patterns, contains_own))
has_selector = True
return has_selector
def parse_pseudo_lang(self, sel, m, has_selector):
def parse_pseudo_lang(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool:
"""Parse pseudo language."""
values = m.group('values')
@ -836,7 +915,7 @@ class CSSParser(object):
return has_selector
def parse_pseudo_dir(self, sel, m, has_selector):
def parse_pseudo_dir(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool:
"""Parse pseudo direction."""
value = ct.SEL_DIR_LTR if util.lower(m.group('dir')) == 'ltr' else ct.SEL_DIR_RTL
@ -844,15 +923,23 @@ class CSSParser(object):
has_selector = True
return has_selector
def parse_selectors(self, iselector, index=0, flags=0):
def parse_selectors(
self,
iselector: Iterator[tuple[str, Match[str]]],
index: int = 0,
flags: int = 0
) -> ct.SelectorList:
"""Parse selectors."""
# Initialize important variables
sel = _Selector()
selectors = []
has_selector = False
closed = False
relations = []
relations = [] # type: list[_Selector]
rel_type = ":" + WS_COMBINATOR
# Setup various flags
is_open = bool(flags & FLG_OPEN)
is_pseudo = bool(flags & FLG_PSEUDO)
is_relative = bool(flags & FLG_RELATIVE)
@ -863,7 +950,9 @@ class CSSParser(object):
is_in_range = bool(flags & FLG_IN_RANGE)
is_out_of_range = bool(flags & FLG_OUT_OF_RANGE)
is_placeholder_shown = bool(flags & FLG_PLACEHOLDER_SHOWN)
is_forgive = bool(flags & FLG_FORGIVE)
# Print out useful debug stuff
if self.debug: # pragma: no cover
if is_pseudo:
print(' is_pseudo: True')
@ -885,7 +974,10 @@ class CSSParser(object):
print(' is_out_of_range: True')
if is_placeholder_shown:
print(' is_placeholder_shown: True')
if is_forgive:
print(' is_forgive: True')
# The algorithm for relative selectors require an initial selector in the selector list
if is_relative:
selectors.append(_Selector())
@ -914,17 +1006,19 @@ class CSSParser(object):
is_html = True
elif key == 'pseudo_close':
if not has_selector:
raise SelectorSyntaxError(
"Expected a selector at postion {}".format(m.start(0)),
self.pattern,
m.start(0)
)
if not is_forgive:
raise SelectorSyntaxError(
"Expected a selector at position {}".format(m.start(0)),
self.pattern,
m.start(0)
)
sel.no_match = True
if is_open:
closed = True
break
else:
raise SelectorSyntaxError(
"Unmatched pseudo-class close at postion {}".format(m.start(0)),
"Unmatched pseudo-class close at position {}".format(m.start(0)),
self.pattern,
m.start(0)
)
@ -935,7 +1029,7 @@ class CSSParser(object):
)
else:
has_selector, sel = self.parse_combinator(
sel, m, has_selector, selectors, relations, is_pseudo, index
sel, m, has_selector, selectors, relations, is_pseudo, is_forgive, index
)
elif key == 'attribute':
has_selector = self.parse_attribute_selector(sel, m, has_selector)
@ -954,6 +1048,7 @@ class CSSParser(object):
except StopIteration:
pass
# Handle selectors that are not closed
if is_open and not closed:
raise SelectorSyntaxError(
"Unclosed pseudo-class at position {}".format(index),
@ -961,6 +1056,7 @@ class CSSParser(object):
index
)
# Cleanup completed selector piece
if has_selector:
if not sel.tag and not is_pseudo:
# Implied `*`
@ -972,8 +1068,28 @@ class CSSParser(object):
sel.relations.extend(relations)
del relations[:]
selectors.append(sel)
else:
# Forgive empty slots in pseudo-classes that have lists (and are forgiving)
elif is_forgive:
if is_relative:
# Handle relative selectors pseudo-classes with empty slots like `:has()`
if selectors and selectors[-1].rel_type is None and rel_type == ': ':
sel.rel_type = rel_type
sel.no_match = True
selectors[-1].relations.append(sel)
has_selector = True
else:
# Handle normal pseudo-classes with empty slots
if not selectors or not relations:
# Others like `:is()` etc.
sel.no_match = True
del relations[:]
selectors.append(sel)
has_selector = True
if not has_selector:
# We will always need to finish a selector when `:has()` is used as it leads with combining.
# May apply to others as well.
raise SelectorSyntaxError(
'Expected a selector at position {}'.format(index),
self.pattern,
@ -994,9 +1110,10 @@ class CSSParser(object):
if is_placeholder_shown:
selectors[-1].flags = ct.SEL_PLACEHOLDER_SHOWN
# Return selector list
return ct.SelectorList([s.freeze() for s in selectors], is_not, is_html)
def selector_iter(self, pattern):
def selector_iter(self, pattern: str) -> Iterator[tuple[str, Match[str]]]:
"""Iterate selector tokens."""
# Ignore whitespace and comments at start and end of pattern
@ -1037,7 +1154,7 @@ class CSSParser(object):
if self.debug: # pragma: no cover
print('## END PARSING')
def process_selectors(self, index=0, flags=0):
def process_selectors(self, index: int = 0, flags: int = 0) -> ct.SelectorList:
"""Process selectors."""
return self.parse_selectors(self.selector_iter(self.pattern), index, flags)
@ -1048,7 +1165,7 @@ class CSSParser(object):
# CSS pattern for `:link` and `:any-link`
CSS_LINK = CSSParser(
'html|*:is(a, area, link)[href]'
'html|*:is(a, area)[href]'
).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
# CSS pattern for `:checked`
CSS_CHECKED = CSSParser(
@ -1079,23 +1196,23 @@ CSS_INDETERMINATE = CSSParser(
This pattern must be at the end.
Special logic is applied to the last selector.
*/
html|input[type="radio"][name][name!='']:not([checked])
html|input[type="radio"][name]:not([name='']):not([checked])
'''
).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_INDETERMINATE)
# CSS pattern for `:disabled`
CSS_DISABLED = CSSParser(
'''
html|*:is(input[type!=hidden], button, select, textarea, fieldset, optgroup, option, fieldset)[disabled],
html|*:is(input:not([type=hidden]), button, select, textarea, fieldset, optgroup, option, fieldset)[disabled],
html|optgroup[disabled] > html|option,
html|fieldset[disabled] > html|*:is(input[type!=hidden], button, select, textarea, fieldset),
html|fieldset[disabled] > html|*:is(input:not([type=hidden]), button, select, textarea, fieldset),
html|fieldset[disabled] >
html|*:not(legend:nth-of-type(1)) html|*:is(input[type!=hidden], button, select, textarea, fieldset)
html|*:not(legend:nth-of-type(1)) html|*:is(input:not([type=hidden]), button, select, textarea, fieldset)
'''
).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
# CSS pattern for `:enabled`
CSS_ENABLED = CSSParser(
'''
html|*:is(input[type!=hidden], button, select, textarea, fieldset, optgroup, option, fieldset):not(:disabled)
html|*:is(input:not([type=hidden]), button, select, textarea, fieldset, optgroup, option, fieldset):not(:disabled)
'''
).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
# CSS pattern for `:required`
@ -1119,8 +1236,8 @@ CSS_PLACEHOLDER_SHOWN = CSSParser(
[type=email],
[type=password],
[type=number]
)[placeholder][placeholder!='']:is(:not([value]), [value=""]),
html|textarea[placeholder][placeholder!='']
)[placeholder]:not([placeholder='']):is(:not([value]), [value=""]),
html|textarea[placeholder]:not([placeholder=''])
'''
).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_PLACEHOLDER_SHOWN)
# CSS pattern default for `:nth-child` "of S" feature

View file

@ -1,6 +1,8 @@
"""CSS selector structure items."""
from __future__ import annotations
import copyreg
from collections.abc import Hashable, Mapping
from .pretty import pretty
from typing import Any, Iterator, Hashable, Optional, Pattern, Iterable, Mapping
__all__ = (
'Selector',
@ -29,12 +31,14 @@ SEL_DEFINED = 0x200
SEL_PLACEHOLDER_SHOWN = 0x400
class Immutable(object):
class Immutable:
"""Immutable."""
__slots__ = ('_hash',)
__slots__: tuple[str, ...] = ('_hash',)
def __init__(self, **kwargs):
_hash: int
def __init__(self, **kwargs: Any) -> None:
"""Initialize."""
temp = []
@ -45,12 +49,12 @@ class Immutable(object):
super(Immutable, self).__setattr__('_hash', hash(tuple(temp)))
@classmethod
def __base__(cls):
def __base__(cls) -> "type[Immutable]":
"""Get base class."""
return cls
def __eq__(self, other):
def __eq__(self, other: Any) -> bool:
"""Equal."""
return (
@ -58,7 +62,7 @@ class Immutable(object):
all([getattr(other, key) == getattr(self, key) for key in self.__slots__ if key != '_hash'])
)
def __ne__(self, other):
def __ne__(self, other: Any) -> bool:
"""Equal."""
return (
@ -66,63 +70,74 @@ class Immutable(object):
any([getattr(other, key) != getattr(self, key) for key in self.__slots__ if key != '_hash'])
)
def __hash__(self):
def __hash__(self) -> int:
"""Hash."""
return self._hash
def __setattr__(self, name, value):
def __setattr__(self, name: str, value: Any) -> None:
"""Prevent mutability."""
raise AttributeError("'{}' is immutable".format(self.__class__.__name__))
def __repr__(self): # pragma: no cover
def __repr__(self) -> str: # pragma: no cover
"""Representation."""
return "{}({})".format(
self.__base__(), ', '.join(["{}={!r}".format(k, getattr(self, k)) for k in self.__slots__[:-1]])
self.__class__.__name__, ', '.join(["{}={!r}".format(k, getattr(self, k)) for k in self.__slots__[:-1]])
)
__str__ = __repr__
def pretty(self) -> None: # pragma: no cover
"""Pretty print."""
class ImmutableDict(Mapping):
print(pretty(self))
class ImmutableDict(Mapping[Any, Any]):
"""Hashable, immutable dictionary."""
def __init__(self, *args, **kwargs):
def __init__(
self,
arg: dict[Any, Any] | Iterable[tuple[Any, Any]]
) -> None:
"""Initialize."""
arg = args[0] if args else kwargs
is_dict = isinstance(arg, dict)
if (
is_dict and not all([isinstance(v, Hashable) for v in arg.values()]) or
not is_dict and not all([isinstance(k, Hashable) and isinstance(v, Hashable) for k, v in arg])
):
raise TypeError('All values must be hashable')
self._d = dict(*args, **kwargs)
self._validate(arg)
self._d = dict(arg)
self._hash = hash(tuple([(type(x), x, type(y), y) for x, y in sorted(self._d.items())]))
def __iter__(self):
def _validate(self, arg: dict[Any, Any] | Iterable[tuple[Any, Any]]) -> None:
"""Validate arguments."""
if isinstance(arg, dict):
if not all([isinstance(v, Hashable) for v in arg.values()]):
raise TypeError('{} values must be hashable'.format(self.__class__.__name__))
elif not all([isinstance(k, Hashable) and isinstance(v, Hashable) for k, v in arg]):
raise TypeError('{} values must be hashable'.format(self.__class__.__name__))
def __iter__(self) -> Iterator[Any]:
"""Iterator."""
return iter(self._d)
def __len__(self):
def __len__(self) -> int:
"""Length."""
return len(self._d)
def __getitem__(self, key):
def __getitem__(self, key: Any) -> Any:
"""Get item: `namespace['key']`."""
return self._d[key]
def __hash__(self):
def __hash__(self) -> int:
"""Hash."""
return self._hash
def __repr__(self): # pragma: no cover
def __repr__(self) -> str: # pragma: no cover
"""Representation."""
return "{!r}".format(self._d)
@ -133,39 +148,37 @@ class ImmutableDict(Mapping):
class Namespaces(ImmutableDict):
"""Namespaces."""
def __init__(self, *args, **kwargs):
def __init__(self, arg: dict[str, str] | Iterable[tuple[str, str]]) -> None:
"""Initialize."""
# If there are arguments, check the first index.
# `super` should fail if the user gave multiple arguments,
# so don't bother checking that.
arg = args[0] if args else kwargs
is_dict = isinstance(arg, dict)
if is_dict and not all([isinstance(k, str) and isinstance(v, str) for k, v in arg.items()]):
raise TypeError('Namespace keys and values must be Unicode strings')
elif not is_dict and not all([isinstance(k, str) and isinstance(v, str) for k, v in arg]):
raise TypeError('Namespace keys and values must be Unicode strings')
super().__init__(arg)
super(Namespaces, self).__init__(*args, **kwargs)
def _validate(self, arg: dict[str, str] | Iterable[tuple[str, str]]) -> None:
"""Validate arguments."""
if isinstance(arg, dict):
if not all([isinstance(v, str) for v in arg.values()]):
raise TypeError('{} values must be hashable'.format(self.__class__.__name__))
elif not all([isinstance(k, str) and isinstance(v, str) for k, v in arg]):
raise TypeError('{} keys and values must be Unicode strings'.format(self.__class__.__name__))
class CustomSelectors(ImmutableDict):
"""Custom selectors."""
def __init__(self, *args, **kwargs):
def __init__(self, arg: dict[str, str] | Iterable[tuple[str, str]]) -> None:
"""Initialize."""
# If there are arguments, check the first index.
# `super` should fail if the user gave multiple arguments,
# so don't bother checking that.
arg = args[0] if args else kwargs
is_dict = isinstance(arg, dict)
if is_dict and not all([isinstance(k, str) and isinstance(v, str) for k, v in arg.items()]):
raise TypeError('CustomSelectors keys and values must be Unicode strings')
elif not is_dict and not all([isinstance(k, str) and isinstance(v, str) for k, v in arg]):
raise TypeError('CustomSelectors keys and values must be Unicode strings')
super().__init__(arg)
super(CustomSelectors, self).__init__(*args, **kwargs)
def _validate(self, arg: dict[str, str] | Iterable[tuple[str, str]]) -> None:
"""Validate arguments."""
if isinstance(arg, dict):
if not all([isinstance(v, str) for v in arg.values()]):
raise TypeError('{} values must be hashable'.format(self.__class__.__name__))
elif not all([isinstance(k, str) and isinstance(v, str) for k, v in arg]):
raise TypeError('{} keys and values must be Unicode strings'.format(self.__class__.__name__))
class Selector(Immutable):
@ -176,13 +189,35 @@ class Selector(Immutable):
'relation', 'rel_type', 'contains', 'lang', 'flags', '_hash'
)
tag: Optional[SelectorTag]
ids: tuple[str, ...]
classes: tuple[str, ...]
attributes: tuple[SelectorAttribute, ...]
nth: tuple[SelectorNth, ...]
selectors: tuple[SelectorList, ...]
relation: SelectorList
rel_type: Optional[str]
contains: tuple[SelectorContains, ...]
lang: tuple[SelectorLang, ...]
flags: int
def __init__(
self, tag, ids, classes, attributes, nth, selectors,
relation, rel_type, contains, lang, flags
self,
tag: Optional[SelectorTag],
ids: tuple[str, ...],
classes: tuple[str, ...],
attributes: tuple[SelectorAttribute, ...],
nth: tuple[SelectorNth, ...],
selectors: tuple[SelectorList, ...],
relation: SelectorList,
rel_type: Optional[str],
contains: tuple[SelectorContains, ...],
lang: tuple[SelectorLang, ...],
flags: int
):
"""Initialize."""
super(Selector, self).__init__(
super().__init__(
tag=tag,
ids=ids,
classes=classes,
@ -200,10 +235,10 @@ class Selector(Immutable):
class SelectorNull(Immutable):
"""Null Selector."""
def __init__(self):
def __init__(self) -> None:
"""Initialize."""
super(SelectorNull, self).__init__()
super().__init__()
class SelectorTag(Immutable):
@ -211,13 +246,13 @@ class SelectorTag(Immutable):
__slots__ = ("name", "prefix", "_hash")
def __init__(self, name, prefix):
name: str
prefix: Optional[str]
def __init__(self, name: str, prefix: Optional[str]) -> None:
"""Initialize."""
super(SelectorTag, self).__init__(
name=name,
prefix=prefix
)
super().__init__(name=name, prefix=prefix)
class SelectorAttribute(Immutable):
@ -225,10 +260,21 @@ class SelectorAttribute(Immutable):
__slots__ = ("attribute", "prefix", "pattern", "xml_type_pattern", "_hash")
def __init__(self, attribute, prefix, pattern, xml_type_pattern):
attribute: str
prefix: str
pattern: Optional[Pattern[str]]
xml_type_pattern: Optional[Pattern[str]]
def __init__(
self,
attribute: str,
prefix: str,
pattern: Optional[Pattern[str]],
xml_type_pattern: Optional[Pattern[str]]
) -> None:
"""Initialize."""
super(SelectorAttribute, self).__init__(
super().__init__(
attribute=attribute,
prefix=prefix,
pattern=pattern,
@ -239,14 +285,15 @@ class SelectorAttribute(Immutable):
class SelectorContains(Immutable):
"""Selector contains rule."""
__slots__ = ("text", "_hash")
__slots__ = ("text", "own", "_hash")
def __init__(self, text):
text: tuple[str, ...]
own: bool
def __init__(self, text: Iterable[str], own: bool) -> None:
"""Initialize."""
super(SelectorContains, self).__init__(
text=text
)
super().__init__(text=tuple(text), own=own)
class SelectorNth(Immutable):
@ -254,10 +301,17 @@ class SelectorNth(Immutable):
__slots__ = ("a", "n", "b", "of_type", "last", "selectors", "_hash")
def __init__(self, a, n, b, of_type, last, selectors):
a: int
n: bool
b: int
of_type: bool
last: bool
selectors: SelectorList
def __init__(self, a: int, n: bool, b: int, of_type: bool, last: bool, selectors: SelectorList) -> None:
"""Initialize."""
super(SelectorNth, self).__init__(
super().__init__(
a=a,
n=n,
b=b,
@ -272,24 +326,24 @@ class SelectorLang(Immutable):
__slots__ = ("languages", "_hash",)
def __init__(self, languages):
languages: tuple[str, ...]
def __init__(self, languages: Iterable[str]):
"""Initialize."""
super(SelectorLang, self).__init__(
languages=tuple(languages)
)
super().__init__(languages=tuple(languages))
def __iter__(self):
def __iter__(self) -> Iterator[str]:
"""Iterator."""
return iter(self.languages)
def __len__(self): # pragma: no cover
def __len__(self) -> int: # pragma: no cover
"""Length."""
return len(self.languages)
def __getitem__(self, index): # pragma: no cover
def __getitem__(self, index: int) -> str: # pragma: no cover
"""Get item."""
return self.languages[index]
@ -300,36 +354,45 @@ class SelectorList(Immutable):
__slots__ = ("selectors", "is_not", "is_html", "_hash")
def __init__(self, selectors=tuple(), is_not=False, is_html=False):
selectors: tuple[Selector | SelectorNull, ...]
is_not: bool
is_html: bool
def __init__(
self,
selectors: Optional[Iterable[Selector | SelectorNull]] = None,
is_not: bool = False,
is_html: bool = False
) -> None:
"""Initialize."""
super(SelectorList, self).__init__(
selectors=tuple(selectors),
super().__init__(
selectors=tuple(selectors) if selectors is not None else tuple(),
is_not=is_not,
is_html=is_html
)
def __iter__(self):
def __iter__(self) -> Iterator[Selector | SelectorNull]:
"""Iterator."""
return iter(self.selectors)
def __len__(self):
def __len__(self) -> int:
"""Length."""
return len(self.selectors)
def __getitem__(self, index):
def __getitem__(self, index: int) -> Selector | SelectorNull:
"""Get item."""
return self.selectors[index]
def _pickle(p):
def _pickle(p: Any) -> Any:
return p.__base__(), tuple([getattr(p, s) for s in p.__slots__[:-1]])
def pickle_register(obj):
def pickle_register(obj: Any) -> None:
"""Allow object to be pickled."""
copyreg.pickle(obj, _pickle)

138
lib/soupsieve/pretty.py Normal file
View file

@ -0,0 +1,138 @@
"""
Format a pretty string of a `SoupSieve` object for easy debugging.
This won't necessarily support all types and such, and definitely
not support custom outputs.
It is mainly geared towards our types as the `SelectorList`
object is a beast to look at without some indentation and newlines.
The format and various output types is fairly known (though it
hasn't been tested extensively to make sure we aren't missing corners).
Example:
```
>>> import soupsieve as sv
>>> sv.compile('this > that.class[name=value]').selectors.pretty()
SelectorList(
selectors=(
Selector(
tag=SelectorTag(
name='that',
prefix=None),
ids=(),
classes=(
'class',
),
attributes=(
SelectorAttribute(
attribute='name',
prefix='',
pattern=re.compile(
'^value$'),
xml_type_pattern=None),
),
nth=(),
selectors=(),
relation=SelectorList(
selectors=(
Selector(
tag=SelectorTag(
name='this',
prefix=None),
ids=(),
classes=(),
attributes=(),
nth=(),
selectors=(),
relation=SelectorList(
selectors=(),
is_not=False,
is_html=False),
rel_type='>',
contains=(),
lang=(),
flags=0),
),
is_not=False,
is_html=False),
rel_type=None,
contains=(),
lang=(),
flags=0),
),
is_not=False,
is_html=False)
```
"""
from __future__ import annotations
import re
from typing import Any
RE_CLASS = re.compile(r'(?i)[a-z_][_a-z\d\.]+\(')
RE_PARAM = re.compile(r'(?i)[_a-z][_a-z\d]+=')
RE_EMPTY = re.compile(r'\(\)|\[\]|\{\}')
RE_LSTRT = re.compile(r'\[')
RE_DSTRT = re.compile(r'\{')
RE_TSTRT = re.compile(r'\(')
RE_LEND = re.compile(r'\]')
RE_DEND = re.compile(r'\}')
RE_TEND = re.compile(r'\)')
RE_INT = re.compile(r'\d+')
RE_KWORD = re.compile(r'(?i)[_a-z][_a-z\d]+')
RE_DQSTR = re.compile(r'"(?:\\.|[^"\\])*"')
RE_SQSTR = re.compile(r"'(?:\\.|[^'\\])*'")
RE_SEP = re.compile(r'\s*(,)\s*')
RE_DSEP = re.compile(r'\s*(:)\s*')
TOKENS = {
'class': RE_CLASS,
'param': RE_PARAM,
'empty': RE_EMPTY,
'lstrt': RE_LSTRT,
'dstrt': RE_DSTRT,
'tstrt': RE_TSTRT,
'lend': RE_LEND,
'dend': RE_DEND,
'tend': RE_TEND,
'sqstr': RE_SQSTR,
'sep': RE_SEP,
'dsep': RE_DSEP,
'int': RE_INT,
'kword': RE_KWORD,
'dqstr': RE_DQSTR
}
def pretty(obj: Any) -> str: # pragma: no cover
"""Make the object output string pretty."""
sel = str(obj)
index = 0
end = len(sel) - 1
indent = 0
output = []
while index <= end:
m = None
for k, v in TOKENS.items():
m = v.match(sel, index)
if m:
name = k
index = m.end(0)
if name in ('class', 'lstrt', 'dstrt', 'tstrt'):
indent += 4
output.append('{}\n{}'.format(m.group(0), " " * indent))
elif name in ('param', 'int', 'kword', 'sqstr', 'dqstr', 'empty'):
output.append(m.group(0))
elif name in ('lend', 'dend', 'tend'):
indent -= 4
output.append(m.group(0))
elif name in ('sep',):
output.append('{}\n{}'.format(m.group(1), " " * indent))
elif name in ('dsep',):
output.append('{} '.format(m.group(1)))
break
return ''.join(output)

View file

@ -1,7 +1,9 @@
"""Utility."""
from __future__ import annotations
from functools import wraps, lru_cache
import warnings
import re
from typing import Callable, Any, Optional
DEBUG = 0x00001
@ -12,7 +14,7 @@ UC_Z = ord('Z')
@lru_cache(maxsize=512)
def lower(string):
def lower(string: str) -> str:
"""Lower."""
new_string = []
@ -25,7 +27,7 @@ def lower(string):
class SelectorSyntaxError(Exception):
"""Syntax error in a CSS selector."""
def __init__(self, msg, pattern=None, index=None):
def __init__(self, msg: str, pattern: Optional[str] = None, index: Optional[int] = None) -> None:
"""Initialize."""
self.line = None
@ -37,30 +39,34 @@ class SelectorSyntaxError(Exception):
self.context, self.line, self.col = get_pattern_context(pattern, index)
msg = '{}\n line {}:\n{}'.format(msg, self.line, self.context)
super(SelectorSyntaxError, self).__init__(msg)
super().__init__(msg)
def deprecated(message, stacklevel=2): # pragma: no cover
def deprecated(message: str, stacklevel: int = 2) -> Callable[..., Any]: # pragma: no cover
"""
Raise a `DeprecationWarning` when wrapped function/method is called.
Borrowed from https://stackoverflow.com/a/48632082/866026
Usage:
@deprecated("This method will be removed in version X; use Y instead.")
def some_method()"
pass
"""
def _decorator(func):
def _wrapper(func: Callable[..., Any]) -> Callable[..., Any]:
@wraps(func)
def _func(*args, **kwargs):
def _deprecated_func(*args: Any, **kwargs: Any) -> Any:
warnings.warn(
"'{}' is deprecated. {}".format(func.__name__, message),
f"'{func.__name__}' is deprecated. {message}",
category=DeprecationWarning,
stacklevel=stacklevel
)
return func(*args, **kwargs)
return _func
return _decorator
return _deprecated_func
return _wrapper
def warn_deprecated(message, stacklevel=2): # pragma: no cover
def warn_deprecated(message: str, stacklevel: int = 2) -> None: # pragma: no cover
"""Warn deprecated."""
warnings.warn(
@ -70,14 +76,15 @@ def warn_deprecated(message, stacklevel=2): # pragma: no cover
)
def get_pattern_context(pattern, index):
def get_pattern_context(pattern: str, index: int) -> tuple[str, int, int]:
"""Get the pattern context."""
last = 0
current_line = 1
col = 1
text = []
text = [] # type: list[str]
line = 1
offset = None # type: Optional[int]
# Split pattern by newline and handle the text before the newline
for m in RE_PATTERN_LINE_SPLIT.finditer(pattern):