mirror of
https://github.com/SickGear/SickGear.git
synced 2024-12-18 08:43:37 +00:00
Update soupsieve 2.0.2.dev (05086ef) → 2.3.2.post1 (792d566).
This commit is contained in:
parent
da008063f3
commit
5ea3ea8556
8 changed files with 894 additions and 399 deletions
|
@ -19,6 +19,7 @@
|
|||
* Update UnRar x64 for Windows 6.11 to 6.20
|
||||
* Update Send2Trash 1.5.0 (66afce7) to 1.8.1b0 (0ef9b32)
|
||||
* Update SimpleJSON 3.16.1 (ce75e60) to 3.18.1 (c891b95)
|
||||
* Update soupsieve 2.0.2.dev (05086ef) to 2.3.2.post1 (792d566)
|
||||
* Update tmdbsimple 2.6.6 (679e343) to 2.9.1 (9da400a)
|
||||
* Update torrent_parser 0.3.0 (2a4eecb) to 0.4.0 (23b9e11)
|
||||
* Update unidecode module 1.1.1 (632af82) to 1.3.6 (4141992)
|
||||
|
|
|
@ -25,11 +25,14 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
from .__meta__ import __version__, __version_info__ # noqa: F401
|
||||
from . import css_parser as cp
|
||||
from . import css_match as cm
|
||||
from . import css_types as ct
|
||||
from .util import DEBUG, SelectorSyntaxError # noqa: F401
|
||||
import bs4 # type: ignore[import]
|
||||
from typing import Optional, Any, Iterator, Iterable
|
||||
|
||||
__all__ = (
|
||||
'DEBUG', 'SelectorSyntaxError', 'SoupSieve',
|
||||
|
@ -40,15 +43,18 @@ __all__ = (
|
|||
SoupSieve = cm.SoupSieve
|
||||
|
||||
|
||||
def compile(pattern, namespaces=None, flags=0, **kwargs): # noqa: A001
|
||||
def compile( # noqa: A001
|
||||
pattern: str,
|
||||
namespaces: Optional[dict[str, str]] = None,
|
||||
flags: int = 0,
|
||||
*,
|
||||
custom: Optional[dict[str, str]] = None,
|
||||
**kwargs: Any
|
||||
) -> cm.SoupSieve:
|
||||
"""Compile CSS pattern."""
|
||||
|
||||
if namespaces is not None:
|
||||
namespaces = ct.Namespaces(**namespaces)
|
||||
|
||||
custom = kwargs.get('custom')
|
||||
if custom is not None:
|
||||
custom = ct.CustomSelectors(**custom)
|
||||
ns = ct.Namespaces(namespaces) if namespaces is not None else namespaces # type: Optional[ct.Namespaces]
|
||||
cs = ct.CustomSelectors(custom) if custom is not None else custom # type: Optional[ct.CustomSelectors]
|
||||
|
||||
if isinstance(pattern, SoupSieve):
|
||||
if flags:
|
||||
|
@ -59,53 +65,103 @@ def compile(pattern, namespaces=None, flags=0, **kwargs): # noqa: A001
|
|||
raise ValueError("Cannot process 'custom' argument on a compiled selector list")
|
||||
return pattern
|
||||
|
||||
return cp._cached_css_compile(pattern, namespaces, custom, flags)
|
||||
return cp._cached_css_compile(pattern, ns, cs, flags)
|
||||
|
||||
|
||||
def purge():
|
||||
def purge() -> None:
|
||||
"""Purge cached patterns."""
|
||||
|
||||
cp._purge_cache()
|
||||
|
||||
|
||||
def closest(select, tag, namespaces=None, flags=0, **kwargs):
|
||||
def closest(
|
||||
select: str,
|
||||
tag: 'bs4.Tag',
|
||||
namespaces: Optional[dict[str, str]] = None,
|
||||
flags: int = 0,
|
||||
*,
|
||||
custom: Optional[dict[str, str]] = None,
|
||||
**kwargs: Any
|
||||
) -> 'bs4.Tag':
|
||||
"""Match closest ancestor."""
|
||||
|
||||
return compile(select, namespaces, flags, **kwargs).closest(tag)
|
||||
|
||||
|
||||
def match(select, tag, namespaces=None, flags=0, **kwargs):
|
||||
def match(
|
||||
select: str,
|
||||
tag: 'bs4.Tag',
|
||||
namespaces: Optional[dict[str, str]] = None,
|
||||
flags: int = 0,
|
||||
*,
|
||||
custom: Optional[dict[str, str]] = None,
|
||||
**kwargs: Any
|
||||
) -> bool:
|
||||
"""Match node."""
|
||||
|
||||
return compile(select, namespaces, flags, **kwargs).match(tag)
|
||||
|
||||
|
||||
def filter(select, iterable, namespaces=None, flags=0, **kwargs): # noqa: A001
|
||||
def filter( # noqa: A001
|
||||
select: str,
|
||||
iterable: Iterable['bs4.Tag'],
|
||||
namespaces: Optional[dict[str, str]] = None,
|
||||
flags: int = 0,
|
||||
*,
|
||||
custom: Optional[dict[str, str]] = None,
|
||||
**kwargs: Any
|
||||
) -> list['bs4.Tag']:
|
||||
"""Filter list of nodes."""
|
||||
|
||||
return compile(select, namespaces, flags, **kwargs).filter(iterable)
|
||||
|
||||
|
||||
def select_one(select, tag, namespaces=None, flags=0, **kwargs):
|
||||
def select_one(
|
||||
select: str,
|
||||
tag: 'bs4.Tag',
|
||||
namespaces: Optional[dict[str, str]] = None,
|
||||
flags: int = 0,
|
||||
*,
|
||||
custom: Optional[dict[str, str]] = None,
|
||||
**kwargs: Any
|
||||
) -> 'bs4.Tag':
|
||||
"""Select a single tag."""
|
||||
|
||||
return compile(select, namespaces, flags, **kwargs).select_one(tag)
|
||||
|
||||
|
||||
def select(select, tag, namespaces=None, limit=0, flags=0, **kwargs):
|
||||
def select(
|
||||
select: str,
|
||||
tag: 'bs4.Tag',
|
||||
namespaces: Optional[dict[str, str]] = None,
|
||||
limit: int = 0,
|
||||
flags: int = 0,
|
||||
*,
|
||||
custom: Optional[dict[str, str]] = None,
|
||||
**kwargs: Any
|
||||
) -> list['bs4.Tag']:
|
||||
"""Select the specified tags."""
|
||||
|
||||
return compile(select, namespaces, flags, **kwargs).select(tag, limit)
|
||||
|
||||
|
||||
def iselect(select, tag, namespaces=None, limit=0, flags=0, **kwargs):
|
||||
def iselect(
|
||||
select: str,
|
||||
tag: 'bs4.Tag',
|
||||
namespaces: Optional[dict[str, str]] = None,
|
||||
limit: int = 0,
|
||||
flags: int = 0,
|
||||
*,
|
||||
custom: Optional[dict[str, str]] = None,
|
||||
**kwargs: Any
|
||||
) -> Iterator['bs4.Tag']:
|
||||
"""Iterate the specified tags."""
|
||||
|
||||
for el in compile(select, namespaces, flags, **kwargs).iselect(tag, limit):
|
||||
yield el
|
||||
|
||||
|
||||
def escape(ident):
|
||||
def escape(ident: str) -> str:
|
||||
"""Escape identifier."""
|
||||
|
||||
return cp.escape(ident)
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
"""Meta related things."""
|
||||
from __future__ import annotations
|
||||
from collections import namedtuple
|
||||
import re
|
||||
|
||||
|
@ -79,7 +80,11 @@ class Version(namedtuple("Version", ["major", "minor", "micro", "release", "pre"
|
|||
|
||||
"""
|
||||
|
||||
def __new__(cls, major, minor, micro, release="final", pre=0, post=0, dev=0):
|
||||
def __new__(
|
||||
cls,
|
||||
major: int, minor: int, micro: int, release: str = "final",
|
||||
pre: int = 0, post: int = 0, dev: int = 0
|
||||
) -> Version:
|
||||
"""Validate version info."""
|
||||
|
||||
# Ensure all parts are positive integers.
|
||||
|
@ -115,27 +120,27 @@ class Version(namedtuple("Version", ["major", "minor", "micro", "release", "pre"
|
|||
|
||||
return super(Version, cls).__new__(cls, major, minor, micro, release, pre, post, dev)
|
||||
|
||||
def _is_pre(self):
|
||||
def _is_pre(self) -> bool:
|
||||
"""Is prerelease."""
|
||||
|
||||
return self.pre > 0
|
||||
return bool(self.pre > 0)
|
||||
|
||||
def _is_dev(self):
|
||||
def _is_dev(self) -> bool:
|
||||
"""Is development."""
|
||||
|
||||
return bool(self.release < "alpha")
|
||||
|
||||
def _is_post(self):
|
||||
def _is_post(self) -> bool:
|
||||
"""Is post."""
|
||||
|
||||
return self.post > 0
|
||||
return bool(self.post > 0)
|
||||
|
||||
def _get_dev_status(self): # pragma: no cover
|
||||
def _get_dev_status(self) -> str: # pragma: no cover
|
||||
"""Get development status string."""
|
||||
|
||||
return DEV_STATUS[self.release]
|
||||
|
||||
def _get_canonical(self):
|
||||
def _get_canonical(self) -> str:
|
||||
"""Get the canonical output string."""
|
||||
|
||||
# Assemble major, minor, micro version and append `pre`, `post`, or `dev` if needed..
|
||||
|
@ -153,11 +158,14 @@ class Version(namedtuple("Version", ["major", "minor", "micro", "release", "pre"
|
|||
return ver
|
||||
|
||||
|
||||
def parse_version(ver, pre=False):
|
||||
def parse_version(ver: str) -> Version:
|
||||
"""Parse version into a comparable Version tuple."""
|
||||
|
||||
m = RE_VER.match(ver)
|
||||
|
||||
if m is None:
|
||||
raise ValueError("'{}' is not a valid version".format(ver))
|
||||
|
||||
# Handle major, minor, micro
|
||||
major = int(m.group('major'))
|
||||
minor = int(m.group('minor')) if m.group('minor') else 0
|
||||
|
@ -185,5 +193,5 @@ def parse_version(ver, pre=False):
|
|||
return Version(major, minor, micro, release, pre, post, dev)
|
||||
|
||||
|
||||
__version_info__ = Version(2, 0, 2, ".dev")
|
||||
__version_info__ = Version(2, 5, 0, "final", post=1)
|
||||
__version__ = __version_info__._get_canonical()
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,10 +1,13 @@
|
|||
"""CSS selector parser."""
|
||||
from __future__ import annotations
|
||||
import re
|
||||
from functools import lru_cache
|
||||
from . import util
|
||||
from . import css_match as cm
|
||||
from . import css_types as ct
|
||||
from .util import SelectorSyntaxError
|
||||
import warnings
|
||||
from typing import Optional, Match, Any, Iterator, cast
|
||||
|
||||
UNICODE_REPLACEMENT_CHAR = 0xFFFD
|
||||
|
||||
|
@ -59,6 +62,8 @@ PSEUDO_SIMPLE_NO_MATCH = {
|
|||
# Complex pseudo classes that take selector lists
|
||||
PSEUDO_COMPLEX = {
|
||||
':contains',
|
||||
':-soup-contains',
|
||||
':-soup-contains-own',
|
||||
':has',
|
||||
':is',
|
||||
':matches',
|
||||
|
@ -193,32 +198,42 @@ FLG_OPEN = 0x40
|
|||
FLG_IN_RANGE = 0x80
|
||||
FLG_OUT_OF_RANGE = 0x100
|
||||
FLG_PLACEHOLDER_SHOWN = 0x200
|
||||
FLG_FORGIVE = 0x400
|
||||
|
||||
# Maximum cached patterns to store
|
||||
_MAXCACHE = 500
|
||||
|
||||
|
||||
@lru_cache(maxsize=_MAXCACHE)
|
||||
def _cached_css_compile(pattern, namespaces, custom, flags):
|
||||
def _cached_css_compile(
|
||||
pattern: str,
|
||||
namespaces: Optional[ct.Namespaces],
|
||||
custom: Optional[ct.CustomSelectors],
|
||||
flags: int
|
||||
) -> cm.SoupSieve:
|
||||
"""Cached CSS compile."""
|
||||
|
||||
custom_selectors = process_custom(custom)
|
||||
return cm.SoupSieve(
|
||||
pattern,
|
||||
CSSParser(pattern, custom=custom_selectors, flags=flags).process_selectors(),
|
||||
CSSParser(
|
||||
pattern,
|
||||
custom=custom_selectors,
|
||||
flags=flags
|
||||
).process_selectors(),
|
||||
namespaces,
|
||||
custom,
|
||||
flags
|
||||
)
|
||||
|
||||
|
||||
def _purge_cache():
|
||||
def _purge_cache() -> None:
|
||||
"""Purge the cache."""
|
||||
|
||||
_cached_css_compile.cache_clear()
|
||||
|
||||
|
||||
def process_custom(custom):
|
||||
def process_custom(custom: Optional[ct.CustomSelectors]) -> dict[str, str | ct.SelectorList]:
|
||||
"""Process custom."""
|
||||
|
||||
custom_selectors = {}
|
||||
|
@ -233,14 +248,14 @@ def process_custom(custom):
|
|||
return custom_selectors
|
||||
|
||||
|
||||
def css_unescape(content, string=False):
|
||||
def css_unescape(content: str, string: bool = False) -> str:
|
||||
"""
|
||||
Unescape CSS value.
|
||||
|
||||
Strings allow for spanning the value on multiple strings by escaping a new line.
|
||||
"""
|
||||
|
||||
def replace(m):
|
||||
def replace(m: Match[str]) -> str:
|
||||
"""Replace with the appropriate substitute."""
|
||||
|
||||
if m.group(1):
|
||||
|
@ -260,7 +275,7 @@ def css_unescape(content, string=False):
|
|||
return (RE_CSS_ESC if not string else RE_CSS_STR_ESC).sub(replace, content)
|
||||
|
||||
|
||||
def escape(ident):
|
||||
def escape(ident: str) -> str:
|
||||
"""Escape identifier."""
|
||||
|
||||
string = []
|
||||
|
@ -288,21 +303,21 @@ def escape(ident):
|
|||
return ''.join(string)
|
||||
|
||||
|
||||
class SelectorPattern(object):
|
||||
class SelectorPattern:
|
||||
"""Selector pattern."""
|
||||
|
||||
def __init__(self, name, pattern):
|
||||
def __init__(self, name: str, pattern: str) -> None:
|
||||
"""Initialize."""
|
||||
|
||||
self.name = name
|
||||
self.re_pattern = re.compile(pattern, re.I | re.X | re.U)
|
||||
|
||||
def get_name(self):
|
||||
def get_name(self) -> str:
|
||||
"""Get name."""
|
||||
|
||||
return self.name
|
||||
|
||||
def match(self, selector, index, flags):
|
||||
def match(self, selector: str, index: int, flags: int) -> Optional[Match[str]]:
|
||||
"""Match the selector."""
|
||||
|
||||
return self.re_pattern.match(selector, index)
|
||||
|
@ -311,7 +326,7 @@ class SelectorPattern(object):
|
|||
class SpecialPseudoPattern(SelectorPattern):
|
||||
"""Selector pattern."""
|
||||
|
||||
def __init__(self, patterns):
|
||||
def __init__(self, patterns: tuple[tuple[str, tuple[str, ...], str, type[SelectorPattern]], ...]) -> None:
|
||||
"""Initialize."""
|
||||
|
||||
self.patterns = {}
|
||||
|
@ -321,15 +336,15 @@ class SpecialPseudoPattern(SelectorPattern):
|
|||
for pseudo in p[1]:
|
||||
self.patterns[pseudo] = pattern
|
||||
|
||||
self.matched_name = None
|
||||
self.matched_name = None # type: Optional[SelectorPattern]
|
||||
self.re_pseudo_name = re.compile(PAT_PSEUDO_CLASS_SPECIAL, re.I | re.X | re.U)
|
||||
|
||||
def get_name(self):
|
||||
def get_name(self) -> str:
|
||||
"""Get name."""
|
||||
|
||||
return self.matched_name.get_name()
|
||||
return '' if self.matched_name is None else self.matched_name.get_name()
|
||||
|
||||
def match(self, selector, index, flags):
|
||||
def match(self, selector: str, index: int, flags: int) -> Optional[Match[str]]:
|
||||
"""Match the selector."""
|
||||
|
||||
pseudo = None
|
||||
|
@ -345,7 +360,7 @@ class SpecialPseudoPattern(SelectorPattern):
|
|||
return pseudo
|
||||
|
||||
|
||||
class _Selector(object):
|
||||
class _Selector:
|
||||
"""
|
||||
Intermediate selector class.
|
||||
|
||||
|
@ -354,23 +369,23 @@ class _Selector(object):
|
|||
the data in an object that can be pickled and hashed.
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
def __init__(self, **kwargs: Any) -> None:
|
||||
"""Initialize."""
|
||||
|
||||
self.tag = kwargs.get('tag', None)
|
||||
self.ids = kwargs.get('ids', [])
|
||||
self.classes = kwargs.get('classes', [])
|
||||
self.attributes = kwargs.get('attributes', [])
|
||||
self.nth = kwargs.get('nth', [])
|
||||
self.selectors = kwargs.get('selectors', [])
|
||||
self.relations = kwargs.get('relations', [])
|
||||
self.rel_type = kwargs.get('rel_type', None)
|
||||
self.contains = kwargs.get('contains', [])
|
||||
self.lang = kwargs.get('lang', [])
|
||||
self.flags = kwargs.get('flags', 0)
|
||||
self.no_match = kwargs.get('no_match', False)
|
||||
self.tag = kwargs.get('tag', None) # type: Optional[ct.SelectorTag]
|
||||
self.ids = kwargs.get('ids', []) # type: list[str]
|
||||
self.classes = kwargs.get('classes', []) # type: list[str]
|
||||
self.attributes = kwargs.get('attributes', []) # type: list[ct.SelectorAttribute]
|
||||
self.nth = kwargs.get('nth', []) # type: list[ct.SelectorNth]
|
||||
self.selectors = kwargs.get('selectors', []) # type: list[ct.SelectorList]
|
||||
self.relations = kwargs.get('relations', []) # type: list[_Selector]
|
||||
self.rel_type = kwargs.get('rel_type', None) # type: Optional[str]
|
||||
self.contains = kwargs.get('contains', []) # type: list[ct.SelectorContains]
|
||||
self.lang = kwargs.get('lang', []) # type: list[ct.SelectorLang]
|
||||
self.flags = kwargs.get('flags', 0) # type: int
|
||||
self.no_match = kwargs.get('no_match', False) # type: bool
|
||||
|
||||
def _freeze_relations(self, relations):
|
||||
def _freeze_relations(self, relations: list[_Selector]) -> ct.SelectorList:
|
||||
"""Freeze relation."""
|
||||
|
||||
if relations:
|
||||
|
@ -380,7 +395,7 @@ class _Selector(object):
|
|||
else:
|
||||
return ct.SelectorList()
|
||||
|
||||
def freeze(self):
|
||||
def freeze(self) -> ct.Selector | ct.SelectorNull:
|
||||
"""Freeze self."""
|
||||
|
||||
if self.no_match:
|
||||
|
@ -400,7 +415,7 @@ class _Selector(object):
|
|||
self.flags
|
||||
)
|
||||
|
||||
def __str__(self): # pragma: no cover
|
||||
def __str__(self) -> str: # pragma: no cover
|
||||
"""String representation."""
|
||||
|
||||
return (
|
||||
|
@ -414,14 +429,19 @@ class _Selector(object):
|
|||
__repr__ = __str__
|
||||
|
||||
|
||||
class CSSParser(object):
|
||||
class CSSParser:
|
||||
"""Parse CSS selectors."""
|
||||
|
||||
css_tokens = (
|
||||
SelectorPattern("pseudo_close", PAT_PSEUDO_CLOSE),
|
||||
SpecialPseudoPattern(
|
||||
(
|
||||
("pseudo_contains", (':contains',), PAT_PSEUDO_CONTAINS, SelectorPattern),
|
||||
(
|
||||
"pseudo_contains",
|
||||
(':contains', ':-soup-contains', ':-soup-contains-own'),
|
||||
PAT_PSEUDO_CONTAINS,
|
||||
SelectorPattern
|
||||
),
|
||||
("pseudo_nth_child", (':nth-child', ':nth-last-child'), PAT_PSEUDO_NTH_CHILD, SelectorPattern),
|
||||
("pseudo_nth_type", (':nth-of-type', ':nth-last-of-type'), PAT_PSEUDO_NTH_TYPE, SelectorPattern),
|
||||
("pseudo_lang", (':lang',), PAT_PSEUDO_LANG, SelectorPattern),
|
||||
|
@ -439,7 +459,12 @@ class CSSParser(object):
|
|||
SelectorPattern("combine", PAT_COMBINE)
|
||||
)
|
||||
|
||||
def __init__(self, selector, custom=None, flags=0):
|
||||
def __init__(
|
||||
self,
|
||||
selector: str,
|
||||
custom: Optional[dict[str, str | ct.SelectorList]] = None,
|
||||
flags: int = 0
|
||||
) -> None:
|
||||
"""Initialize."""
|
||||
|
||||
self.pattern = selector.replace('\x00', '\ufffd')
|
||||
|
@ -447,7 +472,7 @@ class CSSParser(object):
|
|||
self.debug = self.flags & util.DEBUG
|
||||
self.custom = {} if custom is None else custom
|
||||
|
||||
def parse_attribute_selector(self, sel, m, has_selector):
|
||||
def parse_attribute_selector(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool:
|
||||
"""Create attribute selector from the returned regex match."""
|
||||
|
||||
inverse = False
|
||||
|
@ -457,22 +482,22 @@ class CSSParser(object):
|
|||
attr = css_unescape(m.group('attr_name'))
|
||||
is_type = False
|
||||
pattern2 = None
|
||||
value = ''
|
||||
|
||||
if case:
|
||||
flags = re.I if case == 'i' else 0
|
||||
flags = (re.I if case == 'i' else 0) | re.DOTALL
|
||||
elif util.lower(attr) == 'type':
|
||||
flags = re.I
|
||||
flags = re.I | re.DOTALL
|
||||
is_type = True
|
||||
else:
|
||||
flags = 0
|
||||
flags = re.DOTALL
|
||||
|
||||
if op:
|
||||
if m.group('value').startswith(('"', "'")):
|
||||
value = css_unescape(m.group('value')[1:-1], True)
|
||||
else:
|
||||
value = css_unescape(m.group('value'))
|
||||
else:
|
||||
value = None
|
||||
|
||||
if not op:
|
||||
# Attribute name
|
||||
pattern = None
|
||||
|
@ -517,7 +542,7 @@ class CSSParser(object):
|
|||
has_selector = True
|
||||
return has_selector
|
||||
|
||||
def parse_tag_pattern(self, sel, m, has_selector):
|
||||
def parse_tag_pattern(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool:
|
||||
"""Parse tag pattern from regex match."""
|
||||
|
||||
prefix = css_unescape(m.group('tag_ns')[:-1]) if m.group('tag_ns') else None
|
||||
|
@ -526,7 +551,7 @@ class CSSParser(object):
|
|||
has_selector = True
|
||||
return has_selector
|
||||
|
||||
def parse_pseudo_class_custom(self, sel, m, has_selector):
|
||||
def parse_pseudo_class_custom(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool:
|
||||
"""
|
||||
Parse custom pseudo class alias.
|
||||
|
||||
|
@ -538,13 +563,13 @@ class CSSParser(object):
|
|||
selector = self.custom.get(pseudo)
|
||||
if selector is None:
|
||||
raise SelectorSyntaxError(
|
||||
"Undefined custom selector '{}' found at postion {}".format(pseudo, m.end(0)),
|
||||
"Undefined custom selector '{}' found at position {}".format(pseudo, m.end(0)),
|
||||
self.pattern,
|
||||
m.end(0)
|
||||
)
|
||||
|
||||
if not isinstance(selector, ct.SelectorList):
|
||||
self.custom[pseudo] = None
|
||||
del self.custom[pseudo]
|
||||
selector = CSSParser(
|
||||
selector, custom=self.custom, flags=self.flags
|
||||
).process_selectors(flags=FLG_PSEUDO)
|
||||
|
@ -554,7 +579,14 @@ class CSSParser(object):
|
|||
has_selector = True
|
||||
return has_selector
|
||||
|
||||
def parse_pseudo_class(self, sel, m, has_selector, iselector, is_html):
|
||||
def parse_pseudo_class(
|
||||
self,
|
||||
sel: _Selector,
|
||||
m: Match[str],
|
||||
has_selector: bool,
|
||||
iselector: Iterator[tuple[str, Match[str]]],
|
||||
is_html: bool
|
||||
) -> tuple[bool, bool]:
|
||||
"""Parse pseudo class."""
|
||||
|
||||
complex_pseudo = False
|
||||
|
@ -642,7 +674,13 @@ class CSSParser(object):
|
|||
|
||||
return has_selector, is_html
|
||||
|
||||
def parse_pseudo_nth(self, sel, m, has_selector, iselector):
|
||||
def parse_pseudo_nth(
|
||||
self,
|
||||
sel: _Selector,
|
||||
m: Match[str],
|
||||
has_selector: bool,
|
||||
iselector: Iterator[tuple[str, Match[str]]]
|
||||
) -> bool:
|
||||
"""Parse `nth` pseudo."""
|
||||
|
||||
mdict = m.groupdict()
|
||||
|
@ -663,29 +701,29 @@ class CSSParser(object):
|
|||
s2 = 1
|
||||
var = True
|
||||
else:
|
||||
nth_parts = RE_NTH.match(content)
|
||||
s1 = '-' if nth_parts.group('s1') and nth_parts.group('s1') == '-' else ''
|
||||
nth_parts = cast(Match[str], RE_NTH.match(content))
|
||||
_s1 = '-' if nth_parts.group('s1') and nth_parts.group('s1') == '-' else ''
|
||||
a = nth_parts.group('a')
|
||||
var = a.endswith('n')
|
||||
if a.startswith('n'):
|
||||
s1 += '1'
|
||||
_s1 += '1'
|
||||
elif var:
|
||||
s1 += a[:-1]
|
||||
_s1 += a[:-1]
|
||||
else:
|
||||
s1 += a
|
||||
s2 = '-' if nth_parts.group('s2') and nth_parts.group('s2') == '-' else ''
|
||||
_s1 += a
|
||||
_s2 = '-' if nth_parts.group('s2') and nth_parts.group('s2') == '-' else ''
|
||||
if nth_parts.group('b'):
|
||||
s2 += nth_parts.group('b')
|
||||
_s2 += nth_parts.group('b')
|
||||
else:
|
||||
s2 = '0'
|
||||
s1 = int(s1, 10)
|
||||
s2 = int(s2, 10)
|
||||
_s2 = '0'
|
||||
s1 = int(_s1, 10)
|
||||
s2 = int(_s2, 10)
|
||||
|
||||
pseudo_sel = mdict['name']
|
||||
if postfix == '_child':
|
||||
if m.group('of'):
|
||||
# Parse the rest of `of S`.
|
||||
nth_sel = self.parse_selectors(iselector, m.end(0), FLG_PSEUDO | FLG_OPEN)
|
||||
nth_sel = self.parse_selectors(iselector, m.end(0), FLG_PSEUDO | FLG_OPEN | FLG_FORGIVE)
|
||||
else:
|
||||
# Use default `*|*` for `of S`.
|
||||
nth_sel = CSS_NTH_OF_S_DEFAULT
|
||||
|
@ -701,20 +739,38 @@ class CSSParser(object):
|
|||
has_selector = True
|
||||
return has_selector
|
||||
|
||||
def parse_pseudo_open(self, sel, name, has_selector, iselector, index):
|
||||
def parse_pseudo_open(
|
||||
self,
|
||||
sel: _Selector,
|
||||
name: str,
|
||||
has_selector: bool,
|
||||
iselector: Iterator[tuple[str, Match[str]]],
|
||||
index: int
|
||||
) -> bool:
|
||||
"""Parse pseudo with opening bracket."""
|
||||
|
||||
flags = FLG_PSEUDO | FLG_OPEN
|
||||
if name == ':not':
|
||||
flags |= FLG_NOT
|
||||
if name == ':has':
|
||||
flags |= FLG_RELATIVE
|
||||
elif name == ':has':
|
||||
flags |= FLG_RELATIVE | FLG_FORGIVE
|
||||
elif name in (':where', ':is'):
|
||||
flags |= FLG_FORGIVE
|
||||
|
||||
sel.selectors.append(self.parse_selectors(iselector, index, flags))
|
||||
has_selector = True
|
||||
|
||||
return has_selector
|
||||
|
||||
def parse_has_combinator(self, sel, m, has_selector, selectors, rel_type, index):
|
||||
def parse_has_combinator(
|
||||
self,
|
||||
sel: _Selector,
|
||||
m: Match[str],
|
||||
has_selector: bool,
|
||||
selectors: list[_Selector],
|
||||
rel_type: str,
|
||||
index: int
|
||||
) -> tuple[bool, _Selector, str]:
|
||||
"""Parse combinator tokens."""
|
||||
|
||||
combinator = m.group('relation').strip()
|
||||
|
@ -723,12 +779,9 @@ class CSSParser(object):
|
|||
if combinator == COMMA_COMBINATOR:
|
||||
if not has_selector:
|
||||
# If we've not captured any selector parts, the comma is either at the beginning of the pattern
|
||||
# or following another comma, both of which are unexpected. Commas must split selectors.
|
||||
raise SelectorSyntaxError(
|
||||
"The combinator '{}' at postion {}, must have a selector before it".format(combinator, index),
|
||||
self.pattern,
|
||||
index
|
||||
)
|
||||
# or following another comma, both of which are unexpected. But shouldn't fail the pseudo-class.
|
||||
sel.no_match = True
|
||||
|
||||
sel.rel_type = rel_type
|
||||
selectors[-1].relations.append(sel)
|
||||
rel_type = ":" + WS_COMBINATOR
|
||||
|
@ -749,26 +802,44 @@ class CSSParser(object):
|
|||
self.pattern,
|
||||
index
|
||||
)
|
||||
|
||||
# Set the leading combinator for the next selector.
|
||||
rel_type = ':' + combinator
|
||||
sel = _Selector()
|
||||
|
||||
sel = _Selector()
|
||||
has_selector = False
|
||||
return has_selector, sel, rel_type
|
||||
|
||||
def parse_combinator(self, sel, m, has_selector, selectors, relations, is_pseudo, index):
|
||||
def parse_combinator(
|
||||
self,
|
||||
sel: _Selector,
|
||||
m: Match[str],
|
||||
has_selector: bool,
|
||||
selectors: list[_Selector],
|
||||
relations: list[_Selector],
|
||||
is_pseudo: bool,
|
||||
is_forgive: bool,
|
||||
index: int
|
||||
) -> tuple[bool, _Selector]:
|
||||
"""Parse combinator tokens."""
|
||||
|
||||
combinator = m.group('relation').strip()
|
||||
if not combinator:
|
||||
combinator = WS_COMBINATOR
|
||||
if not has_selector:
|
||||
if not is_forgive or combinator != COMMA_COMBINATOR:
|
||||
raise SelectorSyntaxError(
|
||||
"The combinator '{}' at postion {}, must have a selector before it".format(combinator, index),
|
||||
"The combinator '{}' at position {}, must have a selector before it".format(combinator, index),
|
||||
self.pattern,
|
||||
index
|
||||
)
|
||||
|
||||
# If we are in a forgiving pseudo class, just make the selector a "no match"
|
||||
if combinator == COMMA_COMBINATOR:
|
||||
sel.no_match = True
|
||||
del relations[:]
|
||||
selectors.append(sel)
|
||||
else:
|
||||
if combinator == COMMA_COMBINATOR:
|
||||
if not sel.tag and not is_pseudo:
|
||||
# Implied `*`
|
||||
|
@ -781,12 +852,13 @@ class CSSParser(object):
|
|||
sel.rel_type = combinator
|
||||
del relations[:]
|
||||
relations.append(sel)
|
||||
sel = _Selector()
|
||||
|
||||
sel = _Selector()
|
||||
has_selector = False
|
||||
|
||||
return has_selector, sel
|
||||
|
||||
def parse_class_id(self, sel, m, has_selector):
|
||||
def parse_class_id(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool:
|
||||
"""Parse HTML classes and ids."""
|
||||
|
||||
selector = m.group(0)
|
||||
|
@ -797,10 +869,17 @@ class CSSParser(object):
|
|||
has_selector = True
|
||||
return has_selector
|
||||
|
||||
def parse_pseudo_contains(self, sel, m, has_selector):
|
||||
def parse_pseudo_contains(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool:
|
||||
"""Parse contains."""
|
||||
|
||||
values = m.group('values')
|
||||
pseudo = util.lower(css_unescape(m.group('name')))
|
||||
if pseudo == ":contains":
|
||||
warnings.warn(
|
||||
"The pseudo class ':contains' is deprecated, ':-soup-contains' should be used moving forward.",
|
||||
FutureWarning
|
||||
)
|
||||
contains_own = pseudo == ":-soup-contains-own"
|
||||
values = css_unescape(m.group('values'))
|
||||
patterns = []
|
||||
for token in RE_VALUES.finditer(values):
|
||||
if token.group('split'):
|
||||
|
@ -811,11 +890,11 @@ class CSSParser(object):
|
|||
else:
|
||||
value = css_unescape(value)
|
||||
patterns.append(value)
|
||||
sel.contains.append(ct.SelectorContains(tuple(patterns)))
|
||||
sel.contains.append(ct.SelectorContains(patterns, contains_own))
|
||||
has_selector = True
|
||||
return has_selector
|
||||
|
||||
def parse_pseudo_lang(self, sel, m, has_selector):
|
||||
def parse_pseudo_lang(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool:
|
||||
"""Parse pseudo language."""
|
||||
|
||||
values = m.group('values')
|
||||
|
@ -836,7 +915,7 @@ class CSSParser(object):
|
|||
|
||||
return has_selector
|
||||
|
||||
def parse_pseudo_dir(self, sel, m, has_selector):
|
||||
def parse_pseudo_dir(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool:
|
||||
"""Parse pseudo direction."""
|
||||
|
||||
value = ct.SEL_DIR_LTR if util.lower(m.group('dir')) == 'ltr' else ct.SEL_DIR_RTL
|
||||
|
@ -844,15 +923,23 @@ class CSSParser(object):
|
|||
has_selector = True
|
||||
return has_selector
|
||||
|
||||
def parse_selectors(self, iselector, index=0, flags=0):
|
||||
def parse_selectors(
|
||||
self,
|
||||
iselector: Iterator[tuple[str, Match[str]]],
|
||||
index: int = 0,
|
||||
flags: int = 0
|
||||
) -> ct.SelectorList:
|
||||
"""Parse selectors."""
|
||||
|
||||
# Initialize important variables
|
||||
sel = _Selector()
|
||||
selectors = []
|
||||
has_selector = False
|
||||
closed = False
|
||||
relations = []
|
||||
relations = [] # type: list[_Selector]
|
||||
rel_type = ":" + WS_COMBINATOR
|
||||
|
||||
# Setup various flags
|
||||
is_open = bool(flags & FLG_OPEN)
|
||||
is_pseudo = bool(flags & FLG_PSEUDO)
|
||||
is_relative = bool(flags & FLG_RELATIVE)
|
||||
|
@ -863,7 +950,9 @@ class CSSParser(object):
|
|||
is_in_range = bool(flags & FLG_IN_RANGE)
|
||||
is_out_of_range = bool(flags & FLG_OUT_OF_RANGE)
|
||||
is_placeholder_shown = bool(flags & FLG_PLACEHOLDER_SHOWN)
|
||||
is_forgive = bool(flags & FLG_FORGIVE)
|
||||
|
||||
# Print out useful debug stuff
|
||||
if self.debug: # pragma: no cover
|
||||
if is_pseudo:
|
||||
print(' is_pseudo: True')
|
||||
|
@ -885,7 +974,10 @@ class CSSParser(object):
|
|||
print(' is_out_of_range: True')
|
||||
if is_placeholder_shown:
|
||||
print(' is_placeholder_shown: True')
|
||||
if is_forgive:
|
||||
print(' is_forgive: True')
|
||||
|
||||
# The algorithm for relative selectors require an initial selector in the selector list
|
||||
if is_relative:
|
||||
selectors.append(_Selector())
|
||||
|
||||
|
@ -914,17 +1006,19 @@ class CSSParser(object):
|
|||
is_html = True
|
||||
elif key == 'pseudo_close':
|
||||
if not has_selector:
|
||||
if not is_forgive:
|
||||
raise SelectorSyntaxError(
|
||||
"Expected a selector at postion {}".format(m.start(0)),
|
||||
"Expected a selector at position {}".format(m.start(0)),
|
||||
self.pattern,
|
||||
m.start(0)
|
||||
)
|
||||
sel.no_match = True
|
||||
if is_open:
|
||||
closed = True
|
||||
break
|
||||
else:
|
||||
raise SelectorSyntaxError(
|
||||
"Unmatched pseudo-class close at postion {}".format(m.start(0)),
|
||||
"Unmatched pseudo-class close at position {}".format(m.start(0)),
|
||||
self.pattern,
|
||||
m.start(0)
|
||||
)
|
||||
|
@ -935,7 +1029,7 @@ class CSSParser(object):
|
|||
)
|
||||
else:
|
||||
has_selector, sel = self.parse_combinator(
|
||||
sel, m, has_selector, selectors, relations, is_pseudo, index
|
||||
sel, m, has_selector, selectors, relations, is_pseudo, is_forgive, index
|
||||
)
|
||||
elif key == 'attribute':
|
||||
has_selector = self.parse_attribute_selector(sel, m, has_selector)
|
||||
|
@ -954,6 +1048,7 @@ class CSSParser(object):
|
|||
except StopIteration:
|
||||
pass
|
||||
|
||||
# Handle selectors that are not closed
|
||||
if is_open and not closed:
|
||||
raise SelectorSyntaxError(
|
||||
"Unclosed pseudo-class at position {}".format(index),
|
||||
|
@ -961,6 +1056,7 @@ class CSSParser(object):
|
|||
index
|
||||
)
|
||||
|
||||
# Cleanup completed selector piece
|
||||
if has_selector:
|
||||
if not sel.tag and not is_pseudo:
|
||||
# Implied `*`
|
||||
|
@ -972,8 +1068,28 @@ class CSSParser(object):
|
|||
sel.relations.extend(relations)
|
||||
del relations[:]
|
||||
selectors.append(sel)
|
||||
|
||||
# Forgive empty slots in pseudo-classes that have lists (and are forgiving)
|
||||
elif is_forgive:
|
||||
if is_relative:
|
||||
# Handle relative selectors pseudo-classes with empty slots like `:has()`
|
||||
if selectors and selectors[-1].rel_type is None and rel_type == ': ':
|
||||
sel.rel_type = rel_type
|
||||
sel.no_match = True
|
||||
selectors[-1].relations.append(sel)
|
||||
has_selector = True
|
||||
else:
|
||||
# Handle normal pseudo-classes with empty slots
|
||||
if not selectors or not relations:
|
||||
# Others like `:is()` etc.
|
||||
sel.no_match = True
|
||||
del relations[:]
|
||||
selectors.append(sel)
|
||||
has_selector = True
|
||||
|
||||
if not has_selector:
|
||||
# We will always need to finish a selector when `:has()` is used as it leads with combining.
|
||||
# May apply to others as well.
|
||||
raise SelectorSyntaxError(
|
||||
'Expected a selector at position {}'.format(index),
|
||||
self.pattern,
|
||||
|
@ -994,9 +1110,10 @@ class CSSParser(object):
|
|||
if is_placeholder_shown:
|
||||
selectors[-1].flags = ct.SEL_PLACEHOLDER_SHOWN
|
||||
|
||||
# Return selector list
|
||||
return ct.SelectorList([s.freeze() for s in selectors], is_not, is_html)
|
||||
|
||||
def selector_iter(self, pattern):
|
||||
def selector_iter(self, pattern: str) -> Iterator[tuple[str, Match[str]]]:
|
||||
"""Iterate selector tokens."""
|
||||
|
||||
# Ignore whitespace and comments at start and end of pattern
|
||||
|
@ -1037,7 +1154,7 @@ class CSSParser(object):
|
|||
if self.debug: # pragma: no cover
|
||||
print('## END PARSING')
|
||||
|
||||
def process_selectors(self, index=0, flags=0):
|
||||
def process_selectors(self, index: int = 0, flags: int = 0) -> ct.SelectorList:
|
||||
"""Process selectors."""
|
||||
|
||||
return self.parse_selectors(self.selector_iter(self.pattern), index, flags)
|
||||
|
@ -1048,7 +1165,7 @@ class CSSParser(object):
|
|||
|
||||
# CSS pattern for `:link` and `:any-link`
|
||||
CSS_LINK = CSSParser(
|
||||
'html|*:is(a, area, link)[href]'
|
||||
'html|*:is(a, area)[href]'
|
||||
).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
|
||||
# CSS pattern for `:checked`
|
||||
CSS_CHECKED = CSSParser(
|
||||
|
@ -1079,23 +1196,23 @@ CSS_INDETERMINATE = CSSParser(
|
|||
This pattern must be at the end.
|
||||
Special logic is applied to the last selector.
|
||||
*/
|
||||
html|input[type="radio"][name][name!='']:not([checked])
|
||||
html|input[type="radio"][name]:not([name='']):not([checked])
|
||||
'''
|
||||
).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_INDETERMINATE)
|
||||
# CSS pattern for `:disabled`
|
||||
CSS_DISABLED = CSSParser(
|
||||
'''
|
||||
html|*:is(input[type!=hidden], button, select, textarea, fieldset, optgroup, option, fieldset)[disabled],
|
||||
html|*:is(input:not([type=hidden]), button, select, textarea, fieldset, optgroup, option, fieldset)[disabled],
|
||||
html|optgroup[disabled] > html|option,
|
||||
html|fieldset[disabled] > html|*:is(input[type!=hidden], button, select, textarea, fieldset),
|
||||
html|fieldset[disabled] > html|*:is(input:not([type=hidden]), button, select, textarea, fieldset),
|
||||
html|fieldset[disabled] >
|
||||
html|*:not(legend:nth-of-type(1)) html|*:is(input[type!=hidden], button, select, textarea, fieldset)
|
||||
html|*:not(legend:nth-of-type(1)) html|*:is(input:not([type=hidden]), button, select, textarea, fieldset)
|
||||
'''
|
||||
).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
|
||||
# CSS pattern for `:enabled`
|
||||
CSS_ENABLED = CSSParser(
|
||||
'''
|
||||
html|*:is(input[type!=hidden], button, select, textarea, fieldset, optgroup, option, fieldset):not(:disabled)
|
||||
html|*:is(input:not([type=hidden]), button, select, textarea, fieldset, optgroup, option, fieldset):not(:disabled)
|
||||
'''
|
||||
).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
|
||||
# CSS pattern for `:required`
|
||||
|
@ -1119,8 +1236,8 @@ CSS_PLACEHOLDER_SHOWN = CSSParser(
|
|||
[type=email],
|
||||
[type=password],
|
||||
[type=number]
|
||||
)[placeholder][placeholder!='']:is(:not([value]), [value=""]),
|
||||
html|textarea[placeholder][placeholder!='']
|
||||
)[placeholder]:not([placeholder='']):is(:not([value]), [value=""]),
|
||||
html|textarea[placeholder]:not([placeholder=''])
|
||||
'''
|
||||
).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_PLACEHOLDER_SHOWN)
|
||||
# CSS pattern default for `:nth-child` "of S" feature
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
"""CSS selector structure items."""
|
||||
from __future__ import annotations
|
||||
import copyreg
|
||||
from collections.abc import Hashable, Mapping
|
||||
from .pretty import pretty
|
||||
from typing import Any, Iterator, Hashable, Optional, Pattern, Iterable, Mapping
|
||||
|
||||
__all__ = (
|
||||
'Selector',
|
||||
|
@ -29,12 +31,14 @@ SEL_DEFINED = 0x200
|
|||
SEL_PLACEHOLDER_SHOWN = 0x400
|
||||
|
||||
|
||||
class Immutable(object):
|
||||
class Immutable:
|
||||
"""Immutable."""
|
||||
|
||||
__slots__ = ('_hash',)
|
||||
__slots__: tuple[str, ...] = ('_hash',)
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
_hash: int
|
||||
|
||||
def __init__(self, **kwargs: Any) -> None:
|
||||
"""Initialize."""
|
||||
|
||||
temp = []
|
||||
|
@ -45,12 +49,12 @@ class Immutable(object):
|
|||
super(Immutable, self).__setattr__('_hash', hash(tuple(temp)))
|
||||
|
||||
@classmethod
|
||||
def __base__(cls):
|
||||
def __base__(cls) -> "type[Immutable]":
|
||||
"""Get base class."""
|
||||
|
||||
return cls
|
||||
|
||||
def __eq__(self, other):
|
||||
def __eq__(self, other: Any) -> bool:
|
||||
"""Equal."""
|
||||
|
||||
return (
|
||||
|
@ -58,7 +62,7 @@ class Immutable(object):
|
|||
all([getattr(other, key) == getattr(self, key) for key in self.__slots__ if key != '_hash'])
|
||||
)
|
||||
|
||||
def __ne__(self, other):
|
||||
def __ne__(self, other: Any) -> bool:
|
||||
"""Equal."""
|
||||
|
||||
return (
|
||||
|
@ -66,63 +70,74 @@ class Immutable(object):
|
|||
any([getattr(other, key) != getattr(self, key) for key in self.__slots__ if key != '_hash'])
|
||||
)
|
||||
|
||||
def __hash__(self):
|
||||
def __hash__(self) -> int:
|
||||
"""Hash."""
|
||||
|
||||
return self._hash
|
||||
|
||||
def __setattr__(self, name, value):
|
||||
def __setattr__(self, name: str, value: Any) -> None:
|
||||
"""Prevent mutability."""
|
||||
|
||||
raise AttributeError("'{}' is immutable".format(self.__class__.__name__))
|
||||
|
||||
def __repr__(self): # pragma: no cover
|
||||
def __repr__(self) -> str: # pragma: no cover
|
||||
"""Representation."""
|
||||
|
||||
return "{}({})".format(
|
||||
self.__base__(), ', '.join(["{}={!r}".format(k, getattr(self, k)) for k in self.__slots__[:-1]])
|
||||
self.__class__.__name__, ', '.join(["{}={!r}".format(k, getattr(self, k)) for k in self.__slots__[:-1]])
|
||||
)
|
||||
|
||||
__str__ = __repr__
|
||||
|
||||
def pretty(self) -> None: # pragma: no cover
|
||||
"""Pretty print."""
|
||||
|
||||
class ImmutableDict(Mapping):
|
||||
print(pretty(self))
|
||||
|
||||
|
||||
class ImmutableDict(Mapping[Any, Any]):
|
||||
"""Hashable, immutable dictionary."""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
def __init__(
|
||||
self,
|
||||
arg: dict[Any, Any] | Iterable[tuple[Any, Any]]
|
||||
) -> None:
|
||||
"""Initialize."""
|
||||
|
||||
arg = args[0] if args else kwargs
|
||||
is_dict = isinstance(arg, dict)
|
||||
if (
|
||||
is_dict and not all([isinstance(v, Hashable) for v in arg.values()]) or
|
||||
not is_dict and not all([isinstance(k, Hashable) and isinstance(v, Hashable) for k, v in arg])
|
||||
):
|
||||
raise TypeError('All values must be hashable')
|
||||
|
||||
self._d = dict(*args, **kwargs)
|
||||
self._validate(arg)
|
||||
self._d = dict(arg)
|
||||
self._hash = hash(tuple([(type(x), x, type(y), y) for x, y in sorted(self._d.items())]))
|
||||
|
||||
def __iter__(self):
|
||||
def _validate(self, arg: dict[Any, Any] | Iterable[tuple[Any, Any]]) -> None:
|
||||
"""Validate arguments."""
|
||||
|
||||
if isinstance(arg, dict):
|
||||
if not all([isinstance(v, Hashable) for v in arg.values()]):
|
||||
raise TypeError('{} values must be hashable'.format(self.__class__.__name__))
|
||||
elif not all([isinstance(k, Hashable) and isinstance(v, Hashable) for k, v in arg]):
|
||||
raise TypeError('{} values must be hashable'.format(self.__class__.__name__))
|
||||
|
||||
def __iter__(self) -> Iterator[Any]:
|
||||
"""Iterator."""
|
||||
|
||||
return iter(self._d)
|
||||
|
||||
def __len__(self):
|
||||
def __len__(self) -> int:
|
||||
"""Length."""
|
||||
|
||||
return len(self._d)
|
||||
|
||||
def __getitem__(self, key):
|
||||
def __getitem__(self, key: Any) -> Any:
|
||||
"""Get item: `namespace['key']`."""
|
||||
|
||||
return self._d[key]
|
||||
|
||||
def __hash__(self):
|
||||
def __hash__(self) -> int:
|
||||
"""Hash."""
|
||||
|
||||
return self._hash
|
||||
|
||||
def __repr__(self): # pragma: no cover
|
||||
def __repr__(self) -> str: # pragma: no cover
|
||||
"""Representation."""
|
||||
|
||||
return "{!r}".format(self._d)
|
||||
|
@ -133,39 +148,37 @@ class ImmutableDict(Mapping):
|
|||
class Namespaces(ImmutableDict):
|
||||
"""Namespaces."""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
def __init__(self, arg: dict[str, str] | Iterable[tuple[str, str]]) -> None:
|
||||
"""Initialize."""
|
||||
|
||||
# If there are arguments, check the first index.
|
||||
# `super` should fail if the user gave multiple arguments,
|
||||
# so don't bother checking that.
|
||||
arg = args[0] if args else kwargs
|
||||
is_dict = isinstance(arg, dict)
|
||||
if is_dict and not all([isinstance(k, str) and isinstance(v, str) for k, v in arg.items()]):
|
||||
raise TypeError('Namespace keys and values must be Unicode strings')
|
||||
elif not is_dict and not all([isinstance(k, str) and isinstance(v, str) for k, v in arg]):
|
||||
raise TypeError('Namespace keys and values must be Unicode strings')
|
||||
super().__init__(arg)
|
||||
|
||||
super(Namespaces, self).__init__(*args, **kwargs)
|
||||
def _validate(self, arg: dict[str, str] | Iterable[tuple[str, str]]) -> None:
|
||||
"""Validate arguments."""
|
||||
|
||||
if isinstance(arg, dict):
|
||||
if not all([isinstance(v, str) for v in arg.values()]):
|
||||
raise TypeError('{} values must be hashable'.format(self.__class__.__name__))
|
||||
elif not all([isinstance(k, str) and isinstance(v, str) for k, v in arg]):
|
||||
raise TypeError('{} keys and values must be Unicode strings'.format(self.__class__.__name__))
|
||||
|
||||
|
||||
class CustomSelectors(ImmutableDict):
|
||||
"""Custom selectors."""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
def __init__(self, arg: dict[str, str] | Iterable[tuple[str, str]]) -> None:
|
||||
"""Initialize."""
|
||||
|
||||
# If there are arguments, check the first index.
|
||||
# `super` should fail if the user gave multiple arguments,
|
||||
# so don't bother checking that.
|
||||
arg = args[0] if args else kwargs
|
||||
is_dict = isinstance(arg, dict)
|
||||
if is_dict and not all([isinstance(k, str) and isinstance(v, str) for k, v in arg.items()]):
|
||||
raise TypeError('CustomSelectors keys and values must be Unicode strings')
|
||||
elif not is_dict and not all([isinstance(k, str) and isinstance(v, str) for k, v in arg]):
|
||||
raise TypeError('CustomSelectors keys and values must be Unicode strings')
|
||||
super().__init__(arg)
|
||||
|
||||
super(CustomSelectors, self).__init__(*args, **kwargs)
|
||||
def _validate(self, arg: dict[str, str] | Iterable[tuple[str, str]]) -> None:
|
||||
"""Validate arguments."""
|
||||
|
||||
if isinstance(arg, dict):
|
||||
if not all([isinstance(v, str) for v in arg.values()]):
|
||||
raise TypeError('{} values must be hashable'.format(self.__class__.__name__))
|
||||
elif not all([isinstance(k, str) and isinstance(v, str) for k, v in arg]):
|
||||
raise TypeError('{} keys and values must be Unicode strings'.format(self.__class__.__name__))
|
||||
|
||||
|
||||
class Selector(Immutable):
|
||||
|
@ -176,13 +189,35 @@ class Selector(Immutable):
|
|||
'relation', 'rel_type', 'contains', 'lang', 'flags', '_hash'
|
||||
)
|
||||
|
||||
tag: Optional[SelectorTag]
|
||||
ids: tuple[str, ...]
|
||||
classes: tuple[str, ...]
|
||||
attributes: tuple[SelectorAttribute, ...]
|
||||
nth: tuple[SelectorNth, ...]
|
||||
selectors: tuple[SelectorList, ...]
|
||||
relation: SelectorList
|
||||
rel_type: Optional[str]
|
||||
contains: tuple[SelectorContains, ...]
|
||||
lang: tuple[SelectorLang, ...]
|
||||
flags: int
|
||||
|
||||
def __init__(
|
||||
self, tag, ids, classes, attributes, nth, selectors,
|
||||
relation, rel_type, contains, lang, flags
|
||||
self,
|
||||
tag: Optional[SelectorTag],
|
||||
ids: tuple[str, ...],
|
||||
classes: tuple[str, ...],
|
||||
attributes: tuple[SelectorAttribute, ...],
|
||||
nth: tuple[SelectorNth, ...],
|
||||
selectors: tuple[SelectorList, ...],
|
||||
relation: SelectorList,
|
||||
rel_type: Optional[str],
|
||||
contains: tuple[SelectorContains, ...],
|
||||
lang: tuple[SelectorLang, ...],
|
||||
flags: int
|
||||
):
|
||||
"""Initialize."""
|
||||
|
||||
super(Selector, self).__init__(
|
||||
super().__init__(
|
||||
tag=tag,
|
||||
ids=ids,
|
||||
classes=classes,
|
||||
|
@ -200,10 +235,10 @@ class Selector(Immutable):
|
|||
class SelectorNull(Immutable):
|
||||
"""Null Selector."""
|
||||
|
||||
def __init__(self):
|
||||
def __init__(self) -> None:
|
||||
"""Initialize."""
|
||||
|
||||
super(SelectorNull, self).__init__()
|
||||
super().__init__()
|
||||
|
||||
|
||||
class SelectorTag(Immutable):
|
||||
|
@ -211,13 +246,13 @@ class SelectorTag(Immutable):
|
|||
|
||||
__slots__ = ("name", "prefix", "_hash")
|
||||
|
||||
def __init__(self, name, prefix):
|
||||
name: str
|
||||
prefix: Optional[str]
|
||||
|
||||
def __init__(self, name: str, prefix: Optional[str]) -> None:
|
||||
"""Initialize."""
|
||||
|
||||
super(SelectorTag, self).__init__(
|
||||
name=name,
|
||||
prefix=prefix
|
||||
)
|
||||
super().__init__(name=name, prefix=prefix)
|
||||
|
||||
|
||||
class SelectorAttribute(Immutable):
|
||||
|
@ -225,10 +260,21 @@ class SelectorAttribute(Immutable):
|
|||
|
||||
__slots__ = ("attribute", "prefix", "pattern", "xml_type_pattern", "_hash")
|
||||
|
||||
def __init__(self, attribute, prefix, pattern, xml_type_pattern):
|
||||
attribute: str
|
||||
prefix: str
|
||||
pattern: Optional[Pattern[str]]
|
||||
xml_type_pattern: Optional[Pattern[str]]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
attribute: str,
|
||||
prefix: str,
|
||||
pattern: Optional[Pattern[str]],
|
||||
xml_type_pattern: Optional[Pattern[str]]
|
||||
) -> None:
|
||||
"""Initialize."""
|
||||
|
||||
super(SelectorAttribute, self).__init__(
|
||||
super().__init__(
|
||||
attribute=attribute,
|
||||
prefix=prefix,
|
||||
pattern=pattern,
|
||||
|
@ -239,14 +285,15 @@ class SelectorAttribute(Immutable):
|
|||
class SelectorContains(Immutable):
|
||||
"""Selector contains rule."""
|
||||
|
||||
__slots__ = ("text", "_hash")
|
||||
__slots__ = ("text", "own", "_hash")
|
||||
|
||||
def __init__(self, text):
|
||||
text: tuple[str, ...]
|
||||
own: bool
|
||||
|
||||
def __init__(self, text: Iterable[str], own: bool) -> None:
|
||||
"""Initialize."""
|
||||
|
||||
super(SelectorContains, self).__init__(
|
||||
text=text
|
||||
)
|
||||
super().__init__(text=tuple(text), own=own)
|
||||
|
||||
|
||||
class SelectorNth(Immutable):
|
||||
|
@ -254,10 +301,17 @@ class SelectorNth(Immutable):
|
|||
|
||||
__slots__ = ("a", "n", "b", "of_type", "last", "selectors", "_hash")
|
||||
|
||||
def __init__(self, a, n, b, of_type, last, selectors):
|
||||
a: int
|
||||
n: bool
|
||||
b: int
|
||||
of_type: bool
|
||||
last: bool
|
||||
selectors: SelectorList
|
||||
|
||||
def __init__(self, a: int, n: bool, b: int, of_type: bool, last: bool, selectors: SelectorList) -> None:
|
||||
"""Initialize."""
|
||||
|
||||
super(SelectorNth, self).__init__(
|
||||
super().__init__(
|
||||
a=a,
|
||||
n=n,
|
||||
b=b,
|
||||
|
@ -272,24 +326,24 @@ class SelectorLang(Immutable):
|
|||
|
||||
__slots__ = ("languages", "_hash",)
|
||||
|
||||
def __init__(self, languages):
|
||||
languages: tuple[str, ...]
|
||||
|
||||
def __init__(self, languages: Iterable[str]):
|
||||
"""Initialize."""
|
||||
|
||||
super(SelectorLang, self).__init__(
|
||||
languages=tuple(languages)
|
||||
)
|
||||
super().__init__(languages=tuple(languages))
|
||||
|
||||
def __iter__(self):
|
||||
def __iter__(self) -> Iterator[str]:
|
||||
"""Iterator."""
|
||||
|
||||
return iter(self.languages)
|
||||
|
||||
def __len__(self): # pragma: no cover
|
||||
def __len__(self) -> int: # pragma: no cover
|
||||
"""Length."""
|
||||
|
||||
return len(self.languages)
|
||||
|
||||
def __getitem__(self, index): # pragma: no cover
|
||||
def __getitem__(self, index: int) -> str: # pragma: no cover
|
||||
"""Get item."""
|
||||
|
||||
return self.languages[index]
|
||||
|
@ -300,36 +354,45 @@ class SelectorList(Immutable):
|
|||
|
||||
__slots__ = ("selectors", "is_not", "is_html", "_hash")
|
||||
|
||||
def __init__(self, selectors=tuple(), is_not=False, is_html=False):
|
||||
selectors: tuple[Selector | SelectorNull, ...]
|
||||
is_not: bool
|
||||
is_html: bool
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
selectors: Optional[Iterable[Selector | SelectorNull]] = None,
|
||||
is_not: bool = False,
|
||||
is_html: bool = False
|
||||
) -> None:
|
||||
"""Initialize."""
|
||||
|
||||
super(SelectorList, self).__init__(
|
||||
selectors=tuple(selectors),
|
||||
super().__init__(
|
||||
selectors=tuple(selectors) if selectors is not None else tuple(),
|
||||
is_not=is_not,
|
||||
is_html=is_html
|
||||
)
|
||||
|
||||
def __iter__(self):
|
||||
def __iter__(self) -> Iterator[Selector | SelectorNull]:
|
||||
"""Iterator."""
|
||||
|
||||
return iter(self.selectors)
|
||||
|
||||
def __len__(self):
|
||||
def __len__(self) -> int:
|
||||
"""Length."""
|
||||
|
||||
return len(self.selectors)
|
||||
|
||||
def __getitem__(self, index):
|
||||
def __getitem__(self, index: int) -> Selector | SelectorNull:
|
||||
"""Get item."""
|
||||
|
||||
return self.selectors[index]
|
||||
|
||||
|
||||
def _pickle(p):
|
||||
def _pickle(p: Any) -> Any:
|
||||
return p.__base__(), tuple([getattr(p, s) for s in p.__slots__[:-1]])
|
||||
|
||||
|
||||
def pickle_register(obj):
|
||||
def pickle_register(obj: Any) -> None:
|
||||
"""Allow object to be pickled."""
|
||||
|
||||
copyreg.pickle(obj, _pickle)
|
||||
|
|
138
lib/soupsieve/pretty.py
Normal file
138
lib/soupsieve/pretty.py
Normal file
|
@ -0,0 +1,138 @@
|
|||
"""
|
||||
Format a pretty string of a `SoupSieve` object for easy debugging.
|
||||
|
||||
This won't necessarily support all types and such, and definitely
|
||||
not support custom outputs.
|
||||
|
||||
It is mainly geared towards our types as the `SelectorList`
|
||||
object is a beast to look at without some indentation and newlines.
|
||||
The format and various output types is fairly known (though it
|
||||
hasn't been tested extensively to make sure we aren't missing corners).
|
||||
|
||||
Example:
|
||||
|
||||
```
|
||||
>>> import soupsieve as sv
|
||||
>>> sv.compile('this > that.class[name=value]').selectors.pretty()
|
||||
SelectorList(
|
||||
selectors=(
|
||||
Selector(
|
||||
tag=SelectorTag(
|
||||
name='that',
|
||||
prefix=None),
|
||||
ids=(),
|
||||
classes=(
|
||||
'class',
|
||||
),
|
||||
attributes=(
|
||||
SelectorAttribute(
|
||||
attribute='name',
|
||||
prefix='',
|
||||
pattern=re.compile(
|
||||
'^value$'),
|
||||
xml_type_pattern=None),
|
||||
),
|
||||
nth=(),
|
||||
selectors=(),
|
||||
relation=SelectorList(
|
||||
selectors=(
|
||||
Selector(
|
||||
tag=SelectorTag(
|
||||
name='this',
|
||||
prefix=None),
|
||||
ids=(),
|
||||
classes=(),
|
||||
attributes=(),
|
||||
nth=(),
|
||||
selectors=(),
|
||||
relation=SelectorList(
|
||||
selectors=(),
|
||||
is_not=False,
|
||||
is_html=False),
|
||||
rel_type='>',
|
||||
contains=(),
|
||||
lang=(),
|
||||
flags=0),
|
||||
),
|
||||
is_not=False,
|
||||
is_html=False),
|
||||
rel_type=None,
|
||||
contains=(),
|
||||
lang=(),
|
||||
flags=0),
|
||||
),
|
||||
is_not=False,
|
||||
is_html=False)
|
||||
```
|
||||
"""
|
||||
from __future__ import annotations
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
RE_CLASS = re.compile(r'(?i)[a-z_][_a-z\d\.]+\(')
|
||||
RE_PARAM = re.compile(r'(?i)[_a-z][_a-z\d]+=')
|
||||
RE_EMPTY = re.compile(r'\(\)|\[\]|\{\}')
|
||||
RE_LSTRT = re.compile(r'\[')
|
||||
RE_DSTRT = re.compile(r'\{')
|
||||
RE_TSTRT = re.compile(r'\(')
|
||||
RE_LEND = re.compile(r'\]')
|
||||
RE_DEND = re.compile(r'\}')
|
||||
RE_TEND = re.compile(r'\)')
|
||||
RE_INT = re.compile(r'\d+')
|
||||
RE_KWORD = re.compile(r'(?i)[_a-z][_a-z\d]+')
|
||||
RE_DQSTR = re.compile(r'"(?:\\.|[^"\\])*"')
|
||||
RE_SQSTR = re.compile(r"'(?:\\.|[^'\\])*'")
|
||||
RE_SEP = re.compile(r'\s*(,)\s*')
|
||||
RE_DSEP = re.compile(r'\s*(:)\s*')
|
||||
|
||||
TOKENS = {
|
||||
'class': RE_CLASS,
|
||||
'param': RE_PARAM,
|
||||
'empty': RE_EMPTY,
|
||||
'lstrt': RE_LSTRT,
|
||||
'dstrt': RE_DSTRT,
|
||||
'tstrt': RE_TSTRT,
|
||||
'lend': RE_LEND,
|
||||
'dend': RE_DEND,
|
||||
'tend': RE_TEND,
|
||||
'sqstr': RE_SQSTR,
|
||||
'sep': RE_SEP,
|
||||
'dsep': RE_DSEP,
|
||||
'int': RE_INT,
|
||||
'kword': RE_KWORD,
|
||||
'dqstr': RE_DQSTR
|
||||
}
|
||||
|
||||
|
||||
def pretty(obj: Any) -> str: # pragma: no cover
|
||||
"""Make the object output string pretty."""
|
||||
|
||||
sel = str(obj)
|
||||
index = 0
|
||||
end = len(sel) - 1
|
||||
indent = 0
|
||||
output = []
|
||||
|
||||
while index <= end:
|
||||
m = None
|
||||
for k, v in TOKENS.items():
|
||||
m = v.match(sel, index)
|
||||
|
||||
if m:
|
||||
name = k
|
||||
index = m.end(0)
|
||||
if name in ('class', 'lstrt', 'dstrt', 'tstrt'):
|
||||
indent += 4
|
||||
output.append('{}\n{}'.format(m.group(0), " " * indent))
|
||||
elif name in ('param', 'int', 'kword', 'sqstr', 'dqstr', 'empty'):
|
||||
output.append(m.group(0))
|
||||
elif name in ('lend', 'dend', 'tend'):
|
||||
indent -= 4
|
||||
output.append(m.group(0))
|
||||
elif name in ('sep',):
|
||||
output.append('{}\n{}'.format(m.group(1), " " * indent))
|
||||
elif name in ('dsep',):
|
||||
output.append('{} '.format(m.group(1)))
|
||||
break
|
||||
|
||||
return ''.join(output)
|
|
@ -1,7 +1,9 @@
|
|||
"""Utility."""
|
||||
from __future__ import annotations
|
||||
from functools import wraps, lru_cache
|
||||
import warnings
|
||||
import re
|
||||
from typing import Callable, Any, Optional
|
||||
|
||||
DEBUG = 0x00001
|
||||
|
||||
|
@ -12,7 +14,7 @@ UC_Z = ord('Z')
|
|||
|
||||
|
||||
@lru_cache(maxsize=512)
|
||||
def lower(string):
|
||||
def lower(string: str) -> str:
|
||||
"""Lower."""
|
||||
|
||||
new_string = []
|
||||
|
@ -25,7 +27,7 @@ def lower(string):
|
|||
class SelectorSyntaxError(Exception):
|
||||
"""Syntax error in a CSS selector."""
|
||||
|
||||
def __init__(self, msg, pattern=None, index=None):
|
||||
def __init__(self, msg: str, pattern: Optional[str] = None, index: Optional[int] = None) -> None:
|
||||
"""Initialize."""
|
||||
|
||||
self.line = None
|
||||
|
@ -37,30 +39,34 @@ class SelectorSyntaxError(Exception):
|
|||
self.context, self.line, self.col = get_pattern_context(pattern, index)
|
||||
msg = '{}\n line {}:\n{}'.format(msg, self.line, self.context)
|
||||
|
||||
super(SelectorSyntaxError, self).__init__(msg)
|
||||
super().__init__(msg)
|
||||
|
||||
|
||||
def deprecated(message, stacklevel=2): # pragma: no cover
|
||||
def deprecated(message: str, stacklevel: int = 2) -> Callable[..., Any]: # pragma: no cover
|
||||
"""
|
||||
Raise a `DeprecationWarning` when wrapped function/method is called.
|
||||
|
||||
Borrowed from https://stackoverflow.com/a/48632082/866026
|
||||
Usage:
|
||||
|
||||
@deprecated("This method will be removed in version X; use Y instead.")
|
||||
def some_method()"
|
||||
pass
|
||||
"""
|
||||
|
||||
def _decorator(func):
|
||||
def _wrapper(func: Callable[..., Any]) -> Callable[..., Any]:
|
||||
@wraps(func)
|
||||
def _func(*args, **kwargs):
|
||||
def _deprecated_func(*args: Any, **kwargs: Any) -> Any:
|
||||
warnings.warn(
|
||||
"'{}' is deprecated. {}".format(func.__name__, message),
|
||||
f"'{func.__name__}' is deprecated. {message}",
|
||||
category=DeprecationWarning,
|
||||
stacklevel=stacklevel
|
||||
)
|
||||
return func(*args, **kwargs)
|
||||
return _func
|
||||
return _decorator
|
||||
return _deprecated_func
|
||||
return _wrapper
|
||||
|
||||
|
||||
def warn_deprecated(message, stacklevel=2): # pragma: no cover
|
||||
def warn_deprecated(message: str, stacklevel: int = 2) -> None: # pragma: no cover
|
||||
"""Warn deprecated."""
|
||||
|
||||
warnings.warn(
|
||||
|
@ -70,14 +76,15 @@ def warn_deprecated(message, stacklevel=2): # pragma: no cover
|
|||
)
|
||||
|
||||
|
||||
def get_pattern_context(pattern, index):
|
||||
def get_pattern_context(pattern: str, index: int) -> tuple[str, int, int]:
|
||||
"""Get the pattern context."""
|
||||
|
||||
last = 0
|
||||
current_line = 1
|
||||
col = 1
|
||||
text = []
|
||||
text = [] # type: list[str]
|
||||
line = 1
|
||||
offset = None # type: Optional[int]
|
||||
|
||||
# Split pattern by newline and handle the text before the newline
|
||||
for m in RE_PATTERN_LINE_SPLIT.finditer(pattern):
|
||||
|
|
Loading…
Reference in a new issue