mirror of
https://github.com/SickGear/SickGear.git
synced 2024-12-18 16:53:38 +00:00
Merge branch 'feature/UpdateSoupsieve' into dev
This commit is contained in:
commit
062ccbeacc
8 changed files with 894 additions and 399 deletions
|
@ -19,6 +19,7 @@
|
||||||
* Update UnRar x64 for Windows 6.11 to 6.20
|
* Update UnRar x64 for Windows 6.11 to 6.20
|
||||||
* Update Send2Trash 1.5.0 (66afce7) to 1.8.1b0 (0ef9b32)
|
* Update Send2Trash 1.5.0 (66afce7) to 1.8.1b0 (0ef9b32)
|
||||||
* Update SimpleJSON 3.16.1 (ce75e60) to 3.18.1 (c891b95)
|
* Update SimpleJSON 3.16.1 (ce75e60) to 3.18.1 (c891b95)
|
||||||
|
* Update soupsieve 2.0.2.dev (05086ef) to 2.3.2.post1 (792d566)
|
||||||
* Update tmdbsimple 2.6.6 (679e343) to 2.9.1 (9da400a)
|
* Update tmdbsimple 2.6.6 (679e343) to 2.9.1 (9da400a)
|
||||||
* Update torrent_parser 0.3.0 (2a4eecb) to 0.4.0 (23b9e11)
|
* Update torrent_parser 0.3.0 (2a4eecb) to 0.4.0 (23b9e11)
|
||||||
* Update unidecode module 1.1.1 (632af82) to 1.3.6 (4141992)
|
* Update unidecode module 1.1.1 (632af82) to 1.3.6 (4141992)
|
||||||
|
|
|
@ -25,11 +25,14 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
SOFTWARE.
|
SOFTWARE.
|
||||||
"""
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
from .__meta__ import __version__, __version_info__ # noqa: F401
|
from .__meta__ import __version__, __version_info__ # noqa: F401
|
||||||
from . import css_parser as cp
|
from . import css_parser as cp
|
||||||
from . import css_match as cm
|
from . import css_match as cm
|
||||||
from . import css_types as ct
|
from . import css_types as ct
|
||||||
from .util import DEBUG, SelectorSyntaxError # noqa: F401
|
from .util import DEBUG, SelectorSyntaxError # noqa: F401
|
||||||
|
import bs4 # type: ignore[import]
|
||||||
|
from typing import Optional, Any, Iterator, Iterable
|
||||||
|
|
||||||
__all__ = (
|
__all__ = (
|
||||||
'DEBUG', 'SelectorSyntaxError', 'SoupSieve',
|
'DEBUG', 'SelectorSyntaxError', 'SoupSieve',
|
||||||
|
@ -40,15 +43,18 @@ __all__ = (
|
||||||
SoupSieve = cm.SoupSieve
|
SoupSieve = cm.SoupSieve
|
||||||
|
|
||||||
|
|
||||||
def compile(pattern, namespaces=None, flags=0, **kwargs): # noqa: A001
|
def compile( # noqa: A001
|
||||||
|
pattern: str,
|
||||||
|
namespaces: Optional[dict[str, str]] = None,
|
||||||
|
flags: int = 0,
|
||||||
|
*,
|
||||||
|
custom: Optional[dict[str, str]] = None,
|
||||||
|
**kwargs: Any
|
||||||
|
) -> cm.SoupSieve:
|
||||||
"""Compile CSS pattern."""
|
"""Compile CSS pattern."""
|
||||||
|
|
||||||
if namespaces is not None:
|
ns = ct.Namespaces(namespaces) if namespaces is not None else namespaces # type: Optional[ct.Namespaces]
|
||||||
namespaces = ct.Namespaces(**namespaces)
|
cs = ct.CustomSelectors(custom) if custom is not None else custom # type: Optional[ct.CustomSelectors]
|
||||||
|
|
||||||
custom = kwargs.get('custom')
|
|
||||||
if custom is not None:
|
|
||||||
custom = ct.CustomSelectors(**custom)
|
|
||||||
|
|
||||||
if isinstance(pattern, SoupSieve):
|
if isinstance(pattern, SoupSieve):
|
||||||
if flags:
|
if flags:
|
||||||
|
@ -59,53 +65,103 @@ def compile(pattern, namespaces=None, flags=0, **kwargs): # noqa: A001
|
||||||
raise ValueError("Cannot process 'custom' argument on a compiled selector list")
|
raise ValueError("Cannot process 'custom' argument on a compiled selector list")
|
||||||
return pattern
|
return pattern
|
||||||
|
|
||||||
return cp._cached_css_compile(pattern, namespaces, custom, flags)
|
return cp._cached_css_compile(pattern, ns, cs, flags)
|
||||||
|
|
||||||
|
|
||||||
def purge():
|
def purge() -> None:
|
||||||
"""Purge cached patterns."""
|
"""Purge cached patterns."""
|
||||||
|
|
||||||
cp._purge_cache()
|
cp._purge_cache()
|
||||||
|
|
||||||
|
|
||||||
def closest(select, tag, namespaces=None, flags=0, **kwargs):
|
def closest(
|
||||||
|
select: str,
|
||||||
|
tag: 'bs4.Tag',
|
||||||
|
namespaces: Optional[dict[str, str]] = None,
|
||||||
|
flags: int = 0,
|
||||||
|
*,
|
||||||
|
custom: Optional[dict[str, str]] = None,
|
||||||
|
**kwargs: Any
|
||||||
|
) -> 'bs4.Tag':
|
||||||
"""Match closest ancestor."""
|
"""Match closest ancestor."""
|
||||||
|
|
||||||
return compile(select, namespaces, flags, **kwargs).closest(tag)
|
return compile(select, namespaces, flags, **kwargs).closest(tag)
|
||||||
|
|
||||||
|
|
||||||
def match(select, tag, namespaces=None, flags=0, **kwargs):
|
def match(
|
||||||
|
select: str,
|
||||||
|
tag: 'bs4.Tag',
|
||||||
|
namespaces: Optional[dict[str, str]] = None,
|
||||||
|
flags: int = 0,
|
||||||
|
*,
|
||||||
|
custom: Optional[dict[str, str]] = None,
|
||||||
|
**kwargs: Any
|
||||||
|
) -> bool:
|
||||||
"""Match node."""
|
"""Match node."""
|
||||||
|
|
||||||
return compile(select, namespaces, flags, **kwargs).match(tag)
|
return compile(select, namespaces, flags, **kwargs).match(tag)
|
||||||
|
|
||||||
|
|
||||||
def filter(select, iterable, namespaces=None, flags=0, **kwargs): # noqa: A001
|
def filter( # noqa: A001
|
||||||
|
select: str,
|
||||||
|
iterable: Iterable['bs4.Tag'],
|
||||||
|
namespaces: Optional[dict[str, str]] = None,
|
||||||
|
flags: int = 0,
|
||||||
|
*,
|
||||||
|
custom: Optional[dict[str, str]] = None,
|
||||||
|
**kwargs: Any
|
||||||
|
) -> list['bs4.Tag']:
|
||||||
"""Filter list of nodes."""
|
"""Filter list of nodes."""
|
||||||
|
|
||||||
return compile(select, namespaces, flags, **kwargs).filter(iterable)
|
return compile(select, namespaces, flags, **kwargs).filter(iterable)
|
||||||
|
|
||||||
|
|
||||||
def select_one(select, tag, namespaces=None, flags=0, **kwargs):
|
def select_one(
|
||||||
|
select: str,
|
||||||
|
tag: 'bs4.Tag',
|
||||||
|
namespaces: Optional[dict[str, str]] = None,
|
||||||
|
flags: int = 0,
|
||||||
|
*,
|
||||||
|
custom: Optional[dict[str, str]] = None,
|
||||||
|
**kwargs: Any
|
||||||
|
) -> 'bs4.Tag':
|
||||||
"""Select a single tag."""
|
"""Select a single tag."""
|
||||||
|
|
||||||
return compile(select, namespaces, flags, **kwargs).select_one(tag)
|
return compile(select, namespaces, flags, **kwargs).select_one(tag)
|
||||||
|
|
||||||
|
|
||||||
def select(select, tag, namespaces=None, limit=0, flags=0, **kwargs):
|
def select(
|
||||||
|
select: str,
|
||||||
|
tag: 'bs4.Tag',
|
||||||
|
namespaces: Optional[dict[str, str]] = None,
|
||||||
|
limit: int = 0,
|
||||||
|
flags: int = 0,
|
||||||
|
*,
|
||||||
|
custom: Optional[dict[str, str]] = None,
|
||||||
|
**kwargs: Any
|
||||||
|
) -> list['bs4.Tag']:
|
||||||
"""Select the specified tags."""
|
"""Select the specified tags."""
|
||||||
|
|
||||||
return compile(select, namespaces, flags, **kwargs).select(tag, limit)
|
return compile(select, namespaces, flags, **kwargs).select(tag, limit)
|
||||||
|
|
||||||
|
|
||||||
def iselect(select, tag, namespaces=None, limit=0, flags=0, **kwargs):
|
def iselect(
|
||||||
|
select: str,
|
||||||
|
tag: 'bs4.Tag',
|
||||||
|
namespaces: Optional[dict[str, str]] = None,
|
||||||
|
limit: int = 0,
|
||||||
|
flags: int = 0,
|
||||||
|
*,
|
||||||
|
custom: Optional[dict[str, str]] = None,
|
||||||
|
**kwargs: Any
|
||||||
|
) -> Iterator['bs4.Tag']:
|
||||||
"""Iterate the specified tags."""
|
"""Iterate the specified tags."""
|
||||||
|
|
||||||
for el in compile(select, namespaces, flags, **kwargs).iselect(tag, limit):
|
for el in compile(select, namespaces, flags, **kwargs).iselect(tag, limit):
|
||||||
yield el
|
yield el
|
||||||
|
|
||||||
|
|
||||||
def escape(ident):
|
def escape(ident: str) -> str:
|
||||||
"""Escape identifier."""
|
"""Escape identifier."""
|
||||||
|
|
||||||
return cp.escape(ident)
|
return cp.escape(ident)
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
"""Meta related things."""
|
"""Meta related things."""
|
||||||
|
from __future__ import annotations
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
@ -79,7 +80,11 @@ class Version(namedtuple("Version", ["major", "minor", "micro", "release", "pre"
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __new__(cls, major, minor, micro, release="final", pre=0, post=0, dev=0):
|
def __new__(
|
||||||
|
cls,
|
||||||
|
major: int, minor: int, micro: int, release: str = "final",
|
||||||
|
pre: int = 0, post: int = 0, dev: int = 0
|
||||||
|
) -> Version:
|
||||||
"""Validate version info."""
|
"""Validate version info."""
|
||||||
|
|
||||||
# Ensure all parts are positive integers.
|
# Ensure all parts are positive integers.
|
||||||
|
@ -115,27 +120,27 @@ class Version(namedtuple("Version", ["major", "minor", "micro", "release", "pre"
|
||||||
|
|
||||||
return super(Version, cls).__new__(cls, major, minor, micro, release, pre, post, dev)
|
return super(Version, cls).__new__(cls, major, minor, micro, release, pre, post, dev)
|
||||||
|
|
||||||
def _is_pre(self):
|
def _is_pre(self) -> bool:
|
||||||
"""Is prerelease."""
|
"""Is prerelease."""
|
||||||
|
|
||||||
return self.pre > 0
|
return bool(self.pre > 0)
|
||||||
|
|
||||||
def _is_dev(self):
|
def _is_dev(self) -> bool:
|
||||||
"""Is development."""
|
"""Is development."""
|
||||||
|
|
||||||
return bool(self.release < "alpha")
|
return bool(self.release < "alpha")
|
||||||
|
|
||||||
def _is_post(self):
|
def _is_post(self) -> bool:
|
||||||
"""Is post."""
|
"""Is post."""
|
||||||
|
|
||||||
return self.post > 0
|
return bool(self.post > 0)
|
||||||
|
|
||||||
def _get_dev_status(self): # pragma: no cover
|
def _get_dev_status(self) -> str: # pragma: no cover
|
||||||
"""Get development status string."""
|
"""Get development status string."""
|
||||||
|
|
||||||
return DEV_STATUS[self.release]
|
return DEV_STATUS[self.release]
|
||||||
|
|
||||||
def _get_canonical(self):
|
def _get_canonical(self) -> str:
|
||||||
"""Get the canonical output string."""
|
"""Get the canonical output string."""
|
||||||
|
|
||||||
# Assemble major, minor, micro version and append `pre`, `post`, or `dev` if needed..
|
# Assemble major, minor, micro version and append `pre`, `post`, or `dev` if needed..
|
||||||
|
@ -153,11 +158,14 @@ class Version(namedtuple("Version", ["major", "minor", "micro", "release", "pre"
|
||||||
return ver
|
return ver
|
||||||
|
|
||||||
|
|
||||||
def parse_version(ver, pre=False):
|
def parse_version(ver: str) -> Version:
|
||||||
"""Parse version into a comparable Version tuple."""
|
"""Parse version into a comparable Version tuple."""
|
||||||
|
|
||||||
m = RE_VER.match(ver)
|
m = RE_VER.match(ver)
|
||||||
|
|
||||||
|
if m is None:
|
||||||
|
raise ValueError("'{}' is not a valid version".format(ver))
|
||||||
|
|
||||||
# Handle major, minor, micro
|
# Handle major, minor, micro
|
||||||
major = int(m.group('major'))
|
major = int(m.group('major'))
|
||||||
minor = int(m.group('minor')) if m.group('minor') else 0
|
minor = int(m.group('minor')) if m.group('minor') else 0
|
||||||
|
@ -185,5 +193,5 @@ def parse_version(ver, pre=False):
|
||||||
return Version(major, minor, micro, release, pre, post, dev)
|
return Version(major, minor, micro, release, pre, post, dev)
|
||||||
|
|
||||||
|
|
||||||
__version_info__ = Version(2, 0, 2, ".dev")
|
__version_info__ = Version(2, 5, 0, "final", post=1)
|
||||||
__version__ = __version_info__._get_canonical()
|
__version__ = __version_info__._get_canonical()
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,10 +1,13 @@
|
||||||
"""CSS selector parser."""
|
"""CSS selector parser."""
|
||||||
|
from __future__ import annotations
|
||||||
import re
|
import re
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
from . import util
|
from . import util
|
||||||
from . import css_match as cm
|
from . import css_match as cm
|
||||||
from . import css_types as ct
|
from . import css_types as ct
|
||||||
from .util import SelectorSyntaxError
|
from .util import SelectorSyntaxError
|
||||||
|
import warnings
|
||||||
|
from typing import Optional, Match, Any, Iterator, cast
|
||||||
|
|
||||||
UNICODE_REPLACEMENT_CHAR = 0xFFFD
|
UNICODE_REPLACEMENT_CHAR = 0xFFFD
|
||||||
|
|
||||||
|
@ -59,6 +62,8 @@ PSEUDO_SIMPLE_NO_MATCH = {
|
||||||
# Complex pseudo classes that take selector lists
|
# Complex pseudo classes that take selector lists
|
||||||
PSEUDO_COMPLEX = {
|
PSEUDO_COMPLEX = {
|
||||||
':contains',
|
':contains',
|
||||||
|
':-soup-contains',
|
||||||
|
':-soup-contains-own',
|
||||||
':has',
|
':has',
|
||||||
':is',
|
':is',
|
||||||
':matches',
|
':matches',
|
||||||
|
@ -193,32 +198,42 @@ FLG_OPEN = 0x40
|
||||||
FLG_IN_RANGE = 0x80
|
FLG_IN_RANGE = 0x80
|
||||||
FLG_OUT_OF_RANGE = 0x100
|
FLG_OUT_OF_RANGE = 0x100
|
||||||
FLG_PLACEHOLDER_SHOWN = 0x200
|
FLG_PLACEHOLDER_SHOWN = 0x200
|
||||||
|
FLG_FORGIVE = 0x400
|
||||||
|
|
||||||
# Maximum cached patterns to store
|
# Maximum cached patterns to store
|
||||||
_MAXCACHE = 500
|
_MAXCACHE = 500
|
||||||
|
|
||||||
|
|
||||||
@lru_cache(maxsize=_MAXCACHE)
|
@lru_cache(maxsize=_MAXCACHE)
|
||||||
def _cached_css_compile(pattern, namespaces, custom, flags):
|
def _cached_css_compile(
|
||||||
|
pattern: str,
|
||||||
|
namespaces: Optional[ct.Namespaces],
|
||||||
|
custom: Optional[ct.CustomSelectors],
|
||||||
|
flags: int
|
||||||
|
) -> cm.SoupSieve:
|
||||||
"""Cached CSS compile."""
|
"""Cached CSS compile."""
|
||||||
|
|
||||||
custom_selectors = process_custom(custom)
|
custom_selectors = process_custom(custom)
|
||||||
return cm.SoupSieve(
|
return cm.SoupSieve(
|
||||||
pattern,
|
pattern,
|
||||||
CSSParser(pattern, custom=custom_selectors, flags=flags).process_selectors(),
|
CSSParser(
|
||||||
|
pattern,
|
||||||
|
custom=custom_selectors,
|
||||||
|
flags=flags
|
||||||
|
).process_selectors(),
|
||||||
namespaces,
|
namespaces,
|
||||||
custom,
|
custom,
|
||||||
flags
|
flags
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def _purge_cache():
|
def _purge_cache() -> None:
|
||||||
"""Purge the cache."""
|
"""Purge the cache."""
|
||||||
|
|
||||||
_cached_css_compile.cache_clear()
|
_cached_css_compile.cache_clear()
|
||||||
|
|
||||||
|
|
||||||
def process_custom(custom):
|
def process_custom(custom: Optional[ct.CustomSelectors]) -> dict[str, str | ct.SelectorList]:
|
||||||
"""Process custom."""
|
"""Process custom."""
|
||||||
|
|
||||||
custom_selectors = {}
|
custom_selectors = {}
|
||||||
|
@ -233,14 +248,14 @@ def process_custom(custom):
|
||||||
return custom_selectors
|
return custom_selectors
|
||||||
|
|
||||||
|
|
||||||
def css_unescape(content, string=False):
|
def css_unescape(content: str, string: bool = False) -> str:
|
||||||
"""
|
"""
|
||||||
Unescape CSS value.
|
Unescape CSS value.
|
||||||
|
|
||||||
Strings allow for spanning the value on multiple strings by escaping a new line.
|
Strings allow for spanning the value on multiple strings by escaping a new line.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def replace(m):
|
def replace(m: Match[str]) -> str:
|
||||||
"""Replace with the appropriate substitute."""
|
"""Replace with the appropriate substitute."""
|
||||||
|
|
||||||
if m.group(1):
|
if m.group(1):
|
||||||
|
@ -260,7 +275,7 @@ def css_unescape(content, string=False):
|
||||||
return (RE_CSS_ESC if not string else RE_CSS_STR_ESC).sub(replace, content)
|
return (RE_CSS_ESC if not string else RE_CSS_STR_ESC).sub(replace, content)
|
||||||
|
|
||||||
|
|
||||||
def escape(ident):
|
def escape(ident: str) -> str:
|
||||||
"""Escape identifier."""
|
"""Escape identifier."""
|
||||||
|
|
||||||
string = []
|
string = []
|
||||||
|
@ -288,21 +303,21 @@ def escape(ident):
|
||||||
return ''.join(string)
|
return ''.join(string)
|
||||||
|
|
||||||
|
|
||||||
class SelectorPattern(object):
|
class SelectorPattern:
|
||||||
"""Selector pattern."""
|
"""Selector pattern."""
|
||||||
|
|
||||||
def __init__(self, name, pattern):
|
def __init__(self, name: str, pattern: str) -> None:
|
||||||
"""Initialize."""
|
"""Initialize."""
|
||||||
|
|
||||||
self.name = name
|
self.name = name
|
||||||
self.re_pattern = re.compile(pattern, re.I | re.X | re.U)
|
self.re_pattern = re.compile(pattern, re.I | re.X | re.U)
|
||||||
|
|
||||||
def get_name(self):
|
def get_name(self) -> str:
|
||||||
"""Get name."""
|
"""Get name."""
|
||||||
|
|
||||||
return self.name
|
return self.name
|
||||||
|
|
||||||
def match(self, selector, index, flags):
|
def match(self, selector: str, index: int, flags: int) -> Optional[Match[str]]:
|
||||||
"""Match the selector."""
|
"""Match the selector."""
|
||||||
|
|
||||||
return self.re_pattern.match(selector, index)
|
return self.re_pattern.match(selector, index)
|
||||||
|
@ -311,7 +326,7 @@ class SelectorPattern(object):
|
||||||
class SpecialPseudoPattern(SelectorPattern):
|
class SpecialPseudoPattern(SelectorPattern):
|
||||||
"""Selector pattern."""
|
"""Selector pattern."""
|
||||||
|
|
||||||
def __init__(self, patterns):
|
def __init__(self, patterns: tuple[tuple[str, tuple[str, ...], str, type[SelectorPattern]], ...]) -> None:
|
||||||
"""Initialize."""
|
"""Initialize."""
|
||||||
|
|
||||||
self.patterns = {}
|
self.patterns = {}
|
||||||
|
@ -321,15 +336,15 @@ class SpecialPseudoPattern(SelectorPattern):
|
||||||
for pseudo in p[1]:
|
for pseudo in p[1]:
|
||||||
self.patterns[pseudo] = pattern
|
self.patterns[pseudo] = pattern
|
||||||
|
|
||||||
self.matched_name = None
|
self.matched_name = None # type: Optional[SelectorPattern]
|
||||||
self.re_pseudo_name = re.compile(PAT_PSEUDO_CLASS_SPECIAL, re.I | re.X | re.U)
|
self.re_pseudo_name = re.compile(PAT_PSEUDO_CLASS_SPECIAL, re.I | re.X | re.U)
|
||||||
|
|
||||||
def get_name(self):
|
def get_name(self) -> str:
|
||||||
"""Get name."""
|
"""Get name."""
|
||||||
|
|
||||||
return self.matched_name.get_name()
|
return '' if self.matched_name is None else self.matched_name.get_name()
|
||||||
|
|
||||||
def match(self, selector, index, flags):
|
def match(self, selector: str, index: int, flags: int) -> Optional[Match[str]]:
|
||||||
"""Match the selector."""
|
"""Match the selector."""
|
||||||
|
|
||||||
pseudo = None
|
pseudo = None
|
||||||
|
@ -345,7 +360,7 @@ class SpecialPseudoPattern(SelectorPattern):
|
||||||
return pseudo
|
return pseudo
|
||||||
|
|
||||||
|
|
||||||
class _Selector(object):
|
class _Selector:
|
||||||
"""
|
"""
|
||||||
Intermediate selector class.
|
Intermediate selector class.
|
||||||
|
|
||||||
|
@ -354,23 +369,23 @@ class _Selector(object):
|
||||||
the data in an object that can be pickled and hashed.
|
the data in an object that can be pickled and hashed.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, **kwargs):
|
def __init__(self, **kwargs: Any) -> None:
|
||||||
"""Initialize."""
|
"""Initialize."""
|
||||||
|
|
||||||
self.tag = kwargs.get('tag', None)
|
self.tag = kwargs.get('tag', None) # type: Optional[ct.SelectorTag]
|
||||||
self.ids = kwargs.get('ids', [])
|
self.ids = kwargs.get('ids', []) # type: list[str]
|
||||||
self.classes = kwargs.get('classes', [])
|
self.classes = kwargs.get('classes', []) # type: list[str]
|
||||||
self.attributes = kwargs.get('attributes', [])
|
self.attributes = kwargs.get('attributes', []) # type: list[ct.SelectorAttribute]
|
||||||
self.nth = kwargs.get('nth', [])
|
self.nth = kwargs.get('nth', []) # type: list[ct.SelectorNth]
|
||||||
self.selectors = kwargs.get('selectors', [])
|
self.selectors = kwargs.get('selectors', []) # type: list[ct.SelectorList]
|
||||||
self.relations = kwargs.get('relations', [])
|
self.relations = kwargs.get('relations', []) # type: list[_Selector]
|
||||||
self.rel_type = kwargs.get('rel_type', None)
|
self.rel_type = kwargs.get('rel_type', None) # type: Optional[str]
|
||||||
self.contains = kwargs.get('contains', [])
|
self.contains = kwargs.get('contains', []) # type: list[ct.SelectorContains]
|
||||||
self.lang = kwargs.get('lang', [])
|
self.lang = kwargs.get('lang', []) # type: list[ct.SelectorLang]
|
||||||
self.flags = kwargs.get('flags', 0)
|
self.flags = kwargs.get('flags', 0) # type: int
|
||||||
self.no_match = kwargs.get('no_match', False)
|
self.no_match = kwargs.get('no_match', False) # type: bool
|
||||||
|
|
||||||
def _freeze_relations(self, relations):
|
def _freeze_relations(self, relations: list[_Selector]) -> ct.SelectorList:
|
||||||
"""Freeze relation."""
|
"""Freeze relation."""
|
||||||
|
|
||||||
if relations:
|
if relations:
|
||||||
|
@ -380,7 +395,7 @@ class _Selector(object):
|
||||||
else:
|
else:
|
||||||
return ct.SelectorList()
|
return ct.SelectorList()
|
||||||
|
|
||||||
def freeze(self):
|
def freeze(self) -> ct.Selector | ct.SelectorNull:
|
||||||
"""Freeze self."""
|
"""Freeze self."""
|
||||||
|
|
||||||
if self.no_match:
|
if self.no_match:
|
||||||
|
@ -400,7 +415,7 @@ class _Selector(object):
|
||||||
self.flags
|
self.flags
|
||||||
)
|
)
|
||||||
|
|
||||||
def __str__(self): # pragma: no cover
|
def __str__(self) -> str: # pragma: no cover
|
||||||
"""String representation."""
|
"""String representation."""
|
||||||
|
|
||||||
return (
|
return (
|
||||||
|
@ -414,14 +429,19 @@ class _Selector(object):
|
||||||
__repr__ = __str__
|
__repr__ = __str__
|
||||||
|
|
||||||
|
|
||||||
class CSSParser(object):
|
class CSSParser:
|
||||||
"""Parse CSS selectors."""
|
"""Parse CSS selectors."""
|
||||||
|
|
||||||
css_tokens = (
|
css_tokens = (
|
||||||
SelectorPattern("pseudo_close", PAT_PSEUDO_CLOSE),
|
SelectorPattern("pseudo_close", PAT_PSEUDO_CLOSE),
|
||||||
SpecialPseudoPattern(
|
SpecialPseudoPattern(
|
||||||
(
|
(
|
||||||
("pseudo_contains", (':contains',), PAT_PSEUDO_CONTAINS, SelectorPattern),
|
(
|
||||||
|
"pseudo_contains",
|
||||||
|
(':contains', ':-soup-contains', ':-soup-contains-own'),
|
||||||
|
PAT_PSEUDO_CONTAINS,
|
||||||
|
SelectorPattern
|
||||||
|
),
|
||||||
("pseudo_nth_child", (':nth-child', ':nth-last-child'), PAT_PSEUDO_NTH_CHILD, SelectorPattern),
|
("pseudo_nth_child", (':nth-child', ':nth-last-child'), PAT_PSEUDO_NTH_CHILD, SelectorPattern),
|
||||||
("pseudo_nth_type", (':nth-of-type', ':nth-last-of-type'), PAT_PSEUDO_NTH_TYPE, SelectorPattern),
|
("pseudo_nth_type", (':nth-of-type', ':nth-last-of-type'), PAT_PSEUDO_NTH_TYPE, SelectorPattern),
|
||||||
("pseudo_lang", (':lang',), PAT_PSEUDO_LANG, SelectorPattern),
|
("pseudo_lang", (':lang',), PAT_PSEUDO_LANG, SelectorPattern),
|
||||||
|
@ -439,7 +459,12 @@ class CSSParser(object):
|
||||||
SelectorPattern("combine", PAT_COMBINE)
|
SelectorPattern("combine", PAT_COMBINE)
|
||||||
)
|
)
|
||||||
|
|
||||||
def __init__(self, selector, custom=None, flags=0):
|
def __init__(
|
||||||
|
self,
|
||||||
|
selector: str,
|
||||||
|
custom: Optional[dict[str, str | ct.SelectorList]] = None,
|
||||||
|
flags: int = 0
|
||||||
|
) -> None:
|
||||||
"""Initialize."""
|
"""Initialize."""
|
||||||
|
|
||||||
self.pattern = selector.replace('\x00', '\ufffd')
|
self.pattern = selector.replace('\x00', '\ufffd')
|
||||||
|
@ -447,7 +472,7 @@ class CSSParser(object):
|
||||||
self.debug = self.flags & util.DEBUG
|
self.debug = self.flags & util.DEBUG
|
||||||
self.custom = {} if custom is None else custom
|
self.custom = {} if custom is None else custom
|
||||||
|
|
||||||
def parse_attribute_selector(self, sel, m, has_selector):
|
def parse_attribute_selector(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool:
|
||||||
"""Create attribute selector from the returned regex match."""
|
"""Create attribute selector from the returned regex match."""
|
||||||
|
|
||||||
inverse = False
|
inverse = False
|
||||||
|
@ -457,22 +482,22 @@ class CSSParser(object):
|
||||||
attr = css_unescape(m.group('attr_name'))
|
attr = css_unescape(m.group('attr_name'))
|
||||||
is_type = False
|
is_type = False
|
||||||
pattern2 = None
|
pattern2 = None
|
||||||
|
value = ''
|
||||||
|
|
||||||
if case:
|
if case:
|
||||||
flags = re.I if case == 'i' else 0
|
flags = (re.I if case == 'i' else 0) | re.DOTALL
|
||||||
elif util.lower(attr) == 'type':
|
elif util.lower(attr) == 'type':
|
||||||
flags = re.I
|
flags = re.I | re.DOTALL
|
||||||
is_type = True
|
is_type = True
|
||||||
else:
|
else:
|
||||||
flags = 0
|
flags = re.DOTALL
|
||||||
|
|
||||||
if op:
|
if op:
|
||||||
if m.group('value').startswith(('"', "'")):
|
if m.group('value').startswith(('"', "'")):
|
||||||
value = css_unescape(m.group('value')[1:-1], True)
|
value = css_unescape(m.group('value')[1:-1], True)
|
||||||
else:
|
else:
|
||||||
value = css_unescape(m.group('value'))
|
value = css_unescape(m.group('value'))
|
||||||
else:
|
|
||||||
value = None
|
|
||||||
if not op:
|
if not op:
|
||||||
# Attribute name
|
# Attribute name
|
||||||
pattern = None
|
pattern = None
|
||||||
|
@ -517,7 +542,7 @@ class CSSParser(object):
|
||||||
has_selector = True
|
has_selector = True
|
||||||
return has_selector
|
return has_selector
|
||||||
|
|
||||||
def parse_tag_pattern(self, sel, m, has_selector):
|
def parse_tag_pattern(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool:
|
||||||
"""Parse tag pattern from regex match."""
|
"""Parse tag pattern from regex match."""
|
||||||
|
|
||||||
prefix = css_unescape(m.group('tag_ns')[:-1]) if m.group('tag_ns') else None
|
prefix = css_unescape(m.group('tag_ns')[:-1]) if m.group('tag_ns') else None
|
||||||
|
@ -526,7 +551,7 @@ class CSSParser(object):
|
||||||
has_selector = True
|
has_selector = True
|
||||||
return has_selector
|
return has_selector
|
||||||
|
|
||||||
def parse_pseudo_class_custom(self, sel, m, has_selector):
|
def parse_pseudo_class_custom(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool:
|
||||||
"""
|
"""
|
||||||
Parse custom pseudo class alias.
|
Parse custom pseudo class alias.
|
||||||
|
|
||||||
|
@ -538,13 +563,13 @@ class CSSParser(object):
|
||||||
selector = self.custom.get(pseudo)
|
selector = self.custom.get(pseudo)
|
||||||
if selector is None:
|
if selector is None:
|
||||||
raise SelectorSyntaxError(
|
raise SelectorSyntaxError(
|
||||||
"Undefined custom selector '{}' found at postion {}".format(pseudo, m.end(0)),
|
"Undefined custom selector '{}' found at position {}".format(pseudo, m.end(0)),
|
||||||
self.pattern,
|
self.pattern,
|
||||||
m.end(0)
|
m.end(0)
|
||||||
)
|
)
|
||||||
|
|
||||||
if not isinstance(selector, ct.SelectorList):
|
if not isinstance(selector, ct.SelectorList):
|
||||||
self.custom[pseudo] = None
|
del self.custom[pseudo]
|
||||||
selector = CSSParser(
|
selector = CSSParser(
|
||||||
selector, custom=self.custom, flags=self.flags
|
selector, custom=self.custom, flags=self.flags
|
||||||
).process_selectors(flags=FLG_PSEUDO)
|
).process_selectors(flags=FLG_PSEUDO)
|
||||||
|
@ -554,7 +579,14 @@ class CSSParser(object):
|
||||||
has_selector = True
|
has_selector = True
|
||||||
return has_selector
|
return has_selector
|
||||||
|
|
||||||
def parse_pseudo_class(self, sel, m, has_selector, iselector, is_html):
|
def parse_pseudo_class(
|
||||||
|
self,
|
||||||
|
sel: _Selector,
|
||||||
|
m: Match[str],
|
||||||
|
has_selector: bool,
|
||||||
|
iselector: Iterator[tuple[str, Match[str]]],
|
||||||
|
is_html: bool
|
||||||
|
) -> tuple[bool, bool]:
|
||||||
"""Parse pseudo class."""
|
"""Parse pseudo class."""
|
||||||
|
|
||||||
complex_pseudo = False
|
complex_pseudo = False
|
||||||
|
@ -642,7 +674,13 @@ class CSSParser(object):
|
||||||
|
|
||||||
return has_selector, is_html
|
return has_selector, is_html
|
||||||
|
|
||||||
def parse_pseudo_nth(self, sel, m, has_selector, iselector):
|
def parse_pseudo_nth(
|
||||||
|
self,
|
||||||
|
sel: _Selector,
|
||||||
|
m: Match[str],
|
||||||
|
has_selector: bool,
|
||||||
|
iselector: Iterator[tuple[str, Match[str]]]
|
||||||
|
) -> bool:
|
||||||
"""Parse `nth` pseudo."""
|
"""Parse `nth` pseudo."""
|
||||||
|
|
||||||
mdict = m.groupdict()
|
mdict = m.groupdict()
|
||||||
|
@ -663,29 +701,29 @@ class CSSParser(object):
|
||||||
s2 = 1
|
s2 = 1
|
||||||
var = True
|
var = True
|
||||||
else:
|
else:
|
||||||
nth_parts = RE_NTH.match(content)
|
nth_parts = cast(Match[str], RE_NTH.match(content))
|
||||||
s1 = '-' if nth_parts.group('s1') and nth_parts.group('s1') == '-' else ''
|
_s1 = '-' if nth_parts.group('s1') and nth_parts.group('s1') == '-' else ''
|
||||||
a = nth_parts.group('a')
|
a = nth_parts.group('a')
|
||||||
var = a.endswith('n')
|
var = a.endswith('n')
|
||||||
if a.startswith('n'):
|
if a.startswith('n'):
|
||||||
s1 += '1'
|
_s1 += '1'
|
||||||
elif var:
|
elif var:
|
||||||
s1 += a[:-1]
|
_s1 += a[:-1]
|
||||||
else:
|
else:
|
||||||
s1 += a
|
_s1 += a
|
||||||
s2 = '-' if nth_parts.group('s2') and nth_parts.group('s2') == '-' else ''
|
_s2 = '-' if nth_parts.group('s2') and nth_parts.group('s2') == '-' else ''
|
||||||
if nth_parts.group('b'):
|
if nth_parts.group('b'):
|
||||||
s2 += nth_parts.group('b')
|
_s2 += nth_parts.group('b')
|
||||||
else:
|
else:
|
||||||
s2 = '0'
|
_s2 = '0'
|
||||||
s1 = int(s1, 10)
|
s1 = int(_s1, 10)
|
||||||
s2 = int(s2, 10)
|
s2 = int(_s2, 10)
|
||||||
|
|
||||||
pseudo_sel = mdict['name']
|
pseudo_sel = mdict['name']
|
||||||
if postfix == '_child':
|
if postfix == '_child':
|
||||||
if m.group('of'):
|
if m.group('of'):
|
||||||
# Parse the rest of `of S`.
|
# Parse the rest of `of S`.
|
||||||
nth_sel = self.parse_selectors(iselector, m.end(0), FLG_PSEUDO | FLG_OPEN)
|
nth_sel = self.parse_selectors(iselector, m.end(0), FLG_PSEUDO | FLG_OPEN | FLG_FORGIVE)
|
||||||
else:
|
else:
|
||||||
# Use default `*|*` for `of S`.
|
# Use default `*|*` for `of S`.
|
||||||
nth_sel = CSS_NTH_OF_S_DEFAULT
|
nth_sel = CSS_NTH_OF_S_DEFAULT
|
||||||
|
@ -701,20 +739,38 @@ class CSSParser(object):
|
||||||
has_selector = True
|
has_selector = True
|
||||||
return has_selector
|
return has_selector
|
||||||
|
|
||||||
def parse_pseudo_open(self, sel, name, has_selector, iselector, index):
|
def parse_pseudo_open(
|
||||||
|
self,
|
||||||
|
sel: _Selector,
|
||||||
|
name: str,
|
||||||
|
has_selector: bool,
|
||||||
|
iselector: Iterator[tuple[str, Match[str]]],
|
||||||
|
index: int
|
||||||
|
) -> bool:
|
||||||
"""Parse pseudo with opening bracket."""
|
"""Parse pseudo with opening bracket."""
|
||||||
|
|
||||||
flags = FLG_PSEUDO | FLG_OPEN
|
flags = FLG_PSEUDO | FLG_OPEN
|
||||||
if name == ':not':
|
if name == ':not':
|
||||||
flags |= FLG_NOT
|
flags |= FLG_NOT
|
||||||
if name == ':has':
|
elif name == ':has':
|
||||||
flags |= FLG_RELATIVE
|
flags |= FLG_RELATIVE | FLG_FORGIVE
|
||||||
|
elif name in (':where', ':is'):
|
||||||
|
flags |= FLG_FORGIVE
|
||||||
|
|
||||||
sel.selectors.append(self.parse_selectors(iselector, index, flags))
|
sel.selectors.append(self.parse_selectors(iselector, index, flags))
|
||||||
has_selector = True
|
has_selector = True
|
||||||
|
|
||||||
return has_selector
|
return has_selector
|
||||||
|
|
||||||
def parse_has_combinator(self, sel, m, has_selector, selectors, rel_type, index):
|
def parse_has_combinator(
|
||||||
|
self,
|
||||||
|
sel: _Selector,
|
||||||
|
m: Match[str],
|
||||||
|
has_selector: bool,
|
||||||
|
selectors: list[_Selector],
|
||||||
|
rel_type: str,
|
||||||
|
index: int
|
||||||
|
) -> tuple[bool, _Selector, str]:
|
||||||
"""Parse combinator tokens."""
|
"""Parse combinator tokens."""
|
||||||
|
|
||||||
combinator = m.group('relation').strip()
|
combinator = m.group('relation').strip()
|
||||||
|
@ -723,12 +779,9 @@ class CSSParser(object):
|
||||||
if combinator == COMMA_COMBINATOR:
|
if combinator == COMMA_COMBINATOR:
|
||||||
if not has_selector:
|
if not has_selector:
|
||||||
# If we've not captured any selector parts, the comma is either at the beginning of the pattern
|
# If we've not captured any selector parts, the comma is either at the beginning of the pattern
|
||||||
# or following another comma, both of which are unexpected. Commas must split selectors.
|
# or following another comma, both of which are unexpected. But shouldn't fail the pseudo-class.
|
||||||
raise SelectorSyntaxError(
|
sel.no_match = True
|
||||||
"The combinator '{}' at postion {}, must have a selector before it".format(combinator, index),
|
|
||||||
self.pattern,
|
|
||||||
index
|
|
||||||
)
|
|
||||||
sel.rel_type = rel_type
|
sel.rel_type = rel_type
|
||||||
selectors[-1].relations.append(sel)
|
selectors[-1].relations.append(sel)
|
||||||
rel_type = ":" + WS_COMBINATOR
|
rel_type = ":" + WS_COMBINATOR
|
||||||
|
@ -749,26 +802,44 @@ class CSSParser(object):
|
||||||
self.pattern,
|
self.pattern,
|
||||||
index
|
index
|
||||||
)
|
)
|
||||||
|
|
||||||
# Set the leading combinator for the next selector.
|
# Set the leading combinator for the next selector.
|
||||||
rel_type = ':' + combinator
|
rel_type = ':' + combinator
|
||||||
sel = _Selector()
|
|
||||||
|
|
||||||
|
sel = _Selector()
|
||||||
has_selector = False
|
has_selector = False
|
||||||
return has_selector, sel, rel_type
|
return has_selector, sel, rel_type
|
||||||
|
|
||||||
def parse_combinator(self, sel, m, has_selector, selectors, relations, is_pseudo, index):
|
def parse_combinator(
|
||||||
|
self,
|
||||||
|
sel: _Selector,
|
||||||
|
m: Match[str],
|
||||||
|
has_selector: bool,
|
||||||
|
selectors: list[_Selector],
|
||||||
|
relations: list[_Selector],
|
||||||
|
is_pseudo: bool,
|
||||||
|
is_forgive: bool,
|
||||||
|
index: int
|
||||||
|
) -> tuple[bool, _Selector]:
|
||||||
"""Parse combinator tokens."""
|
"""Parse combinator tokens."""
|
||||||
|
|
||||||
combinator = m.group('relation').strip()
|
combinator = m.group('relation').strip()
|
||||||
if not combinator:
|
if not combinator:
|
||||||
combinator = WS_COMBINATOR
|
combinator = WS_COMBINATOR
|
||||||
if not has_selector:
|
if not has_selector:
|
||||||
|
if not is_forgive or combinator != COMMA_COMBINATOR:
|
||||||
raise SelectorSyntaxError(
|
raise SelectorSyntaxError(
|
||||||
"The combinator '{}' at postion {}, must have a selector before it".format(combinator, index),
|
"The combinator '{}' at position {}, must have a selector before it".format(combinator, index),
|
||||||
self.pattern,
|
self.pattern,
|
||||||
index
|
index
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# If we are in a forgiving pseudo class, just make the selector a "no match"
|
||||||
|
if combinator == COMMA_COMBINATOR:
|
||||||
|
sel.no_match = True
|
||||||
|
del relations[:]
|
||||||
|
selectors.append(sel)
|
||||||
|
else:
|
||||||
if combinator == COMMA_COMBINATOR:
|
if combinator == COMMA_COMBINATOR:
|
||||||
if not sel.tag and not is_pseudo:
|
if not sel.tag and not is_pseudo:
|
||||||
# Implied `*`
|
# Implied `*`
|
||||||
|
@ -781,12 +852,13 @@ class CSSParser(object):
|
||||||
sel.rel_type = combinator
|
sel.rel_type = combinator
|
||||||
del relations[:]
|
del relations[:]
|
||||||
relations.append(sel)
|
relations.append(sel)
|
||||||
sel = _Selector()
|
|
||||||
|
|
||||||
|
sel = _Selector()
|
||||||
has_selector = False
|
has_selector = False
|
||||||
|
|
||||||
return has_selector, sel
|
return has_selector, sel
|
||||||
|
|
||||||
def parse_class_id(self, sel, m, has_selector):
|
def parse_class_id(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool:
|
||||||
"""Parse HTML classes and ids."""
|
"""Parse HTML classes and ids."""
|
||||||
|
|
||||||
selector = m.group(0)
|
selector = m.group(0)
|
||||||
|
@ -797,10 +869,17 @@ class CSSParser(object):
|
||||||
has_selector = True
|
has_selector = True
|
||||||
return has_selector
|
return has_selector
|
||||||
|
|
||||||
def parse_pseudo_contains(self, sel, m, has_selector):
|
def parse_pseudo_contains(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool:
|
||||||
"""Parse contains."""
|
"""Parse contains."""
|
||||||
|
|
||||||
values = m.group('values')
|
pseudo = util.lower(css_unescape(m.group('name')))
|
||||||
|
if pseudo == ":contains":
|
||||||
|
warnings.warn(
|
||||||
|
"The pseudo class ':contains' is deprecated, ':-soup-contains' should be used moving forward.",
|
||||||
|
FutureWarning
|
||||||
|
)
|
||||||
|
contains_own = pseudo == ":-soup-contains-own"
|
||||||
|
values = css_unescape(m.group('values'))
|
||||||
patterns = []
|
patterns = []
|
||||||
for token in RE_VALUES.finditer(values):
|
for token in RE_VALUES.finditer(values):
|
||||||
if token.group('split'):
|
if token.group('split'):
|
||||||
|
@ -811,11 +890,11 @@ class CSSParser(object):
|
||||||
else:
|
else:
|
||||||
value = css_unescape(value)
|
value = css_unescape(value)
|
||||||
patterns.append(value)
|
patterns.append(value)
|
||||||
sel.contains.append(ct.SelectorContains(tuple(patterns)))
|
sel.contains.append(ct.SelectorContains(patterns, contains_own))
|
||||||
has_selector = True
|
has_selector = True
|
||||||
return has_selector
|
return has_selector
|
||||||
|
|
||||||
def parse_pseudo_lang(self, sel, m, has_selector):
|
def parse_pseudo_lang(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool:
|
||||||
"""Parse pseudo language."""
|
"""Parse pseudo language."""
|
||||||
|
|
||||||
values = m.group('values')
|
values = m.group('values')
|
||||||
|
@ -836,7 +915,7 @@ class CSSParser(object):
|
||||||
|
|
||||||
return has_selector
|
return has_selector
|
||||||
|
|
||||||
def parse_pseudo_dir(self, sel, m, has_selector):
|
def parse_pseudo_dir(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool:
|
||||||
"""Parse pseudo direction."""
|
"""Parse pseudo direction."""
|
||||||
|
|
||||||
value = ct.SEL_DIR_LTR if util.lower(m.group('dir')) == 'ltr' else ct.SEL_DIR_RTL
|
value = ct.SEL_DIR_LTR if util.lower(m.group('dir')) == 'ltr' else ct.SEL_DIR_RTL
|
||||||
|
@ -844,15 +923,23 @@ class CSSParser(object):
|
||||||
has_selector = True
|
has_selector = True
|
||||||
return has_selector
|
return has_selector
|
||||||
|
|
||||||
def parse_selectors(self, iselector, index=0, flags=0):
|
def parse_selectors(
|
||||||
|
self,
|
||||||
|
iselector: Iterator[tuple[str, Match[str]]],
|
||||||
|
index: int = 0,
|
||||||
|
flags: int = 0
|
||||||
|
) -> ct.SelectorList:
|
||||||
"""Parse selectors."""
|
"""Parse selectors."""
|
||||||
|
|
||||||
|
# Initialize important variables
|
||||||
sel = _Selector()
|
sel = _Selector()
|
||||||
selectors = []
|
selectors = []
|
||||||
has_selector = False
|
has_selector = False
|
||||||
closed = False
|
closed = False
|
||||||
relations = []
|
relations = [] # type: list[_Selector]
|
||||||
rel_type = ":" + WS_COMBINATOR
|
rel_type = ":" + WS_COMBINATOR
|
||||||
|
|
||||||
|
# Setup various flags
|
||||||
is_open = bool(flags & FLG_OPEN)
|
is_open = bool(flags & FLG_OPEN)
|
||||||
is_pseudo = bool(flags & FLG_PSEUDO)
|
is_pseudo = bool(flags & FLG_PSEUDO)
|
||||||
is_relative = bool(flags & FLG_RELATIVE)
|
is_relative = bool(flags & FLG_RELATIVE)
|
||||||
|
@ -863,7 +950,9 @@ class CSSParser(object):
|
||||||
is_in_range = bool(flags & FLG_IN_RANGE)
|
is_in_range = bool(flags & FLG_IN_RANGE)
|
||||||
is_out_of_range = bool(flags & FLG_OUT_OF_RANGE)
|
is_out_of_range = bool(flags & FLG_OUT_OF_RANGE)
|
||||||
is_placeholder_shown = bool(flags & FLG_PLACEHOLDER_SHOWN)
|
is_placeholder_shown = bool(flags & FLG_PLACEHOLDER_SHOWN)
|
||||||
|
is_forgive = bool(flags & FLG_FORGIVE)
|
||||||
|
|
||||||
|
# Print out useful debug stuff
|
||||||
if self.debug: # pragma: no cover
|
if self.debug: # pragma: no cover
|
||||||
if is_pseudo:
|
if is_pseudo:
|
||||||
print(' is_pseudo: True')
|
print(' is_pseudo: True')
|
||||||
|
@ -885,7 +974,10 @@ class CSSParser(object):
|
||||||
print(' is_out_of_range: True')
|
print(' is_out_of_range: True')
|
||||||
if is_placeholder_shown:
|
if is_placeholder_shown:
|
||||||
print(' is_placeholder_shown: True')
|
print(' is_placeholder_shown: True')
|
||||||
|
if is_forgive:
|
||||||
|
print(' is_forgive: True')
|
||||||
|
|
||||||
|
# The algorithm for relative selectors require an initial selector in the selector list
|
||||||
if is_relative:
|
if is_relative:
|
||||||
selectors.append(_Selector())
|
selectors.append(_Selector())
|
||||||
|
|
||||||
|
@ -914,17 +1006,19 @@ class CSSParser(object):
|
||||||
is_html = True
|
is_html = True
|
||||||
elif key == 'pseudo_close':
|
elif key == 'pseudo_close':
|
||||||
if not has_selector:
|
if not has_selector:
|
||||||
|
if not is_forgive:
|
||||||
raise SelectorSyntaxError(
|
raise SelectorSyntaxError(
|
||||||
"Expected a selector at postion {}".format(m.start(0)),
|
"Expected a selector at position {}".format(m.start(0)),
|
||||||
self.pattern,
|
self.pattern,
|
||||||
m.start(0)
|
m.start(0)
|
||||||
)
|
)
|
||||||
|
sel.no_match = True
|
||||||
if is_open:
|
if is_open:
|
||||||
closed = True
|
closed = True
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
raise SelectorSyntaxError(
|
raise SelectorSyntaxError(
|
||||||
"Unmatched pseudo-class close at postion {}".format(m.start(0)),
|
"Unmatched pseudo-class close at position {}".format(m.start(0)),
|
||||||
self.pattern,
|
self.pattern,
|
||||||
m.start(0)
|
m.start(0)
|
||||||
)
|
)
|
||||||
|
@ -935,7 +1029,7 @@ class CSSParser(object):
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
has_selector, sel = self.parse_combinator(
|
has_selector, sel = self.parse_combinator(
|
||||||
sel, m, has_selector, selectors, relations, is_pseudo, index
|
sel, m, has_selector, selectors, relations, is_pseudo, is_forgive, index
|
||||||
)
|
)
|
||||||
elif key == 'attribute':
|
elif key == 'attribute':
|
||||||
has_selector = self.parse_attribute_selector(sel, m, has_selector)
|
has_selector = self.parse_attribute_selector(sel, m, has_selector)
|
||||||
|
@ -954,6 +1048,7 @@ class CSSParser(object):
|
||||||
except StopIteration:
|
except StopIteration:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
# Handle selectors that are not closed
|
||||||
if is_open and not closed:
|
if is_open and not closed:
|
||||||
raise SelectorSyntaxError(
|
raise SelectorSyntaxError(
|
||||||
"Unclosed pseudo-class at position {}".format(index),
|
"Unclosed pseudo-class at position {}".format(index),
|
||||||
|
@ -961,6 +1056,7 @@ class CSSParser(object):
|
||||||
index
|
index
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Cleanup completed selector piece
|
||||||
if has_selector:
|
if has_selector:
|
||||||
if not sel.tag and not is_pseudo:
|
if not sel.tag and not is_pseudo:
|
||||||
# Implied `*`
|
# Implied `*`
|
||||||
|
@ -972,8 +1068,28 @@ class CSSParser(object):
|
||||||
sel.relations.extend(relations)
|
sel.relations.extend(relations)
|
||||||
del relations[:]
|
del relations[:]
|
||||||
selectors.append(sel)
|
selectors.append(sel)
|
||||||
|
|
||||||
|
# Forgive empty slots in pseudo-classes that have lists (and are forgiving)
|
||||||
|
elif is_forgive:
|
||||||
|
if is_relative:
|
||||||
|
# Handle relative selectors pseudo-classes with empty slots like `:has()`
|
||||||
|
if selectors and selectors[-1].rel_type is None and rel_type == ': ':
|
||||||
|
sel.rel_type = rel_type
|
||||||
|
sel.no_match = True
|
||||||
|
selectors[-1].relations.append(sel)
|
||||||
|
has_selector = True
|
||||||
else:
|
else:
|
||||||
|
# Handle normal pseudo-classes with empty slots
|
||||||
|
if not selectors or not relations:
|
||||||
|
# Others like `:is()` etc.
|
||||||
|
sel.no_match = True
|
||||||
|
del relations[:]
|
||||||
|
selectors.append(sel)
|
||||||
|
has_selector = True
|
||||||
|
|
||||||
|
if not has_selector:
|
||||||
# We will always need to finish a selector when `:has()` is used as it leads with combining.
|
# We will always need to finish a selector when `:has()` is used as it leads with combining.
|
||||||
|
# May apply to others as well.
|
||||||
raise SelectorSyntaxError(
|
raise SelectorSyntaxError(
|
||||||
'Expected a selector at position {}'.format(index),
|
'Expected a selector at position {}'.format(index),
|
||||||
self.pattern,
|
self.pattern,
|
||||||
|
@ -994,9 +1110,10 @@ class CSSParser(object):
|
||||||
if is_placeholder_shown:
|
if is_placeholder_shown:
|
||||||
selectors[-1].flags = ct.SEL_PLACEHOLDER_SHOWN
|
selectors[-1].flags = ct.SEL_PLACEHOLDER_SHOWN
|
||||||
|
|
||||||
|
# Return selector list
|
||||||
return ct.SelectorList([s.freeze() for s in selectors], is_not, is_html)
|
return ct.SelectorList([s.freeze() for s in selectors], is_not, is_html)
|
||||||
|
|
||||||
def selector_iter(self, pattern):
|
def selector_iter(self, pattern: str) -> Iterator[tuple[str, Match[str]]]:
|
||||||
"""Iterate selector tokens."""
|
"""Iterate selector tokens."""
|
||||||
|
|
||||||
# Ignore whitespace and comments at start and end of pattern
|
# Ignore whitespace and comments at start and end of pattern
|
||||||
|
@ -1037,7 +1154,7 @@ class CSSParser(object):
|
||||||
if self.debug: # pragma: no cover
|
if self.debug: # pragma: no cover
|
||||||
print('## END PARSING')
|
print('## END PARSING')
|
||||||
|
|
||||||
def process_selectors(self, index=0, flags=0):
|
def process_selectors(self, index: int = 0, flags: int = 0) -> ct.SelectorList:
|
||||||
"""Process selectors."""
|
"""Process selectors."""
|
||||||
|
|
||||||
return self.parse_selectors(self.selector_iter(self.pattern), index, flags)
|
return self.parse_selectors(self.selector_iter(self.pattern), index, flags)
|
||||||
|
@ -1048,7 +1165,7 @@ class CSSParser(object):
|
||||||
|
|
||||||
# CSS pattern for `:link` and `:any-link`
|
# CSS pattern for `:link` and `:any-link`
|
||||||
CSS_LINK = CSSParser(
|
CSS_LINK = CSSParser(
|
||||||
'html|*:is(a, area, link)[href]'
|
'html|*:is(a, area)[href]'
|
||||||
).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
|
).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
|
||||||
# CSS pattern for `:checked`
|
# CSS pattern for `:checked`
|
||||||
CSS_CHECKED = CSSParser(
|
CSS_CHECKED = CSSParser(
|
||||||
|
@ -1079,23 +1196,23 @@ CSS_INDETERMINATE = CSSParser(
|
||||||
This pattern must be at the end.
|
This pattern must be at the end.
|
||||||
Special logic is applied to the last selector.
|
Special logic is applied to the last selector.
|
||||||
*/
|
*/
|
||||||
html|input[type="radio"][name][name!='']:not([checked])
|
html|input[type="radio"][name]:not([name='']):not([checked])
|
||||||
'''
|
'''
|
||||||
).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_INDETERMINATE)
|
).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_INDETERMINATE)
|
||||||
# CSS pattern for `:disabled`
|
# CSS pattern for `:disabled`
|
||||||
CSS_DISABLED = CSSParser(
|
CSS_DISABLED = CSSParser(
|
||||||
'''
|
'''
|
||||||
html|*:is(input[type!=hidden], button, select, textarea, fieldset, optgroup, option, fieldset)[disabled],
|
html|*:is(input:not([type=hidden]), button, select, textarea, fieldset, optgroup, option, fieldset)[disabled],
|
||||||
html|optgroup[disabled] > html|option,
|
html|optgroup[disabled] > html|option,
|
||||||
html|fieldset[disabled] > html|*:is(input[type!=hidden], button, select, textarea, fieldset),
|
html|fieldset[disabled] > html|*:is(input:not([type=hidden]), button, select, textarea, fieldset),
|
||||||
html|fieldset[disabled] >
|
html|fieldset[disabled] >
|
||||||
html|*:not(legend:nth-of-type(1)) html|*:is(input[type!=hidden], button, select, textarea, fieldset)
|
html|*:not(legend:nth-of-type(1)) html|*:is(input:not([type=hidden]), button, select, textarea, fieldset)
|
||||||
'''
|
'''
|
||||||
).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
|
).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
|
||||||
# CSS pattern for `:enabled`
|
# CSS pattern for `:enabled`
|
||||||
CSS_ENABLED = CSSParser(
|
CSS_ENABLED = CSSParser(
|
||||||
'''
|
'''
|
||||||
html|*:is(input[type!=hidden], button, select, textarea, fieldset, optgroup, option, fieldset):not(:disabled)
|
html|*:is(input:not([type=hidden]), button, select, textarea, fieldset, optgroup, option, fieldset):not(:disabled)
|
||||||
'''
|
'''
|
||||||
).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
|
).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
|
||||||
# CSS pattern for `:required`
|
# CSS pattern for `:required`
|
||||||
|
@ -1119,8 +1236,8 @@ CSS_PLACEHOLDER_SHOWN = CSSParser(
|
||||||
[type=email],
|
[type=email],
|
||||||
[type=password],
|
[type=password],
|
||||||
[type=number]
|
[type=number]
|
||||||
)[placeholder][placeholder!='']:is(:not([value]), [value=""]),
|
)[placeholder]:not([placeholder='']):is(:not([value]), [value=""]),
|
||||||
html|textarea[placeholder][placeholder!='']
|
html|textarea[placeholder]:not([placeholder=''])
|
||||||
'''
|
'''
|
||||||
).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_PLACEHOLDER_SHOWN)
|
).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_PLACEHOLDER_SHOWN)
|
||||||
# CSS pattern default for `:nth-child` "of S" feature
|
# CSS pattern default for `:nth-child` "of S" feature
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
"""CSS selector structure items."""
|
"""CSS selector structure items."""
|
||||||
|
from __future__ import annotations
|
||||||
import copyreg
|
import copyreg
|
||||||
from collections.abc import Hashable, Mapping
|
from .pretty import pretty
|
||||||
|
from typing import Any, Iterator, Hashable, Optional, Pattern, Iterable, Mapping
|
||||||
|
|
||||||
__all__ = (
|
__all__ = (
|
||||||
'Selector',
|
'Selector',
|
||||||
|
@ -29,12 +31,14 @@ SEL_DEFINED = 0x200
|
||||||
SEL_PLACEHOLDER_SHOWN = 0x400
|
SEL_PLACEHOLDER_SHOWN = 0x400
|
||||||
|
|
||||||
|
|
||||||
class Immutable(object):
|
class Immutable:
|
||||||
"""Immutable."""
|
"""Immutable."""
|
||||||
|
|
||||||
__slots__ = ('_hash',)
|
__slots__: tuple[str, ...] = ('_hash',)
|
||||||
|
|
||||||
def __init__(self, **kwargs):
|
_hash: int
|
||||||
|
|
||||||
|
def __init__(self, **kwargs: Any) -> None:
|
||||||
"""Initialize."""
|
"""Initialize."""
|
||||||
|
|
||||||
temp = []
|
temp = []
|
||||||
|
@ -45,12 +49,12 @@ class Immutable(object):
|
||||||
super(Immutable, self).__setattr__('_hash', hash(tuple(temp)))
|
super(Immutable, self).__setattr__('_hash', hash(tuple(temp)))
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def __base__(cls):
|
def __base__(cls) -> "type[Immutable]":
|
||||||
"""Get base class."""
|
"""Get base class."""
|
||||||
|
|
||||||
return cls
|
return cls
|
||||||
|
|
||||||
def __eq__(self, other):
|
def __eq__(self, other: Any) -> bool:
|
||||||
"""Equal."""
|
"""Equal."""
|
||||||
|
|
||||||
return (
|
return (
|
||||||
|
@ -58,7 +62,7 @@ class Immutable(object):
|
||||||
all([getattr(other, key) == getattr(self, key) for key in self.__slots__ if key != '_hash'])
|
all([getattr(other, key) == getattr(self, key) for key in self.__slots__ if key != '_hash'])
|
||||||
)
|
)
|
||||||
|
|
||||||
def __ne__(self, other):
|
def __ne__(self, other: Any) -> bool:
|
||||||
"""Equal."""
|
"""Equal."""
|
||||||
|
|
||||||
return (
|
return (
|
||||||
|
@ -66,63 +70,74 @@ class Immutable(object):
|
||||||
any([getattr(other, key) != getattr(self, key) for key in self.__slots__ if key != '_hash'])
|
any([getattr(other, key) != getattr(self, key) for key in self.__slots__ if key != '_hash'])
|
||||||
)
|
)
|
||||||
|
|
||||||
def __hash__(self):
|
def __hash__(self) -> int:
|
||||||
"""Hash."""
|
"""Hash."""
|
||||||
|
|
||||||
return self._hash
|
return self._hash
|
||||||
|
|
||||||
def __setattr__(self, name, value):
|
def __setattr__(self, name: str, value: Any) -> None:
|
||||||
"""Prevent mutability."""
|
"""Prevent mutability."""
|
||||||
|
|
||||||
raise AttributeError("'{}' is immutable".format(self.__class__.__name__))
|
raise AttributeError("'{}' is immutable".format(self.__class__.__name__))
|
||||||
|
|
||||||
def __repr__(self): # pragma: no cover
|
def __repr__(self) -> str: # pragma: no cover
|
||||||
"""Representation."""
|
"""Representation."""
|
||||||
|
|
||||||
return "{}({})".format(
|
return "{}({})".format(
|
||||||
self.__base__(), ', '.join(["{}={!r}".format(k, getattr(self, k)) for k in self.__slots__[:-1]])
|
self.__class__.__name__, ', '.join(["{}={!r}".format(k, getattr(self, k)) for k in self.__slots__[:-1]])
|
||||||
)
|
)
|
||||||
|
|
||||||
__str__ = __repr__
|
__str__ = __repr__
|
||||||
|
|
||||||
|
def pretty(self) -> None: # pragma: no cover
|
||||||
|
"""Pretty print."""
|
||||||
|
|
||||||
class ImmutableDict(Mapping):
|
print(pretty(self))
|
||||||
|
|
||||||
|
|
||||||
|
class ImmutableDict(Mapping[Any, Any]):
|
||||||
"""Hashable, immutable dictionary."""
|
"""Hashable, immutable dictionary."""
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(
|
||||||
|
self,
|
||||||
|
arg: dict[Any, Any] | Iterable[tuple[Any, Any]]
|
||||||
|
) -> None:
|
||||||
"""Initialize."""
|
"""Initialize."""
|
||||||
|
|
||||||
arg = args[0] if args else kwargs
|
self._validate(arg)
|
||||||
is_dict = isinstance(arg, dict)
|
self._d = dict(arg)
|
||||||
if (
|
|
||||||
is_dict and not all([isinstance(v, Hashable) for v in arg.values()]) or
|
|
||||||
not is_dict and not all([isinstance(k, Hashable) and isinstance(v, Hashable) for k, v in arg])
|
|
||||||
):
|
|
||||||
raise TypeError('All values must be hashable')
|
|
||||||
|
|
||||||
self._d = dict(*args, **kwargs)
|
|
||||||
self._hash = hash(tuple([(type(x), x, type(y), y) for x, y in sorted(self._d.items())]))
|
self._hash = hash(tuple([(type(x), x, type(y), y) for x, y in sorted(self._d.items())]))
|
||||||
|
|
||||||
def __iter__(self):
|
def _validate(self, arg: dict[Any, Any] | Iterable[tuple[Any, Any]]) -> None:
|
||||||
|
"""Validate arguments."""
|
||||||
|
|
||||||
|
if isinstance(arg, dict):
|
||||||
|
if not all([isinstance(v, Hashable) for v in arg.values()]):
|
||||||
|
raise TypeError('{} values must be hashable'.format(self.__class__.__name__))
|
||||||
|
elif not all([isinstance(k, Hashable) and isinstance(v, Hashable) for k, v in arg]):
|
||||||
|
raise TypeError('{} values must be hashable'.format(self.__class__.__name__))
|
||||||
|
|
||||||
|
def __iter__(self) -> Iterator[Any]:
|
||||||
"""Iterator."""
|
"""Iterator."""
|
||||||
|
|
||||||
return iter(self._d)
|
return iter(self._d)
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self) -> int:
|
||||||
"""Length."""
|
"""Length."""
|
||||||
|
|
||||||
return len(self._d)
|
return len(self._d)
|
||||||
|
|
||||||
def __getitem__(self, key):
|
def __getitem__(self, key: Any) -> Any:
|
||||||
"""Get item: `namespace['key']`."""
|
"""Get item: `namespace['key']`."""
|
||||||
|
|
||||||
return self._d[key]
|
return self._d[key]
|
||||||
|
|
||||||
def __hash__(self):
|
def __hash__(self) -> int:
|
||||||
"""Hash."""
|
"""Hash."""
|
||||||
|
|
||||||
return self._hash
|
return self._hash
|
||||||
|
|
||||||
def __repr__(self): # pragma: no cover
|
def __repr__(self) -> str: # pragma: no cover
|
||||||
"""Representation."""
|
"""Representation."""
|
||||||
|
|
||||||
return "{!r}".format(self._d)
|
return "{!r}".format(self._d)
|
||||||
|
@ -133,39 +148,37 @@ class ImmutableDict(Mapping):
|
||||||
class Namespaces(ImmutableDict):
|
class Namespaces(ImmutableDict):
|
||||||
"""Namespaces."""
|
"""Namespaces."""
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, arg: dict[str, str] | Iterable[tuple[str, str]]) -> None:
|
||||||
"""Initialize."""
|
"""Initialize."""
|
||||||
|
|
||||||
# If there are arguments, check the first index.
|
super().__init__(arg)
|
||||||
# `super` should fail if the user gave multiple arguments,
|
|
||||||
# so don't bother checking that.
|
|
||||||
arg = args[0] if args else kwargs
|
|
||||||
is_dict = isinstance(arg, dict)
|
|
||||||
if is_dict and not all([isinstance(k, str) and isinstance(v, str) for k, v in arg.items()]):
|
|
||||||
raise TypeError('Namespace keys and values must be Unicode strings')
|
|
||||||
elif not is_dict and not all([isinstance(k, str) and isinstance(v, str) for k, v in arg]):
|
|
||||||
raise TypeError('Namespace keys and values must be Unicode strings')
|
|
||||||
|
|
||||||
super(Namespaces, self).__init__(*args, **kwargs)
|
def _validate(self, arg: dict[str, str] | Iterable[tuple[str, str]]) -> None:
|
||||||
|
"""Validate arguments."""
|
||||||
|
|
||||||
|
if isinstance(arg, dict):
|
||||||
|
if not all([isinstance(v, str) for v in arg.values()]):
|
||||||
|
raise TypeError('{} values must be hashable'.format(self.__class__.__name__))
|
||||||
|
elif not all([isinstance(k, str) and isinstance(v, str) for k, v in arg]):
|
||||||
|
raise TypeError('{} keys and values must be Unicode strings'.format(self.__class__.__name__))
|
||||||
|
|
||||||
|
|
||||||
class CustomSelectors(ImmutableDict):
|
class CustomSelectors(ImmutableDict):
|
||||||
"""Custom selectors."""
|
"""Custom selectors."""
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, arg: dict[str, str] | Iterable[tuple[str, str]]) -> None:
|
||||||
"""Initialize."""
|
"""Initialize."""
|
||||||
|
|
||||||
# If there are arguments, check the first index.
|
super().__init__(arg)
|
||||||
# `super` should fail if the user gave multiple arguments,
|
|
||||||
# so don't bother checking that.
|
|
||||||
arg = args[0] if args else kwargs
|
|
||||||
is_dict = isinstance(arg, dict)
|
|
||||||
if is_dict and not all([isinstance(k, str) and isinstance(v, str) for k, v in arg.items()]):
|
|
||||||
raise TypeError('CustomSelectors keys and values must be Unicode strings')
|
|
||||||
elif not is_dict and not all([isinstance(k, str) and isinstance(v, str) for k, v in arg]):
|
|
||||||
raise TypeError('CustomSelectors keys and values must be Unicode strings')
|
|
||||||
|
|
||||||
super(CustomSelectors, self).__init__(*args, **kwargs)
|
def _validate(self, arg: dict[str, str] | Iterable[tuple[str, str]]) -> None:
|
||||||
|
"""Validate arguments."""
|
||||||
|
|
||||||
|
if isinstance(arg, dict):
|
||||||
|
if not all([isinstance(v, str) for v in arg.values()]):
|
||||||
|
raise TypeError('{} values must be hashable'.format(self.__class__.__name__))
|
||||||
|
elif not all([isinstance(k, str) and isinstance(v, str) for k, v in arg]):
|
||||||
|
raise TypeError('{} keys and values must be Unicode strings'.format(self.__class__.__name__))
|
||||||
|
|
||||||
|
|
||||||
class Selector(Immutable):
|
class Selector(Immutable):
|
||||||
|
@ -176,13 +189,35 @@ class Selector(Immutable):
|
||||||
'relation', 'rel_type', 'contains', 'lang', 'flags', '_hash'
|
'relation', 'rel_type', 'contains', 'lang', 'flags', '_hash'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
tag: Optional[SelectorTag]
|
||||||
|
ids: tuple[str, ...]
|
||||||
|
classes: tuple[str, ...]
|
||||||
|
attributes: tuple[SelectorAttribute, ...]
|
||||||
|
nth: tuple[SelectorNth, ...]
|
||||||
|
selectors: tuple[SelectorList, ...]
|
||||||
|
relation: SelectorList
|
||||||
|
rel_type: Optional[str]
|
||||||
|
contains: tuple[SelectorContains, ...]
|
||||||
|
lang: tuple[SelectorLang, ...]
|
||||||
|
flags: int
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self, tag, ids, classes, attributes, nth, selectors,
|
self,
|
||||||
relation, rel_type, contains, lang, flags
|
tag: Optional[SelectorTag],
|
||||||
|
ids: tuple[str, ...],
|
||||||
|
classes: tuple[str, ...],
|
||||||
|
attributes: tuple[SelectorAttribute, ...],
|
||||||
|
nth: tuple[SelectorNth, ...],
|
||||||
|
selectors: tuple[SelectorList, ...],
|
||||||
|
relation: SelectorList,
|
||||||
|
rel_type: Optional[str],
|
||||||
|
contains: tuple[SelectorContains, ...],
|
||||||
|
lang: tuple[SelectorLang, ...],
|
||||||
|
flags: int
|
||||||
):
|
):
|
||||||
"""Initialize."""
|
"""Initialize."""
|
||||||
|
|
||||||
super(Selector, self).__init__(
|
super().__init__(
|
||||||
tag=tag,
|
tag=tag,
|
||||||
ids=ids,
|
ids=ids,
|
||||||
classes=classes,
|
classes=classes,
|
||||||
|
@ -200,10 +235,10 @@ class Selector(Immutable):
|
||||||
class SelectorNull(Immutable):
|
class SelectorNull(Immutable):
|
||||||
"""Null Selector."""
|
"""Null Selector."""
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self) -> None:
|
||||||
"""Initialize."""
|
"""Initialize."""
|
||||||
|
|
||||||
super(SelectorNull, self).__init__()
|
super().__init__()
|
||||||
|
|
||||||
|
|
||||||
class SelectorTag(Immutable):
|
class SelectorTag(Immutable):
|
||||||
|
@ -211,13 +246,13 @@ class SelectorTag(Immutable):
|
||||||
|
|
||||||
__slots__ = ("name", "prefix", "_hash")
|
__slots__ = ("name", "prefix", "_hash")
|
||||||
|
|
||||||
def __init__(self, name, prefix):
|
name: str
|
||||||
|
prefix: Optional[str]
|
||||||
|
|
||||||
|
def __init__(self, name: str, prefix: Optional[str]) -> None:
|
||||||
"""Initialize."""
|
"""Initialize."""
|
||||||
|
|
||||||
super(SelectorTag, self).__init__(
|
super().__init__(name=name, prefix=prefix)
|
||||||
name=name,
|
|
||||||
prefix=prefix
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class SelectorAttribute(Immutable):
|
class SelectorAttribute(Immutable):
|
||||||
|
@ -225,10 +260,21 @@ class SelectorAttribute(Immutable):
|
||||||
|
|
||||||
__slots__ = ("attribute", "prefix", "pattern", "xml_type_pattern", "_hash")
|
__slots__ = ("attribute", "prefix", "pattern", "xml_type_pattern", "_hash")
|
||||||
|
|
||||||
def __init__(self, attribute, prefix, pattern, xml_type_pattern):
|
attribute: str
|
||||||
|
prefix: str
|
||||||
|
pattern: Optional[Pattern[str]]
|
||||||
|
xml_type_pattern: Optional[Pattern[str]]
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
attribute: str,
|
||||||
|
prefix: str,
|
||||||
|
pattern: Optional[Pattern[str]],
|
||||||
|
xml_type_pattern: Optional[Pattern[str]]
|
||||||
|
) -> None:
|
||||||
"""Initialize."""
|
"""Initialize."""
|
||||||
|
|
||||||
super(SelectorAttribute, self).__init__(
|
super().__init__(
|
||||||
attribute=attribute,
|
attribute=attribute,
|
||||||
prefix=prefix,
|
prefix=prefix,
|
||||||
pattern=pattern,
|
pattern=pattern,
|
||||||
|
@ -239,14 +285,15 @@ class SelectorAttribute(Immutable):
|
||||||
class SelectorContains(Immutable):
|
class SelectorContains(Immutable):
|
||||||
"""Selector contains rule."""
|
"""Selector contains rule."""
|
||||||
|
|
||||||
__slots__ = ("text", "_hash")
|
__slots__ = ("text", "own", "_hash")
|
||||||
|
|
||||||
def __init__(self, text):
|
text: tuple[str, ...]
|
||||||
|
own: bool
|
||||||
|
|
||||||
|
def __init__(self, text: Iterable[str], own: bool) -> None:
|
||||||
"""Initialize."""
|
"""Initialize."""
|
||||||
|
|
||||||
super(SelectorContains, self).__init__(
|
super().__init__(text=tuple(text), own=own)
|
||||||
text=text
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class SelectorNth(Immutable):
|
class SelectorNth(Immutable):
|
||||||
|
@ -254,10 +301,17 @@ class SelectorNth(Immutable):
|
||||||
|
|
||||||
__slots__ = ("a", "n", "b", "of_type", "last", "selectors", "_hash")
|
__slots__ = ("a", "n", "b", "of_type", "last", "selectors", "_hash")
|
||||||
|
|
||||||
def __init__(self, a, n, b, of_type, last, selectors):
|
a: int
|
||||||
|
n: bool
|
||||||
|
b: int
|
||||||
|
of_type: bool
|
||||||
|
last: bool
|
||||||
|
selectors: SelectorList
|
||||||
|
|
||||||
|
def __init__(self, a: int, n: bool, b: int, of_type: bool, last: bool, selectors: SelectorList) -> None:
|
||||||
"""Initialize."""
|
"""Initialize."""
|
||||||
|
|
||||||
super(SelectorNth, self).__init__(
|
super().__init__(
|
||||||
a=a,
|
a=a,
|
||||||
n=n,
|
n=n,
|
||||||
b=b,
|
b=b,
|
||||||
|
@ -272,24 +326,24 @@ class SelectorLang(Immutable):
|
||||||
|
|
||||||
__slots__ = ("languages", "_hash",)
|
__slots__ = ("languages", "_hash",)
|
||||||
|
|
||||||
def __init__(self, languages):
|
languages: tuple[str, ...]
|
||||||
|
|
||||||
|
def __init__(self, languages: Iterable[str]):
|
||||||
"""Initialize."""
|
"""Initialize."""
|
||||||
|
|
||||||
super(SelectorLang, self).__init__(
|
super().__init__(languages=tuple(languages))
|
||||||
languages=tuple(languages)
|
|
||||||
)
|
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self) -> Iterator[str]:
|
||||||
"""Iterator."""
|
"""Iterator."""
|
||||||
|
|
||||||
return iter(self.languages)
|
return iter(self.languages)
|
||||||
|
|
||||||
def __len__(self): # pragma: no cover
|
def __len__(self) -> int: # pragma: no cover
|
||||||
"""Length."""
|
"""Length."""
|
||||||
|
|
||||||
return len(self.languages)
|
return len(self.languages)
|
||||||
|
|
||||||
def __getitem__(self, index): # pragma: no cover
|
def __getitem__(self, index: int) -> str: # pragma: no cover
|
||||||
"""Get item."""
|
"""Get item."""
|
||||||
|
|
||||||
return self.languages[index]
|
return self.languages[index]
|
||||||
|
@ -300,36 +354,45 @@ class SelectorList(Immutable):
|
||||||
|
|
||||||
__slots__ = ("selectors", "is_not", "is_html", "_hash")
|
__slots__ = ("selectors", "is_not", "is_html", "_hash")
|
||||||
|
|
||||||
def __init__(self, selectors=tuple(), is_not=False, is_html=False):
|
selectors: tuple[Selector | SelectorNull, ...]
|
||||||
|
is_not: bool
|
||||||
|
is_html: bool
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
selectors: Optional[Iterable[Selector | SelectorNull]] = None,
|
||||||
|
is_not: bool = False,
|
||||||
|
is_html: bool = False
|
||||||
|
) -> None:
|
||||||
"""Initialize."""
|
"""Initialize."""
|
||||||
|
|
||||||
super(SelectorList, self).__init__(
|
super().__init__(
|
||||||
selectors=tuple(selectors),
|
selectors=tuple(selectors) if selectors is not None else tuple(),
|
||||||
is_not=is_not,
|
is_not=is_not,
|
||||||
is_html=is_html
|
is_html=is_html
|
||||||
)
|
)
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self) -> Iterator[Selector | SelectorNull]:
|
||||||
"""Iterator."""
|
"""Iterator."""
|
||||||
|
|
||||||
return iter(self.selectors)
|
return iter(self.selectors)
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self) -> int:
|
||||||
"""Length."""
|
"""Length."""
|
||||||
|
|
||||||
return len(self.selectors)
|
return len(self.selectors)
|
||||||
|
|
||||||
def __getitem__(self, index):
|
def __getitem__(self, index: int) -> Selector | SelectorNull:
|
||||||
"""Get item."""
|
"""Get item."""
|
||||||
|
|
||||||
return self.selectors[index]
|
return self.selectors[index]
|
||||||
|
|
||||||
|
|
||||||
def _pickle(p):
|
def _pickle(p: Any) -> Any:
|
||||||
return p.__base__(), tuple([getattr(p, s) for s in p.__slots__[:-1]])
|
return p.__base__(), tuple([getattr(p, s) for s in p.__slots__[:-1]])
|
||||||
|
|
||||||
|
|
||||||
def pickle_register(obj):
|
def pickle_register(obj: Any) -> None:
|
||||||
"""Allow object to be pickled."""
|
"""Allow object to be pickled."""
|
||||||
|
|
||||||
copyreg.pickle(obj, _pickle)
|
copyreg.pickle(obj, _pickle)
|
||||||
|
|
138
lib/soupsieve/pretty.py
Normal file
138
lib/soupsieve/pretty.py
Normal file
|
@ -0,0 +1,138 @@
|
||||||
|
"""
|
||||||
|
Format a pretty string of a `SoupSieve` object for easy debugging.
|
||||||
|
|
||||||
|
This won't necessarily support all types and such, and definitely
|
||||||
|
not support custom outputs.
|
||||||
|
|
||||||
|
It is mainly geared towards our types as the `SelectorList`
|
||||||
|
object is a beast to look at without some indentation and newlines.
|
||||||
|
The format and various output types is fairly known (though it
|
||||||
|
hasn't been tested extensively to make sure we aren't missing corners).
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
```
|
||||||
|
>>> import soupsieve as sv
|
||||||
|
>>> sv.compile('this > that.class[name=value]').selectors.pretty()
|
||||||
|
SelectorList(
|
||||||
|
selectors=(
|
||||||
|
Selector(
|
||||||
|
tag=SelectorTag(
|
||||||
|
name='that',
|
||||||
|
prefix=None),
|
||||||
|
ids=(),
|
||||||
|
classes=(
|
||||||
|
'class',
|
||||||
|
),
|
||||||
|
attributes=(
|
||||||
|
SelectorAttribute(
|
||||||
|
attribute='name',
|
||||||
|
prefix='',
|
||||||
|
pattern=re.compile(
|
||||||
|
'^value$'),
|
||||||
|
xml_type_pattern=None),
|
||||||
|
),
|
||||||
|
nth=(),
|
||||||
|
selectors=(),
|
||||||
|
relation=SelectorList(
|
||||||
|
selectors=(
|
||||||
|
Selector(
|
||||||
|
tag=SelectorTag(
|
||||||
|
name='this',
|
||||||
|
prefix=None),
|
||||||
|
ids=(),
|
||||||
|
classes=(),
|
||||||
|
attributes=(),
|
||||||
|
nth=(),
|
||||||
|
selectors=(),
|
||||||
|
relation=SelectorList(
|
||||||
|
selectors=(),
|
||||||
|
is_not=False,
|
||||||
|
is_html=False),
|
||||||
|
rel_type='>',
|
||||||
|
contains=(),
|
||||||
|
lang=(),
|
||||||
|
flags=0),
|
||||||
|
),
|
||||||
|
is_not=False,
|
||||||
|
is_html=False),
|
||||||
|
rel_type=None,
|
||||||
|
contains=(),
|
||||||
|
lang=(),
|
||||||
|
flags=0),
|
||||||
|
),
|
||||||
|
is_not=False,
|
||||||
|
is_html=False)
|
||||||
|
```
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
import re
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
RE_CLASS = re.compile(r'(?i)[a-z_][_a-z\d\.]+\(')
|
||||||
|
RE_PARAM = re.compile(r'(?i)[_a-z][_a-z\d]+=')
|
||||||
|
RE_EMPTY = re.compile(r'\(\)|\[\]|\{\}')
|
||||||
|
RE_LSTRT = re.compile(r'\[')
|
||||||
|
RE_DSTRT = re.compile(r'\{')
|
||||||
|
RE_TSTRT = re.compile(r'\(')
|
||||||
|
RE_LEND = re.compile(r'\]')
|
||||||
|
RE_DEND = re.compile(r'\}')
|
||||||
|
RE_TEND = re.compile(r'\)')
|
||||||
|
RE_INT = re.compile(r'\d+')
|
||||||
|
RE_KWORD = re.compile(r'(?i)[_a-z][_a-z\d]+')
|
||||||
|
RE_DQSTR = re.compile(r'"(?:\\.|[^"\\])*"')
|
||||||
|
RE_SQSTR = re.compile(r"'(?:\\.|[^'\\])*'")
|
||||||
|
RE_SEP = re.compile(r'\s*(,)\s*')
|
||||||
|
RE_DSEP = re.compile(r'\s*(:)\s*')
|
||||||
|
|
||||||
|
TOKENS = {
|
||||||
|
'class': RE_CLASS,
|
||||||
|
'param': RE_PARAM,
|
||||||
|
'empty': RE_EMPTY,
|
||||||
|
'lstrt': RE_LSTRT,
|
||||||
|
'dstrt': RE_DSTRT,
|
||||||
|
'tstrt': RE_TSTRT,
|
||||||
|
'lend': RE_LEND,
|
||||||
|
'dend': RE_DEND,
|
||||||
|
'tend': RE_TEND,
|
||||||
|
'sqstr': RE_SQSTR,
|
||||||
|
'sep': RE_SEP,
|
||||||
|
'dsep': RE_DSEP,
|
||||||
|
'int': RE_INT,
|
||||||
|
'kword': RE_KWORD,
|
||||||
|
'dqstr': RE_DQSTR
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def pretty(obj: Any) -> str: # pragma: no cover
|
||||||
|
"""Make the object output string pretty."""
|
||||||
|
|
||||||
|
sel = str(obj)
|
||||||
|
index = 0
|
||||||
|
end = len(sel) - 1
|
||||||
|
indent = 0
|
||||||
|
output = []
|
||||||
|
|
||||||
|
while index <= end:
|
||||||
|
m = None
|
||||||
|
for k, v in TOKENS.items():
|
||||||
|
m = v.match(sel, index)
|
||||||
|
|
||||||
|
if m:
|
||||||
|
name = k
|
||||||
|
index = m.end(0)
|
||||||
|
if name in ('class', 'lstrt', 'dstrt', 'tstrt'):
|
||||||
|
indent += 4
|
||||||
|
output.append('{}\n{}'.format(m.group(0), " " * indent))
|
||||||
|
elif name in ('param', 'int', 'kword', 'sqstr', 'dqstr', 'empty'):
|
||||||
|
output.append(m.group(0))
|
||||||
|
elif name in ('lend', 'dend', 'tend'):
|
||||||
|
indent -= 4
|
||||||
|
output.append(m.group(0))
|
||||||
|
elif name in ('sep',):
|
||||||
|
output.append('{}\n{}'.format(m.group(1), " " * indent))
|
||||||
|
elif name in ('dsep',):
|
||||||
|
output.append('{} '.format(m.group(1)))
|
||||||
|
break
|
||||||
|
|
||||||
|
return ''.join(output)
|
|
@ -1,7 +1,9 @@
|
||||||
"""Utility."""
|
"""Utility."""
|
||||||
|
from __future__ import annotations
|
||||||
from functools import wraps, lru_cache
|
from functools import wraps, lru_cache
|
||||||
import warnings
|
import warnings
|
||||||
import re
|
import re
|
||||||
|
from typing import Callable, Any, Optional
|
||||||
|
|
||||||
DEBUG = 0x00001
|
DEBUG = 0x00001
|
||||||
|
|
||||||
|
@ -12,7 +14,7 @@ UC_Z = ord('Z')
|
||||||
|
|
||||||
|
|
||||||
@lru_cache(maxsize=512)
|
@lru_cache(maxsize=512)
|
||||||
def lower(string):
|
def lower(string: str) -> str:
|
||||||
"""Lower."""
|
"""Lower."""
|
||||||
|
|
||||||
new_string = []
|
new_string = []
|
||||||
|
@ -25,7 +27,7 @@ def lower(string):
|
||||||
class SelectorSyntaxError(Exception):
|
class SelectorSyntaxError(Exception):
|
||||||
"""Syntax error in a CSS selector."""
|
"""Syntax error in a CSS selector."""
|
||||||
|
|
||||||
def __init__(self, msg, pattern=None, index=None):
|
def __init__(self, msg: str, pattern: Optional[str] = None, index: Optional[int] = None) -> None:
|
||||||
"""Initialize."""
|
"""Initialize."""
|
||||||
|
|
||||||
self.line = None
|
self.line = None
|
||||||
|
@ -37,30 +39,34 @@ class SelectorSyntaxError(Exception):
|
||||||
self.context, self.line, self.col = get_pattern_context(pattern, index)
|
self.context, self.line, self.col = get_pattern_context(pattern, index)
|
||||||
msg = '{}\n line {}:\n{}'.format(msg, self.line, self.context)
|
msg = '{}\n line {}:\n{}'.format(msg, self.line, self.context)
|
||||||
|
|
||||||
super(SelectorSyntaxError, self).__init__(msg)
|
super().__init__(msg)
|
||||||
|
|
||||||
|
|
||||||
def deprecated(message, stacklevel=2): # pragma: no cover
|
def deprecated(message: str, stacklevel: int = 2) -> Callable[..., Any]: # pragma: no cover
|
||||||
"""
|
"""
|
||||||
Raise a `DeprecationWarning` when wrapped function/method is called.
|
Raise a `DeprecationWarning` when wrapped function/method is called.
|
||||||
|
|
||||||
Borrowed from https://stackoverflow.com/a/48632082/866026
|
Usage:
|
||||||
|
|
||||||
|
@deprecated("This method will be removed in version X; use Y instead.")
|
||||||
|
def some_method()"
|
||||||
|
pass
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def _decorator(func):
|
def _wrapper(func: Callable[..., Any]) -> Callable[..., Any]:
|
||||||
@wraps(func)
|
@wraps(func)
|
||||||
def _func(*args, **kwargs):
|
def _deprecated_func(*args: Any, **kwargs: Any) -> Any:
|
||||||
warnings.warn(
|
warnings.warn(
|
||||||
"'{}' is deprecated. {}".format(func.__name__, message),
|
f"'{func.__name__}' is deprecated. {message}",
|
||||||
category=DeprecationWarning,
|
category=DeprecationWarning,
|
||||||
stacklevel=stacklevel
|
stacklevel=stacklevel
|
||||||
)
|
)
|
||||||
return func(*args, **kwargs)
|
return func(*args, **kwargs)
|
||||||
return _func
|
return _deprecated_func
|
||||||
return _decorator
|
return _wrapper
|
||||||
|
|
||||||
|
|
||||||
def warn_deprecated(message, stacklevel=2): # pragma: no cover
|
def warn_deprecated(message: str, stacklevel: int = 2) -> None: # pragma: no cover
|
||||||
"""Warn deprecated."""
|
"""Warn deprecated."""
|
||||||
|
|
||||||
warnings.warn(
|
warnings.warn(
|
||||||
|
@ -70,14 +76,15 @@ def warn_deprecated(message, stacklevel=2): # pragma: no cover
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def get_pattern_context(pattern, index):
|
def get_pattern_context(pattern: str, index: int) -> tuple[str, int, int]:
|
||||||
"""Get the pattern context."""
|
"""Get the pattern context."""
|
||||||
|
|
||||||
last = 0
|
last = 0
|
||||||
current_line = 1
|
current_line = 1
|
||||||
col = 1
|
col = 1
|
||||||
text = []
|
text = [] # type: list[str]
|
||||||
line = 1
|
line = 1
|
||||||
|
offset = None # type: Optional[int]
|
||||||
|
|
||||||
# Split pattern by newline and handle the text before the newline
|
# Split pattern by newline and handle the text before the newline
|
||||||
for m in RE_PATTERN_LINE_SPLIT.finditer(pattern):
|
for m in RE_PATTERN_LINE_SPLIT.finditer(pattern):
|
||||||
|
|
Loading…
Reference in a new issue