Merge branch 'feature/UpdateBSoup' into dev

This commit is contained in:
JackDandy 2023-10-07 21:41:48 +01:00
commit ecd70f546f
10 changed files with 143 additions and 157 deletions

View file

@ -1,4 +1,10 @@
### 3.30.1 (2023-10-02 22:50:00 UTC) ### 3.31.0 (2023-1x-xx xx:xx:00 UTC)
* Update Beautiful Soup 4.12.2 to 4.12.2 (30c58a1)
* Update soupsieve 2.4.1 (2e66beb) to 2.5.0 (dc71495)
### 3.30.1 (2023-10-02 22:50:00 UTC)
* Change allow Python 3.12.0 and 3.11.6 * Change allow Python 3.12.0 and 3.11.6

View file

@ -378,10 +378,10 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
parser.soup = self.soup parser.soup = self.soup
try: try:
parser.feed(markup) parser.feed(markup)
parser.close()
except AssertionError as e: except AssertionError as e:
# html.parser raises AssertionError in rare cases to # html.parser raises AssertionError in rare cases to
# indicate a fatal problem with the markup, especially # indicate a fatal problem with the markup, especially
# when there's an error in the doctype declaration. # when there's an error in the doctype declaration.
raise ParserRejectedMarkup(e) raise ParserRejectedMarkup(e)
parser.close()
parser.already_closed_empty_element = [] parser.already_closed_empty_element = []

View file

@ -1356,7 +1356,7 @@ class Tag(PageElement):
This is the first step in the deepcopy process. This is the first step in the deepcopy process.
""" """
clone = type(self)( clone = type(self)(
None, self.builder, self.name, self.namespace, None, None, self.name, self.namespace,
self.prefix, self.attrs, is_xml=self._is_xml, self.prefix, self.attrs, is_xml=self._is_xml,
sourceline=self.sourceline, sourcepos=self.sourcepos, sourceline=self.sourceline, sourcepos=self.sourcepos,
can_be_empty_element=self.can_be_empty_element, can_be_empty_element=self.can_be_empty_element,
@ -1845,6 +1845,11 @@ class Tag(PageElement):
return space_before + s + space_after return space_before + s + space_after
def _format_tag(self, eventual_encoding, formatter, opening): def _format_tag(self, eventual_encoding, formatter, opening):
if self.hidden:
# A hidden tag is invisible, although its contents
# are visible.
return ''
# A tag starts with the < character (see below). # A tag starts with the < character (see below).
# Then the / character, if this is a closing tag. # Then the / character, if this is a closing tag.

View file

@ -78,13 +78,13 @@ def purge() -> None:
def closest( def closest(
select: str, select: str,
tag: 'bs4.Tag', tag: bs4.Tag,
namespaces: dict[str, str] | None = None, namespaces: dict[str, str] | None = None,
flags: int = 0, flags: int = 0,
*, *,
custom: dict[str, str] | None = None, custom: dict[str, str] | None = None,
**kwargs: Any **kwargs: Any
) -> 'bs4.Tag': ) -> bs4.Tag:
"""Match closest ancestor.""" """Match closest ancestor."""
return compile(select, namespaces, flags, **kwargs).closest(tag) return compile(select, namespaces, flags, **kwargs).closest(tag)
@ -92,7 +92,7 @@ def closest(
def match( def match(
select: str, select: str,
tag: 'bs4.Tag', tag: bs4.Tag,
namespaces: dict[str, str] | None = None, namespaces: dict[str, str] | None = None,
flags: int = 0, flags: int = 0,
*, *,
@ -106,13 +106,13 @@ def match(
def filter( # noqa: A001 def filter( # noqa: A001
select: str, select: str,
iterable: Iterable['bs4.Tag'], iterable: Iterable[bs4.Tag],
namespaces: dict[str, str] | None = None, namespaces: dict[str, str] | None = None,
flags: int = 0, flags: int = 0,
*, *,
custom: dict[str, str] | None = None, custom: dict[str, str] | None = None,
**kwargs: Any **kwargs: Any
) -> list['bs4.Tag']: ) -> list[bs4.Tag]:
"""Filter list of nodes.""" """Filter list of nodes."""
return compile(select, namespaces, flags, **kwargs).filter(iterable) return compile(select, namespaces, flags, **kwargs).filter(iterable)
@ -120,13 +120,13 @@ def filter( # noqa: A001
def select_one( def select_one(
select: str, select: str,
tag: 'bs4.Tag', tag: bs4.Tag,
namespaces: dict[str, str] | None = None, namespaces: dict[str, str] | None = None,
flags: int = 0, flags: int = 0,
*, *,
custom: dict[str, str] | None = None, custom: dict[str, str] | None = None,
**kwargs: Any **kwargs: Any
) -> 'bs4.Tag': ) -> bs4.Tag:
"""Select a single tag.""" """Select a single tag."""
return compile(select, namespaces, flags, **kwargs).select_one(tag) return compile(select, namespaces, flags, **kwargs).select_one(tag)
@ -134,14 +134,14 @@ def select_one(
def select( def select(
select: str, select: str,
tag: 'bs4.Tag', tag: bs4.Tag,
namespaces: dict[str, str] | None = None, namespaces: dict[str, str] | None = None,
limit: int = 0, limit: int = 0,
flags: int = 0, flags: int = 0,
*, *,
custom: dict[str, str] | None = None, custom: dict[str, str] | None = None,
**kwargs: Any **kwargs: Any
) -> list['bs4.Tag']: ) -> list[bs4.Tag]:
"""Select the specified tags.""" """Select the specified tags."""
return compile(select, namespaces, flags, **kwargs).select(tag, limit) return compile(select, namespaces, flags, **kwargs).select(tag, limit)
@ -149,18 +149,17 @@ def select(
def iselect( def iselect(
select: str, select: str,
tag: 'bs4.Tag', tag: bs4.Tag,
namespaces: dict[str, str] | None = None, namespaces: dict[str, str] | None = None,
limit: int = 0, limit: int = 0,
flags: int = 0, flags: int = 0,
*, *,
custom: dict[str, str] | None = None, custom: dict[str, str] | None = None,
**kwargs: Any **kwargs: Any
) -> Iterator['bs4.Tag']: ) -> Iterator[bs4.Tag]:
"""Iterate the specified tags.""" """Iterate the specified tags."""
for el in compile(select, namespaces, flags, **kwargs).iselect(tag, limit): yield from compile(select, namespaces, flags, **kwargs).iselect(tag, limit)
yield el
def escape(ident: str) -> str: def escape(ident: str) -> str:

View file

@ -93,7 +93,7 @@ class Version(namedtuple("Version", ["major", "minor", "micro", "release", "pre"
raise ValueError("All version parts except 'release' should be integers.") raise ValueError("All version parts except 'release' should be integers.")
if release not in REL_MAP: if release not in REL_MAP:
raise ValueError("'{}' is not a valid release type.".format(release)) raise ValueError(f"'{release}' is not a valid release type.")
# Ensure valid pre-release (we do not allow implicit pre-releases). # Ensure valid pre-release (we do not allow implicit pre-releases).
if ".dev-candidate" < release < "final": if ".dev-candidate" < release < "final":
@ -118,7 +118,7 @@ class Version(namedtuple("Version", ["major", "minor", "micro", "release", "pre"
elif dev: elif dev:
raise ValueError("Version is not a development release.") raise ValueError("Version is not a development release.")
return super(Version, cls).__new__(cls, major, minor, micro, release, pre, post, dev) return super().__new__(cls, major, minor, micro, release, pre, post, dev)
def _is_pre(self) -> bool: def _is_pre(self) -> bool:
"""Is prerelease.""" """Is prerelease."""
@ -145,15 +145,15 @@ class Version(namedtuple("Version", ["major", "minor", "micro", "release", "pre"
# Assemble major, minor, micro version and append `pre`, `post`, or `dev` if needed.. # Assemble major, minor, micro version and append `pre`, `post`, or `dev` if needed..
if self.micro == 0: if self.micro == 0:
ver = "{}.{}".format(self.major, self.minor) ver = f"{self.major}.{self.minor}"
else: else:
ver = "{}.{}.{}".format(self.major, self.minor, self.micro) ver = f"{self.major}.{self.minor}.{self.micro}"
if self._is_pre(): if self._is_pre():
ver += '{}{}'.format(REL_MAP[self.release], self.pre) ver += f'{REL_MAP[self.release]}{self.pre}'
if self._is_post(): if self._is_post():
ver += ".post{}".format(self.post) ver += f".post{self.post}"
if self._is_dev(): if self._is_dev():
ver += ".dev{}".format(self.dev) ver += f".dev{self.dev}"
return ver return ver
@ -164,7 +164,7 @@ def parse_version(ver: str) -> Version:
m = RE_VER.match(ver) m = RE_VER.match(ver)
if m is None: if m is None:
raise ValueError("'{}' is not a valid version".format(ver)) raise ValueError(f"'{ver}' is not a valid version")
# Handle major, minor, micro # Handle major, minor, micro
major = int(m.group('major')) major = int(m.group('major'))
@ -193,5 +193,5 @@ def parse_version(ver: str) -> Version:
return Version(major, minor, micro, release, pre, post, dev) return Version(major, minor, micro, release, pre, post, dev)
__version_info__ = Version(2, 4, 1, "final") __version_info__ = Version(2, 5, 0, "final")
__version__ = __version_info__._get_canonical() __version__ = __version_info__._get_canonical()

View file

@ -85,7 +85,7 @@ class _DocumentNav:
# Fail on unexpected types. # Fail on unexpected types.
if not cls.is_tag(tag): if not cls.is_tag(tag):
raise TypeError("Expected a BeautifulSoup 'Tag', but instead received type {}".format(type(tag))) raise TypeError(f"Expected a BeautifulSoup 'Tag', but instead received type {type(tag)}")
@staticmethod @staticmethod
def is_doc(obj: bs4.Tag) -> bool: def is_doc(obj: bs4.Tag) -> bool:
@ -165,8 +165,7 @@ class _DocumentNav:
def get_contents(self, el: bs4.Tag, no_iframe: bool = False) -> Iterator[bs4.PageElement]: def get_contents(self, el: bs4.Tag, no_iframe: bool = False) -> Iterator[bs4.PageElement]:
"""Get contents or contents in reverse.""" """Get contents or contents in reverse."""
if not no_iframe or not self.is_iframe(el): if not no_iframe or not self.is_iframe(el):
for content in el.contents: yield from el.contents
yield content
def get_children( def get_children(
self, self,
@ -283,7 +282,7 @@ class _DocumentNav:
like we do in the case of `is_html_tag`. like we do in the case of `is_html_tag`.
""" """
ns = getattr(el, 'namespace') if el else None ns = getattr(el, 'namespace') if el else None # noqa: B009
return bool(ns and ns == NS_XHTML) return bool(ns and ns == NS_XHTML)
@staticmethod @staticmethod
@ -394,7 +393,7 @@ class Inputs:
def validate_week(year: int, week: int) -> bool: def validate_week(year: int, week: int) -> bool:
"""Validate week.""" """Validate week."""
max_week = datetime.strptime("{}-{}-{}".format(12, 31, year), "%m-%d-%Y").isocalendar()[1] max_week = datetime.strptime(f"{12}-{31}-{year}", "%m-%d-%Y").isocalendar()[1]
if max_week == 1: if max_week == 1:
max_week = 53 max_week = 53
return 1 <= week <= max_week return 1 <= week <= max_week
@ -1272,11 +1271,7 @@ class CSSMatch(_DocumentNav):
# Auto handling for text inputs # Auto handling for text inputs
if ((is_input and itype in ('text', 'search', 'tel', 'url', 'email')) or is_textarea) and direction == 0: if ((is_input and itype in ('text', 'search', 'tel', 'url', 'email')) or is_textarea) and direction == 0:
if is_textarea: if is_textarea:
temp = [] value = ''.join(node for node in self.get_contents(el, no_iframe=True) if self.is_content_string(node))
for node in self.get_contents(el, no_iframe=True):
if self.is_content_string(node):
temp.append(node)
value = ''.join(temp)
else: else:
value = cast(str, self.get_attribute_by_name(el, 'value', '')) value = cast(str, self.get_attribute_by_name(el, 'value', ''))
if value: if value:
@ -1571,17 +1566,14 @@ class SoupSieve(ct.Immutable):
def iselect(self, tag: bs4.Tag, limit: int = 0) -> Iterator[bs4.Tag]: def iselect(self, tag: bs4.Tag, limit: int = 0) -> Iterator[bs4.Tag]:
"""Iterate the specified tags.""" """Iterate the specified tags."""
for el in CSSMatch(self.selectors, tag, self.namespaces, self.flags).select(limit): yield from CSSMatch(self.selectors, tag, self.namespaces, self.flags).select(limit)
yield el
def __repr__(self) -> str: # pragma: no cover def __repr__(self) -> str: # pragma: no cover
"""Representation.""" """Representation."""
return "SoupSieve(pattern={!r}, namespaces={!r}, custom={!r}, flags={!r})".format( return (
self.pattern, f"SoupSieve(pattern={self.pattern!r}, namespaces={self.namespaces!r}, "
self.namespaces, f"custom={self.custom!r}, flags={self.flags!r})"
self.custom,
self.flags
) )
__str__ = __repr__ __str__ = __repr__

View file

@ -92,94 +92,79 @@ PSEUDO_SUPPORTED = PSEUDO_SIMPLE | PSEUDO_SIMPLE_NO_MATCH | PSEUDO_COMPLEX | PSE
# Sub-patterns parts # Sub-patterns parts
# Whitespace # Whitespace
NEWLINE = r'(?:\r\n|(?!\r\n)[\n\f\r])' NEWLINE = r'(?:\r\n|(?!\r\n)[\n\f\r])'
WS = r'(?:[ \t]|{})'.format(NEWLINE) WS = fr'(?:[ \t]|{NEWLINE})'
# Comments # Comments
COMMENTS = r'(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)' COMMENTS = r'(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)'
# Whitespace with comments included # Whitespace with comments included
WSC = r'(?:{ws}|{comments})'.format(ws=WS, comments=COMMENTS) WSC = fr'(?:{WS}|{COMMENTS})'
# CSS escapes # CSS escapes
CSS_ESCAPES = r'(?:\\(?:[a-f0-9]{{1,6}}{ws}?|[^\r\n\f]|$))'.format(ws=WS) CSS_ESCAPES = fr'(?:\\(?:[a-f0-9]{{1,6}}{WS}?|[^\r\n\f]|$))'
CSS_STRING_ESCAPES = r'(?:\\(?:[a-f0-9]{{1,6}}{ws}?|[^\r\n\f]|$|{nl}))'.format(ws=WS, nl=NEWLINE) CSS_STRING_ESCAPES = fr'(?:\\(?:[a-f0-9]{{1,6}}{WS}?|[^\r\n\f]|$|{NEWLINE}))'
# CSS Identifier # CSS Identifier
IDENTIFIER = r''' IDENTIFIER = fr'''
(?:(?:-?(?:[^\x00-\x2f\x30-\x40\x5B-\x5E\x60\x7B-\x9f]|{esc})+|--) (?:(?:-?(?:[^\x00-\x2f\x30-\x40\x5B-\x5E\x60\x7B-\x9f]|{CSS_ESCAPES})+|--)
(?:[^\x00-\x2c\x2e\x2f\x3A-\x40\x5B-\x5E\x60\x7B-\x9f]|{esc})*) (?:[^\x00-\x2c\x2e\x2f\x3A-\x40\x5B-\x5E\x60\x7B-\x9f]|{CSS_ESCAPES})*)
'''.format(esc=CSS_ESCAPES) '''
# `nth` content # `nth` content
NTH = r'(?:[-+])?(?:[0-9]+n?|n)(?:(?<=n){ws}*(?:[-+]){ws}*(?:[0-9]+))?'.format(ws=WSC) NTH = fr'(?:[-+])?(?:[0-9]+n?|n)(?:(?<=n){WSC}*(?:[-+]){WSC}*(?:[0-9]+))?'
# Value: quoted string or identifier # Value: quoted string or identifier
VALUE = r''' VALUE = fr'''(?:"(?:\\(?:.|{NEWLINE})|[^\\"\r\n\f]+)*?"|'(?:\\(?:.|{NEWLINE})|[^\\'\r\n\f]+)*?'|{IDENTIFIER}+)'''
(?:"(?:\\(?:.|{nl})|[^\\"\r\n\f]+)*?"|'(?:\\(?:.|{nl})|[^\\'\r\n\f]+)*?'|{ident}+)
'''.format(nl=NEWLINE, ident=IDENTIFIER)
# Attribute value comparison. `!=` is handled special as it is non-standard. # Attribute value comparison. `!=` is handled special as it is non-standard.
ATTR = r''' ATTR = fr'(?:{WSC}*(?P<cmp>[!~^|*$]?=){WSC}*(?P<value>{VALUE})(?:{WSC}*(?P<case>[is]))?)?{WSC}*\]'
(?:{ws}*(?P<cmp>[!~^|*$]?=){ws}*(?P<value>{value})(?:{ws}*(?P<case>[is]))?)?{ws}*\]
'''.format(ws=WSC, value=VALUE)
# Selector patterns # Selector patterns
# IDs (`#id`) # IDs (`#id`)
PAT_ID = r'\#{ident}'.format(ident=IDENTIFIER) PAT_ID = fr'\#{IDENTIFIER}'
# Classes (`.class`) # Classes (`.class`)
PAT_CLASS = r'\.{ident}'.format(ident=IDENTIFIER) PAT_CLASS = fr'\.{IDENTIFIER}'
# Prefix:Tag (`prefix|tag`) # Prefix:Tag (`prefix|tag`)
PAT_TAG = r'(?P<tag_ns>(?:{ident}|\*)?\|)?(?P<tag_name>{ident}|\*)'.format(ident=IDENTIFIER) PAT_TAG = fr'(?P<tag_ns>(?:{IDENTIFIER}|\*)?\|)?(?P<tag_name>{IDENTIFIER}|\*)'
# Attributes (`[attr]`, `[attr=value]`, etc.) # Attributes (`[attr]`, `[attr=value]`, etc.)
PAT_ATTR = r''' PAT_ATTR = fr'\[{WSC}*(?P<attr_ns>(?:{IDENTIFIER}|\*)?\|)?(?P<attr_name>{IDENTIFIER}){ATTR}'
\[{ws}*(?P<attr_ns>(?:{ident}|\*)?\|)?(?P<attr_name>{ident}){attr}
'''.format(ws=WSC, ident=IDENTIFIER, attr=ATTR)
# Pseudo class (`:pseudo-class`, `:pseudo-class(`) # Pseudo class (`:pseudo-class`, `:pseudo-class(`)
PAT_PSEUDO_CLASS = r'(?P<name>:{ident})(?P<open>\({ws}*)?'.format(ws=WSC, ident=IDENTIFIER) PAT_PSEUDO_CLASS = fr'(?P<name>:{IDENTIFIER})(?P<open>\({WSC}*)?'
# Pseudo class special patterns. Matches `:pseudo-class(` for special case pseudo classes. # Pseudo class special patterns. Matches `:pseudo-class(` for special case pseudo classes.
PAT_PSEUDO_CLASS_SPECIAL = r'(?P<name>:{ident})(?P<open>\({ws}*)'.format(ws=WSC, ident=IDENTIFIER) PAT_PSEUDO_CLASS_SPECIAL = fr'(?P<name>:{IDENTIFIER})(?P<open>\({WSC}*)'
# Custom pseudo class (`:--custom-pseudo`) # Custom pseudo class (`:--custom-pseudo`)
PAT_PSEUDO_CLASS_CUSTOM = r'(?P<name>:(?=--){ident})'.format(ident=IDENTIFIER) PAT_PSEUDO_CLASS_CUSTOM = fr'(?P<name>:(?=--){IDENTIFIER})'
# Closing pseudo group (`)`) # Closing pseudo group (`)`)
PAT_PSEUDO_CLOSE = r'{ws}*\)'.format(ws=WSC) PAT_PSEUDO_CLOSE = fr'{WSC}*\)'
# Pseudo element (`::pseudo-element`) # Pseudo element (`::pseudo-element`)
PAT_PSEUDO_ELEMENT = r':{}'.format(PAT_PSEUDO_CLASS) PAT_PSEUDO_ELEMENT = fr':{PAT_PSEUDO_CLASS}'
# At rule (`@page`, etc.) (not supported) # At rule (`@page`, etc.) (not supported)
PAT_AT_RULE = r'@P{ident}'.format(ident=IDENTIFIER) PAT_AT_RULE = fr'@P{IDENTIFIER}'
# Pseudo class `nth-child` (`:nth-child(an+b [of S]?)`, `:first-child`, etc.) # Pseudo class `nth-child` (`:nth-child(an+b [of S]?)`, `:first-child`, etc.)
PAT_PSEUDO_NTH_CHILD = r''' PAT_PSEUDO_NTH_CHILD = fr'''
(?P<pseudo_nth_child>{name} (?P<pseudo_nth_child>{PAT_PSEUDO_CLASS_SPECIAL}
(?P<nth_child>{nth}|even|odd))(?:{wsc}*\)|(?P<of>{comments}*{ws}{wsc}*of{comments}*{ws}{wsc}*)) (?P<nth_child>{NTH}|even|odd))(?:{WSC}*\)|(?P<of>{COMMENTS}*{WS}{WSC}*of{COMMENTS}*{WS}{WSC}*))
'''.format(name=PAT_PSEUDO_CLASS_SPECIAL, wsc=WSC, comments=COMMENTS, ws=WS, nth=NTH) '''
# Pseudo class `nth-of-type` (`:nth-of-type(an+b)`, `:first-of-type`, etc.) # Pseudo class `nth-of-type` (`:nth-of-type(an+b)`, `:first-of-type`, etc.)
PAT_PSEUDO_NTH_TYPE = r''' PAT_PSEUDO_NTH_TYPE = fr'''
(?P<pseudo_nth_type>{name} (?P<pseudo_nth_type>{PAT_PSEUDO_CLASS_SPECIAL}
(?P<nth_type>{nth}|even|odd)){ws}*\) (?P<nth_type>{NTH}|even|odd)){WSC}*\)
'''.format(name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC, nth=NTH) '''
# Pseudo class language (`:lang("*-de", en)`) # Pseudo class language (`:lang("*-de", en)`)
PAT_PSEUDO_LANG = r'{name}(?P<values>{value}(?:{ws}*,{ws}*{value})*){ws}*\)'.format( PAT_PSEUDO_LANG = fr'{PAT_PSEUDO_CLASS_SPECIAL}(?P<values>{VALUE}(?:{WSC}*,{WSC}*{VALUE})*){WSC}*\)'
name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC, value=VALUE
)
# Pseudo class direction (`:dir(ltr)`) # Pseudo class direction (`:dir(ltr)`)
PAT_PSEUDO_DIR = r'{name}(?P<dir>ltr|rtl){ws}*\)'.format(name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC) PAT_PSEUDO_DIR = fr'{PAT_PSEUDO_CLASS_SPECIAL}(?P<dir>ltr|rtl){WSC}*\)'
# Combining characters (`>`, `~`, ` `, `+`, `,`) # Combining characters (`>`, `~`, ` `, `+`, `,`)
PAT_COMBINE = r'{wsc}*?(?P<relation>[,+>~]|{ws}(?![,+>~])){wsc}*'.format(ws=WS, wsc=WSC) PAT_COMBINE = fr'{WSC}*?(?P<relation>[,+>~]|{WS}(?![,+>~])){WSC}*'
# Extra: Contains (`:contains(text)`) # Extra: Contains (`:contains(text)`)
PAT_PSEUDO_CONTAINS = r'{name}(?P<values>{value}(?:{ws}*,{ws}*{value})*){ws}*\)'.format( PAT_PSEUDO_CONTAINS = fr'{PAT_PSEUDO_CLASS_SPECIAL}(?P<values>{VALUE}(?:{WSC}*,{WSC}*{VALUE})*){WSC}*\)'
name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC, value=VALUE
)
# Regular expressions # Regular expressions
# CSS escape pattern # CSS escape pattern
RE_CSS_ESC = re.compile(r'(?:(\\[a-f0-9]{{1,6}}{ws}?)|(\\[^\r\n\f])|(\\$))'.format(ws=WSC), re.I) RE_CSS_ESC = re.compile(fr'(?:(\\[a-f0-9]{{1,6}}{WSC}?)|(\\[^\r\n\f])|(\\$))', re.I)
RE_CSS_STR_ESC = re.compile( RE_CSS_STR_ESC = re.compile(fr'(?:(\\[a-f0-9]{{1,6}}{WS}?)|(\\[^\r\n\f])|(\\$)|(\\{NEWLINE}))', re.I)
r'(?:(\\[a-f0-9]{{1,6}}{ws}?)|(\\[^\r\n\f])|(\\$)|(\\{nl}))'.format(ws=WS, nl=NEWLINE), re.I
)
# Pattern to break up `nth` specifiers # Pattern to break up `nth` specifiers
RE_NTH = re.compile( RE_NTH = re.compile(fr'(?P<s1>[-+])?(?P<a>[0-9]+n?|n)(?:(?<=n){WSC}*(?P<s2>[-+]){WSC}*(?P<b>[0-9]+))?', re.I)
r'(?P<s1>[-+])?(?P<a>[0-9]+n?|n)(?:(?<=n){ws}*(?P<s2>[-+]){ws}*(?P<b>[0-9]+))?'.format(ws=WSC),
re.I
)
# Pattern to iterate multiple values. # Pattern to iterate multiple values.
RE_VALUES = re.compile(r'(?:(?P<value>{value})|(?P<split>{ws}*,{ws}*))'.format(ws=WSC, value=VALUE), re.X) RE_VALUES = re.compile(fr'(?:(?P<value>{VALUE})|(?P<split>{WSC}*,{WSC}*))', re.X)
# Whitespace checks # Whitespace checks
RE_WS = re.compile(WS) RE_WS = re.compile(WS)
RE_WS_BEGIN = re.compile('^{}*'.format(WSC)) RE_WS_BEGIN = re.compile(fr'^{WSC}*')
RE_WS_END = re.compile('{}*$'.format(WSC)) RE_WS_END = re.compile(fr'{WSC}*$')
RE_CUSTOM = re.compile(r'^{}$'.format(PAT_PSEUDO_CLASS_CUSTOM), re.X) RE_CUSTOM = re.compile(fr'^{PAT_PSEUDO_CLASS_CUSTOM}$', re.X)
# Constants # Constants
# List split token # List split token
@ -241,9 +226,9 @@ def process_custom(custom: ct.CustomSelectors | None) -> dict[str, str | ct.Sele
for key, value in custom.items(): for key, value in custom.items():
name = util.lower(key) name = util.lower(key)
if RE_CUSTOM.match(name) is None: if RE_CUSTOM.match(name) is None:
raise SelectorSyntaxError("The name '{}' is not a valid custom pseudo-class name".format(name)) raise SelectorSyntaxError(f"The name '{name}' is not a valid custom pseudo-class name")
if name in custom_selectors: if name in custom_selectors:
raise KeyError("The custom selector '{}' has already been registered".format(name)) raise KeyError(f"The custom selector '{name}' has already been registered")
custom_selectors[css_unescape(name)] = value custom_selectors[css_unescape(name)] = value
return custom_selectors return custom_selectors
@ -283,23 +268,23 @@ def escape(ident: str) -> str:
start_dash = length > 0 and ident[0] == '-' start_dash = length > 0 and ident[0] == '-'
if length == 1 and start_dash: if length == 1 and start_dash:
# Need to escape identifier that is a single `-` with no other characters # Need to escape identifier that is a single `-` with no other characters
string.append('\\{}'.format(ident)) string.append(f'\\{ident}')
else: else:
for index, c in enumerate(ident): for index, c in enumerate(ident):
codepoint = ord(c) codepoint = ord(c)
if codepoint == 0x00: if codepoint == 0x00:
string.append('\ufffd') string.append('\ufffd')
elif (0x01 <= codepoint <= 0x1F) or codepoint == 0x7F: elif (0x01 <= codepoint <= 0x1F) or codepoint == 0x7F:
string.append('\\{:x} '.format(codepoint)) string.append(f'\\{codepoint:x} ')
elif (index == 0 or (start_dash and index == 1)) and (0x30 <= codepoint <= 0x39): elif (index == 0 or (start_dash and index == 1)) and (0x30 <= codepoint <= 0x39):
string.append('\\{:x} '.format(codepoint)) string.append(f'\\{codepoint:x} ')
elif ( elif (
codepoint in (0x2D, 0x5F) or codepoint >= 0x80 or (0x30 <= codepoint <= 0x39) or codepoint in (0x2D, 0x5F) or codepoint >= 0x80 or (0x30 <= codepoint <= 0x39) or
(0x30 <= codepoint <= 0x39) or (0x41 <= codepoint <= 0x5A) or (0x61 <= codepoint <= 0x7A) (0x30 <= codepoint <= 0x39) or (0x41 <= codepoint <= 0x5A) or (0x61 <= codepoint <= 0x7A)
): ):
string.append(c) string.append(c)
else: else:
string.append('\\{}'.format(c)) string.append(f'\\{c}')
return ''.join(string) return ''.join(string)
@ -419,11 +404,10 @@ class _Selector:
"""String representation.""" """String representation."""
return ( return (
'_Selector(tag={!r}, ids={!r}, classes={!r}, attributes={!r}, nth={!r}, selectors={!r}, ' f'_Selector(tag={self.tag!r}, ids={self.ids!r}, classes={self.classes!r}, attributes={self.attributes!r}, '
'relations={!r}, rel_type={!r}, contains={!r}, lang={!r}, flags={!r}, no_match={!r})' f'nth={self.nth!r}, selectors={self.selectors!r}, relations={self.relations!r}, '
).format( f'rel_type={self.rel_type!r}, contains={self.contains!r}, lang={self.lang!r}, flags={self.flags!r}, '
self.tag, self.ids, self.classes, self.attributes, self.nth, self.selectors, f'no_match={self.no_match!r})'
self.relations, self.rel_type, self.contains, self.lang, self.flags, self.no_match
) )
__repr__ = __str__ __repr__ = __str__
@ -563,7 +547,7 @@ class CSSParser:
selector = self.custom.get(pseudo) selector = self.custom.get(pseudo)
if selector is None: if selector is None:
raise SelectorSyntaxError( raise SelectorSyntaxError(
"Undefined custom selector '{}' found at position {}".format(pseudo, m.end(0)), f"Undefined custom selector '{pseudo}' found at position {m.end(0)}",
self.pattern, self.pattern,
m.end(0) m.end(0)
) )
@ -663,13 +647,13 @@ class CSSParser:
has_selector = True has_selector = True
elif pseudo in PSEUDO_SUPPORTED: elif pseudo in PSEUDO_SUPPORTED:
raise SelectorSyntaxError( raise SelectorSyntaxError(
"Invalid syntax for pseudo class '{}'".format(pseudo), f"Invalid syntax for pseudo class '{pseudo}'",
self.pattern, self.pattern,
m.start(0) m.start(0)
) )
else: else:
raise NotImplementedError( raise NotImplementedError(
"'{}' pseudo-class is not implemented at this time".format(pseudo) f"'{pseudo}' pseudo-class is not implemented at this time"
) )
return has_selector, is_html return has_selector, is_html
@ -793,7 +777,7 @@ class CSSParser:
# multiple non-whitespace combinators. So if the current combinator is not a whitespace, # multiple non-whitespace combinators. So if the current combinator is not a whitespace,
# then we've hit the multiple combinator case, so we should fail. # then we've hit the multiple combinator case, so we should fail.
raise SelectorSyntaxError( raise SelectorSyntaxError(
'The multiple combinators at position {}'.format(index), f'The multiple combinators at position {index}',
self.pattern, self.pattern,
index index
) )
@ -824,7 +808,7 @@ class CSSParser:
if not has_selector: if not has_selector:
if not is_forgive or combinator != COMMA_COMBINATOR: if not is_forgive or combinator != COMMA_COMBINATOR:
raise SelectorSyntaxError( raise SelectorSyntaxError(
"The combinator '{}' at position {}, must have a selector before it".format(combinator, index), f"The combinator '{combinator}' at position {index}, must have a selector before it",
self.pattern, self.pattern,
index index
) )
@ -869,7 +853,7 @@ class CSSParser:
pseudo = util.lower(css_unescape(m.group('name'))) pseudo = util.lower(css_unescape(m.group('name')))
if pseudo == ":contains": if pseudo == ":contains":
warnings.warn( warnings.warn( # noqa: B028
"The pseudo class ':contains' is deprecated, ':-soup-contains' should be used moving forward.", "The pseudo class ':contains' is deprecated, ':-soup-contains' should be used moving forward.",
FutureWarning FutureWarning
) )
@ -982,13 +966,13 @@ class CSSParser:
# Handle parts # Handle parts
if key == "at_rule": if key == "at_rule":
raise NotImplementedError("At-rules found at position {}".format(m.start(0))) raise NotImplementedError(f"At-rules found at position {m.start(0)}")
elif key == 'pseudo_class_custom': elif key == 'pseudo_class_custom':
has_selector = self.parse_pseudo_class_custom(sel, m, has_selector) has_selector = self.parse_pseudo_class_custom(sel, m, has_selector)
elif key == 'pseudo_class': elif key == 'pseudo_class':
has_selector, is_html = self.parse_pseudo_class(sel, m, has_selector, iselector, is_html) has_selector, is_html = self.parse_pseudo_class(sel, m, has_selector, iselector, is_html)
elif key == 'pseudo_element': elif key == 'pseudo_element':
raise NotImplementedError("Pseudo-element found at position {}".format(m.start(0))) raise NotImplementedError(f"Pseudo-element found at position {m.start(0)}")
elif key == 'pseudo_contains': elif key == 'pseudo_contains':
has_selector = self.parse_pseudo_contains(sel, m, has_selector) has_selector = self.parse_pseudo_contains(sel, m, has_selector)
elif key in ('pseudo_nth_type', 'pseudo_nth_child'): elif key in ('pseudo_nth_type', 'pseudo_nth_child'):
@ -1003,7 +987,7 @@ class CSSParser:
if not has_selector: if not has_selector:
if not is_forgive: if not is_forgive:
raise SelectorSyntaxError( raise SelectorSyntaxError(
"Expected a selector at position {}".format(m.start(0)), f"Expected a selector at position {m.start(0)}",
self.pattern, self.pattern,
m.start(0) m.start(0)
) )
@ -1013,7 +997,7 @@ class CSSParser:
break break
else: else:
raise SelectorSyntaxError( raise SelectorSyntaxError(
"Unmatched pseudo-class close at position {}".format(m.start(0)), f"Unmatched pseudo-class close at position {m.start(0)}",
self.pattern, self.pattern,
m.start(0) m.start(0)
) )
@ -1031,7 +1015,7 @@ class CSSParser:
elif key == 'tag': elif key == 'tag':
if has_selector: if has_selector:
raise SelectorSyntaxError( raise SelectorSyntaxError(
"Tag name found at position {} instead of at the start".format(m.start(0)), f"Tag name found at position {m.start(0)} instead of at the start",
self.pattern, self.pattern,
m.start(0) m.start(0)
) )
@ -1046,7 +1030,7 @@ class CSSParser:
# Handle selectors that are not closed # Handle selectors that are not closed
if is_open and not closed: if is_open and not closed:
raise SelectorSyntaxError( raise SelectorSyntaxError(
"Unclosed pseudo-class at position {}".format(index), f"Unclosed pseudo-class at position {index}",
self.pattern, self.pattern,
index index
) )
@ -1076,7 +1060,7 @@ class CSSParser:
# We will always need to finish a selector when `:has()` is used as it leads with combining. # We will always need to finish a selector when `:has()` is used as it leads with combining.
# May apply to others as well. # May apply to others as well.
raise SelectorSyntaxError( raise SelectorSyntaxError(
'Expected a selector at position {}'.format(index), f'Expected a selector at position {index}',
self.pattern, self.pattern,
index index
) )
@ -1108,7 +1092,7 @@ class CSSParser:
end = (m.start(0) - 1) if m else (len(pattern) - 1) end = (m.start(0) - 1) if m else (len(pattern) - 1)
if self.debug: # pragma: no cover if self.debug: # pragma: no cover
print('## PARSING: {!r}'.format(pattern)) print(f'## PARSING: {pattern!r}')
while index <= end: while index <= end:
m = None m = None
for v in self.css_tokens: for v in self.css_tokens:
@ -1116,7 +1100,7 @@ class CSSParser:
if m: if m:
name = v.get_name() name = v.get_name()
if self.debug: # pragma: no cover if self.debug: # pragma: no cover
print("TOKEN: '{}' --> {!r} at position {}".format(name, m.group(0), m.start(0))) print(f"TOKEN: '{name}' --> {m.group(0)!r} at position {m.start(0)}")
index = m.end(0) index = m.end(0)
yield name, m yield name, m
break break
@ -1126,15 +1110,15 @@ class CSSParser:
# throw an exception mentioning that the known selector type is in error; # throw an exception mentioning that the known selector type is in error;
# otherwise, report the invalid character. # otherwise, report the invalid character.
if c == '[': if c == '[':
msg = "Malformed attribute selector at position {}".format(index) msg = f"Malformed attribute selector at position {index}"
elif c == '.': elif c == '.':
msg = "Malformed class selector at position {}".format(index) msg = f"Malformed class selector at position {index}"
elif c == '#': elif c == '#':
msg = "Malformed id selector at position {}".format(index) msg = f"Malformed id selector at position {index}"
elif c == ':': elif c == ':':
msg = "Malformed pseudo-class selector at position {}".format(index) msg = f"Malformed pseudo-class selector at position {index}"
else: else:
msg = "Invalid character {!r} position {}".format(c, index) msg = f"Invalid character {c!r} position {index}"
raise SelectorSyntaxError(msg, self.pattern, index) raise SelectorSyntaxError(msg, self.pattern, index)
if self.debug: # pragma: no cover if self.debug: # pragma: no cover
print('## END PARSING') print('## END PARSING')

View file

@ -45,11 +45,11 @@ class Immutable:
for k, v in kwargs.items(): for k, v in kwargs.items():
temp.append(type(v)) temp.append(type(v))
temp.append(v) temp.append(v)
super(Immutable, self).__setattr__(k, v) super().__setattr__(k, v)
super(Immutable, self).__setattr__('_hash', hash(tuple(temp))) super().__setattr__('_hash', hash(tuple(temp)))
@classmethod @classmethod
def __base__(cls) -> "type[Immutable]": def __base__(cls) -> type[Immutable]:
"""Get base class.""" """Get base class."""
return cls return cls
@ -59,7 +59,7 @@ class Immutable:
return ( return (
isinstance(other, self.__base__()) and isinstance(other, self.__base__()) and
all([getattr(other, key) == getattr(self, key) for key in self.__slots__ if key != '_hash']) all(getattr(other, key) == getattr(self, key) for key in self.__slots__ if key != '_hash')
) )
def __ne__(self, other: Any) -> bool: def __ne__(self, other: Any) -> bool:
@ -67,7 +67,7 @@ class Immutable:
return ( return (
not isinstance(other, self.__base__()) or not isinstance(other, self.__base__()) or
any([getattr(other, key) != getattr(self, key) for key in self.__slots__ if key != '_hash']) any(getattr(other, key) != getattr(self, key) for key in self.__slots__ if key != '_hash')
) )
def __hash__(self) -> int: def __hash__(self) -> int:
@ -78,14 +78,13 @@ class Immutable:
def __setattr__(self, name: str, value: Any) -> None: def __setattr__(self, name: str, value: Any) -> None:
"""Prevent mutability.""" """Prevent mutability."""
raise AttributeError("'{}' is immutable".format(self.__class__.__name__)) raise AttributeError(f"'{self.__class__.__name__}' is immutable")
def __repr__(self) -> str: # pragma: no cover def __repr__(self) -> str: # pragma: no cover
"""Representation.""" """Representation."""
return "{}({})".format( r = ', '.join([f"{k}={getattr(self, k)!r}" for k in self.__slots__[:-1]])
self.__class__.__name__, ', '.join(["{}={!r}".format(k, getattr(self, k)) for k in self.__slots__[:-1]]) return f"{self.__class__.__name__}({r})"
)
__str__ = __repr__ __str__ = __repr__
@ -112,10 +111,10 @@ class ImmutableDict(Mapping[Any, Any]):
"""Validate arguments.""" """Validate arguments."""
if isinstance(arg, dict): if isinstance(arg, dict):
if not all([isinstance(v, Hashable) for v in arg.values()]): if not all(isinstance(v, Hashable) for v in arg.values()):
raise TypeError('{} values must be hashable'.format(self.__class__.__name__)) raise TypeError(f'{self.__class__.__name__} values must be hashable')
elif not all([isinstance(k, Hashable) and isinstance(v, Hashable) for k, v in arg]): elif not all(isinstance(k, Hashable) and isinstance(v, Hashable) for k, v in arg):
raise TypeError('{} values must be hashable'.format(self.__class__.__name__)) raise TypeError(f'{self.__class__.__name__} values must be hashable')
def __iter__(self) -> Iterator[Any]: def __iter__(self) -> Iterator[Any]:
"""Iterator.""" """Iterator."""
@ -140,7 +139,7 @@ class ImmutableDict(Mapping[Any, Any]):
def __repr__(self) -> str: # pragma: no cover def __repr__(self) -> str: # pragma: no cover
"""Representation.""" """Representation."""
return "{!r}".format(self._d) return f"{self._d!r}"
__str__ = __repr__ __str__ = __repr__
@ -157,10 +156,10 @@ class Namespaces(ImmutableDict):
"""Validate arguments.""" """Validate arguments."""
if isinstance(arg, dict): if isinstance(arg, dict):
if not all([isinstance(v, str) for v in arg.values()]): if not all(isinstance(v, str) for v in arg.values()):
raise TypeError('{} values must be hashable'.format(self.__class__.__name__)) raise TypeError(f'{self.__class__.__name__} values must be hashable')
elif not all([isinstance(k, str) and isinstance(v, str) for k, v in arg]): elif not all(isinstance(k, str) and isinstance(v, str) for k, v in arg):
raise TypeError('{} keys and values must be Unicode strings'.format(self.__class__.__name__)) raise TypeError(f'{self.__class__.__name__} keys and values must be Unicode strings')
class CustomSelectors(ImmutableDict): class CustomSelectors(ImmutableDict):
@ -175,10 +174,10 @@ class CustomSelectors(ImmutableDict):
"""Validate arguments.""" """Validate arguments."""
if isinstance(arg, dict): if isinstance(arg, dict):
if not all([isinstance(v, str) for v in arg.values()]): if not all(isinstance(v, str) for v in arg.values()):
raise TypeError('{} values must be hashable'.format(self.__class__.__name__)) raise TypeError(f'{self.__class__.__name__} values must be hashable')
elif not all([isinstance(k, str) and isinstance(v, str) for k, v in arg]): elif not all(isinstance(k, str) and isinstance(v, str) for k, v in arg):
raise TypeError('{} keys and values must be Unicode strings'.format(self.__class__.__name__)) raise TypeError(f'{self.__class__.__name__} keys and values must be Unicode strings')
class Selector(Immutable): class Selector(Immutable):
@ -367,7 +366,7 @@ class SelectorList(Immutable):
"""Initialize.""" """Initialize."""
super().__init__( super().__init__(
selectors=tuple(selectors) if selectors is not None else tuple(), selectors=tuple(selectors) if selectors is not None else (),
is_not=is_not, is_not=is_not,
is_html=is_html is_html=is_html
) )

View file

@ -10,7 +10,7 @@ The format and various output types is fairly known (though it
hasn't been tested extensively to make sure we aren't missing corners). hasn't been tested extensively to make sure we aren't missing corners).
Example: Example:
-------
``` ```
>>> import soupsieve as sv >>> import soupsieve as sv
>>> sv.compile('this > that.class[name=value]').selectors.pretty() >>> sv.compile('this > that.class[name=value]').selectors.pretty()
@ -64,6 +64,7 @@ SelectorList(
is_not=False, is_not=False,
is_html=False) is_html=False)
``` ```
""" """
from __future__ import annotations from __future__ import annotations
import re import re
@ -123,16 +124,16 @@ def pretty(obj: Any) -> str: # pragma: no cover
index = m.end(0) index = m.end(0)
if name in ('class', 'lstrt', 'dstrt', 'tstrt'): if name in ('class', 'lstrt', 'dstrt', 'tstrt'):
indent += 4 indent += 4
output.append('{}\n{}'.format(m.group(0), " " * indent)) output.append(f'{m.group(0)}\n{" " * indent}')
elif name in ('param', 'int', 'kword', 'sqstr', 'dqstr', 'empty'): elif name in ('param', 'int', 'kword', 'sqstr', 'dqstr', 'empty'):
output.append(m.group(0)) output.append(m.group(0))
elif name in ('lend', 'dend', 'tend'): elif name in ('lend', 'dend', 'tend'):
indent -= 4 indent -= 4
output.append(m.group(0)) output.append(m.group(0))
elif name in ('sep',): elif name in ('sep',):
output.append('{}\n{}'.format(m.group(1), " " * indent)) output.append(f'{m.group(1)}\n{" " * indent}')
elif name in ('dsep',): elif name in ('dsep',):
output.append('{} '.format(m.group(1))) output.append(f'{m.group(1)} ')
break break
return ''.join(output) return ''.join(output)

View file

@ -37,7 +37,7 @@ class SelectorSyntaxError(Exception):
if pattern is not None and index is not None: if pattern is not None and index is not None:
# Format pattern to show line and column position # Format pattern to show line and column position
self.context, self.line, self.col = get_pattern_context(pattern, index) self.context, self.line, self.col = get_pattern_context(pattern, index)
msg = '{}\n line {}:\n{}'.format(msg, self.line, self.context) msg = f'{msg}\n line {self.line}:\n{self.context}'
super().__init__(msg) super().__init__(msg)
@ -105,7 +105,7 @@ def get_pattern_context(pattern: str, index: int) -> tuple[str, int, int]:
# we will render the output with just `\n`. We will still log the column # we will render the output with just `\n`. We will still log the column
# correctly though. # correctly though.
text.append('\n') text.append('\n')
text.append('{}{}'.format(indent, linetext)) text.append(f'{indent}{linetext}')
if offset is not None: if offset is not None:
text.append('\n') text.append('\n')
text.append(' ' * (col + offset) + '^') text.append(' ' * (col + offset) + '^')