Merge branch 'feature/UpdateBSoup' into dev

This commit is contained in:
JackDandy 2023-10-07 21:41:48 +01:00
commit ecd70f546f
10 changed files with 143 additions and 157 deletions

View file

@ -1,4 +1,10 @@
### 3.30.1 (2023-10-02 22:50:00 UTC)
### 3.31.0 (2023-1x-xx xx:xx:00 UTC)
* Update Beautiful Soup 4.12.2 to 4.12.2 (30c58a1)
* Update soupsieve 2.4.1 (2e66beb) to 2.5.0 (dc71495)
### 3.30.1 (2023-10-02 22:50:00 UTC)
* Change allow Python 3.12.0 and 3.11.6

View file

@ -378,10 +378,10 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
parser.soup = self.soup
try:
parser.feed(markup)
parser.close()
except AssertionError as e:
# html.parser raises AssertionError in rare cases to
# indicate a fatal problem with the markup, especially
# when there's an error in the doctype declaration.
raise ParserRejectedMarkup(e)
parser.close()
parser.already_closed_empty_element = []

View file

@ -1356,7 +1356,7 @@ class Tag(PageElement):
This is the first step in the deepcopy process.
"""
clone = type(self)(
None, self.builder, self.name, self.namespace,
None, None, self.name, self.namespace,
self.prefix, self.attrs, is_xml=self._is_xml,
sourceline=self.sourceline, sourcepos=self.sourcepos,
can_be_empty_element=self.can_be_empty_element,
@ -1845,6 +1845,11 @@ class Tag(PageElement):
return space_before + s + space_after
def _format_tag(self, eventual_encoding, formatter, opening):
if self.hidden:
# A hidden tag is invisible, although its contents
# are visible.
return ''
# A tag starts with the < character (see below).
# Then the / character, if this is a closing tag.

View file

@ -78,13 +78,13 @@ def purge() -> None:
def closest(
select: str,
tag: 'bs4.Tag',
tag: bs4.Tag,
namespaces: dict[str, str] | None = None,
flags: int = 0,
*,
custom: dict[str, str] | None = None,
**kwargs: Any
) -> 'bs4.Tag':
) -> bs4.Tag:
"""Match closest ancestor."""
return compile(select, namespaces, flags, **kwargs).closest(tag)
@ -92,7 +92,7 @@ def closest(
def match(
select: str,
tag: 'bs4.Tag',
tag: bs4.Tag,
namespaces: dict[str, str] | None = None,
flags: int = 0,
*,
@ -106,13 +106,13 @@ def match(
def filter( # noqa: A001
select: str,
iterable: Iterable['bs4.Tag'],
iterable: Iterable[bs4.Tag],
namespaces: dict[str, str] | None = None,
flags: int = 0,
*,
custom: dict[str, str] | None = None,
**kwargs: Any
) -> list['bs4.Tag']:
) -> list[bs4.Tag]:
"""Filter list of nodes."""
return compile(select, namespaces, flags, **kwargs).filter(iterable)
@ -120,13 +120,13 @@ def filter( # noqa: A001
def select_one(
select: str,
tag: 'bs4.Tag',
tag: bs4.Tag,
namespaces: dict[str, str] | None = None,
flags: int = 0,
*,
custom: dict[str, str] | None = None,
**kwargs: Any
) -> 'bs4.Tag':
) -> bs4.Tag:
"""Select a single tag."""
return compile(select, namespaces, flags, **kwargs).select_one(tag)
@ -134,14 +134,14 @@ def select_one(
def select(
select: str,
tag: 'bs4.Tag',
tag: bs4.Tag,
namespaces: dict[str, str] | None = None,
limit: int = 0,
flags: int = 0,
*,
custom: dict[str, str] | None = None,
**kwargs: Any
) -> list['bs4.Tag']:
) -> list[bs4.Tag]:
"""Select the specified tags."""
return compile(select, namespaces, flags, **kwargs).select(tag, limit)
@ -149,18 +149,17 @@ def select(
def iselect(
select: str,
tag: 'bs4.Tag',
tag: bs4.Tag,
namespaces: dict[str, str] | None = None,
limit: int = 0,
flags: int = 0,
*,
custom: dict[str, str] | None = None,
**kwargs: Any
) -> Iterator['bs4.Tag']:
) -> Iterator[bs4.Tag]:
"""Iterate the specified tags."""
for el in compile(select, namespaces, flags, **kwargs).iselect(tag, limit):
yield el
yield from compile(select, namespaces, flags, **kwargs).iselect(tag, limit)
def escape(ident: str) -> str:

View file

@ -93,7 +93,7 @@ class Version(namedtuple("Version", ["major", "minor", "micro", "release", "pre"
raise ValueError("All version parts except 'release' should be integers.")
if release not in REL_MAP:
raise ValueError("'{}' is not a valid release type.".format(release))
raise ValueError(f"'{release}' is not a valid release type.")
# Ensure valid pre-release (we do not allow implicit pre-releases).
if ".dev-candidate" < release < "final":
@ -118,7 +118,7 @@ class Version(namedtuple("Version", ["major", "minor", "micro", "release", "pre"
elif dev:
raise ValueError("Version is not a development release.")
return super(Version, cls).__new__(cls, major, minor, micro, release, pre, post, dev)
return super().__new__(cls, major, minor, micro, release, pre, post, dev)
def _is_pre(self) -> bool:
"""Is prerelease."""
@ -145,15 +145,15 @@ class Version(namedtuple("Version", ["major", "minor", "micro", "release", "pre"
# Assemble major, minor, micro version and append `pre`, `post`, or `dev` if needed..
if self.micro == 0:
ver = "{}.{}".format(self.major, self.minor)
ver = f"{self.major}.{self.minor}"
else:
ver = "{}.{}.{}".format(self.major, self.minor, self.micro)
ver = f"{self.major}.{self.minor}.{self.micro}"
if self._is_pre():
ver += '{}{}'.format(REL_MAP[self.release], self.pre)
ver += f'{REL_MAP[self.release]}{self.pre}'
if self._is_post():
ver += ".post{}".format(self.post)
ver += f".post{self.post}"
if self._is_dev():
ver += ".dev{}".format(self.dev)
ver += f".dev{self.dev}"
return ver
@ -164,7 +164,7 @@ def parse_version(ver: str) -> Version:
m = RE_VER.match(ver)
if m is None:
raise ValueError("'{}' is not a valid version".format(ver))
raise ValueError(f"'{ver}' is not a valid version")
# Handle major, minor, micro
major = int(m.group('major'))
@ -193,5 +193,5 @@ def parse_version(ver: str) -> Version:
return Version(major, minor, micro, release, pre, post, dev)
__version_info__ = Version(2, 4, 1, "final")
__version_info__ = Version(2, 5, 0, "final")
__version__ = __version_info__._get_canonical()

View file

@ -85,7 +85,7 @@ class _DocumentNav:
# Fail on unexpected types.
if not cls.is_tag(tag):
raise TypeError("Expected a BeautifulSoup 'Tag', but instead received type {}".format(type(tag)))
raise TypeError(f"Expected a BeautifulSoup 'Tag', but instead received type {type(tag)}")
@staticmethod
def is_doc(obj: bs4.Tag) -> bool:
@ -165,8 +165,7 @@ class _DocumentNav:
def get_contents(self, el: bs4.Tag, no_iframe: bool = False) -> Iterator[bs4.PageElement]:
"""Get contents or contents in reverse."""
if not no_iframe or not self.is_iframe(el):
for content in el.contents:
yield content
yield from el.contents
def get_children(
self,
@ -283,7 +282,7 @@ class _DocumentNav:
like we do in the case of `is_html_tag`.
"""
ns = getattr(el, 'namespace') if el else None
ns = getattr(el, 'namespace') if el else None # noqa: B009
return bool(ns and ns == NS_XHTML)
@staticmethod
@ -394,7 +393,7 @@ class Inputs:
def validate_week(year: int, week: int) -> bool:
"""Validate week."""
max_week = datetime.strptime("{}-{}-{}".format(12, 31, year), "%m-%d-%Y").isocalendar()[1]
max_week = datetime.strptime(f"{12}-{31}-{year}", "%m-%d-%Y").isocalendar()[1]
if max_week == 1:
max_week = 53
return 1 <= week <= max_week
@ -1272,11 +1271,7 @@ class CSSMatch(_DocumentNav):
# Auto handling for text inputs
if ((is_input and itype in ('text', 'search', 'tel', 'url', 'email')) or is_textarea) and direction == 0:
if is_textarea:
temp = []
for node in self.get_contents(el, no_iframe=True):
if self.is_content_string(node):
temp.append(node)
value = ''.join(temp)
value = ''.join(node for node in self.get_contents(el, no_iframe=True) if self.is_content_string(node))
else:
value = cast(str, self.get_attribute_by_name(el, 'value', ''))
if value:
@ -1571,17 +1566,14 @@ class SoupSieve(ct.Immutable):
def iselect(self, tag: bs4.Tag, limit: int = 0) -> Iterator[bs4.Tag]:
"""Iterate the specified tags."""
for el in CSSMatch(self.selectors, tag, self.namespaces, self.flags).select(limit):
yield el
yield from CSSMatch(self.selectors, tag, self.namespaces, self.flags).select(limit)
def __repr__(self) -> str: # pragma: no cover
"""Representation."""
return "SoupSieve(pattern={!r}, namespaces={!r}, custom={!r}, flags={!r})".format(
self.pattern,
self.namespaces,
self.custom,
self.flags
return (
f"SoupSieve(pattern={self.pattern!r}, namespaces={self.namespaces!r}, "
f"custom={self.custom!r}, flags={self.flags!r})"
)
__str__ = __repr__

View file

@ -92,94 +92,79 @@ PSEUDO_SUPPORTED = PSEUDO_SIMPLE | PSEUDO_SIMPLE_NO_MATCH | PSEUDO_COMPLEX | PSE
# Sub-patterns parts
# Whitespace
NEWLINE = r'(?:\r\n|(?!\r\n)[\n\f\r])'
WS = r'(?:[ \t]|{})'.format(NEWLINE)
WS = fr'(?:[ \t]|{NEWLINE})'
# Comments
COMMENTS = r'(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)'
# Whitespace with comments included
WSC = r'(?:{ws}|{comments})'.format(ws=WS, comments=COMMENTS)
WSC = fr'(?:{WS}|{COMMENTS})'
# CSS escapes
CSS_ESCAPES = r'(?:\\(?:[a-f0-9]{{1,6}}{ws}?|[^\r\n\f]|$))'.format(ws=WS)
CSS_STRING_ESCAPES = r'(?:\\(?:[a-f0-9]{{1,6}}{ws}?|[^\r\n\f]|$|{nl}))'.format(ws=WS, nl=NEWLINE)
CSS_ESCAPES = fr'(?:\\(?:[a-f0-9]{{1,6}}{WS}?|[^\r\n\f]|$))'
CSS_STRING_ESCAPES = fr'(?:\\(?:[a-f0-9]{{1,6}}{WS}?|[^\r\n\f]|$|{NEWLINE}))'
# CSS Identifier
IDENTIFIER = r'''
(?:(?:-?(?:[^\x00-\x2f\x30-\x40\x5B-\x5E\x60\x7B-\x9f]|{esc})+|--)
(?:[^\x00-\x2c\x2e\x2f\x3A-\x40\x5B-\x5E\x60\x7B-\x9f]|{esc})*)
'''.format(esc=CSS_ESCAPES)
IDENTIFIER = fr'''
(?:(?:-?(?:[^\x00-\x2f\x30-\x40\x5B-\x5E\x60\x7B-\x9f]|{CSS_ESCAPES})+|--)
(?:[^\x00-\x2c\x2e\x2f\x3A-\x40\x5B-\x5E\x60\x7B-\x9f]|{CSS_ESCAPES})*)
'''
# `nth` content
NTH = r'(?:[-+])?(?:[0-9]+n?|n)(?:(?<=n){ws}*(?:[-+]){ws}*(?:[0-9]+))?'.format(ws=WSC)
NTH = fr'(?:[-+])?(?:[0-9]+n?|n)(?:(?<=n){WSC}*(?:[-+]){WSC}*(?:[0-9]+))?'
# Value: quoted string or identifier
VALUE = r'''
(?:"(?:\\(?:.|{nl})|[^\\"\r\n\f]+)*?"|'(?:\\(?:.|{nl})|[^\\'\r\n\f]+)*?'|{ident}+)
'''.format(nl=NEWLINE, ident=IDENTIFIER)
VALUE = fr'''(?:"(?:\\(?:.|{NEWLINE})|[^\\"\r\n\f]+)*?"|'(?:\\(?:.|{NEWLINE})|[^\\'\r\n\f]+)*?'|{IDENTIFIER}+)'''
# Attribute value comparison. `!=` is handled special as it is non-standard.
ATTR = r'''
(?:{ws}*(?P<cmp>[!~^|*$]?=){ws}*(?P<value>{value})(?:{ws}*(?P<case>[is]))?)?{ws}*\]
'''.format(ws=WSC, value=VALUE)
ATTR = fr'(?:{WSC}*(?P<cmp>[!~^|*$]?=){WSC}*(?P<value>{VALUE})(?:{WSC}*(?P<case>[is]))?)?{WSC}*\]'
# Selector patterns
# IDs (`#id`)
PAT_ID = r'\#{ident}'.format(ident=IDENTIFIER)
PAT_ID = fr'\#{IDENTIFIER}'
# Classes (`.class`)
PAT_CLASS = r'\.{ident}'.format(ident=IDENTIFIER)
PAT_CLASS = fr'\.{IDENTIFIER}'
# Prefix:Tag (`prefix|tag`)
PAT_TAG = r'(?P<tag_ns>(?:{ident}|\*)?\|)?(?P<tag_name>{ident}|\*)'.format(ident=IDENTIFIER)
PAT_TAG = fr'(?P<tag_ns>(?:{IDENTIFIER}|\*)?\|)?(?P<tag_name>{IDENTIFIER}|\*)'
# Attributes (`[attr]`, `[attr=value]`, etc.)
PAT_ATTR = r'''
\[{ws}*(?P<attr_ns>(?:{ident}|\*)?\|)?(?P<attr_name>{ident}){attr}
'''.format(ws=WSC, ident=IDENTIFIER, attr=ATTR)
PAT_ATTR = fr'\[{WSC}*(?P<attr_ns>(?:{IDENTIFIER}|\*)?\|)?(?P<attr_name>{IDENTIFIER}){ATTR}'
# Pseudo class (`:pseudo-class`, `:pseudo-class(`)
PAT_PSEUDO_CLASS = r'(?P<name>:{ident})(?P<open>\({ws}*)?'.format(ws=WSC, ident=IDENTIFIER)
PAT_PSEUDO_CLASS = fr'(?P<name>:{IDENTIFIER})(?P<open>\({WSC}*)?'
# Pseudo class special patterns. Matches `:pseudo-class(` for special case pseudo classes.
PAT_PSEUDO_CLASS_SPECIAL = r'(?P<name>:{ident})(?P<open>\({ws}*)'.format(ws=WSC, ident=IDENTIFIER)
PAT_PSEUDO_CLASS_SPECIAL = fr'(?P<name>:{IDENTIFIER})(?P<open>\({WSC}*)'
# Custom pseudo class (`:--custom-pseudo`)
PAT_PSEUDO_CLASS_CUSTOM = r'(?P<name>:(?=--){ident})'.format(ident=IDENTIFIER)
PAT_PSEUDO_CLASS_CUSTOM = fr'(?P<name>:(?=--){IDENTIFIER})'
# Closing pseudo group (`)`)
PAT_PSEUDO_CLOSE = r'{ws}*\)'.format(ws=WSC)
PAT_PSEUDO_CLOSE = fr'{WSC}*\)'
# Pseudo element (`::pseudo-element`)
PAT_PSEUDO_ELEMENT = r':{}'.format(PAT_PSEUDO_CLASS)
PAT_PSEUDO_ELEMENT = fr':{PAT_PSEUDO_CLASS}'
# At rule (`@page`, etc.) (not supported)
PAT_AT_RULE = r'@P{ident}'.format(ident=IDENTIFIER)
PAT_AT_RULE = fr'@P{IDENTIFIER}'
# Pseudo class `nth-child` (`:nth-child(an+b [of S]?)`, `:first-child`, etc.)
PAT_PSEUDO_NTH_CHILD = r'''
(?P<pseudo_nth_child>{name}
(?P<nth_child>{nth}|even|odd))(?:{wsc}*\)|(?P<of>{comments}*{ws}{wsc}*of{comments}*{ws}{wsc}*))
'''.format(name=PAT_PSEUDO_CLASS_SPECIAL, wsc=WSC, comments=COMMENTS, ws=WS, nth=NTH)
PAT_PSEUDO_NTH_CHILD = fr'''
(?P<pseudo_nth_child>{PAT_PSEUDO_CLASS_SPECIAL}
(?P<nth_child>{NTH}|even|odd))(?:{WSC}*\)|(?P<of>{COMMENTS}*{WS}{WSC}*of{COMMENTS}*{WS}{WSC}*))
'''
# Pseudo class `nth-of-type` (`:nth-of-type(an+b)`, `:first-of-type`, etc.)
PAT_PSEUDO_NTH_TYPE = r'''
(?P<pseudo_nth_type>{name}
(?P<nth_type>{nth}|even|odd)){ws}*\)
'''.format(name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC, nth=NTH)
PAT_PSEUDO_NTH_TYPE = fr'''
(?P<pseudo_nth_type>{PAT_PSEUDO_CLASS_SPECIAL}
(?P<nth_type>{NTH}|even|odd)){WSC}*\)
'''
# Pseudo class language (`:lang("*-de", en)`)
PAT_PSEUDO_LANG = r'{name}(?P<values>{value}(?:{ws}*,{ws}*{value})*){ws}*\)'.format(
name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC, value=VALUE
)
PAT_PSEUDO_LANG = fr'{PAT_PSEUDO_CLASS_SPECIAL}(?P<values>{VALUE}(?:{WSC}*,{WSC}*{VALUE})*){WSC}*\)'
# Pseudo class direction (`:dir(ltr)`)
PAT_PSEUDO_DIR = r'{name}(?P<dir>ltr|rtl){ws}*\)'.format(name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC)
PAT_PSEUDO_DIR = fr'{PAT_PSEUDO_CLASS_SPECIAL}(?P<dir>ltr|rtl){WSC}*\)'
# Combining characters (`>`, `~`, ` `, `+`, `,`)
PAT_COMBINE = r'{wsc}*?(?P<relation>[,+>~]|{ws}(?![,+>~])){wsc}*'.format(ws=WS, wsc=WSC)
PAT_COMBINE = fr'{WSC}*?(?P<relation>[,+>~]|{WS}(?![,+>~])){WSC}*'
# Extra: Contains (`:contains(text)`)
PAT_PSEUDO_CONTAINS = r'{name}(?P<values>{value}(?:{ws}*,{ws}*{value})*){ws}*\)'.format(
name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC, value=VALUE
)
PAT_PSEUDO_CONTAINS = fr'{PAT_PSEUDO_CLASS_SPECIAL}(?P<values>{VALUE}(?:{WSC}*,{WSC}*{VALUE})*){WSC}*\)'
# Regular expressions
# CSS escape pattern
RE_CSS_ESC = re.compile(r'(?:(\\[a-f0-9]{{1,6}}{ws}?)|(\\[^\r\n\f])|(\\$))'.format(ws=WSC), re.I)
RE_CSS_STR_ESC = re.compile(
r'(?:(\\[a-f0-9]{{1,6}}{ws}?)|(\\[^\r\n\f])|(\\$)|(\\{nl}))'.format(ws=WS, nl=NEWLINE), re.I
)
RE_CSS_ESC = re.compile(fr'(?:(\\[a-f0-9]{{1,6}}{WSC}?)|(\\[^\r\n\f])|(\\$))', re.I)
RE_CSS_STR_ESC = re.compile(fr'(?:(\\[a-f0-9]{{1,6}}{WS}?)|(\\[^\r\n\f])|(\\$)|(\\{NEWLINE}))', re.I)
# Pattern to break up `nth` specifiers
RE_NTH = re.compile(
r'(?P<s1>[-+])?(?P<a>[0-9]+n?|n)(?:(?<=n){ws}*(?P<s2>[-+]){ws}*(?P<b>[0-9]+))?'.format(ws=WSC),
re.I
)
RE_NTH = re.compile(fr'(?P<s1>[-+])?(?P<a>[0-9]+n?|n)(?:(?<=n){WSC}*(?P<s2>[-+]){WSC}*(?P<b>[0-9]+))?', re.I)
# Pattern to iterate multiple values.
RE_VALUES = re.compile(r'(?:(?P<value>{value})|(?P<split>{ws}*,{ws}*))'.format(ws=WSC, value=VALUE), re.X)
RE_VALUES = re.compile(fr'(?:(?P<value>{VALUE})|(?P<split>{WSC}*,{WSC}*))', re.X)
# Whitespace checks
RE_WS = re.compile(WS)
RE_WS_BEGIN = re.compile('^{}*'.format(WSC))
RE_WS_END = re.compile('{}*$'.format(WSC))
RE_CUSTOM = re.compile(r'^{}$'.format(PAT_PSEUDO_CLASS_CUSTOM), re.X)
RE_WS_BEGIN = re.compile(fr'^{WSC}*')
RE_WS_END = re.compile(fr'{WSC}*$')
RE_CUSTOM = re.compile(fr'^{PAT_PSEUDO_CLASS_CUSTOM}$', re.X)
# Constants
# List split token
@ -241,9 +226,9 @@ def process_custom(custom: ct.CustomSelectors | None) -> dict[str, str | ct.Sele
for key, value in custom.items():
name = util.lower(key)
if RE_CUSTOM.match(name) is None:
raise SelectorSyntaxError("The name '{}' is not a valid custom pseudo-class name".format(name))
raise SelectorSyntaxError(f"The name '{name}' is not a valid custom pseudo-class name")
if name in custom_selectors:
raise KeyError("The custom selector '{}' has already been registered".format(name))
raise KeyError(f"The custom selector '{name}' has already been registered")
custom_selectors[css_unescape(name)] = value
return custom_selectors
@ -283,23 +268,23 @@ def escape(ident: str) -> str:
start_dash = length > 0 and ident[0] == '-'
if length == 1 and start_dash:
# Need to escape identifier that is a single `-` with no other characters
string.append('\\{}'.format(ident))
string.append(f'\\{ident}')
else:
for index, c in enumerate(ident):
codepoint = ord(c)
if codepoint == 0x00:
string.append('\ufffd')
elif (0x01 <= codepoint <= 0x1F) or codepoint == 0x7F:
string.append('\\{:x} '.format(codepoint))
string.append(f'\\{codepoint:x} ')
elif (index == 0 or (start_dash and index == 1)) and (0x30 <= codepoint <= 0x39):
string.append('\\{:x} '.format(codepoint))
string.append(f'\\{codepoint:x} ')
elif (
codepoint in (0x2D, 0x5F) or codepoint >= 0x80 or (0x30 <= codepoint <= 0x39) or
(0x30 <= codepoint <= 0x39) or (0x41 <= codepoint <= 0x5A) or (0x61 <= codepoint <= 0x7A)
):
string.append(c)
else:
string.append('\\{}'.format(c))
string.append(f'\\{c}')
return ''.join(string)
@ -419,11 +404,10 @@ class _Selector:
"""String representation."""
return (
'_Selector(tag={!r}, ids={!r}, classes={!r}, attributes={!r}, nth={!r}, selectors={!r}, '
'relations={!r}, rel_type={!r}, contains={!r}, lang={!r}, flags={!r}, no_match={!r})'
).format(
self.tag, self.ids, self.classes, self.attributes, self.nth, self.selectors,
self.relations, self.rel_type, self.contains, self.lang, self.flags, self.no_match
f'_Selector(tag={self.tag!r}, ids={self.ids!r}, classes={self.classes!r}, attributes={self.attributes!r}, '
f'nth={self.nth!r}, selectors={self.selectors!r}, relations={self.relations!r}, '
f'rel_type={self.rel_type!r}, contains={self.contains!r}, lang={self.lang!r}, flags={self.flags!r}, '
f'no_match={self.no_match!r})'
)
__repr__ = __str__
@ -563,7 +547,7 @@ class CSSParser:
selector = self.custom.get(pseudo)
if selector is None:
raise SelectorSyntaxError(
"Undefined custom selector '{}' found at position {}".format(pseudo, m.end(0)),
f"Undefined custom selector '{pseudo}' found at position {m.end(0)}",
self.pattern,
m.end(0)
)
@ -663,13 +647,13 @@ class CSSParser:
has_selector = True
elif pseudo in PSEUDO_SUPPORTED:
raise SelectorSyntaxError(
"Invalid syntax for pseudo class '{}'".format(pseudo),
f"Invalid syntax for pseudo class '{pseudo}'",
self.pattern,
m.start(0)
)
else:
raise NotImplementedError(
"'{}' pseudo-class is not implemented at this time".format(pseudo)
f"'{pseudo}' pseudo-class is not implemented at this time"
)
return has_selector, is_html
@ -793,7 +777,7 @@ class CSSParser:
# multiple non-whitespace combinators. So if the current combinator is not a whitespace,
# then we've hit the multiple combinator case, so we should fail.
raise SelectorSyntaxError(
'The multiple combinators at position {}'.format(index),
f'The multiple combinators at position {index}',
self.pattern,
index
)
@ -824,7 +808,7 @@ class CSSParser:
if not has_selector:
if not is_forgive or combinator != COMMA_COMBINATOR:
raise SelectorSyntaxError(
"The combinator '{}' at position {}, must have a selector before it".format(combinator, index),
f"The combinator '{combinator}' at position {index}, must have a selector before it",
self.pattern,
index
)
@ -869,7 +853,7 @@ class CSSParser:
pseudo = util.lower(css_unescape(m.group('name')))
if pseudo == ":contains":
warnings.warn(
warnings.warn( # noqa: B028
"The pseudo class ':contains' is deprecated, ':-soup-contains' should be used moving forward.",
FutureWarning
)
@ -982,13 +966,13 @@ class CSSParser:
# Handle parts
if key == "at_rule":
raise NotImplementedError("At-rules found at position {}".format(m.start(0)))
raise NotImplementedError(f"At-rules found at position {m.start(0)}")
elif key == 'pseudo_class_custom':
has_selector = self.parse_pseudo_class_custom(sel, m, has_selector)
elif key == 'pseudo_class':
has_selector, is_html = self.parse_pseudo_class(sel, m, has_selector, iselector, is_html)
elif key == 'pseudo_element':
raise NotImplementedError("Pseudo-element found at position {}".format(m.start(0)))
raise NotImplementedError(f"Pseudo-element found at position {m.start(0)}")
elif key == 'pseudo_contains':
has_selector = self.parse_pseudo_contains(sel, m, has_selector)
elif key in ('pseudo_nth_type', 'pseudo_nth_child'):
@ -1003,7 +987,7 @@ class CSSParser:
if not has_selector:
if not is_forgive:
raise SelectorSyntaxError(
"Expected a selector at position {}".format(m.start(0)),
f"Expected a selector at position {m.start(0)}",
self.pattern,
m.start(0)
)
@ -1013,7 +997,7 @@ class CSSParser:
break
else:
raise SelectorSyntaxError(
"Unmatched pseudo-class close at position {}".format(m.start(0)),
f"Unmatched pseudo-class close at position {m.start(0)}",
self.pattern,
m.start(0)
)
@ -1031,7 +1015,7 @@ class CSSParser:
elif key == 'tag':
if has_selector:
raise SelectorSyntaxError(
"Tag name found at position {} instead of at the start".format(m.start(0)),
f"Tag name found at position {m.start(0)} instead of at the start",
self.pattern,
m.start(0)
)
@ -1046,7 +1030,7 @@ class CSSParser:
# Handle selectors that are not closed
if is_open and not closed:
raise SelectorSyntaxError(
"Unclosed pseudo-class at position {}".format(index),
f"Unclosed pseudo-class at position {index}",
self.pattern,
index
)
@ -1076,7 +1060,7 @@ class CSSParser:
# We will always need to finish a selector when `:has()` is used as it leads with combining.
# May apply to others as well.
raise SelectorSyntaxError(
'Expected a selector at position {}'.format(index),
f'Expected a selector at position {index}',
self.pattern,
index
)
@ -1108,7 +1092,7 @@ class CSSParser:
end = (m.start(0) - 1) if m else (len(pattern) - 1)
if self.debug: # pragma: no cover
print('## PARSING: {!r}'.format(pattern))
print(f'## PARSING: {pattern!r}')
while index <= end:
m = None
for v in self.css_tokens:
@ -1116,7 +1100,7 @@ class CSSParser:
if m:
name = v.get_name()
if self.debug: # pragma: no cover
print("TOKEN: '{}' --> {!r} at position {}".format(name, m.group(0), m.start(0)))
print(f"TOKEN: '{name}' --> {m.group(0)!r} at position {m.start(0)}")
index = m.end(0)
yield name, m
break
@ -1126,15 +1110,15 @@ class CSSParser:
# throw an exception mentioning that the known selector type is in error;
# otherwise, report the invalid character.
if c == '[':
msg = "Malformed attribute selector at position {}".format(index)
msg = f"Malformed attribute selector at position {index}"
elif c == '.':
msg = "Malformed class selector at position {}".format(index)
msg = f"Malformed class selector at position {index}"
elif c == '#':
msg = "Malformed id selector at position {}".format(index)
msg = f"Malformed id selector at position {index}"
elif c == ':':
msg = "Malformed pseudo-class selector at position {}".format(index)
msg = f"Malformed pseudo-class selector at position {index}"
else:
msg = "Invalid character {!r} position {}".format(c, index)
msg = f"Invalid character {c!r} position {index}"
raise SelectorSyntaxError(msg, self.pattern, index)
if self.debug: # pragma: no cover
print('## END PARSING')

View file

@ -45,11 +45,11 @@ class Immutable:
for k, v in kwargs.items():
temp.append(type(v))
temp.append(v)
super(Immutable, self).__setattr__(k, v)
super(Immutable, self).__setattr__('_hash', hash(tuple(temp)))
super().__setattr__(k, v)
super().__setattr__('_hash', hash(tuple(temp)))
@classmethod
def __base__(cls) -> "type[Immutable]":
def __base__(cls) -> type[Immutable]:
"""Get base class."""
return cls
@ -59,7 +59,7 @@ class Immutable:
return (
isinstance(other, self.__base__()) and
all([getattr(other, key) == getattr(self, key) for key in self.__slots__ if key != '_hash'])
all(getattr(other, key) == getattr(self, key) for key in self.__slots__ if key != '_hash')
)
def __ne__(self, other: Any) -> bool:
@ -67,7 +67,7 @@ class Immutable:
return (
not isinstance(other, self.__base__()) or
any([getattr(other, key) != getattr(self, key) for key in self.__slots__ if key != '_hash'])
any(getattr(other, key) != getattr(self, key) for key in self.__slots__ if key != '_hash')
)
def __hash__(self) -> int:
@ -78,14 +78,13 @@ class Immutable:
def __setattr__(self, name: str, value: Any) -> None:
"""Prevent mutability."""
raise AttributeError("'{}' is immutable".format(self.__class__.__name__))
raise AttributeError(f"'{self.__class__.__name__}' is immutable")
def __repr__(self) -> str: # pragma: no cover
"""Representation."""
return "{}({})".format(
self.__class__.__name__, ', '.join(["{}={!r}".format(k, getattr(self, k)) for k in self.__slots__[:-1]])
)
r = ', '.join([f"{k}={getattr(self, k)!r}" for k in self.__slots__[:-1]])
return f"{self.__class__.__name__}({r})"
__str__ = __repr__
@ -112,10 +111,10 @@ class ImmutableDict(Mapping[Any, Any]):
"""Validate arguments."""
if isinstance(arg, dict):
if not all([isinstance(v, Hashable) for v in arg.values()]):
raise TypeError('{} values must be hashable'.format(self.__class__.__name__))
elif not all([isinstance(k, Hashable) and isinstance(v, Hashable) for k, v in arg]):
raise TypeError('{} values must be hashable'.format(self.__class__.__name__))
if not all(isinstance(v, Hashable) for v in arg.values()):
raise TypeError(f'{self.__class__.__name__} values must be hashable')
elif not all(isinstance(k, Hashable) and isinstance(v, Hashable) for k, v in arg):
raise TypeError(f'{self.__class__.__name__} values must be hashable')
def __iter__(self) -> Iterator[Any]:
"""Iterator."""
@ -140,7 +139,7 @@ class ImmutableDict(Mapping[Any, Any]):
def __repr__(self) -> str: # pragma: no cover
"""Representation."""
return "{!r}".format(self._d)
return f"{self._d!r}"
__str__ = __repr__
@ -157,10 +156,10 @@ class Namespaces(ImmutableDict):
"""Validate arguments."""
if isinstance(arg, dict):
if not all([isinstance(v, str) for v in arg.values()]):
raise TypeError('{} values must be hashable'.format(self.__class__.__name__))
elif not all([isinstance(k, str) and isinstance(v, str) for k, v in arg]):
raise TypeError('{} keys and values must be Unicode strings'.format(self.__class__.__name__))
if not all(isinstance(v, str) for v in arg.values()):
raise TypeError(f'{self.__class__.__name__} values must be hashable')
elif not all(isinstance(k, str) and isinstance(v, str) for k, v in arg):
raise TypeError(f'{self.__class__.__name__} keys and values must be Unicode strings')
class CustomSelectors(ImmutableDict):
@ -175,10 +174,10 @@ class CustomSelectors(ImmutableDict):
"""Validate arguments."""
if isinstance(arg, dict):
if not all([isinstance(v, str) for v in arg.values()]):
raise TypeError('{} values must be hashable'.format(self.__class__.__name__))
elif not all([isinstance(k, str) and isinstance(v, str) for k, v in arg]):
raise TypeError('{} keys and values must be Unicode strings'.format(self.__class__.__name__))
if not all(isinstance(v, str) for v in arg.values()):
raise TypeError(f'{self.__class__.__name__} values must be hashable')
elif not all(isinstance(k, str) and isinstance(v, str) for k, v in arg):
raise TypeError(f'{self.__class__.__name__} keys and values must be Unicode strings')
class Selector(Immutable):
@ -367,7 +366,7 @@ class SelectorList(Immutable):
"""Initialize."""
super().__init__(
selectors=tuple(selectors) if selectors is not None else tuple(),
selectors=tuple(selectors) if selectors is not None else (),
is_not=is_not,
is_html=is_html
)

View file

@ -10,7 +10,7 @@ The format and various output types is fairly known (though it
hasn't been tested extensively to make sure we aren't missing corners).
Example:
-------
```
>>> import soupsieve as sv
>>> sv.compile('this > that.class[name=value]').selectors.pretty()
@ -64,6 +64,7 @@ SelectorList(
is_not=False,
is_html=False)
```
"""
from __future__ import annotations
import re
@ -123,16 +124,16 @@ def pretty(obj: Any) -> str: # pragma: no cover
index = m.end(0)
if name in ('class', 'lstrt', 'dstrt', 'tstrt'):
indent += 4
output.append('{}\n{}'.format(m.group(0), " " * indent))
output.append(f'{m.group(0)}\n{" " * indent}')
elif name in ('param', 'int', 'kword', 'sqstr', 'dqstr', 'empty'):
output.append(m.group(0))
elif name in ('lend', 'dend', 'tend'):
indent -= 4
output.append(m.group(0))
elif name in ('sep',):
output.append('{}\n{}'.format(m.group(1), " " * indent))
output.append(f'{m.group(1)}\n{" " * indent}')
elif name in ('dsep',):
output.append('{} '.format(m.group(1)))
output.append(f'{m.group(1)} ')
break
return ''.join(output)

View file

@ -37,7 +37,7 @@ class SelectorSyntaxError(Exception):
if pattern is not None and index is not None:
# Format pattern to show line and column position
self.context, self.line, self.col = get_pattern_context(pattern, index)
msg = '{}\n line {}:\n{}'.format(msg, self.line, self.context)
msg = f'{msg}\n line {self.line}:\n{self.context}'
super().__init__(msg)
@ -105,7 +105,7 @@ def get_pattern_context(pattern: str, index: int) -> tuple[str, int, int]:
# we will render the output with just `\n`. We will still log the column
# correctly though.
text.append('\n')
text.append('{}{}'.format(indent, linetext))
text.append(f'{indent}{linetext}')
if offset is not None:
text.append('\n')
text.append(' ' * (col + offset) + '^')