diff --git a/CHANGES.md b/CHANGES.md index be1f0eaa..ac2be56b 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,4 +1,10 @@ -### 3.30.1 (2023-10-02 22:50:00 UTC) +### 3.31.0 (2023-1x-xx xx:xx:00 UTC) + +* Update Beautiful Soup 4.12.2 to 4.12.2 (30c58a1) +* Update soupsieve 2.4.1 (2e66beb) to 2.5.0 (dc71495) + + +### 3.30.1 (2023-10-02 22:50:00 UTC) * Change allow Python 3.12.0 and 3.11.6 diff --git a/lib/bs4/builder/_htmlparser.py b/lib/bs4/builder/_htmlparser.py index 4c5ced93..59911043 100644 --- a/lib/bs4/builder/_htmlparser.py +++ b/lib/bs4/builder/_htmlparser.py @@ -378,10 +378,10 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder): parser.soup = self.soup try: parser.feed(markup) + parser.close() except AssertionError as e: # html.parser raises AssertionError in rare cases to # indicate a fatal problem with the markup, especially # when there's an error in the doctype declaration. raise ParserRejectedMarkup(e) - parser.close() parser.already_closed_empty_element = [] diff --git a/lib/bs4/element.py b/lib/bs4/element.py index 99fc8137..38ca2dc2 100644 --- a/lib/bs4/element.py +++ b/lib/bs4/element.py @@ -1356,7 +1356,7 @@ class Tag(PageElement): This is the first step in the deepcopy process. """ clone = type(self)( - None, self.builder, self.name, self.namespace, + None, None, self.name, self.namespace, self.prefix, self.attrs, is_xml=self._is_xml, sourceline=self.sourceline, sourcepos=self.sourcepos, can_be_empty_element=self.can_be_empty_element, @@ -1845,6 +1845,11 @@ class Tag(PageElement): return space_before + s + space_after def _format_tag(self, eventual_encoding, formatter, opening): + if self.hidden: + # A hidden tag is invisible, although its contents + # are visible. + return '' + # A tag starts with the < character (see below). # Then the / character, if this is a closing tag. diff --git a/lib/soupsieve/__init__.py b/lib/soupsieve/__init__.py index 4c928fd5..549b4c5c 100644 --- a/lib/soupsieve/__init__.py +++ b/lib/soupsieve/__init__.py @@ -78,13 +78,13 @@ def purge() -> None: def closest( select: str, - tag: 'bs4.Tag', + tag: bs4.Tag, namespaces: dict[str, str] | None = None, flags: int = 0, *, custom: dict[str, str] | None = None, **kwargs: Any -) -> 'bs4.Tag': +) -> bs4.Tag: """Match closest ancestor.""" return compile(select, namespaces, flags, **kwargs).closest(tag) @@ -92,7 +92,7 @@ def closest( def match( select: str, - tag: 'bs4.Tag', + tag: bs4.Tag, namespaces: dict[str, str] | None = None, flags: int = 0, *, @@ -106,13 +106,13 @@ def match( def filter( # noqa: A001 select: str, - iterable: Iterable['bs4.Tag'], + iterable: Iterable[bs4.Tag], namespaces: dict[str, str] | None = None, flags: int = 0, *, custom: dict[str, str] | None = None, **kwargs: Any -) -> list['bs4.Tag']: +) -> list[bs4.Tag]: """Filter list of nodes.""" return compile(select, namespaces, flags, **kwargs).filter(iterable) @@ -120,13 +120,13 @@ def filter( # noqa: A001 def select_one( select: str, - tag: 'bs4.Tag', + tag: bs4.Tag, namespaces: dict[str, str] | None = None, flags: int = 0, *, custom: dict[str, str] | None = None, **kwargs: Any -) -> 'bs4.Tag': +) -> bs4.Tag: """Select a single tag.""" return compile(select, namespaces, flags, **kwargs).select_one(tag) @@ -134,14 +134,14 @@ def select_one( def select( select: str, - tag: 'bs4.Tag', + tag: bs4.Tag, namespaces: dict[str, str] | None = None, limit: int = 0, flags: int = 0, *, custom: dict[str, str] | None = None, **kwargs: Any -) -> list['bs4.Tag']: +) -> list[bs4.Tag]: """Select the specified tags.""" return compile(select, namespaces, flags, **kwargs).select(tag, limit) @@ -149,18 +149,17 @@ def select( def iselect( select: str, - tag: 'bs4.Tag', + tag: bs4.Tag, namespaces: dict[str, str] | None = None, limit: int = 0, flags: int = 0, *, custom: dict[str, str] | None = None, **kwargs: Any -) -> Iterator['bs4.Tag']: +) -> Iterator[bs4.Tag]: """Iterate the specified tags.""" - for el in compile(select, namespaces, flags, **kwargs).iselect(tag, limit): - yield el + yield from compile(select, namespaces, flags, **kwargs).iselect(tag, limit) def escape(ident: str) -> str: diff --git a/lib/soupsieve/__meta__.py b/lib/soupsieve/__meta__.py index 1a5cfe41..df341145 100644 --- a/lib/soupsieve/__meta__.py +++ b/lib/soupsieve/__meta__.py @@ -93,7 +93,7 @@ class Version(namedtuple("Version", ["major", "minor", "micro", "release", "pre" raise ValueError("All version parts except 'release' should be integers.") if release not in REL_MAP: - raise ValueError("'{}' is not a valid release type.".format(release)) + raise ValueError(f"'{release}' is not a valid release type.") # Ensure valid pre-release (we do not allow implicit pre-releases). if ".dev-candidate" < release < "final": @@ -118,7 +118,7 @@ class Version(namedtuple("Version", ["major", "minor", "micro", "release", "pre" elif dev: raise ValueError("Version is not a development release.") - return super(Version, cls).__new__(cls, major, minor, micro, release, pre, post, dev) + return super().__new__(cls, major, minor, micro, release, pre, post, dev) def _is_pre(self) -> bool: """Is prerelease.""" @@ -145,15 +145,15 @@ class Version(namedtuple("Version", ["major", "minor", "micro", "release", "pre" # Assemble major, minor, micro version and append `pre`, `post`, or `dev` if needed.. if self.micro == 0: - ver = "{}.{}".format(self.major, self.minor) + ver = f"{self.major}.{self.minor}" else: - ver = "{}.{}.{}".format(self.major, self.minor, self.micro) + ver = f"{self.major}.{self.minor}.{self.micro}" if self._is_pre(): - ver += '{}{}'.format(REL_MAP[self.release], self.pre) + ver += f'{REL_MAP[self.release]}{self.pre}' if self._is_post(): - ver += ".post{}".format(self.post) + ver += f".post{self.post}" if self._is_dev(): - ver += ".dev{}".format(self.dev) + ver += f".dev{self.dev}" return ver @@ -164,7 +164,7 @@ def parse_version(ver: str) -> Version: m = RE_VER.match(ver) if m is None: - raise ValueError("'{}' is not a valid version".format(ver)) + raise ValueError(f"'{ver}' is not a valid version") # Handle major, minor, micro major = int(m.group('major')) @@ -193,5 +193,5 @@ def parse_version(ver: str) -> Version: return Version(major, minor, micro, release, pre, post, dev) -__version_info__ = Version(2, 4, 1, "final") +__version_info__ = Version(2, 5, 0, "final") __version__ = __version_info__._get_canonical() diff --git a/lib/soupsieve/css_match.py b/lib/soupsieve/css_match.py index 9acf500e..4504e7b3 100644 --- a/lib/soupsieve/css_match.py +++ b/lib/soupsieve/css_match.py @@ -85,7 +85,7 @@ class _DocumentNav: # Fail on unexpected types. if not cls.is_tag(tag): - raise TypeError("Expected a BeautifulSoup 'Tag', but instead received type {}".format(type(tag))) + raise TypeError(f"Expected a BeautifulSoup 'Tag', but instead received type {type(tag)}") @staticmethod def is_doc(obj: bs4.Tag) -> bool: @@ -165,8 +165,7 @@ class _DocumentNav: def get_contents(self, el: bs4.Tag, no_iframe: bool = False) -> Iterator[bs4.PageElement]: """Get contents or contents in reverse.""" if not no_iframe or not self.is_iframe(el): - for content in el.contents: - yield content + yield from el.contents def get_children( self, @@ -283,7 +282,7 @@ class _DocumentNav: like we do in the case of `is_html_tag`. """ - ns = getattr(el, 'namespace') if el else None + ns = getattr(el, 'namespace') if el else None # noqa: B009 return bool(ns and ns == NS_XHTML) @staticmethod @@ -394,7 +393,7 @@ class Inputs: def validate_week(year: int, week: int) -> bool: """Validate week.""" - max_week = datetime.strptime("{}-{}-{}".format(12, 31, year), "%m-%d-%Y").isocalendar()[1] + max_week = datetime.strptime(f"{12}-{31}-{year}", "%m-%d-%Y").isocalendar()[1] if max_week == 1: max_week = 53 return 1 <= week <= max_week @@ -1272,11 +1271,7 @@ class CSSMatch(_DocumentNav): # Auto handling for text inputs if ((is_input and itype in ('text', 'search', 'tel', 'url', 'email')) or is_textarea) and direction == 0: if is_textarea: - temp = [] - for node in self.get_contents(el, no_iframe=True): - if self.is_content_string(node): - temp.append(node) - value = ''.join(temp) + value = ''.join(node for node in self.get_contents(el, no_iframe=True) if self.is_content_string(node)) else: value = cast(str, self.get_attribute_by_name(el, 'value', '')) if value: @@ -1571,17 +1566,14 @@ class SoupSieve(ct.Immutable): def iselect(self, tag: bs4.Tag, limit: int = 0) -> Iterator[bs4.Tag]: """Iterate the specified tags.""" - for el in CSSMatch(self.selectors, tag, self.namespaces, self.flags).select(limit): - yield el + yield from CSSMatch(self.selectors, tag, self.namespaces, self.flags).select(limit) def __repr__(self) -> str: # pragma: no cover """Representation.""" - return "SoupSieve(pattern={!r}, namespaces={!r}, custom={!r}, flags={!r})".format( - self.pattern, - self.namespaces, - self.custom, - self.flags + return ( + f"SoupSieve(pattern={self.pattern!r}, namespaces={self.namespaces!r}, " + f"custom={self.custom!r}, flags={self.flags!r})" ) __str__ = __repr__ diff --git a/lib/soupsieve/css_parser.py b/lib/soupsieve/css_parser.py index 739ab74b..4a9ea39d 100644 --- a/lib/soupsieve/css_parser.py +++ b/lib/soupsieve/css_parser.py @@ -92,94 +92,79 @@ PSEUDO_SUPPORTED = PSEUDO_SIMPLE | PSEUDO_SIMPLE_NO_MATCH | PSEUDO_COMPLEX | PSE # Sub-patterns parts # Whitespace NEWLINE = r'(?:\r\n|(?!\r\n)[\n\f\r])' -WS = r'(?:[ \t]|{})'.format(NEWLINE) +WS = fr'(?:[ \t]|{NEWLINE})' # Comments COMMENTS = r'(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)' # Whitespace with comments included -WSC = r'(?:{ws}|{comments})'.format(ws=WS, comments=COMMENTS) +WSC = fr'(?:{WS}|{COMMENTS})' # CSS escapes -CSS_ESCAPES = r'(?:\\(?:[a-f0-9]{{1,6}}{ws}?|[^\r\n\f]|$))'.format(ws=WS) -CSS_STRING_ESCAPES = r'(?:\\(?:[a-f0-9]{{1,6}}{ws}?|[^\r\n\f]|$|{nl}))'.format(ws=WS, nl=NEWLINE) +CSS_ESCAPES = fr'(?:\\(?:[a-f0-9]{{1,6}}{WS}?|[^\r\n\f]|$))' +CSS_STRING_ESCAPES = fr'(?:\\(?:[a-f0-9]{{1,6}}{WS}?|[^\r\n\f]|$|{NEWLINE}))' # CSS Identifier -IDENTIFIER = r''' -(?:(?:-?(?:[^\x00-\x2f\x30-\x40\x5B-\x5E\x60\x7B-\x9f]|{esc})+|--) -(?:[^\x00-\x2c\x2e\x2f\x3A-\x40\x5B-\x5E\x60\x7B-\x9f]|{esc})*) -'''.format(esc=CSS_ESCAPES) +IDENTIFIER = fr''' +(?:(?:-?(?:[^\x00-\x2f\x30-\x40\x5B-\x5E\x60\x7B-\x9f]|{CSS_ESCAPES})+|--) +(?:[^\x00-\x2c\x2e\x2f\x3A-\x40\x5B-\x5E\x60\x7B-\x9f]|{CSS_ESCAPES})*) +''' # `nth` content -NTH = r'(?:[-+])?(?:[0-9]+n?|n)(?:(?<=n){ws}*(?:[-+]){ws}*(?:[0-9]+))?'.format(ws=WSC) +NTH = fr'(?:[-+])?(?:[0-9]+n?|n)(?:(?<=n){WSC}*(?:[-+]){WSC}*(?:[0-9]+))?' # Value: quoted string or identifier -VALUE = r''' -(?:"(?:\\(?:.|{nl})|[^\\"\r\n\f]+)*?"|'(?:\\(?:.|{nl})|[^\\'\r\n\f]+)*?'|{ident}+) -'''.format(nl=NEWLINE, ident=IDENTIFIER) +VALUE = fr'''(?:"(?:\\(?:.|{NEWLINE})|[^\\"\r\n\f]+)*?"|'(?:\\(?:.|{NEWLINE})|[^\\'\r\n\f]+)*?'|{IDENTIFIER}+)''' # Attribute value comparison. `!=` is handled special as it is non-standard. -ATTR = r''' -(?:{ws}*(?P[!~^|*$]?=){ws}*(?P{value})(?:{ws}*(?P[is]))?)?{ws}*\] -'''.format(ws=WSC, value=VALUE) +ATTR = fr'(?:{WSC}*(?P[!~^|*$]?=){WSC}*(?P{VALUE})(?:{WSC}*(?P[is]))?)?{WSC}*\]' # Selector patterns # IDs (`#id`) -PAT_ID = r'\#{ident}'.format(ident=IDENTIFIER) +PAT_ID = fr'\#{IDENTIFIER}' # Classes (`.class`) -PAT_CLASS = r'\.{ident}'.format(ident=IDENTIFIER) +PAT_CLASS = fr'\.{IDENTIFIER}' # Prefix:Tag (`prefix|tag`) -PAT_TAG = r'(?P(?:{ident}|\*)?\|)?(?P{ident}|\*)'.format(ident=IDENTIFIER) +PAT_TAG = fr'(?P(?:{IDENTIFIER}|\*)?\|)?(?P{IDENTIFIER}|\*)' # Attributes (`[attr]`, `[attr=value]`, etc.) -PAT_ATTR = r''' -\[{ws}*(?P(?:{ident}|\*)?\|)?(?P{ident}){attr} -'''.format(ws=WSC, ident=IDENTIFIER, attr=ATTR) +PAT_ATTR = fr'\[{WSC}*(?P(?:{IDENTIFIER}|\*)?\|)?(?P{IDENTIFIER}){ATTR}' # Pseudo class (`:pseudo-class`, `:pseudo-class(`) -PAT_PSEUDO_CLASS = r'(?P:{ident})(?P\({ws}*)?'.format(ws=WSC, ident=IDENTIFIER) +PAT_PSEUDO_CLASS = fr'(?P:{IDENTIFIER})(?P\({WSC}*)?' # Pseudo class special patterns. Matches `:pseudo-class(` for special case pseudo classes. -PAT_PSEUDO_CLASS_SPECIAL = r'(?P:{ident})(?P\({ws}*)'.format(ws=WSC, ident=IDENTIFIER) +PAT_PSEUDO_CLASS_SPECIAL = fr'(?P:{IDENTIFIER})(?P\({WSC}*)' # Custom pseudo class (`:--custom-pseudo`) -PAT_PSEUDO_CLASS_CUSTOM = r'(?P:(?=--){ident})'.format(ident=IDENTIFIER) +PAT_PSEUDO_CLASS_CUSTOM = fr'(?P:(?=--){IDENTIFIER})' # Closing pseudo group (`)`) -PAT_PSEUDO_CLOSE = r'{ws}*\)'.format(ws=WSC) +PAT_PSEUDO_CLOSE = fr'{WSC}*\)' # Pseudo element (`::pseudo-element`) -PAT_PSEUDO_ELEMENT = r':{}'.format(PAT_PSEUDO_CLASS) +PAT_PSEUDO_ELEMENT = fr':{PAT_PSEUDO_CLASS}' # At rule (`@page`, etc.) (not supported) -PAT_AT_RULE = r'@P{ident}'.format(ident=IDENTIFIER) +PAT_AT_RULE = fr'@P{IDENTIFIER}' # Pseudo class `nth-child` (`:nth-child(an+b [of S]?)`, `:first-child`, etc.) -PAT_PSEUDO_NTH_CHILD = r''' -(?P{name} -(?P{nth}|even|odd))(?:{wsc}*\)|(?P{comments}*{ws}{wsc}*of{comments}*{ws}{wsc}*)) -'''.format(name=PAT_PSEUDO_CLASS_SPECIAL, wsc=WSC, comments=COMMENTS, ws=WS, nth=NTH) +PAT_PSEUDO_NTH_CHILD = fr''' +(?P{PAT_PSEUDO_CLASS_SPECIAL} +(?P{NTH}|even|odd))(?:{WSC}*\)|(?P{COMMENTS}*{WS}{WSC}*of{COMMENTS}*{WS}{WSC}*)) +''' # Pseudo class `nth-of-type` (`:nth-of-type(an+b)`, `:first-of-type`, etc.) -PAT_PSEUDO_NTH_TYPE = r''' -(?P{name} -(?P{nth}|even|odd)){ws}*\) -'''.format(name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC, nth=NTH) +PAT_PSEUDO_NTH_TYPE = fr''' +(?P{PAT_PSEUDO_CLASS_SPECIAL} +(?P{NTH}|even|odd)){WSC}*\) +''' # Pseudo class language (`:lang("*-de", en)`) -PAT_PSEUDO_LANG = r'{name}(?P{value}(?:{ws}*,{ws}*{value})*){ws}*\)'.format( - name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC, value=VALUE -) +PAT_PSEUDO_LANG = fr'{PAT_PSEUDO_CLASS_SPECIAL}(?P{VALUE}(?:{WSC}*,{WSC}*{VALUE})*){WSC}*\)' # Pseudo class direction (`:dir(ltr)`) -PAT_PSEUDO_DIR = r'{name}(?Pltr|rtl){ws}*\)'.format(name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC) +PAT_PSEUDO_DIR = fr'{PAT_PSEUDO_CLASS_SPECIAL}(?Pltr|rtl){WSC}*\)' # Combining characters (`>`, `~`, ` `, `+`, `,`) -PAT_COMBINE = r'{wsc}*?(?P[,+>~]|{ws}(?![,+>~])){wsc}*'.format(ws=WS, wsc=WSC) +PAT_COMBINE = fr'{WSC}*?(?P[,+>~]|{WS}(?![,+>~])){WSC}*' # Extra: Contains (`:contains(text)`) -PAT_PSEUDO_CONTAINS = r'{name}(?P{value}(?:{ws}*,{ws}*{value})*){ws}*\)'.format( - name=PAT_PSEUDO_CLASS_SPECIAL, ws=WSC, value=VALUE -) +PAT_PSEUDO_CONTAINS = fr'{PAT_PSEUDO_CLASS_SPECIAL}(?P{VALUE}(?:{WSC}*,{WSC}*{VALUE})*){WSC}*\)' # Regular expressions # CSS escape pattern -RE_CSS_ESC = re.compile(r'(?:(\\[a-f0-9]{{1,6}}{ws}?)|(\\[^\r\n\f])|(\\$))'.format(ws=WSC), re.I) -RE_CSS_STR_ESC = re.compile( - r'(?:(\\[a-f0-9]{{1,6}}{ws}?)|(\\[^\r\n\f])|(\\$)|(\\{nl}))'.format(ws=WS, nl=NEWLINE), re.I -) +RE_CSS_ESC = re.compile(fr'(?:(\\[a-f0-9]{{1,6}}{WSC}?)|(\\[^\r\n\f])|(\\$))', re.I) +RE_CSS_STR_ESC = re.compile(fr'(?:(\\[a-f0-9]{{1,6}}{WS}?)|(\\[^\r\n\f])|(\\$)|(\\{NEWLINE}))', re.I) # Pattern to break up `nth` specifiers -RE_NTH = re.compile( - r'(?P[-+])?(?P[0-9]+n?|n)(?:(?<=n){ws}*(?P[-+]){ws}*(?P[0-9]+))?'.format(ws=WSC), - re.I -) +RE_NTH = re.compile(fr'(?P[-+])?(?P[0-9]+n?|n)(?:(?<=n){WSC}*(?P[-+]){WSC}*(?P[0-9]+))?', re.I) # Pattern to iterate multiple values. -RE_VALUES = re.compile(r'(?:(?P{value})|(?P{ws}*,{ws}*))'.format(ws=WSC, value=VALUE), re.X) +RE_VALUES = re.compile(fr'(?:(?P{VALUE})|(?P{WSC}*,{WSC}*))', re.X) # Whitespace checks RE_WS = re.compile(WS) -RE_WS_BEGIN = re.compile('^{}*'.format(WSC)) -RE_WS_END = re.compile('{}*$'.format(WSC)) -RE_CUSTOM = re.compile(r'^{}$'.format(PAT_PSEUDO_CLASS_CUSTOM), re.X) +RE_WS_BEGIN = re.compile(fr'^{WSC}*') +RE_WS_END = re.compile(fr'{WSC}*$') +RE_CUSTOM = re.compile(fr'^{PAT_PSEUDO_CLASS_CUSTOM}$', re.X) # Constants # List split token @@ -241,9 +226,9 @@ def process_custom(custom: ct.CustomSelectors | None) -> dict[str, str | ct.Sele for key, value in custom.items(): name = util.lower(key) if RE_CUSTOM.match(name) is None: - raise SelectorSyntaxError("The name '{}' is not a valid custom pseudo-class name".format(name)) + raise SelectorSyntaxError(f"The name '{name}' is not a valid custom pseudo-class name") if name in custom_selectors: - raise KeyError("The custom selector '{}' has already been registered".format(name)) + raise KeyError(f"The custom selector '{name}' has already been registered") custom_selectors[css_unescape(name)] = value return custom_selectors @@ -283,23 +268,23 @@ def escape(ident: str) -> str: start_dash = length > 0 and ident[0] == '-' if length == 1 and start_dash: # Need to escape identifier that is a single `-` with no other characters - string.append('\\{}'.format(ident)) + string.append(f'\\{ident}') else: for index, c in enumerate(ident): codepoint = ord(c) if codepoint == 0x00: string.append('\ufffd') elif (0x01 <= codepoint <= 0x1F) or codepoint == 0x7F: - string.append('\\{:x} '.format(codepoint)) + string.append(f'\\{codepoint:x} ') elif (index == 0 or (start_dash and index == 1)) and (0x30 <= codepoint <= 0x39): - string.append('\\{:x} '.format(codepoint)) + string.append(f'\\{codepoint:x} ') elif ( codepoint in (0x2D, 0x5F) or codepoint >= 0x80 or (0x30 <= codepoint <= 0x39) or (0x30 <= codepoint <= 0x39) or (0x41 <= codepoint <= 0x5A) or (0x61 <= codepoint <= 0x7A) ): string.append(c) else: - string.append('\\{}'.format(c)) + string.append(f'\\{c}') return ''.join(string) @@ -419,11 +404,10 @@ class _Selector: """String representation.""" return ( - '_Selector(tag={!r}, ids={!r}, classes={!r}, attributes={!r}, nth={!r}, selectors={!r}, ' - 'relations={!r}, rel_type={!r}, contains={!r}, lang={!r}, flags={!r}, no_match={!r})' - ).format( - self.tag, self.ids, self.classes, self.attributes, self.nth, self.selectors, - self.relations, self.rel_type, self.contains, self.lang, self.flags, self.no_match + f'_Selector(tag={self.tag!r}, ids={self.ids!r}, classes={self.classes!r}, attributes={self.attributes!r}, ' + f'nth={self.nth!r}, selectors={self.selectors!r}, relations={self.relations!r}, ' + f'rel_type={self.rel_type!r}, contains={self.contains!r}, lang={self.lang!r}, flags={self.flags!r}, ' + f'no_match={self.no_match!r})' ) __repr__ = __str__ @@ -563,7 +547,7 @@ class CSSParser: selector = self.custom.get(pseudo) if selector is None: raise SelectorSyntaxError( - "Undefined custom selector '{}' found at position {}".format(pseudo, m.end(0)), + f"Undefined custom selector '{pseudo}' found at position {m.end(0)}", self.pattern, m.end(0) ) @@ -663,13 +647,13 @@ class CSSParser: has_selector = True elif pseudo in PSEUDO_SUPPORTED: raise SelectorSyntaxError( - "Invalid syntax for pseudo class '{}'".format(pseudo), + f"Invalid syntax for pseudo class '{pseudo}'", self.pattern, m.start(0) ) else: raise NotImplementedError( - "'{}' pseudo-class is not implemented at this time".format(pseudo) + f"'{pseudo}' pseudo-class is not implemented at this time" ) return has_selector, is_html @@ -793,7 +777,7 @@ class CSSParser: # multiple non-whitespace combinators. So if the current combinator is not a whitespace, # then we've hit the multiple combinator case, so we should fail. raise SelectorSyntaxError( - 'The multiple combinators at position {}'.format(index), + f'The multiple combinators at position {index}', self.pattern, index ) @@ -824,7 +808,7 @@ class CSSParser: if not has_selector: if not is_forgive or combinator != COMMA_COMBINATOR: raise SelectorSyntaxError( - "The combinator '{}' at position {}, must have a selector before it".format(combinator, index), + f"The combinator '{combinator}' at position {index}, must have a selector before it", self.pattern, index ) @@ -869,7 +853,7 @@ class CSSParser: pseudo = util.lower(css_unescape(m.group('name'))) if pseudo == ":contains": - warnings.warn( + warnings.warn( # noqa: B028 "The pseudo class ':contains' is deprecated, ':-soup-contains' should be used moving forward.", FutureWarning ) @@ -982,13 +966,13 @@ class CSSParser: # Handle parts if key == "at_rule": - raise NotImplementedError("At-rules found at position {}".format(m.start(0))) + raise NotImplementedError(f"At-rules found at position {m.start(0)}") elif key == 'pseudo_class_custom': has_selector = self.parse_pseudo_class_custom(sel, m, has_selector) elif key == 'pseudo_class': has_selector, is_html = self.parse_pseudo_class(sel, m, has_selector, iselector, is_html) elif key == 'pseudo_element': - raise NotImplementedError("Pseudo-element found at position {}".format(m.start(0))) + raise NotImplementedError(f"Pseudo-element found at position {m.start(0)}") elif key == 'pseudo_contains': has_selector = self.parse_pseudo_contains(sel, m, has_selector) elif key in ('pseudo_nth_type', 'pseudo_nth_child'): @@ -1003,7 +987,7 @@ class CSSParser: if not has_selector: if not is_forgive: raise SelectorSyntaxError( - "Expected a selector at position {}".format(m.start(0)), + f"Expected a selector at position {m.start(0)}", self.pattern, m.start(0) ) @@ -1013,7 +997,7 @@ class CSSParser: break else: raise SelectorSyntaxError( - "Unmatched pseudo-class close at position {}".format(m.start(0)), + f"Unmatched pseudo-class close at position {m.start(0)}", self.pattern, m.start(0) ) @@ -1031,7 +1015,7 @@ class CSSParser: elif key == 'tag': if has_selector: raise SelectorSyntaxError( - "Tag name found at position {} instead of at the start".format(m.start(0)), + f"Tag name found at position {m.start(0)} instead of at the start", self.pattern, m.start(0) ) @@ -1046,7 +1030,7 @@ class CSSParser: # Handle selectors that are not closed if is_open and not closed: raise SelectorSyntaxError( - "Unclosed pseudo-class at position {}".format(index), + f"Unclosed pseudo-class at position {index}", self.pattern, index ) @@ -1076,7 +1060,7 @@ class CSSParser: # We will always need to finish a selector when `:has()` is used as it leads with combining. # May apply to others as well. raise SelectorSyntaxError( - 'Expected a selector at position {}'.format(index), + f'Expected a selector at position {index}', self.pattern, index ) @@ -1108,7 +1092,7 @@ class CSSParser: end = (m.start(0) - 1) if m else (len(pattern) - 1) if self.debug: # pragma: no cover - print('## PARSING: {!r}'.format(pattern)) + print(f'## PARSING: {pattern!r}') while index <= end: m = None for v in self.css_tokens: @@ -1116,7 +1100,7 @@ class CSSParser: if m: name = v.get_name() if self.debug: # pragma: no cover - print("TOKEN: '{}' --> {!r} at position {}".format(name, m.group(0), m.start(0))) + print(f"TOKEN: '{name}' --> {m.group(0)!r} at position {m.start(0)}") index = m.end(0) yield name, m break @@ -1126,15 +1110,15 @@ class CSSParser: # throw an exception mentioning that the known selector type is in error; # otherwise, report the invalid character. if c == '[': - msg = "Malformed attribute selector at position {}".format(index) + msg = f"Malformed attribute selector at position {index}" elif c == '.': - msg = "Malformed class selector at position {}".format(index) + msg = f"Malformed class selector at position {index}" elif c == '#': - msg = "Malformed id selector at position {}".format(index) + msg = f"Malformed id selector at position {index}" elif c == ':': - msg = "Malformed pseudo-class selector at position {}".format(index) + msg = f"Malformed pseudo-class selector at position {index}" else: - msg = "Invalid character {!r} position {}".format(c, index) + msg = f"Invalid character {c!r} position {index}" raise SelectorSyntaxError(msg, self.pattern, index) if self.debug: # pragma: no cover print('## END PARSING') diff --git a/lib/soupsieve/css_types.py b/lib/soupsieve/css_types.py index 90fb4134..621ec7eb 100644 --- a/lib/soupsieve/css_types.py +++ b/lib/soupsieve/css_types.py @@ -45,11 +45,11 @@ class Immutable: for k, v in kwargs.items(): temp.append(type(v)) temp.append(v) - super(Immutable, self).__setattr__(k, v) - super(Immutable, self).__setattr__('_hash', hash(tuple(temp))) + super().__setattr__(k, v) + super().__setattr__('_hash', hash(tuple(temp))) @classmethod - def __base__(cls) -> "type[Immutable]": + def __base__(cls) -> type[Immutable]: """Get base class.""" return cls @@ -59,7 +59,7 @@ class Immutable: return ( isinstance(other, self.__base__()) and - all([getattr(other, key) == getattr(self, key) for key in self.__slots__ if key != '_hash']) + all(getattr(other, key) == getattr(self, key) for key in self.__slots__ if key != '_hash') ) def __ne__(self, other: Any) -> bool: @@ -67,7 +67,7 @@ class Immutable: return ( not isinstance(other, self.__base__()) or - any([getattr(other, key) != getattr(self, key) for key in self.__slots__ if key != '_hash']) + any(getattr(other, key) != getattr(self, key) for key in self.__slots__ if key != '_hash') ) def __hash__(self) -> int: @@ -78,14 +78,13 @@ class Immutable: def __setattr__(self, name: str, value: Any) -> None: """Prevent mutability.""" - raise AttributeError("'{}' is immutable".format(self.__class__.__name__)) + raise AttributeError(f"'{self.__class__.__name__}' is immutable") def __repr__(self) -> str: # pragma: no cover """Representation.""" - return "{}({})".format( - self.__class__.__name__, ', '.join(["{}={!r}".format(k, getattr(self, k)) for k in self.__slots__[:-1]]) - ) + r = ', '.join([f"{k}={getattr(self, k)!r}" for k in self.__slots__[:-1]]) + return f"{self.__class__.__name__}({r})" __str__ = __repr__ @@ -112,10 +111,10 @@ class ImmutableDict(Mapping[Any, Any]): """Validate arguments.""" if isinstance(arg, dict): - if not all([isinstance(v, Hashable) for v in arg.values()]): - raise TypeError('{} values must be hashable'.format(self.__class__.__name__)) - elif not all([isinstance(k, Hashable) and isinstance(v, Hashable) for k, v in arg]): - raise TypeError('{} values must be hashable'.format(self.__class__.__name__)) + if not all(isinstance(v, Hashable) for v in arg.values()): + raise TypeError(f'{self.__class__.__name__} values must be hashable') + elif not all(isinstance(k, Hashable) and isinstance(v, Hashable) for k, v in arg): + raise TypeError(f'{self.__class__.__name__} values must be hashable') def __iter__(self) -> Iterator[Any]: """Iterator.""" @@ -140,7 +139,7 @@ class ImmutableDict(Mapping[Any, Any]): def __repr__(self) -> str: # pragma: no cover """Representation.""" - return "{!r}".format(self._d) + return f"{self._d!r}" __str__ = __repr__ @@ -157,10 +156,10 @@ class Namespaces(ImmutableDict): """Validate arguments.""" if isinstance(arg, dict): - if not all([isinstance(v, str) for v in arg.values()]): - raise TypeError('{} values must be hashable'.format(self.__class__.__name__)) - elif not all([isinstance(k, str) and isinstance(v, str) for k, v in arg]): - raise TypeError('{} keys and values must be Unicode strings'.format(self.__class__.__name__)) + if not all(isinstance(v, str) for v in arg.values()): + raise TypeError(f'{self.__class__.__name__} values must be hashable') + elif not all(isinstance(k, str) and isinstance(v, str) for k, v in arg): + raise TypeError(f'{self.__class__.__name__} keys and values must be Unicode strings') class CustomSelectors(ImmutableDict): @@ -175,10 +174,10 @@ class CustomSelectors(ImmutableDict): """Validate arguments.""" if isinstance(arg, dict): - if not all([isinstance(v, str) for v in arg.values()]): - raise TypeError('{} values must be hashable'.format(self.__class__.__name__)) - elif not all([isinstance(k, str) and isinstance(v, str) for k, v in arg]): - raise TypeError('{} keys and values must be Unicode strings'.format(self.__class__.__name__)) + if not all(isinstance(v, str) for v in arg.values()): + raise TypeError(f'{self.__class__.__name__} values must be hashable') + elif not all(isinstance(k, str) and isinstance(v, str) for k, v in arg): + raise TypeError(f'{self.__class__.__name__} keys and values must be Unicode strings') class Selector(Immutable): @@ -367,7 +366,7 @@ class SelectorList(Immutable): """Initialize.""" super().__init__( - selectors=tuple(selectors) if selectors is not None else tuple(), + selectors=tuple(selectors) if selectors is not None else (), is_not=is_not, is_html=is_html ) diff --git a/lib/soupsieve/pretty.py b/lib/soupsieve/pretty.py index 4c883347..810b87aa 100644 --- a/lib/soupsieve/pretty.py +++ b/lib/soupsieve/pretty.py @@ -10,7 +10,7 @@ The format and various output types is fairly known (though it hasn't been tested extensively to make sure we aren't missing corners). Example: - +------- ``` >>> import soupsieve as sv >>> sv.compile('this > that.class[name=value]').selectors.pretty() @@ -64,6 +64,7 @@ SelectorList( is_not=False, is_html=False) ``` + """ from __future__ import annotations import re @@ -123,16 +124,16 @@ def pretty(obj: Any) -> str: # pragma: no cover index = m.end(0) if name in ('class', 'lstrt', 'dstrt', 'tstrt'): indent += 4 - output.append('{}\n{}'.format(m.group(0), " " * indent)) + output.append(f'{m.group(0)}\n{" " * indent}') elif name in ('param', 'int', 'kword', 'sqstr', 'dqstr', 'empty'): output.append(m.group(0)) elif name in ('lend', 'dend', 'tend'): indent -= 4 output.append(m.group(0)) elif name in ('sep',): - output.append('{}\n{}'.format(m.group(1), " " * indent)) + output.append(f'{m.group(1)}\n{" " * indent}') elif name in ('dsep',): - output.append('{} '.format(m.group(1))) + output.append(f'{m.group(1)} ') break return ''.join(output) diff --git a/lib/soupsieve/util.py b/lib/soupsieve/util.py index 84821bb4..1d0505f4 100644 --- a/lib/soupsieve/util.py +++ b/lib/soupsieve/util.py @@ -37,7 +37,7 @@ class SelectorSyntaxError(Exception): if pattern is not None and index is not None: # Format pattern to show line and column position self.context, self.line, self.col = get_pattern_context(pattern, index) - msg = '{}\n line {}:\n{}'.format(msg, self.line, self.context) + msg = f'{msg}\n line {self.line}:\n{self.context}' super().__init__(msg) @@ -105,7 +105,7 @@ def get_pattern_context(pattern: str, index: int) -> tuple[str, int, int]: # we will render the output with just `\n`. We will still log the column # correctly though. text.append('\n') - text.append('{}{}'.format(indent, linetext)) + text.append(f'{indent}{linetext}') if offset is not None: text.append('\n') text.append(' ' * (col + offset) + '^')