PK ]ZZZ�vH� � soupsieve/__init__.py"""
Soup Sieve.
A CSS selector filter for BeautifulSoup4.
MIT License
Copyright (c) 2018 Isaac Muse
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
"""
from __future__ import annotations
from .__meta__ import __version__, __version_info__ # noqa: F401
from . import css_parser as cp
from . import css_match as cm
from . import css_types as ct
from .util import DEBUG, SelectorSyntaxError # noqa: F401
import bs4 # type: ignore[import]
from typing import Any, Iterator, Iterable
__all__ = (
'DEBUG', 'SelectorSyntaxError', 'SoupSieve',
'closest', 'compile', 'filter', 'iselect',
'match', 'select', 'select_one'
)
SoupSieve = cm.SoupSieve
def compile( # noqa: A001
pattern: str,
namespaces: dict[str, str] | None = None,
flags: int = 0,
*,
custom: dict[str, str] | None = None,
**kwargs: Any
) -> cm.SoupSieve:
"""Compile CSS pattern."""
if isinstance(pattern, SoupSieve):
if flags:
raise ValueError("Cannot process 'flags' argument on a compiled selector list")
elif namespaces is not None:
raise ValueError("Cannot process 'namespaces' argument on a compiled selector list")
elif custom is not None:
raise ValueError("Cannot process 'custom' argument on a compiled selector list")
return pattern
return cp._cached_css_compile(
pattern,
ct.Namespaces(namespaces) if namespaces is not None else namespaces,
ct.CustomSelectors(custom) if custom is not None else custom,
flags
)
def purge() -> None:
"""Purge cached patterns."""
cp._purge_cache()
def closest(
select: str,
tag: bs4.Tag,
namespaces: dict[str, str] | None = None,
flags: int = 0,
*,
custom: dict[str, str] | None = None,
**kwargs: Any
) -> bs4.Tag:
"""Match closest ancestor."""
return compile(select, namespaces, flags, **kwargs).closest(tag)
def match(
select: str,
tag: bs4.Tag,
namespaces: dict[str, str] | None = None,
flags: int = 0,
*,
custom: dict[str, str] | None = None,
**kwargs: Any
) -> bool:
"""Match node."""
return compile(select, namespaces, flags, **kwargs).match(tag)
def filter( # noqa: A001
select: str,
iterable: Iterable[bs4.Tag],
namespaces: dict[str, str] | None = None,
flags: int = 0,
*,
custom: dict[str, str] | None = None,
**kwargs: Any
) -> list[bs4.Tag]:
"""Filter list of nodes."""
return compile(select, namespaces, flags, **kwargs).filter(iterable)
def select_one(
select: str,
tag: bs4.Tag,
namespaces: dict[str, str] | None = None,
flags: int = 0,
*,
custom: dict[str, str] | None = None,
**kwargs: Any
) -> bs4.Tag:
"""Select a single tag."""
return compile(select, namespaces, flags, **kwargs).select_one(tag)
def select(
select: str,
tag: bs4.Tag,
namespaces: dict[str, str] | None = None,
limit: int = 0,
flags: int = 0,
*,
custom: dict[str, str] | None = None,
**kwargs: Any
) -> list[bs4.Tag]:
"""Select the specified tags."""
return compile(select, namespaces, flags, **kwargs).select(tag, limit)
def iselect(
select: str,
tag: bs4.Tag,
namespaces: dict[str, str] | None = None,
limit: int = 0,
flags: int = 0,
*,
custom: dict[str, str] | None = None,
**kwargs: Any
) -> Iterator[bs4.Tag]:
"""Iterate the specified tags."""
yield from compile(select, namespaces, flags, **kwargs).iselect(tag, limit)
def escape(ident: str) -> str:
"""Escape identifier."""
return cp.escape(ident)
PK ]ZZZ�T�n n soupsieve/__meta__.py"""Meta related things."""
from __future__ import annotations
from collections import namedtuple
import re
RE_VER = re.compile(
r'''(?x)
(?P<major>\d+)(?:\.(?P<minor>\d+))?(?:\.(?P<micro>\d+))?
(?:(?P<type>a|b|rc)(?P<pre>\d+))?
(?:\.post(?P<post>\d+))?
(?:\.dev(?P<dev>\d+))?
'''
)
REL_MAP = {
".dev": "",
".dev-alpha": "a",
".dev-beta": "b",
".dev-candidate": "rc",
"alpha": "a",
"beta": "b",
"candidate": "rc",
"final": ""
}
DEV_STATUS = {
".dev": "2 - Pre-Alpha",
".dev-alpha": "2 - Pre-Alpha",
".dev-beta": "2 - Pre-Alpha",
".dev-candidate": "2 - Pre-Alpha",
"alpha": "3 - Alpha",
"beta": "4 - Beta",
"candidate": "4 - Beta",
"final": "5 - Production/Stable"
}
PRE_REL_MAP = {"a": 'alpha', "b": 'beta', "rc": 'candidate'}
class Version(namedtuple("Version", ["major", "minor", "micro", "release", "pre", "post", "dev"])):
"""
Get the version (PEP 440).
A biased approach to the PEP 440 semantic version.
Provides a tuple structure which is sorted for comparisons `v1 > v2` etc.
(major, minor, micro, release type, pre-release build, post-release build, development release build)
Release types are named in is such a way they are comparable with ease.
Accessors to check if a development, pre-release, or post-release build. Also provides accessor to get
development status for setup files.
How it works (currently):
- You must specify a release type as either `final`, `alpha`, `beta`, or `candidate`.
- To define a development release, you can use either `.dev`, `.dev-alpha`, `.dev-beta`, or `.dev-candidate`.
The dot is used to ensure all development specifiers are sorted before `alpha`.
You can specify a `dev` number for development builds, but do not have to as implicit development releases
are allowed.
- You must specify a `pre` value greater than zero if using a prerelease as this project (not PEP 440) does not
allow implicit prereleases.
- You can optionally set `post` to a value greater than zero to make the build a post release. While post releases
are technically allowed in prereleases, it is strongly discouraged, so we are rejecting them. It should be
noted that we do not allow `post0` even though PEP 440 does not restrict this. This project specifically
does not allow implicit post releases.
- It should be noted that we do not support epochs `1!` or local versions `+some-custom.version-1`.
Acceptable version releases:
```
Version(1, 0, 0, "final") 1.0
Version(1, 2, 0, "final") 1.2
Version(1, 2, 3, "final") 1.2.3
Version(1, 2, 0, ".dev-alpha", pre=4) 1.2a4
Version(1, 2, 0, ".dev-beta", pre=4) 1.2b4
Version(1, 2, 0, ".dev-candidate", pre=4) 1.2rc4
Version(1, 2, 0, "final", post=1) 1.2.post1
Version(1, 2, 3, ".dev") 1.2.3.dev0
Version(1, 2, 3, ".dev", dev=1) 1.2.3.dev1
```
"""
def __new__(
cls,
major: int, minor: int, micro: int, release: str = "final",
pre: int = 0, post: int = 0, dev: int = 0
) -> Version:
"""Validate version info."""
# Ensure all parts are positive integers.
for value in (major, minor, micro, pre, post):
if not (isinstance(value, int) and value >= 0):
raise ValueError("All version parts except 'release' should be integers.")
if release not in REL_MAP:
raise ValueError(f"'{release}' is not a valid release type.")
# Ensure valid pre-release (we do not allow implicit pre-releases).
if ".dev-candidate" < release < "final":
if pre == 0:
raise ValueError("Implicit pre-releases not allowed.")
elif dev:
raise ValueError("Version is not a development release.")
elif post:
raise ValueError("Post-releases are not allowed with pre-releases.")
# Ensure valid development or development/pre release
elif release < "alpha":
if release > ".dev" and pre == 0:
raise ValueError("Implicit pre-release not allowed.")
elif post:
raise ValueError("Post-releases are not allowed with pre-releases.")
# Ensure a valid normal release
else:
if pre:
raise ValueError("Version is not a pre-release.")
elif dev:
raise ValueError("Version is not a development release.")
return super().__new__(cls, major, minor, micro, release, pre, post, dev)
def _is_pre(self) -> bool:
"""Is prerelease."""
return bool(self.pre > 0)
def _is_dev(self) -> bool:
"""Is development."""
return bool(self.release < "alpha")
def _is_post(self) -> bool:
"""Is post."""
return bool(self.post > 0)
def _get_dev_status(self) -> str: # pragma: no cover
"""Get development status string."""
return DEV_STATUS[self.release]
def _get_canonical(self) -> str:
"""Get the canonical output string."""
# Assemble major, minor, micro version and append `pre`, `post`, or `dev` if needed..
if self.micro == 0:
ver = f"{self.major}.{self.minor}"
else:
ver = f"{self.major}.{self.minor}.{self.micro}"
if self._is_pre():
ver += f'{REL_MAP[self.release]}{self.pre}'
if self._is_post():
ver += f".post{self.post}"
if self._is_dev():
ver += f".dev{self.dev}"
return ver
def parse_version(ver: str) -> Version:
"""Parse version into a comparable Version tuple."""
m = RE_VER.match(ver)
if m is None:
raise ValueError(f"'{ver}' is not a valid version")
# Handle major, minor, micro
major = int(m.group('major'))
minor = int(m.group('minor')) if m.group('minor') else 0
micro = int(m.group('micro')) if m.group('micro') else 0
# Handle pre releases
if m.group('type'):
release = PRE_REL_MAP[m.group('type')]
pre = int(m.group('pre'))
else:
release = "final"
pre = 0
# Handle development releases
dev = m.group('dev') if m.group('dev') else 0
if m.group('dev'):
dev = int(m.group('dev'))
release = '.dev-' + release if pre else '.dev'
else:
dev = 0
# Handle post
post = int(m.group('post')) if m.group('post') else 0
return Version(major, minor, micro, release, pre, post, dev)
__version_info__ = Version(2, 5, 0, "final")
__version__ = __version_info__._get_canonical()
PK ]ZZZŤ�`� `� soupsieve/css_match.py"""CSS matcher."""
from __future__ import annotations
from datetime import datetime
from . import util
import re
from . import css_types as ct
import unicodedata
import bs4 # type: ignore[import]
from typing import Iterator, Iterable, Any, Callable, Sequence, cast # noqa: F401
# Empty tag pattern (whitespace okay)
RE_NOT_EMPTY = re.compile('[^ \t\r\n\f]')
RE_NOT_WS = re.compile('[^ \t\r\n\f]+')
# Relationships
REL_PARENT = ' '
REL_CLOSE_PARENT = '>'
REL_SIBLING = '~'
REL_CLOSE_SIBLING = '+'
# Relationships for :has() (forward looking)
REL_HAS_PARENT = ': '
REL_HAS_CLOSE_PARENT = ':>'
REL_HAS_SIBLING = ':~'
REL_HAS_CLOSE_SIBLING = ':+'
NS_XHTML = 'http://www.w3.org/1999/xhtml'
NS_XML = 'http://www.w3.org/XML/1998/namespace'
DIR_FLAGS = ct.SEL_DIR_LTR | ct.SEL_DIR_RTL
RANGES = ct.SEL_IN_RANGE | ct.SEL_OUT_OF_RANGE
DIR_MAP = {
'ltr': ct.SEL_DIR_LTR,
'rtl': ct.SEL_DIR_RTL,
'auto': 0
}
RE_NUM = re.compile(r"^(?P<value>-?(?:[0-9]{1,}(\.[0-9]+)?|\.[0-9]+))$")
RE_TIME = re.compile(r'^(?P<hour>[0-9]{2}):(?P<minutes>[0-9]{2})$')
RE_MONTH = re.compile(r'^(?P<year>[0-9]{4,})-(?P<month>[0-9]{2})$')
RE_WEEK = re.compile(r'^(?P<year>[0-9]{4,})-W(?P<week>[0-9]{2})$')
RE_DATE = re.compile(r'^(?P<year>[0-9]{4,})-(?P<month>[0-9]{2})-(?P<day>[0-9]{2})$')
RE_DATETIME = re.compile(
r'^(?P<year>[0-9]{4,})-(?P<month>[0-9]{2})-(?P<day>[0-9]{2})T(?P<hour>[0-9]{2}):(?P<minutes>[0-9]{2})$'
)
RE_WILD_STRIP = re.compile(r'(?:(?:-\*-)(?:\*(?:-|$))*|-\*$)')
MONTHS_30 = (4, 6, 9, 11) # April, June, September, and November
FEB = 2
SHORT_MONTH = 30
LONG_MONTH = 31
FEB_MONTH = 28
FEB_LEAP_MONTH = 29
DAYS_IN_WEEK = 7
class _FakeParent:
"""
Fake parent class.
When we have a fragment with no `BeautifulSoup` document object,
we can't evaluate `nth` selectors properly. Create a temporary
fake parent so we can traverse the root element as a child.
"""
def __init__(self, element: bs4.Tag) -> None:
"""Initialize."""
self.contents = [element]
def __len__(self) -> bs4.PageElement:
"""Length."""
return len(self.contents)
class _DocumentNav:
"""Navigate a Beautiful Soup document."""
@classmethod
def assert_valid_input(cls, tag: Any) -> None:
"""Check if valid input tag or document."""
# Fail on unexpected types.
if not cls.is_tag(tag):
raise TypeError(f"Expected a BeautifulSoup 'Tag', but instead received type {type(tag)}")
@staticmethod
def is_doc(obj: bs4.Tag) -> bool:
"""Is `BeautifulSoup` object."""
return isinstance(obj, bs4.BeautifulSoup)
@staticmethod
def is_tag(obj: bs4.PageElement) -> bool:
"""Is tag."""
return isinstance(obj, bs4.Tag)
@staticmethod
def is_declaration(obj: bs4.PageElement) -> bool: # pragma: no cover
"""Is declaration."""
return isinstance(obj, bs4.Declaration)
@staticmethod
def is_cdata(obj: bs4.PageElement) -> bool:
"""Is CDATA."""
return isinstance(obj, bs4.CData)
@staticmethod
def is_processing_instruction(obj: bs4.PageElement) -> bool: # pragma: no cover
"""Is processing instruction."""
return isinstance(obj, bs4.ProcessingInstruction)
@staticmethod
def is_navigable_string(obj: bs4.PageElement) -> bool:
"""Is navigable string."""
return isinstance(obj, bs4.NavigableString)
@staticmethod
def is_special_string(obj: bs4.PageElement) -> bool:
"""Is special string."""
return isinstance(obj, (bs4.Comment, bs4.Declaration, bs4.CData, bs4.ProcessingInstruction, bs4.Doctype))
@classmethod
def is_content_string(cls, obj: bs4.PageElement) -> bool:
"""Check if node is content string."""
return cls.is_navigable_string(obj) and not cls.is_special_string(obj)
@staticmethod
def create_fake_parent(el: bs4.Tag) -> _FakeParent:
"""Create fake parent for a given element."""
return _FakeParent(el)
@staticmethod
def is_xml_tree(el: bs4.Tag) -> bool:
"""Check if element (or document) is from a XML tree."""
return bool(el._is_xml)
def is_iframe(self, el: bs4.Tag) -> bool:
"""Check if element is an `iframe`."""
return bool(
((el.name if self.is_xml_tree(el) else util.lower(el.name)) == 'iframe') and
self.is_html_tag(el) # type: ignore[attr-defined]
)
def is_root(self, el: bs4.Tag) -> bool:
"""
Return whether element is a root element.
We check that the element is the root of the tree (which we have already pre-calculated),
and we check if it is the root element under an `iframe`.
"""
root = self.root and self.root is el # type: ignore[attr-defined]
if not root:
parent = self.get_parent(el)
root = parent is not None and self.is_html and self.is_iframe(parent) # type: ignore[attr-defined]
return root
def get_contents(self, el: bs4.Tag, no_iframe: bool = False) -> Iterator[bs4.PageElement]:
"""Get contents or contents in reverse."""
if not no_iframe or not self.is_iframe(el):
yield from el.contents
def get_children(
self,
el: bs4.Tag,
start: int | None = None,
reverse: bool = False,
tags: bool = True,
no_iframe: bool = False
) -> Iterator[bs4.PageElement]:
"""Get children."""
if not no_iframe or not self.is_iframe(el):
last = len(el.contents) - 1
if start is None:
index = last if reverse else 0
else:
index = start
end = -1 if reverse else last + 1
incr = -1 if reverse else 1
if 0 <= index <= last:
while index != end:
node = el.contents[index]
index += incr
if not tags or self.is_tag(node):
yield node
def get_descendants(
self,
el: bs4.Tag,
tags: bool = True,
no_iframe: bool = False
) -> Iterator[bs4.PageElement]:
"""Get descendants."""
if not no_iframe or not self.is_iframe(el):
next_good = None
for child in el.descendants:
if next_good is not None:
if child is not next_good:
continue
next_good = None
is_tag = self.is_tag(child)
if no_iframe and is_tag and self.is_iframe(child):
if child.next_sibling is not None:
next_good = child.next_sibling
else:
last_child = child
while self.is_tag(last_child) and last_child.contents:
last_child = last_child.contents[-1]
next_good = last_child.next_element
yield child
if next_good is None:
break
# Coverage isn't seeing this even though it's executed
continue # pragma: no cover
if not tags or is_tag:
yield child
def get_parent(self, el: bs4.Tag, no_iframe: bool = False) -> bs4.Tag:
"""Get parent."""
parent = el.parent
if no_iframe and parent is not None and self.is_iframe(parent):
parent = None
return parent
@staticmethod
def get_tag_name(el: bs4.Tag) -> str | None:
"""Get tag."""
return cast('str | None', el.name)
@staticmethod
def get_prefix_name(el: bs4.Tag) -> str | None:
"""Get prefix."""
return cast('str | None', el.prefix)
@staticmethod
def get_uri(el: bs4.Tag) -> str | None:
"""Get namespace `URI`."""
return cast('str | None', el.namespace)
@classmethod
def get_next(cls, el: bs4.Tag, tags: bool = True) -> bs4.PageElement:
"""Get next sibling tag."""
sibling = el.next_sibling
while tags and not cls.is_tag(sibling) and sibling is not None:
sibling = sibling.next_sibling
return sibling
@classmethod
def get_previous(cls, el: bs4.Tag, tags: bool = True) -> bs4.PageElement:
"""Get previous sibling tag."""
sibling = el.previous_sibling
while tags and not cls.is_tag(sibling) and sibling is not None:
sibling = sibling.previous_sibling
return sibling
@staticmethod
def has_html_ns(el: bs4.Tag) -> bool:
"""
Check if element has an HTML namespace.
This is a bit different than whether a element is treated as having an HTML namespace,
like we do in the case of `is_html_tag`.
"""
ns = getattr(el, 'namespace') if el else None # noqa: B009
return bool(ns and ns == NS_XHTML)
@staticmethod
def split_namespace(el: bs4.Tag, attr_name: str) -> tuple[str | None, str | None]:
"""Return namespace and attribute name without the prefix."""
return getattr(attr_name, 'namespace', None), getattr(attr_name, 'name', None)
@classmethod
def normalize_value(cls, value: Any) -> str | Sequence[str]:
"""Normalize the value to be a string or list of strings."""
# Treat `None` as empty string.
if value is None:
return ''
# Pass through strings
if (isinstance(value, str)):
return value
# If it's a byte string, convert it to Unicode, treating it as UTF-8.
if isinstance(value, bytes):
return value.decode("utf8")
# BeautifulSoup supports sequences of attribute values, so make sure the children are strings.
if isinstance(value, Sequence):
new_value = []
for v in value:
if not isinstance(v, (str, bytes)) and isinstance(v, Sequence):
# This is most certainly a user error and will crash and burn later.
# To keep things working, we'll do what we do with all objects,
# And convert them to strings.
new_value.append(str(v))
else:
# Convert the child to a string
new_value.append(cast(str, cls.normalize_value(v)))
return new_value
# Try and make anything else a string
return str(value)
@classmethod
def get_attribute_by_name(
cls,
el: bs4.Tag,
name: str,
default: str | Sequence[str] | None = None
) -> str | Sequence[str] | None:
"""Get attribute by name."""
value = default
if el._is_xml:
try:
value = cls.normalize_value(el.attrs[name])
except KeyError:
pass
else:
for k, v in el.attrs.items():
if util.lower(k) == name:
value = cls.normalize_value(v)
break
return value
@classmethod
def iter_attributes(cls, el: bs4.Tag) -> Iterator[tuple[str, str | Sequence[str] | None]]:
"""Iterate attributes."""
for k, v in el.attrs.items():
yield k, cls.normalize_value(v)
@classmethod
def get_classes(cls, el: bs4.Tag) -> Sequence[str]:
"""Get classes."""
classes = cls.get_attribute_by_name(el, 'class', [])
if isinstance(classes, str):
classes = RE_NOT_WS.findall(classes)
return cast(Sequence[str], classes)
def get_text(self, el: bs4.Tag, no_iframe: bool = False) -> str:
"""Get text."""
return ''.join(
[node for node in self.get_descendants(el, tags=False, no_iframe=no_iframe) if self.is_content_string(node)]
)
def get_own_text(self, el: bs4.Tag, no_iframe: bool = False) -> list[str]:
"""Get Own Text."""
return [node for node in self.get_contents(el, no_iframe=no_iframe) if self.is_content_string(node)]
class Inputs:
"""Class for parsing and validating input items."""
@staticmethod
def validate_day(year: int, month: int, day: int) -> bool:
"""Validate day."""
max_days = LONG_MONTH
if month == FEB:
max_days = FEB_LEAP_MONTH if ((year % 4 == 0) and (year % 100 != 0)) or (year % 400 == 0) else FEB_MONTH
elif month in MONTHS_30:
max_days = SHORT_MONTH
return 1 <= day <= max_days
@staticmethod
def validate_week(year: int, week: int) -> bool:
"""Validate week."""
max_week = datetime.strptime(f"{12}-{31}-{year}", "%m-%d-%Y").isocalendar()[1]
if max_week == 1:
max_week = 53
return 1 <= week <= max_week
@staticmethod
def validate_month(month: int) -> bool:
"""Validate month."""
return 1 <= month <= 12
@staticmethod
def validate_year(year: int) -> bool:
"""Validate year."""
return 1 <= year
@staticmethod
def validate_hour(hour: int) -> bool:
"""Validate hour."""
return 0 <= hour <= 23
@staticmethod
def validate_minutes(minutes: int) -> bool:
"""Validate minutes."""
return 0 <= minutes <= 59
@classmethod
def parse_value(cls, itype: str, value: str | None) -> tuple[float, ...] | None:
"""Parse the input value."""
parsed = None # type: tuple[float, ...] | None
if value is None:
return value
if itype == "date":
m = RE_DATE.match(value)
if m:
year = int(m.group('year'), 10)
month = int(m.group('month'), 10)
day = int(m.group('day'), 10)
if cls.validate_year(year) and cls.validate_month(month) and cls.validate_day(year, month, day):
parsed = (year, month, day)
elif itype == "month":
m = RE_MONTH.match(value)
if m:
year = int(m.group('year'), 10)
month = int(m.group('month'), 10)
if cls.validate_year(year) and cls.validate_month(month):
parsed = (year, month)
elif itype == "week":
m = RE_WEEK.match(value)
if m:
year = int(m.group('year'), 10)
week = int(m.group('week'), 10)
if cls.validate_year(year) and cls.validate_week(year, week):
parsed = (year, week)
elif itype == "time":
m = RE_TIME.match(value)
if m:
hour = int(m.group('hour'), 10)
minutes = int(m.group('minutes'), 10)
if cls.validate_hour(hour) and cls.validate_minutes(minutes):
parsed = (hour, minutes)
elif itype == "datetime-local":
m = RE_DATETIME.match(value)
if m:
year = int(m.group('year'), 10)
month = int(m.group('month'), 10)
day = int(m.group('day'), 10)
hour = int(m.group('hour'), 10)
minutes = int(m.group('minutes'), 10)
if (
cls.validate_year(year) and cls.validate_month(month) and cls.validate_day(year, month, day) and
cls.validate_hour(hour) and cls.validate_minutes(minutes)
):
parsed = (year, month, day, hour, minutes)
elif itype in ("number", "range"):
m = RE_NUM.match(value)
if m:
parsed = (float(m.group('value')),)
return parsed
class CSSMatch(_DocumentNav):
"""Perform CSS matching."""
def __init__(
self,
selectors: ct.SelectorList,
scope: bs4.Tag,
namespaces: ct.Namespaces | None,
flags: int
) -> None:
"""Initialize."""
self.assert_valid_input(scope)
self.tag = scope
self.cached_meta_lang = [] # type: list[tuple[str, str]]
self.cached_default_forms = [] # type: list[tuple[bs4.Tag, bs4.Tag]]
self.cached_indeterminate_forms = [] # type: list[tuple[bs4.Tag, str, bool]]
self.selectors = selectors
self.namespaces = {} if namespaces is None else namespaces # type: ct.Namespaces | dict[str, str]
self.flags = flags
self.iframe_restrict = False
# Find the root element for the whole tree
doc = scope
parent = self.get_parent(doc)
while parent:
doc = parent
parent = self.get_parent(doc)
root = None
if not self.is_doc(doc):
root = doc
else:
for child in self.get_children(doc):
root = child
break
self.root = root
self.scope = scope if scope is not doc else root
self.has_html_namespace = self.has_html_ns(root)
# A document can be both XML and HTML (XHTML)
self.is_xml = self.is_xml_tree(doc)
self.is_html = not self.is_xml or self.has_html_namespace
def supports_namespaces(self) -> bool:
"""Check if namespaces are supported in the HTML type."""
return self.is_xml or self.has_html_namespace
def get_tag_ns(self, el: bs4.Tag) -> str:
"""Get tag namespace."""
if self.supports_namespaces():
namespace = ''
ns = self.get_uri(el)
if ns:
namespace = ns
else:
namespace = NS_XHTML
return namespace
def is_html_tag(self, el: bs4.Tag) -> bool:
"""Check if tag is in HTML namespace."""
return self.get_tag_ns(el) == NS_XHTML
def get_tag(self, el: bs4.Tag) -> str | None:
"""Get tag."""
name = self.get_tag_name(el)
return util.lower(name) if name is not None and not self.is_xml else name
def get_prefix(self, el: bs4.Tag) -> str | None:
"""Get prefix."""
prefix = self.get_prefix_name(el)
return util.lower(prefix) if prefix is not None and not self.is_xml else prefix
def find_bidi(self, el: bs4.Tag) -> int | None:
"""Get directionality from element text."""
for node in self.get_children(el, tags=False):
# Analyze child text nodes
if self.is_tag(node):
# Avoid analyzing certain elements specified in the specification.
direction = DIR_MAP.get(util.lower(self.get_attribute_by_name(node, 'dir', '')), None)
if (
self.get_tag(node) in ('bdi', 'script', 'style', 'textarea', 'iframe') or
not self.is_html_tag(node) or
direction is not None
):
continue # pragma: no cover
# Check directionality of this node's text
value = self.find_bidi(node)
if value is not None:
return value
# Direction could not be determined
continue # pragma: no cover
# Skip `doctype` comments, etc.
if self.is_special_string(node):
continue
# Analyze text nodes for directionality.
for c in node:
bidi = unicodedata.bidirectional(c)
if bidi in ('AL', 'R', 'L'):
return ct.SEL_DIR_LTR if bidi == 'L' else ct.SEL_DIR_RTL
return None
def extended_language_filter(self, lang_range: str, lang_tag: str) -> bool:
"""Filter the language tags."""
match = True
lang_range = RE_WILD_STRIP.sub('-', lang_range).lower()
ranges = lang_range.split('-')
subtags = lang_tag.lower().split('-')
length = len(ranges)
slength = len(subtags)
rindex = 0
sindex = 0
r = ranges[rindex]
s = subtags[sindex]
# Empty specified language should match unspecified language attributes
if length == 1 and slength == 1 and not r and r == s:
return True
# Primary tag needs to match
if (r != '*' and r != s) or (r == '*' and slength == 1 and not s):
match = False
rindex += 1
sindex += 1
# Match until we run out of ranges
while match and rindex < length:
r = ranges[rindex]
try:
s = subtags[sindex]
except IndexError:
# Ran out of subtags,
# but we still have ranges
match = False
continue
# Empty range
if not r:
match = False
continue
# Matched range
elif s == r:
rindex += 1
# Implicit wildcard cannot match
# singletons
elif len(s) == 1:
match = False
continue
# Implicitly matched, so grab next subtag
sindex += 1
return match
def match_attribute_name(
self,
el: bs4.Tag,
attr: str,
prefix: str | None
) -> str | Sequence[str] | None:
"""Match attribute name and return value if it exists."""
value = None
if self.supports_namespaces():
value = None
# If we have not defined namespaces, we can't very well find them, so don't bother trying.
if prefix:
ns = self.namespaces.get(prefix)
if ns is None and prefix != '*':
return None
else:
ns = None
for k, v in self.iter_attributes(el):
# Get attribute parts
namespace, name = self.split_namespace(el, k)
# Can't match a prefix attribute as we haven't specified one to match
# Try to match it normally as a whole `p:a` as selector may be trying `p\:a`.
if ns is None:
if (self.is_xml and attr == k) or (not self.is_xml and util.lower(attr) == util.lower(k)):
value = v
break
# Coverage is not finding this even though it is executed.
# Adding a print statement before this (and erasing coverage) causes coverage to find the line.
# Ignore the false positive message.
continue # pragma: no cover
# We can't match our desired prefix attribute as the attribute doesn't have a prefix
if namespace is None or ns != namespace and prefix != '*':
continue
# The attribute doesn't match.
if (util.lower(attr) != util.lower(name)) if not self.is_xml else (attr != name):
continue
value = v
break
else:
for k, v in self.iter_attributes(el):
if util.lower(attr) != util.lower(k):
continue
value = v
break
return value
def match_namespace(self, el: bs4.Tag, tag: ct.SelectorTag) -> bool:
"""Match the namespace of the element."""
match = True
namespace = self.get_tag_ns(el)
default_namespace = self.namespaces.get('')
tag_ns = '' if tag.prefix is None else self.namespaces.get(tag.prefix)
# We must match the default namespace if one is not provided
if tag.prefix is None and (default_namespace is not None and namespace != default_namespace):
match = False
# If we specified `|tag`, we must not have a namespace.
elif (tag.prefix is not None and tag.prefix == '' and namespace):
match = False
# Verify prefix matches
elif (
tag.prefix and
tag.prefix != '*' and (tag_ns is None or namespace != tag_ns)
):
match = False
return match
def match_attributes(self, el: bs4.Tag, attributes: tuple[ct.SelectorAttribute, ...]) -> bool:
"""Match attributes."""
match = True
if attributes:
for a in attributes:
temp = self.match_attribute_name(el, a.attribute, a.prefix)
pattern = a.xml_type_pattern if self.is_xml and a.xml_type_pattern else a.pattern
if temp is None:
match = False
break
value = temp if isinstance(temp, str) else ' '.join(temp)
if pattern is None:
continue
elif pattern.match(value) is None:
match = False
break
return match
def match_tagname(self, el: bs4.Tag, tag: ct.SelectorTag) -> bool:
"""Match tag name."""
name = (util.lower(tag.name) if not self.is_xml and tag.name is not None else tag.name)
return not (
name is not None and
name not in (self.get_tag(el), '*')
)
def match_tag(self, el: bs4.Tag, tag: ct.SelectorTag | None) -> bool:
"""Match the tag."""
match = True
if tag is not None:
# Verify namespace
if not self.match_namespace(el, tag):
match = False
if not self.match_tagname(el, tag):
match = False
return match
def match_past_relations(self, el: bs4.Tag, relation: ct.SelectorList) -> bool:
"""Match past relationship."""
found = False
# I don't think this can ever happen, but it makes `mypy` happy
if isinstance(relation[0], ct.SelectorNull): # pragma: no cover
return found
if relation[0].rel_type == REL_PARENT:
parent = self.get_parent(el, no_iframe=self.iframe_restrict)
while not found and parent:
found = self.match_selectors(parent, relation)
parent = self.get_parent(parent, no_iframe=self.iframe_restrict)
elif relation[0].rel_type == REL_CLOSE_PARENT:
parent = self.get_parent(el, no_iframe=self.iframe_restrict)
if parent:
found = self.match_selectors(parent, relation)
elif relation[0].rel_type == REL_SIBLING:
sibling = self.get_previous(el)
while not found and sibling:
found = self.match_selectors(sibling, relation)
sibling = self.get_previous(sibling)
elif relation[0].rel_type == REL_CLOSE_SIBLING:
sibling = self.get_previous(el)
if sibling and self.is_tag(sibling):
found = self.match_selectors(sibling, relation)
return found
def match_future_child(self, parent: bs4.Tag, relation: ct.SelectorList, recursive: bool = False) -> bool:
"""Match future child."""
match = False
if recursive:
children = self.get_descendants # type: Callable[..., Iterator[bs4.Tag]]
else:
children = self.get_children
for child in children(parent, no_iframe=self.iframe_restrict):
match = self.match_selectors(child, relation)
if match:
break
return match
def match_future_relations(self, el: bs4.Tag, relation: ct.SelectorList) -> bool:
"""Match future relationship."""
found = False
# I don't think this can ever happen, but it makes `mypy` happy
if isinstance(relation[0], ct.SelectorNull): # pragma: no cover
return found
if relation[0].rel_type == REL_HAS_PARENT:
found = self.match_future_child(el, relation, True)
elif relation[0].rel_type == REL_HAS_CLOSE_PARENT:
found = self.match_future_child(el, relation)
elif relation[0].rel_type == REL_HAS_SIBLING:
sibling = self.get_next(el)
while not found and sibling:
found = self.match_selectors(sibling, relation)
sibling = self.get_next(sibling)
elif relation[0].rel_type == REL_HAS_CLOSE_SIBLING:
sibling = self.get_next(el)
if sibling and self.is_tag(sibling):
found = self.match_selectors(sibling, relation)
return found
def match_relations(self, el: bs4.Tag, relation: ct.SelectorList) -> bool:
"""Match relationship to other elements."""
found = False
if isinstance(relation[0], ct.SelectorNull) or relation[0].rel_type is None:
return found
if relation[0].rel_type.startswith(':'):
found = self.match_future_relations(el, relation)
else:
found = self.match_past_relations(el, relation)
return found
def match_id(self, el: bs4.Tag, ids: tuple[str, ...]) -> bool:
"""Match element's ID."""
found = True
for i in ids:
if i != self.get_attribute_by_name(el, 'id', ''):
found = False
break
return found
def match_classes(self, el: bs4.Tag, classes: tuple[str, ...]) -> bool:
"""Match element's classes."""
current_classes = self.get_classes(el)
found = True
for c in classes:
if c not in current_classes:
found = False
break
return found
def match_root(self, el: bs4.Tag) -> bool:
"""Match element as root."""
is_root = self.is_root(el)
if is_root:
sibling = self.get_previous(el, tags=False)
while is_root and sibling is not None:
if (
self.is_tag(sibling) or (self.is_content_string(sibling) and sibling.strip()) or
self.is_cdata(sibling)
):
is_root = False
else:
sibling = self.get_previous(sibling, tags=False)
if is_root:
sibling = self.get_next(el, tags=False)
while is_root and sibling is not None:
if (
self.is_tag(sibling) or (self.is_content_string(sibling) and sibling.strip()) or
self.is_cdata(sibling)
):
is_root = False
else:
sibling = self.get_next(sibling, tags=False)
return is_root
def match_scope(self, el: bs4.Tag) -> bool:
"""Match element as scope."""
return self.scope is el
def match_nth_tag_type(self, el: bs4.Tag, child: bs4.Tag) -> bool:
"""Match tag type for `nth` matches."""
return (
(self.get_tag(child) == self.get_tag(el)) and
(self.get_tag_ns(child) == self.get_tag_ns(el))
)
def match_nth(self, el: bs4.Tag, nth: bs4.Tag) -> bool:
"""Match `nth` elements."""
matched = True
for n in nth:
matched = False
if n.selectors and not self.match_selectors(el, n.selectors):
break
parent = self.get_parent(el)
if parent is None:
parent = self.create_fake_parent(el)
last = n.last
last_index = len(parent) - 1
index = last_index if last else 0
relative_index = 0
a = n.a
b = n.b
var = n.n
count = 0
count_incr = 1
factor = -1 if last else 1
idx = last_idx = a * count + b if var else a
# We can only adjust bounds within a variable index
if var:
# Abort if our nth index is out of bounds and only getting further out of bounds as we increment.
# Otherwise, increment to try to get in bounds.
adjust = None
while idx < 1 or idx > last_index:
if idx < 0:
diff_low = 0 - idx
if adjust is not None and adjust == 1:
break
adjust = -1
count += count_incr
idx = last_idx = a * count + b if var else a
diff = 0 - idx
if diff >= diff_low:
break
else:
diff_high = idx - last_index
if adjust is not None and adjust == -1:
break
adjust = 1
count += count_incr
idx = last_idx = a * count + b if var else a
diff = idx - last_index
if diff >= diff_high:
break
diff_high = diff
# If a < 0, our count is working backwards, so floor the index by increasing the count.
# Find the count that yields the lowest, in bound value and use that.
# Lastly reverse count increment so that we'll increase our index.
lowest = count
if a < 0:
while idx >= 1:
lowest = count
count += count_incr
idx = last_idx = a * count + b if var else a
count_incr = -1
count = lowest
idx = last_idx = a * count + b if var else a
# Evaluate elements while our calculated nth index is still in range
while 1 <= idx <= last_index + 1:
child = None
# Evaluate while our child index is still in range.
for child in self.get_children(parent, start=index, reverse=factor < 0, tags=False):
index += factor
if not self.is_tag(child):
continue
# Handle `of S` in `nth-child`
if n.selectors and not self.match_selectors(child, n.selectors):
continue
# Handle `of-type`
if n.of_type and not self.match_nth_tag_type(el, child):
continue
relative_index += 1
if relative_index == idx:
if child is el:
matched = True
else:
break
if child is el:
break
if child is el:
break
last_idx = idx
count += count_incr
if count < 0:
# Count is counting down and has now ventured into invalid territory.
break
idx = a * count + b if var else a
if last_idx == idx:
break
if not matched:
break
return matched
def match_empty(self, el: bs4.Tag) -> bool:
"""Check if element is empty (if requested)."""
is_empty = True
for child in self.get_children(el, tags=False):
if self.is_tag(child):
is_empty = False
break
elif self.is_content_string(child) and RE_NOT_EMPTY.search(child):
is_empty = False
break
return is_empty
def match_subselectors(self, el: bs4.Tag, selectors: tuple[ct.SelectorList, ...]) -> bool:
"""Match selectors."""
match = True
for sel in selectors:
if not self.match_selectors(el, sel):
match = False
return match
def match_contains(self, el: bs4.Tag, contains: tuple[ct.SelectorContains, ...]) -> bool:
"""Match element if it contains text."""
match = True
content = None # type: str | Sequence[str] | None
for contain_list in contains:
if content is None:
if contain_list.own:
content = self.get_own_text(el, no_iframe=self.is_html)
else:
content = self.get_text(el, no_iframe=self.is_html)
found = False
for text in contain_list.text:
if contain_list.own:
for c in content:
if text in c:
found = True
break
if found:
break
else:
if text in content:
found = True
break
if not found:
match = False
return match
def match_default(self, el: bs4.Tag) -> bool:
"""Match default."""
match = False
# Find this input's form
form = None
parent = self.get_parent(el, no_iframe=True)
while parent and form is None:
if self.get_tag(parent) == 'form' and self.is_html_tag(parent):
form = parent
else:
parent = self.get_parent(parent, no_iframe=True)
# Look in form cache to see if we've already located its default button
found_form = False
for f, t in self.cached_default_forms:
if f is form:
found_form = True
if t is el:
match = True
break
# We didn't have the form cached, so look for its default button
if not found_form:
for child in self.get_descendants(form, no_iframe=True):
name = self.get_tag(child)
# Can't do nested forms (haven't figured out why we never hit this)
if name == 'form': # pragma: no cover
break
if name in ('input', 'button'):
v = self.get_attribute_by_name(child, 'type', '')
if v and util.lower(v) == 'submit':
self.cached_default_forms.append((form, child))
if el is child:
match = True
break
return match
def match_indeterminate(self, el: bs4.Tag) -> bool:
"""Match default."""
match = False
name = cast(str, self.get_attribute_by_name(el, 'name'))
def get_parent_form(el: bs4.Tag) -> bs4.Tag | None:
"""Find this input's form."""
form = None
parent = self.get_parent(el, no_iframe=True)
while form is None:
if self.get_tag(parent) == 'form' and self.is_html_tag(parent):
form = parent
break
last_parent = parent
parent = self.get_parent(parent, no_iframe=True)
if parent is None:
form = last_parent
break
return form
form = get_parent_form(el)
# Look in form cache to see if we've already evaluated that its fellow radio buttons are indeterminate
found_form = False
for f, n, i in self.cached_indeterminate_forms:
if f is form and n == name:
found_form = True
if i is True:
match = True
break
# We didn't have the form cached, so validate that the radio button is indeterminate
if not found_form:
checked = False
for child in self.get_descendants(form, no_iframe=True):
if child is el:
continue
tag_name = self.get_tag(child)
if tag_name == 'input':
is_radio = False
check = False
has_name = False
for k, v in self.iter_attributes(child):
if util.lower(k) == 'type' and util.lower(v) == 'radio':
is_radio = True
elif util.lower(k) == 'name' and v == name:
has_name = True
elif util.lower(k) == 'checked':
check = True
if is_radio and check and has_name and get_parent_form(child) is form:
checked = True
break
if checked:
break
if not checked:
match = True
self.cached_indeterminate_forms.append((form, name, match))
return match
def match_lang(self, el: bs4.Tag, langs: tuple[ct.SelectorLang, ...]) -> bool:
"""Match languages."""
match = False
has_ns = self.supports_namespaces()
root = self.root
has_html_namespace = self.has_html_namespace
# Walk parents looking for `lang` (HTML) or `xml:lang` XML property.
parent = el
found_lang = None
last = None
while not found_lang:
has_html_ns = self.has_html_ns(parent)
for k, v in self.iter_attributes(parent):
attr_ns, attr = self.split_namespace(parent, k)
if (
((not has_ns or has_html_ns) and (util.lower(k) if not self.is_xml else k) == 'lang') or
(
has_ns and not has_html_ns and attr_ns == NS_XML and
(util.lower(attr) if not self.is_xml and attr is not None else attr) == 'lang'
)
):
found_lang = v
break
last = parent
parent = self.get_parent(parent, no_iframe=self.is_html)
if parent is None:
root = last
has_html_namespace = self.has_html_ns(root)
parent = last
break
# Use cached meta language.
if found_lang is None and self.cached_meta_lang:
for cache in self.cached_meta_lang:
if root is cache[0]:
found_lang = cache[1]
# If we couldn't find a language, and the document is HTML, look to meta to determine language.
if found_lang is None and (not self.is_xml or (has_html_namespace and root.name == 'html')):
# Find head
found = False
for tag in ('html', 'head'):
found = False
for child in self.get_children(parent, no_iframe=self.is_html):
if self.get_tag(child) == tag and self.is_html_tag(child):
found = True
parent = child
break
if not found: # pragma: no cover
break
# Search meta tags
if found:
for child in parent:
if self.is_tag(child) and self.get_tag(child) == 'meta' and self.is_html_tag(parent):
c_lang = False
content = None
for k, v in self.iter_attributes(child):
if util.lower(k) == 'http-equiv' and util.lower(v) == 'content-language':
c_lang = True
if util.lower(k) == 'content':
content = v
if c_lang and content:
found_lang = content
self.cached_meta_lang.append((cast(str, root), cast(str, found_lang)))
break
if found_lang is not None:
break
if found_lang is None:
self.cached_meta_lang.append((cast(str, root), ''))
# If we determined a language, compare.
if found_lang is not None:
for patterns in langs:
match = False
for pattern in patterns:
if self.extended_language_filter(pattern, cast(str, found_lang)):
match = True
if not match:
break
return match
def match_dir(self, el: bs4.Tag, directionality: int) -> bool:
"""Check directionality."""
# If we have to match both left and right, we can't match either.
if directionality & ct.SEL_DIR_LTR and directionality & ct.SEL_DIR_RTL:
return False
if el is None or not self.is_html_tag(el):
return False
# Element has defined direction of left to right or right to left
direction = DIR_MAP.get(util.lower(self.get_attribute_by_name(el, 'dir', '')), None)
if direction not in (None, 0):
return direction == directionality
# Element is the document element (the root) and no direction assigned, assume left to right.
is_root = self.is_root(el)
if is_root and direction is None:
return ct.SEL_DIR_LTR == directionality
# If `input[type=telephone]` and no direction is assigned, assume left to right.
name = self.get_tag(el)
is_input = name == 'input'
is_textarea = name == 'textarea'
is_bdi = name == 'bdi'
itype = util.lower(self.get_attribute_by_name(el, 'type', '')) if is_input else ''
if is_input and itype == 'tel' and direction is None:
return ct.SEL_DIR_LTR == directionality
# Auto handling for text inputs
if ((is_input and itype in ('text', 'search', 'tel', 'url', 'email')) or is_textarea) and direction == 0:
if is_textarea:
value = ''.join(node for node in self.get_contents(el, no_iframe=True) if self.is_content_string(node))
else:
value = cast(str, self.get_attribute_by_name(el, 'value', ''))
if value:
for c in value:
bidi = unicodedata.bidirectional(c)
if bidi in ('AL', 'R', 'L'):
direction = ct.SEL_DIR_LTR if bidi == 'L' else ct.SEL_DIR_RTL
return direction == directionality
# Assume left to right
return ct.SEL_DIR_LTR == directionality
elif is_root:
return ct.SEL_DIR_LTR == directionality
return self.match_dir(self.get_parent(el, no_iframe=True), directionality)
# Auto handling for `bdi` and other non text inputs.
if (is_bdi and direction is None) or direction == 0:
direction = self.find_bidi(el)
if direction is not None:
return direction == directionality
elif is_root:
return ct.SEL_DIR_LTR == directionality
return self.match_dir(self.get_parent(el, no_iframe=True), directionality)
# Match parents direction
return self.match_dir(self.get_parent(el, no_iframe=True), directionality)
def match_range(self, el: bs4.Tag, condition: int) -> bool:
"""
Match range.
Behavior is modeled after what we see in browsers. Browsers seem to evaluate
if the value is out of range, and if not, it is in range. So a missing value
will not evaluate out of range; therefore, value is in range. Personally, I
feel like this should evaluate as neither in or out of range.
"""
out_of_range = False
itype = util.lower(self.get_attribute_by_name(el, 'type'))
mn = Inputs.parse_value(itype, cast(str, self.get_attribute_by_name(el, 'min', None)))
mx = Inputs.parse_value(itype, cast(str, self.get_attribute_by_name(el, 'max', None)))
# There is no valid min or max, so we cannot evaluate a range
if mn is None and mx is None:
return False
value = Inputs.parse_value(itype, cast(str, self.get_attribute_by_name(el, 'value', None)))
if value is not None:
if itype in ("date", "datetime-local", "month", "week", "number", "range"):
if mn is not None and value < mn:
out_of_range = True
if not out_of_range and mx is not None and value > mx:
out_of_range = True
elif itype == "time":
if mn is not None and mx is not None and mn > mx:
# Time is periodic, so this is a reversed/discontinuous range
if value < mn and value > mx:
out_of_range = True
else:
if mn is not None and value < mn:
out_of_range = True
if not out_of_range and mx is not None and value > mx:
out_of_range = True
return not out_of_range if condition & ct.SEL_IN_RANGE else out_of_range
def match_defined(self, el: bs4.Tag) -> bool:
"""
Match defined.
`:defined` is related to custom elements in a browser.
- If the document is XML (not XHTML), all tags will match.
- Tags that are not custom (don't have a hyphen) are marked defined.
- If the tag has a prefix (without or without a namespace), it will not match.
This is of course requires the parser to provide us with the proper prefix and namespace info,
if it doesn't, there is nothing we can do.
"""
name = self.get_tag(el)
return (
name is not None and (
name.find('-') == -1 or
name.find(':') != -1 or
self.get_prefix(el) is not None
)
)
def match_placeholder_shown(self, el: bs4.Tag) -> bool:
"""
Match placeholder shown according to HTML spec.
- text area should be checked if they have content. A single newline does not count as content.
"""
match = False
content = self.get_text(el)
if content in ('', '\n'):
match = True
return match
def match_selectors(self, el: bs4.Tag, selectors: ct.SelectorList) -> bool:
"""Check if element matches one of the selectors."""
match = False
is_not = selectors.is_not
is_html = selectors.is_html
# Internal selector lists that use the HTML flag, will automatically get the `html` namespace.
if is_html:
namespaces = self.namespaces
iframe_restrict = self.iframe_restrict
self.namespaces = {'html': NS_XHTML}
self.iframe_restrict = True
if not is_html or self.is_html:
for selector in selectors:
match = is_not
# We have a un-matchable situation (like `:focus` as you can focus an element in this environment)
if isinstance(selector, ct.SelectorNull):
continue
# Verify tag matches
if not self.match_tag(el, selector.tag):
continue
# Verify tag is defined
if selector.flags & ct.SEL_DEFINED and not self.match_defined(el):
continue
# Verify element is root
if selector.flags & ct.SEL_ROOT and not self.match_root(el):
continue
# Verify element is scope
if selector.flags & ct.SEL_SCOPE and not self.match_scope(el):
continue
# Verify element has placeholder shown
if selector.flags & ct.SEL_PLACEHOLDER_SHOWN and not self.match_placeholder_shown(el):
continue
# Verify `nth` matches
if not self.match_nth(el, selector.nth):
continue
if selector.flags & ct.SEL_EMPTY and not self.match_empty(el):
continue
# Verify id matches
if selector.ids and not self.match_id(el, selector.ids):
continue
# Verify classes match
if selector.classes and not self.match_classes(el, selector.classes):
continue
# Verify attribute(s) match
if not self.match_attributes(el, selector.attributes):
continue
# Verify ranges
if selector.flags & RANGES and not self.match_range(el, selector.flags & RANGES):
continue
# Verify language patterns
if selector.lang and not self.match_lang(el, selector.lang):
continue
# Verify pseudo selector patterns
if selector.selectors and not self.match_subselectors(el, selector.selectors):
continue
# Verify relationship selectors
if selector.relation and not self.match_relations(el, selector.relation):
continue
# Validate that the current default selector match corresponds to the first submit button in the form
if selector.flags & ct.SEL_DEFAULT and not self.match_default(el):
continue
# Validate that the unset radio button is among radio buttons with the same name in a form that are
# also not set.
if selector.flags & ct.SEL_INDETERMINATE and not self.match_indeterminate(el):
continue
# Validate element directionality
if selector.flags & DIR_FLAGS and not self.match_dir(el, selector.flags & DIR_FLAGS):
continue
# Validate that the tag contains the specified text.
if selector.contains and not self.match_contains(el, selector.contains):
continue
match = not is_not
break
# Restore actual namespaces being used for external selector lists
if is_html:
self.namespaces = namespaces
self.iframe_restrict = iframe_restrict
return match
def select(self, limit: int = 0) -> Iterator[bs4.Tag]:
"""Match all tags under the targeted tag."""
lim = None if limit < 1 else limit
for child in self.get_descendants(self.tag):
if self.match(child):
yield child
if lim is not None:
lim -= 1
if lim < 1:
break
def closest(self) -> bs4.Tag | None:
"""Match closest ancestor."""
current = self.tag
closest = None
while closest is None and current is not None:
if self.match(current):
closest = current
else:
current = self.get_parent(current)
return closest
def filter(self) -> list[bs4.Tag]: # noqa A001
"""Filter tag's children."""
return [tag for tag in self.get_contents(self.tag) if not self.is_navigable_string(tag) and self.match(tag)]
def match(self, el: bs4.Tag) -> bool:
"""Match."""
return not self.is_doc(el) and self.is_tag(el) and self.match_selectors(el, self.selectors)
class SoupSieve(ct.Immutable):
"""Compiled Soup Sieve selector matching object."""
pattern: str
selectors: ct.SelectorList
namespaces: ct.Namespaces | None
custom: dict[str, str]
flags: int
__slots__ = ("pattern", "selectors", "namespaces", "custom", "flags", "_hash")
def __init__(
self,
pattern: str,
selectors: ct.SelectorList,
namespaces: ct.Namespaces | None,
custom: ct.CustomSelectors | None,
flags: int
):
"""Initialize."""
super().__init__(
pattern=pattern,
selectors=selectors,
namespaces=namespaces,
custom=custom,
flags=flags
)
def match(self, tag: bs4.Tag) -> bool:
"""Match."""
return CSSMatch(self.selectors, tag, self.namespaces, self.flags).match(tag)
def closest(self, tag: bs4.Tag) -> bs4.Tag:
"""Match closest ancestor."""
return CSSMatch(self.selectors, tag, self.namespaces, self.flags).closest()
def filter(self, iterable: Iterable[bs4.Tag]) -> list[bs4.Tag]: # noqa A001
"""
Filter.
`CSSMatch` can cache certain searches for tags of the same document,
so if we are given a tag, all tags are from the same document,
and we can take advantage of the optimization.
Any other kind of iterable could have tags from different documents or detached tags,
so for those, we use a new `CSSMatch` for each item in the iterable.
"""
if CSSMatch.is_tag(iterable):
return CSSMatch(self.selectors, iterable, self.namespaces, self.flags).filter()
else:
return [node for node in iterable if not CSSMatch.is_navigable_string(node) and self.match(node)]
def select_one(self, tag: bs4.Tag) -> bs4.Tag:
"""Select a single tag."""
tags = self.select(tag, limit=1)
return tags[0] if tags else None
def select(self, tag: bs4.Tag, limit: int = 0) -> list[bs4.Tag]:
"""Select the specified tags."""
return list(self.iselect(tag, limit))
def iselect(self, tag: bs4.Tag, limit: int = 0) -> Iterator[bs4.Tag]:
"""Iterate the specified tags."""
yield from CSSMatch(self.selectors, tag, self.namespaces, self.flags).select(limit)
def __repr__(self) -> str: # pragma: no cover
"""Representation."""
return (
f"SoupSieve(pattern={self.pattern!r}, namespaces={self.namespaces!r}, "
f"custom={self.custom!r}, flags={self.flags!r})"
)
__str__ = __repr__
ct.pickle_register(SoupSieve)
PK ]ZZZ*&�Q� Q� soupsieve/css_parser.py"""CSS selector parser."""
from __future__ import annotations
import re
from functools import lru_cache
from . import util
from . import css_match as cm
from . import css_types as ct
from .util import SelectorSyntaxError
import warnings
from typing import Match, Any, Iterator, cast
UNICODE_REPLACEMENT_CHAR = 0xFFFD
# Simple pseudo classes that take no parameters
PSEUDO_SIMPLE = {
":any-link",
":empty",
":first-child",
":first-of-type",
":in-range",
":out-of-range",
":last-child",
":last-of-type",
":link",
":only-child",
":only-of-type",
":root",
':checked',
':default',
':disabled',
':enabled',
':indeterminate',
':optional',
':placeholder-shown',
':read-only',
':read-write',
':required',
':scope',
':defined'
}
# Supported, simple pseudo classes that match nothing in the Soup Sieve environment
PSEUDO_SIMPLE_NO_MATCH = {
':active',
':current',
':focus',
':focus-visible',
':focus-within',
':future',
':host',
':hover',
':local-link',
':past',
':paused',
':playing',
':target',
':target-within',
':user-invalid',
':visited'
}
# Complex pseudo classes that take selector lists
PSEUDO_COMPLEX = {
':contains',
':-soup-contains',
':-soup-contains-own',
':has',
':is',
':matches',
':not',
':where'
}
PSEUDO_COMPLEX_NO_MATCH = {
':current',
':host',
':host-context'
}
# Complex pseudo classes that take very specific parameters and are handled special
PSEUDO_SPECIAL = {
':dir',
':lang',
':nth-child',
':nth-last-child',
':nth-last-of-type',
':nth-of-type'
}
PSEUDO_SUPPORTED = PSEUDO_SIMPLE | PSEUDO_SIMPLE_NO_MATCH | PSEUDO_COMPLEX | PSEUDO_COMPLEX_NO_MATCH | PSEUDO_SPECIAL
# Sub-patterns parts
# Whitespace
NEWLINE = r'(?:\r\n|(?!\r\n)[\n\f\r])'
WS = fr'(?:[ \t]|{NEWLINE})'
# Comments
COMMENTS = r'(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)'
# Whitespace with comments included
WSC = fr'(?:{WS}|{COMMENTS})'
# CSS escapes
CSS_ESCAPES = fr'(?:\\(?:[a-f0-9]{{1,6}}{WS}?|[^\r\n\f]|$))'
CSS_STRING_ESCAPES = fr'(?:\\(?:[a-f0-9]{{1,6}}{WS}?|[^\r\n\f]|$|{NEWLINE}))'
# CSS Identifier
IDENTIFIER = fr'''
(?:(?:-?(?:[^\x00-\x2f\x30-\x40\x5B-\x5E\x60\x7B-\x9f]|{CSS_ESCAPES})+|--)
(?:[^\x00-\x2c\x2e\x2f\x3A-\x40\x5B-\x5E\x60\x7B-\x9f]|{CSS_ESCAPES})*)
'''
# `nth` content
NTH = fr'(?:[-+])?(?:[0-9]+n?|n)(?:(?<=n){WSC}*(?:[-+]){WSC}*(?:[0-9]+))?'
# Value: quoted string or identifier
VALUE = fr'''(?:"(?:\\(?:.|{NEWLINE})|[^\\"\r\n\f]+)*?"|'(?:\\(?:.|{NEWLINE})|[^\\'\r\n\f]+)*?'|{IDENTIFIER}+)'''
# Attribute value comparison. `!=` is handled special as it is non-standard.
ATTR = fr'(?:{WSC}*(?P<cmp>[!~^|*$]?=){WSC}*(?P<value>{VALUE})(?:{WSC}*(?P<case>[is]))?)?{WSC}*\]'
# Selector patterns
# IDs (`#id`)
PAT_ID = fr'\#{IDENTIFIER}'
# Classes (`.class`)
PAT_CLASS = fr'\.{IDENTIFIER}'
# Prefix:Tag (`prefix|tag`)
PAT_TAG = fr'(?P<tag_ns>(?:{IDENTIFIER}|\*)?\|)?(?P<tag_name>{IDENTIFIER}|\*)'
# Attributes (`[attr]`, `[attr=value]`, etc.)
PAT_ATTR = fr'\[{WSC}*(?P<attr_ns>(?:{IDENTIFIER}|\*)?\|)?(?P<attr_name>{IDENTIFIER}){ATTR}'
# Pseudo class (`:pseudo-class`, `:pseudo-class(`)
PAT_PSEUDO_CLASS = fr'(?P<name>:{IDENTIFIER})(?P<open>\({WSC}*)?'
# Pseudo class special patterns. Matches `:pseudo-class(` for special case pseudo classes.
PAT_PSEUDO_CLASS_SPECIAL = fr'(?P<name>:{IDENTIFIER})(?P<open>\({WSC}*)'
# Custom pseudo class (`:--custom-pseudo`)
PAT_PSEUDO_CLASS_CUSTOM = fr'(?P<name>:(?=--){IDENTIFIER})'
# Closing pseudo group (`)`)
PAT_PSEUDO_CLOSE = fr'{WSC}*\)'
# Pseudo element (`::pseudo-element`)
PAT_PSEUDO_ELEMENT = fr':{PAT_PSEUDO_CLASS}'
# At rule (`@page`, etc.) (not supported)
PAT_AT_RULE = fr'@P{IDENTIFIER}'
# Pseudo class `nth-child` (`:nth-child(an+b [of S]?)`, `:first-child`, etc.)
PAT_PSEUDO_NTH_CHILD = fr'''
(?P<pseudo_nth_child>{PAT_PSEUDO_CLASS_SPECIAL}
(?P<nth_child>{NTH}|even|odd))(?:{WSC}*\)|(?P<of>{COMMENTS}*{WS}{WSC}*of{COMMENTS}*{WS}{WSC}*))
'''
# Pseudo class `nth-of-type` (`:nth-of-type(an+b)`, `:first-of-type`, etc.)
PAT_PSEUDO_NTH_TYPE = fr'''
(?P<pseudo_nth_type>{PAT_PSEUDO_CLASS_SPECIAL}
(?P<nth_type>{NTH}|even|odd)){WSC}*\)
'''
# Pseudo class language (`:lang("*-de", en)`)
PAT_PSEUDO_LANG = fr'{PAT_PSEUDO_CLASS_SPECIAL}(?P<values>{VALUE}(?:{WSC}*,{WSC}*{VALUE})*){WSC}*\)'
# Pseudo class direction (`:dir(ltr)`)
PAT_PSEUDO_DIR = fr'{PAT_PSEUDO_CLASS_SPECIAL}(?P<dir>ltr|rtl){WSC}*\)'
# Combining characters (`>`, `~`, ` `, `+`, `,`)
PAT_COMBINE = fr'{WSC}*?(?P<relation>[,+>~]|{WS}(?![,+>~])){WSC}*'
# Extra: Contains (`:contains(text)`)
PAT_PSEUDO_CONTAINS = fr'{PAT_PSEUDO_CLASS_SPECIAL}(?P<values>{VALUE}(?:{WSC}*,{WSC}*{VALUE})*){WSC}*\)'
# Regular expressions
# CSS escape pattern
RE_CSS_ESC = re.compile(fr'(?:(\\[a-f0-9]{{1,6}}{WSC}?)|(\\[^\r\n\f])|(\\$))', re.I)
RE_CSS_STR_ESC = re.compile(fr'(?:(\\[a-f0-9]{{1,6}}{WS}?)|(\\[^\r\n\f])|(\\$)|(\\{NEWLINE}))', re.I)
# Pattern to break up `nth` specifiers
RE_NTH = re.compile(fr'(?P<s1>[-+])?(?P<a>[0-9]+n?|n)(?:(?<=n){WSC}*(?P<s2>[-+]){WSC}*(?P<b>[0-9]+))?', re.I)
# Pattern to iterate multiple values.
RE_VALUES = re.compile(fr'(?:(?P<value>{VALUE})|(?P<split>{WSC}*,{WSC}*))', re.X)
# Whitespace checks
RE_WS = re.compile(WS)
RE_WS_BEGIN = re.compile(fr'^{WSC}*')
RE_WS_END = re.compile(fr'{WSC}*$')
RE_CUSTOM = re.compile(fr'^{PAT_PSEUDO_CLASS_CUSTOM}$', re.X)
# Constants
# List split token
COMMA_COMBINATOR = ','
# Relation token for descendant
WS_COMBINATOR = " "
# Parse flags
FLG_PSEUDO = 0x01
FLG_NOT = 0x02
FLG_RELATIVE = 0x04
FLG_DEFAULT = 0x08
FLG_HTML = 0x10
FLG_INDETERMINATE = 0x20
FLG_OPEN = 0x40
FLG_IN_RANGE = 0x80
FLG_OUT_OF_RANGE = 0x100
FLG_PLACEHOLDER_SHOWN = 0x200
FLG_FORGIVE = 0x400
# Maximum cached patterns to store
_MAXCACHE = 500
@lru_cache(maxsize=_MAXCACHE)
def _cached_css_compile(
pattern: str,
namespaces: ct.Namespaces | None,
custom: ct.CustomSelectors | None,
flags: int
) -> cm.SoupSieve:
"""Cached CSS compile."""
custom_selectors = process_custom(custom)
return cm.SoupSieve(
pattern,
CSSParser(
pattern,
custom=custom_selectors,
flags=flags
).process_selectors(),
namespaces,
custom,
flags
)
def _purge_cache() -> None:
"""Purge the cache."""
_cached_css_compile.cache_clear()
def process_custom(custom: ct.CustomSelectors | None) -> dict[str, str | ct.SelectorList]:
"""Process custom."""
custom_selectors = {}
if custom is not None:
for key, value in custom.items():
name = util.lower(key)
if RE_CUSTOM.match(name) is None:
raise SelectorSyntaxError(f"The name '{name}' is not a valid custom pseudo-class name")
if name in custom_selectors:
raise KeyError(f"The custom selector '{name}' has already been registered")
custom_selectors[css_unescape(name)] = value
return custom_selectors
def css_unescape(content: str, string: bool = False) -> str:
"""
Unescape CSS value.
Strings allow for spanning the value on multiple strings by escaping a new line.
"""
def replace(m: Match[str]) -> str:
"""Replace with the appropriate substitute."""
if m.group(1):
codepoint = int(m.group(1)[1:], 16)
if codepoint == 0:
codepoint = UNICODE_REPLACEMENT_CHAR
value = chr(codepoint)
elif m.group(2):
value = m.group(2)[1:]
elif m.group(3):
value = '\ufffd'
else:
value = ''
return value
return (RE_CSS_ESC if not string else RE_CSS_STR_ESC).sub(replace, content)
def escape(ident: str) -> str:
"""Escape identifier."""
string = []
length = len(ident)
start_dash = length > 0 and ident[0] == '-'
if length == 1 and start_dash:
# Need to escape identifier that is a single `-` with no other characters
string.append(f'\\{ident}')
else:
for index, c in enumerate(ident):
codepoint = ord(c)
if codepoint == 0x00:
string.append('\ufffd')
elif (0x01 <= codepoint <= 0x1F) or codepoint == 0x7F:
string.append(f'\\{codepoint:x} ')
elif (index == 0 or (start_dash and index == 1)) and (0x30 <= codepoint <= 0x39):
string.append(f'\\{codepoint:x} ')
elif (
codepoint in (0x2D, 0x5F) or codepoint >= 0x80 or (0x30 <= codepoint <= 0x39) or
(0x30 <= codepoint <= 0x39) or (0x41 <= codepoint <= 0x5A) or (0x61 <= codepoint <= 0x7A)
):
string.append(c)
else:
string.append(f'\\{c}')
return ''.join(string)
class SelectorPattern:
"""Selector pattern."""
def __init__(self, name: str, pattern: str) -> None:
"""Initialize."""
self.name = name
self.re_pattern = re.compile(pattern, re.I | re.X | re.U)
def get_name(self) -> str:
"""Get name."""
return self.name
def match(self, selector: str, index: int, flags: int) -> Match[str] | None:
"""Match the selector."""
return self.re_pattern.match(selector, index)
class SpecialPseudoPattern(SelectorPattern):
"""Selector pattern."""
def __init__(self, patterns: tuple[tuple[str, tuple[str, ...], str, type[SelectorPattern]], ...]) -> None:
"""Initialize."""
self.patterns = {}
for p in patterns:
name = p[0]
pattern = p[3](name, p[2])
for pseudo in p[1]:
self.patterns[pseudo] = pattern
self.matched_name = None # type: SelectorPattern | None
self.re_pseudo_name = re.compile(PAT_PSEUDO_CLASS_SPECIAL, re.I | re.X | re.U)
def get_name(self) -> str:
"""Get name."""
return '' if self.matched_name is None else self.matched_name.get_name()
def match(self, selector: str, index: int, flags: int) -> Match[str] | None:
"""Match the selector."""
pseudo = None
m = self.re_pseudo_name.match(selector, index)
if m:
name = util.lower(css_unescape(m.group('name')))
pattern = self.patterns.get(name)
if pattern:
pseudo = pattern.match(selector, index, flags)
if pseudo:
self.matched_name = pattern
return pseudo
class _Selector:
"""
Intermediate selector class.
This stores selector data for a compound selector as we are acquiring them.
Once we are done collecting the data for a compound selector, we freeze
the data in an object that can be pickled and hashed.
"""
def __init__(self, **kwargs: Any) -> None:
"""Initialize."""
self.tag = kwargs.get('tag', None) # type: ct.SelectorTag | None
self.ids = kwargs.get('ids', []) # type: list[str]
self.classes = kwargs.get('classes', []) # type: list[str]
self.attributes = kwargs.get('attributes', []) # type: list[ct.SelectorAttribute]
self.nth = kwargs.get('nth', []) # type: list[ct.SelectorNth]
self.selectors = kwargs.get('selectors', []) # type: list[ct.SelectorList]
self.relations = kwargs.get('relations', []) # type: list[_Selector]
self.rel_type = kwargs.get('rel_type', None) # type: str | None
self.contains = kwargs.get('contains', []) # type: list[ct.SelectorContains]
self.lang = kwargs.get('lang', []) # type: list[ct.SelectorLang]
self.flags = kwargs.get('flags', 0) # type: int
self.no_match = kwargs.get('no_match', False) # type: bool
def _freeze_relations(self, relations: list[_Selector]) -> ct.SelectorList:
"""Freeze relation."""
if relations:
sel = relations[0]
sel.relations.extend(relations[1:])
return ct.SelectorList([sel.freeze()])
else:
return ct.SelectorList()
def freeze(self) -> ct.Selector | ct.SelectorNull:
"""Freeze self."""
if self.no_match:
return ct.SelectorNull()
else:
return ct.Selector(
self.tag,
tuple(self.ids),
tuple(self.classes),
tuple(self.attributes),
tuple(self.nth),
tuple(self.selectors),
self._freeze_relations(self.relations),
self.rel_type,
tuple(self.contains),
tuple(self.lang),
self.flags
)
def __str__(self) -> str: # pragma: no cover
"""String representation."""
return (
f'_Selector(tag={self.tag!r}, ids={self.ids!r}, classes={self.classes!r}, attributes={self.attributes!r}, '
f'nth={self.nth!r}, selectors={self.selectors!r}, relations={self.relations!r}, '
f'rel_type={self.rel_type!r}, contains={self.contains!r}, lang={self.lang!r}, flags={self.flags!r}, '
f'no_match={self.no_match!r})'
)
__repr__ = __str__
class CSSParser:
"""Parse CSS selectors."""
css_tokens = (
SelectorPattern("pseudo_close", PAT_PSEUDO_CLOSE),
SpecialPseudoPattern(
(
(
"pseudo_contains",
(':contains', ':-soup-contains', ':-soup-contains-own'),
PAT_PSEUDO_CONTAINS,
SelectorPattern
),
("pseudo_nth_child", (':nth-child', ':nth-last-child'), PAT_PSEUDO_NTH_CHILD, SelectorPattern),
("pseudo_nth_type", (':nth-of-type', ':nth-last-of-type'), PAT_PSEUDO_NTH_TYPE, SelectorPattern),
("pseudo_lang", (':lang',), PAT_PSEUDO_LANG, SelectorPattern),
("pseudo_dir", (':dir',), PAT_PSEUDO_DIR, SelectorPattern)
)
),
SelectorPattern("pseudo_class_custom", PAT_PSEUDO_CLASS_CUSTOM),
SelectorPattern("pseudo_class", PAT_PSEUDO_CLASS),
SelectorPattern("pseudo_element", PAT_PSEUDO_ELEMENT),
SelectorPattern("at_rule", PAT_AT_RULE),
SelectorPattern("id", PAT_ID),
SelectorPattern("class", PAT_CLASS),
SelectorPattern("tag", PAT_TAG),
SelectorPattern("attribute", PAT_ATTR),
SelectorPattern("combine", PAT_COMBINE)
)
def __init__(
self,
selector: str,
custom: dict[str, str | ct.SelectorList] | None = None,
flags: int = 0
) -> None:
"""Initialize."""
self.pattern = selector.replace('\x00', '\ufffd')
self.flags = flags
self.debug = self.flags & util.DEBUG
self.custom = {} if custom is None else custom
def parse_attribute_selector(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool:
"""Create attribute selector from the returned regex match."""
inverse = False
op = m.group('cmp')
case = util.lower(m.group('case')) if m.group('case') else None
ns = css_unescape(m.group('attr_ns')[:-1]) if m.group('attr_ns') else ''
attr = css_unescape(m.group('attr_name'))
is_type = False
pattern2 = None
value = ''
if case:
flags = (re.I if case == 'i' else 0) | re.DOTALL
elif util.lower(attr) == 'type':
flags = re.I | re.DOTALL
is_type = True
else:
flags = re.DOTALL
if op:
if m.group('value').startswith(('"', "'")):
value = css_unescape(m.group('value')[1:-1], True)
else:
value = css_unescape(m.group('value'))
if not op:
# Attribute name
pattern = None
elif op.startswith('^'):
# Value start with
pattern = re.compile(r'^%s.*' % re.escape(value), flags)
elif op.startswith('$'):
# Value ends with
pattern = re.compile(r'.*?%s$' % re.escape(value), flags)
elif op.startswith('*'):
# Value contains
pattern = re.compile(r'.*?%s.*' % re.escape(value), flags)
elif op.startswith('~'):
# Value contains word within space separated list
# `~=` should match nothing if it is empty or contains whitespace,
# so if either of these cases is present, use `[^\s\S]` which cannot be matched.
value = r'[^\s\S]' if not value or RE_WS.search(value) else re.escape(value)
pattern = re.compile(r'.*?(?:(?<=^)|(?<=[ \t\r\n\f]))%s(?=(?:[ \t\r\n\f]|$)).*' % value, flags)
elif op.startswith('|'):
# Value starts with word in dash separated list
pattern = re.compile(r'^%s(?:-.*)?$' % re.escape(value), flags)
else:
# Value matches
pattern = re.compile(r'^%s$' % re.escape(value), flags)
if op.startswith('!'):
# Equivalent to `:not([attr=value])`
inverse = True
if is_type and pattern:
pattern2 = re.compile(pattern.pattern)
# Append the attribute selector
sel_attr = ct.SelectorAttribute(attr, ns, pattern, pattern2)
if inverse:
# If we are using `!=`, we need to nest the pattern under a `:not()`.
sub_sel = _Selector()
sub_sel.attributes.append(sel_attr)
not_list = ct.SelectorList([sub_sel.freeze()], True, False)
sel.selectors.append(not_list)
else:
sel.attributes.append(sel_attr)
has_selector = True
return has_selector
def parse_tag_pattern(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool:
"""Parse tag pattern from regex match."""
prefix = css_unescape(m.group('tag_ns')[:-1]) if m.group('tag_ns') else None
tag = css_unescape(m.group('tag_name'))
sel.tag = ct.SelectorTag(tag, prefix)
has_selector = True
return has_selector
def parse_pseudo_class_custom(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool:
"""
Parse custom pseudo class alias.
Compile custom selectors as we need them. When compiling a custom selector,
set it to `None` in the dictionary so we can avoid an infinite loop.
"""
pseudo = util.lower(css_unescape(m.group('name')))
selector = self.custom.get(pseudo)
if selector is None:
raise SelectorSyntaxError(
f"Undefined custom selector '{pseudo}' found at position {m.end(0)}",
self.pattern,
m.end(0)
)
if not isinstance(selector, ct.SelectorList):
del self.custom[pseudo]
selector = CSSParser(
selector, custom=self.custom, flags=self.flags
).process_selectors(flags=FLG_PSEUDO)
self.custom[pseudo] = selector
sel.selectors.append(selector)
has_selector = True
return has_selector
def parse_pseudo_class(
self,
sel: _Selector,
m: Match[str],
has_selector: bool,
iselector: Iterator[tuple[str, Match[str]]],
is_html: bool
) -> tuple[bool, bool]:
"""Parse pseudo class."""
complex_pseudo = False
pseudo = util.lower(css_unescape(m.group('name')))
if m.group('open'):
complex_pseudo = True
if complex_pseudo and pseudo in PSEUDO_COMPLEX:
has_selector = self.parse_pseudo_open(sel, pseudo, has_selector, iselector, m.end(0))
elif not complex_pseudo and pseudo in PSEUDO_SIMPLE:
if pseudo == ':root':
sel.flags |= ct.SEL_ROOT
elif pseudo == ':defined':
sel.flags |= ct.SEL_DEFINED
is_html = True
elif pseudo == ':scope':
sel.flags |= ct.SEL_SCOPE
elif pseudo == ':empty':
sel.flags |= ct.SEL_EMPTY
elif pseudo in (':link', ':any-link'):
sel.selectors.append(CSS_LINK)
elif pseudo == ':checked':
sel.selectors.append(CSS_CHECKED)
elif pseudo == ':default':
sel.selectors.append(CSS_DEFAULT)
elif pseudo == ':indeterminate':
sel.selectors.append(CSS_INDETERMINATE)
elif pseudo == ":disabled":
sel.selectors.append(CSS_DISABLED)
elif pseudo == ":enabled":
sel.selectors.append(CSS_ENABLED)
elif pseudo == ":required":
sel.selectors.append(CSS_REQUIRED)
elif pseudo == ":optional":
sel.selectors.append(CSS_OPTIONAL)
elif pseudo == ":read-only":
sel.selectors.append(CSS_READ_ONLY)
elif pseudo == ":read-write":
sel.selectors.append(CSS_READ_WRITE)
elif pseudo == ":in-range":
sel.selectors.append(CSS_IN_RANGE)
elif pseudo == ":out-of-range":
sel.selectors.append(CSS_OUT_OF_RANGE)
elif pseudo == ":placeholder-shown":
sel.selectors.append(CSS_PLACEHOLDER_SHOWN)
elif pseudo == ':first-child':
sel.nth.append(ct.SelectorNth(1, False, 0, False, False, ct.SelectorList()))
elif pseudo == ':last-child':
sel.nth.append(ct.SelectorNth(1, False, 0, False, True, ct.SelectorList()))
elif pseudo == ':first-of-type':
sel.nth.append(ct.SelectorNth(1, False, 0, True, False, ct.SelectorList()))
elif pseudo == ':last-of-type':
sel.nth.append(ct.SelectorNth(1, False, 0, True, True, ct.SelectorList()))
elif pseudo == ':only-child':
sel.nth.extend(
[
ct.SelectorNth(1, False, 0, False, False, ct.SelectorList()),
ct.SelectorNth(1, False, 0, False, True, ct.SelectorList())
]
)
elif pseudo == ':only-of-type':
sel.nth.extend(
[
ct.SelectorNth(1, False, 0, True, False, ct.SelectorList()),
ct.SelectorNth(1, False, 0, True, True, ct.SelectorList())
]
)
has_selector = True
elif complex_pseudo and pseudo in PSEUDO_COMPLEX_NO_MATCH:
self.parse_selectors(iselector, m.end(0), FLG_PSEUDO | FLG_OPEN)
sel.no_match = True
has_selector = True
elif not complex_pseudo and pseudo in PSEUDO_SIMPLE_NO_MATCH:
sel.no_match = True
has_selector = True
elif pseudo in PSEUDO_SUPPORTED:
raise SelectorSyntaxError(
f"Invalid syntax for pseudo class '{pseudo}'",
self.pattern,
m.start(0)
)
else:
raise NotImplementedError(
f"'{pseudo}' pseudo-class is not implemented at this time"
)
return has_selector, is_html
def parse_pseudo_nth(
self,
sel: _Selector,
m: Match[str],
has_selector: bool,
iselector: Iterator[tuple[str, Match[str]]]
) -> bool:
"""Parse `nth` pseudo."""
mdict = m.groupdict()
if mdict.get('pseudo_nth_child'):
postfix = '_child'
else:
postfix = '_type'
mdict['name'] = util.lower(css_unescape(mdict['name']))
content = util.lower(mdict.get('nth' + postfix))
if content == 'even':
# 2n
s1 = 2
s2 = 0
var = True
elif content == 'odd':
# 2n+1
s1 = 2
s2 = 1
var = True
else:
nth_parts = cast(Match[str], RE_NTH.match(content))
_s1 = '-' if nth_parts.group('s1') and nth_parts.group('s1') == '-' else ''
a = nth_parts.group('a')
var = a.endswith('n')
if a.startswith('n'):
_s1 += '1'
elif var:
_s1 += a[:-1]
else:
_s1 += a
_s2 = '-' if nth_parts.group('s2') and nth_parts.group('s2') == '-' else ''
if nth_parts.group('b'):
_s2 += nth_parts.group('b')
else:
_s2 = '0'
s1 = int(_s1, 10)
s2 = int(_s2, 10)
pseudo_sel = mdict['name']
if postfix == '_child':
if m.group('of'):
# Parse the rest of `of S`.
nth_sel = self.parse_selectors(iselector, m.end(0), FLG_PSEUDO | FLG_OPEN)
else:
# Use default `*|*` for `of S`.
nth_sel = CSS_NTH_OF_S_DEFAULT
if pseudo_sel == ':nth-child':
sel.nth.append(ct.SelectorNth(s1, var, s2, False, False, nth_sel))
elif pseudo_sel == ':nth-last-child':
sel.nth.append(ct.SelectorNth(s1, var, s2, False, True, nth_sel))
else:
if pseudo_sel == ':nth-of-type':
sel.nth.append(ct.SelectorNth(s1, var, s2, True, False, ct.SelectorList()))
elif pseudo_sel == ':nth-last-of-type':
sel.nth.append(ct.SelectorNth(s1, var, s2, True, True, ct.SelectorList()))
has_selector = True
return has_selector
def parse_pseudo_open(
self,
sel: _Selector,
name: str,
has_selector: bool,
iselector: Iterator[tuple[str, Match[str]]],
index: int
) -> bool:
"""Parse pseudo with opening bracket."""
flags = FLG_PSEUDO | FLG_OPEN
if name == ':not':
flags |= FLG_NOT
elif name == ':has':
flags |= FLG_RELATIVE
elif name in (':where', ':is'):
flags |= FLG_FORGIVE
sel.selectors.append(self.parse_selectors(iselector, index, flags))
has_selector = True
return has_selector
def parse_has_combinator(
self,
sel: _Selector,
m: Match[str],
has_selector: bool,
selectors: list[_Selector],
rel_type: str,
index: int
) -> tuple[bool, _Selector, str]:
"""Parse combinator tokens."""
combinator = m.group('relation').strip()
if not combinator:
combinator = WS_COMBINATOR
if combinator == COMMA_COMBINATOR:
sel.rel_type = rel_type
selectors[-1].relations.append(sel)
rel_type = ":" + WS_COMBINATOR
selectors.append(_Selector())
else:
if has_selector:
# End the current selector and associate the leading combinator with this selector.
sel.rel_type = rel_type
selectors[-1].relations.append(sel)
elif rel_type[1:] != WS_COMBINATOR:
# It's impossible to have two whitespace combinators after each other as the patterns
# will gobble up trailing whitespace. It is also impossible to have a whitespace
# combinator after any other kind for the same reason. But we could have
# multiple non-whitespace combinators. So if the current combinator is not a whitespace,
# then we've hit the multiple combinator case, so we should fail.
raise SelectorSyntaxError(
f'The multiple combinators at position {index}',
self.pattern,
index
)
# Set the leading combinator for the next selector.
rel_type = ':' + combinator
sel = _Selector()
has_selector = False
return has_selector, sel, rel_type
def parse_combinator(
self,
sel: _Selector,
m: Match[str],
has_selector: bool,
selectors: list[_Selector],
relations: list[_Selector],
is_pseudo: bool,
is_forgive: bool,
index: int
) -> tuple[bool, _Selector]:
"""Parse combinator tokens."""
combinator = m.group('relation').strip()
if not combinator:
combinator = WS_COMBINATOR
if not has_selector:
if not is_forgive or combinator != COMMA_COMBINATOR:
raise SelectorSyntaxError(
f"The combinator '{combinator}' at position {index}, must have a selector before it",
self.pattern,
index
)
# If we are in a forgiving pseudo class, just make the selector a "no match"
if combinator == COMMA_COMBINATOR:
sel.no_match = True
del relations[:]
selectors.append(sel)
else:
if combinator == COMMA_COMBINATOR:
if not sel.tag and not is_pseudo:
# Implied `*`
sel.tag = ct.SelectorTag('*', None)
sel.relations.extend(relations)
selectors.append(sel)
del relations[:]
else:
sel.relations.extend(relations)
sel.rel_type = combinator
del relations[:]
relations.append(sel)
sel = _Selector()
has_selector = False
return has_selector, sel
def parse_class_id(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool:
"""Parse HTML classes and ids."""
selector = m.group(0)
if selector.startswith('.'):
sel.classes.append(css_unescape(selector[1:]))
else:
sel.ids.append(css_unescape(selector[1:]))
has_selector = True
return has_selector
def parse_pseudo_contains(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool:
"""Parse contains."""
pseudo = util.lower(css_unescape(m.group('name')))
if pseudo == ":contains":
warnings.warn( # noqa: B028
"The pseudo class ':contains' is deprecated, ':-soup-contains' should be used moving forward.",
FutureWarning
)
contains_own = pseudo == ":-soup-contains-own"
values = css_unescape(m.group('values'))
patterns = []
for token in RE_VALUES.finditer(values):
if token.group('split'):
continue
value = token.group('value')
if value.startswith(("'", '"')):
value = css_unescape(value[1:-1], True)
else:
value = css_unescape(value)
patterns.append(value)
sel.contains.append(ct.SelectorContains(patterns, contains_own))
has_selector = True
return has_selector
def parse_pseudo_lang(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool:
"""Parse pseudo language."""
values = m.group('values')
patterns = []
for token in RE_VALUES.finditer(values):
if token.group('split'):
continue
value = token.group('value')
if value.startswith(('"', "'")):
value = css_unescape(value[1:-1], True)
else:
value = css_unescape(value)
patterns.append(value)
sel.lang.append(ct.SelectorLang(patterns))
has_selector = True
return has_selector
def parse_pseudo_dir(self, sel: _Selector, m: Match[str], has_selector: bool) -> bool:
"""Parse pseudo direction."""
value = ct.SEL_DIR_LTR if util.lower(m.group('dir')) == 'ltr' else ct.SEL_DIR_RTL
sel.flags |= value
has_selector = True
return has_selector
def parse_selectors(
self,
iselector: Iterator[tuple[str, Match[str]]],
index: int = 0,
flags: int = 0
) -> ct.SelectorList:
"""Parse selectors."""
# Initialize important variables
sel = _Selector()
selectors = []
has_selector = False
closed = False
relations = [] # type: list[_Selector]
rel_type = ":" + WS_COMBINATOR
# Setup various flags
is_open = bool(flags & FLG_OPEN)
is_pseudo = bool(flags & FLG_PSEUDO)
is_relative = bool(flags & FLG_RELATIVE)
is_not = bool(flags & FLG_NOT)
is_html = bool(flags & FLG_HTML)
is_default = bool(flags & FLG_DEFAULT)
is_indeterminate = bool(flags & FLG_INDETERMINATE)
is_in_range = bool(flags & FLG_IN_RANGE)
is_out_of_range = bool(flags & FLG_OUT_OF_RANGE)
is_placeholder_shown = bool(flags & FLG_PLACEHOLDER_SHOWN)
is_forgive = bool(flags & FLG_FORGIVE)
# Print out useful debug stuff
if self.debug: # pragma: no cover
if is_pseudo:
print(' is_pseudo: True')
if is_open:
print(' is_open: True')
if is_relative:
print(' is_relative: True')
if is_not:
print(' is_not: True')
if is_html:
print(' is_html: True')
if is_default:
print(' is_default: True')
if is_indeterminate:
print(' is_indeterminate: True')
if is_in_range:
print(' is_in_range: True')
if is_out_of_range:
print(' is_out_of_range: True')
if is_placeholder_shown:
print(' is_placeholder_shown: True')
if is_forgive:
print(' is_forgive: True')
# The algorithm for relative selectors require an initial selector in the selector list
if is_relative:
selectors.append(_Selector())
try:
while True:
key, m = next(iselector)
# Handle parts
if key == "at_rule":
raise NotImplementedError(f"At-rules found at position {m.start(0)}")
elif key == 'pseudo_class_custom':
has_selector = self.parse_pseudo_class_custom(sel, m, has_selector)
elif key == 'pseudo_class':
has_selector, is_html = self.parse_pseudo_class(sel, m, has_selector, iselector, is_html)
elif key == 'pseudo_element':
raise NotImplementedError(f"Pseudo-element found at position {m.start(0)}")
elif key == 'pseudo_contains':
has_selector = self.parse_pseudo_contains(sel, m, has_selector)
elif key in ('pseudo_nth_type', 'pseudo_nth_child'):
has_selector = self.parse_pseudo_nth(sel, m, has_selector, iselector)
elif key == 'pseudo_lang':
has_selector = self.parse_pseudo_lang(sel, m, has_selector)
elif key == 'pseudo_dir':
has_selector = self.parse_pseudo_dir(sel, m, has_selector)
# Currently only supports HTML
is_html = True
elif key == 'pseudo_close':
if not has_selector:
if not is_forgive:
raise SelectorSyntaxError(
f"Expected a selector at position {m.start(0)}",
self.pattern,
m.start(0)
)
sel.no_match = True
if is_open:
closed = True
break
else:
raise SelectorSyntaxError(
f"Unmatched pseudo-class close at position {m.start(0)}",
self.pattern,
m.start(0)
)
elif key == 'combine':
if is_relative:
has_selector, sel, rel_type = self.parse_has_combinator(
sel, m, has_selector, selectors, rel_type, index
)
else:
has_selector, sel = self.parse_combinator(
sel, m, has_selector, selectors, relations, is_pseudo, is_forgive, index
)
elif key == 'attribute':
has_selector = self.parse_attribute_selector(sel, m, has_selector)
elif key == 'tag':
if has_selector:
raise SelectorSyntaxError(
f"Tag name found at position {m.start(0)} instead of at the start",
self.pattern,
m.start(0)
)
has_selector = self.parse_tag_pattern(sel, m, has_selector)
elif key in ('class', 'id'):
has_selector = self.parse_class_id(sel, m, has_selector)
index = m.end(0)
except StopIteration:
pass
# Handle selectors that are not closed
if is_open and not closed:
raise SelectorSyntaxError(
f"Unclosed pseudo-class at position {index}",
self.pattern,
index
)
# Cleanup completed selector piece
if has_selector:
if not sel.tag and not is_pseudo:
# Implied `*`
sel.tag = ct.SelectorTag('*', None)
if is_relative:
sel.rel_type = rel_type
selectors[-1].relations.append(sel)
else:
sel.relations.extend(relations)
del relations[:]
selectors.append(sel)
# Forgive empty slots in pseudo-classes that have lists (and are forgiving)
elif is_forgive and (not selectors or not relations):
# Handle normal pseudo-classes with empty slots like `:is()` etc.
sel.no_match = True
del relations[:]
selectors.append(sel)
has_selector = True
if not has_selector:
# We will always need to finish a selector when `:has()` is used as it leads with combining.
# May apply to others as well.
raise SelectorSyntaxError(
f'Expected a selector at position {index}',
self.pattern,
index
)
# Some patterns require additional logic, such as default. We try to make these the
# last pattern, and append the appropriate flag to that selector which communicates
# to the matcher what additional logic is required.
if is_default:
selectors[-1].flags = ct.SEL_DEFAULT
if is_indeterminate:
selectors[-1].flags = ct.SEL_INDETERMINATE
if is_in_range:
selectors[-1].flags = ct.SEL_IN_RANGE
if is_out_of_range:
selectors[-1].flags = ct.SEL_OUT_OF_RANGE
if is_placeholder_shown:
selectors[-1].flags = ct.SEL_PLACEHOLDER_SHOWN
# Return selector list
return ct.SelectorList([s.freeze() for s in selectors], is_not, is_html)
def selector_iter(self, pattern: str) -> Iterator[tuple[str, Match[str]]]:
"""Iterate selector tokens."""
# Ignore whitespace and comments at start and end of pattern
m = RE_WS_BEGIN.search(pattern)
index = m.end(0) if m else 0
m = RE_WS_END.search(pattern)
end = (m.start(0) - 1) if m else (len(pattern) - 1)
if self.debug: # pragma: no cover
print(f'## PARSING: {pattern!r}')
while index <= end:
m = None
for v in self.css_tokens:
m = v.match(pattern, index, self.flags)
if m:
name = v.get_name()
if self.debug: # pragma: no cover
print(f"TOKEN: '{name}' --> {m.group(0)!r} at position {m.start(0)}")
index = m.end(0)
yield name, m
break
if m is None:
c = pattern[index]
# If the character represents the start of one of the known selector types,
# throw an exception mentioning that the known selector type is in error;
# otherwise, report the invalid character.
if c == '[':
msg = f"Malformed attribute selector at position {index}"
elif c == '.':
msg = f"Malformed class selector at position {index}"
elif c == '#':
msg = f"Malformed id selector at position {index}"
elif c == ':':
msg = f"Malformed pseudo-class selector at position {index}"
else:
msg = f"Invalid character {c!r} position {index}"
raise SelectorSyntaxError(msg, self.pattern, index)
if self.debug: # pragma: no cover
print('## END PARSING')
def process_selectors(self, index: int = 0, flags: int = 0) -> ct.SelectorList:
"""Process selectors."""
return self.parse_selectors(self.selector_iter(self.pattern), index, flags)
# Precompile CSS selector lists for pseudo-classes (additional logic may be required beyond the pattern)
# A few patterns are order dependent as they use patterns previous compiled.
# CSS pattern for `:link` and `:any-link`
CSS_LINK = CSSParser(
'html|*:is(a, area)[href]'
).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
# CSS pattern for `:checked`
CSS_CHECKED = CSSParser(
'''
html|*:is(input[type=checkbox], input[type=radio])[checked], html|option[selected]
'''
).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
# CSS pattern for `:default` (must compile CSS_CHECKED first)
CSS_DEFAULT = CSSParser(
'''
:checked,
/*
This pattern must be at the end.
Special logic is applied to the last selector.
*/
html|form html|*:is(button, input)[type="submit"]
'''
).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_DEFAULT)
# CSS pattern for `:indeterminate`
CSS_INDETERMINATE = CSSParser(
'''
html|input[type="checkbox"][indeterminate],
html|input[type="radio"]:is(:not([name]), [name=""]):not([checked]),
html|progress:not([value]),
/*
This pattern must be at the end.
Special logic is applied to the last selector.
*/
html|input[type="radio"][name]:not([name='']):not([checked])
'''
).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_INDETERMINATE)
# CSS pattern for `:disabled`
CSS_DISABLED = CSSParser(
'''
html|*:is(input:not([type=hidden]), button, select, textarea, fieldset, optgroup, option, fieldset)[disabled],
html|optgroup[disabled] > html|option,
html|fieldset[disabled] > html|*:is(input:not([type=hidden]), button, select, textarea, fieldset),
html|fieldset[disabled] >
html|*:not(legend:nth-of-type(1)) html|*:is(input:not([type=hidden]), button, select, textarea, fieldset)
'''
).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
# CSS pattern for `:enabled`
CSS_ENABLED = CSSParser(
'''
html|*:is(input:not([type=hidden]), button, select, textarea, fieldset, optgroup, option, fieldset):not(:disabled)
'''
).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
# CSS pattern for `:required`
CSS_REQUIRED = CSSParser(
'html|*:is(input, textarea, select)[required]'
).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
# CSS pattern for `:optional`
CSS_OPTIONAL = CSSParser(
'html|*:is(input, textarea, select):not([required])'
).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
# CSS pattern for `:placeholder-shown`
CSS_PLACEHOLDER_SHOWN = CSSParser(
'''
html|input:is(
:not([type]),
[type=""],
[type=text],
[type=search],
[type=url],
[type=tel],
[type=email],
[type=password],
[type=number]
)[placeholder]:not([placeholder='']):is(:not([value]), [value=""]),
html|textarea[placeholder]:not([placeholder=''])
'''
).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_PLACEHOLDER_SHOWN)
# CSS pattern default for `:nth-child` "of S" feature
CSS_NTH_OF_S_DEFAULT = CSSParser(
'*|*'
).process_selectors(flags=FLG_PSEUDO)
# CSS pattern for `:read-write` (CSS_DISABLED must be compiled first)
CSS_READ_WRITE = CSSParser(
'''
html|*:is(
textarea,
input:is(
:not([type]),
[type=""],
[type=text],
[type=search],
[type=url],
[type=tel],
[type=email],
[type=number],
[type=password],
[type=date],
[type=datetime-local],
[type=month],
[type=time],
[type=week]
)
):not([readonly], :disabled),
html|*:is([contenteditable=""], [contenteditable="true" i])
'''
).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
# CSS pattern for `:read-only`
CSS_READ_ONLY = CSSParser(
'''
html|*:not(:read-write)
'''
).process_selectors(flags=FLG_PSEUDO | FLG_HTML)
# CSS pattern for `:in-range`
CSS_IN_RANGE = CSSParser(
'''
html|input:is(
[type="date"],
[type="month"],
[type="week"],
[type="time"],
[type="datetime-local"],
[type="number"],
[type="range"]
):is(
[min],
[max]
)
'''
).process_selectors(flags=FLG_PSEUDO | FLG_IN_RANGE | FLG_HTML)
# CSS pattern for `:out-of-range`
CSS_OUT_OF_RANGE = CSSParser(
'''
html|input:is(
[type="date"],
[type="month"],
[type="week"],
[type="time"],
[type="datetime-local"],
[type="number"],
[type="range"]
):is(
[min],
[max]
)
'''
).process_selectors(flags=FLG_PSEUDO | FLG_OUT_OF_RANGE | FLG_HTML)
PK ]ZZZ�w�I�'