from __future__ import annotations import re from html import unescape from typing import Any from urllib.parse import unquote from .exceptions import DuckDuckGoSearchException try: HAS_ORJSON = True import orjson except ImportError: HAS_ORJSON = False import json REGEX_STRIP_TAGS = re.compile("<.*?>") def json_dumps(obj: Any) -> str: try: return ( orjson.dumps(obj, option=orjson.OPT_INDENT_2).decode() if HAS_ORJSON else json.dumps(obj, ensure_ascii=False, indent=2) ) except Exception as ex: raise DuckDuckGoSearchException(f"{type(ex).__name__}: {ex}") from ex def json_loads(obj: str | bytes) -> Any: try: return orjson.loads(obj) if HAS_ORJSON else json.loads(obj) except Exception as ex: raise DuckDuckGoSearchException(f"{type(ex).__name__}: {ex}") from ex def _extract_vqd(html_bytes: bytes, keywords: str) -> str: """Extract vqd from html bytes.""" for c1, c1_len, c2 in ( (b'vqd="', 5, b'"'), (b"vqd=", 4, b"&"), (b"vqd='", 5, b"'"), ): try: start = html_bytes.index(c1) + c1_len end = html_bytes.index(c2, start) return html_bytes[start:end].decode() except ValueError: pass raise DuckDuckGoSearchException(f"_extract_vqd() {keywords=} Could not extract vqd.") def _normalize(raw_html: str) -> str: """Strip HTML tags from the raw_html string.""" return unescape(REGEX_STRIP_TAGS.sub("", raw_html)) if raw_html else "" def _normalize_url(url: str) -> str: """Unquote URL and replace spaces with '+'.""" return unquote(url).replace(" ", "+") if url else "" def _expand_proxy_tb_alias(proxy: str | None) -> str | None: """Expand "tb" to a full proxy URL if applicable.""" return "socks5://127.0.0.1:9150" if proxy == "tb" else proxy