from __future__ import annotations import base64 import collections import datetime import decimal import ipaddress import json import logging import pathlib import re import uuid from typing import Any from langsmith._internal import _orjson try: from zoneinfo import ZoneInfo # type: ignore[import-not-found] except ImportError: class ZoneInfo: # type: ignore[no-redef] """Introduced in python 3.9.""" logger = logging.getLogger(__name__) def _simple_default(obj): try: # Only need to handle types that orjson doesn't serialize by default # https://github.com/ijl/orjson#serialize if isinstance(obj, datetime.datetime): return obj.isoformat() elif isinstance(obj, uuid.UUID): return str(obj) elif isinstance(obj, BaseException): return {"error": type(obj).__name__, "message": str(obj)} elif isinstance(obj, (set, frozenset, collections.deque)): return list(obj) elif isinstance(obj, (datetime.timezone, ZoneInfo)): return obj.tzname(None) elif isinstance(obj, datetime.timedelta): return obj.total_seconds() elif isinstance(obj, decimal.Decimal): if obj.as_tuple().exponent >= 0: return int(obj) else: return float(obj) elif isinstance( obj, ( ipaddress.IPv4Address, ipaddress.IPv4Interface, ipaddress.IPv4Network, ipaddress.IPv6Address, ipaddress.IPv6Interface, ipaddress.IPv6Network, pathlib.Path, ), ): return str(obj) elif isinstance(obj, re.Pattern): return obj.pattern elif isinstance(obj, (bytes, bytearray)): return base64.b64encode(obj).decode() return str(obj) except BaseException as e: logger.debug(f"Failed to serialize {type(obj)} to JSON: {e}") return str(obj) _serialization_methods = [ ( "model_dump", {"exclude_none": True, "mode": "json"}, ), # Pydantic V2 with non-serializable fields ("dict", {}), # Pydantic V1 with non-serializable field ("to_dict", {}), # dataclasses-json ] # IMPORTANT: This function is used from Rust code in `langsmith-pyo3` serialization, # in order to handle serializing these tricky Python types *from Rust*. # Do not cause this function to become inaccessible (e.g. by deleting # or renaming it) without also fixing the corresponding Rust code found in: # rust/crates/langsmith-pyo3/src/serialization/mod.rs def _serialize_json(obj: Any) -> Any: try: if isinstance(obj, (set, tuple)): if hasattr(obj, "_asdict") and callable(obj._asdict): # NamedTuple return obj._asdict() return list(obj) for attr, kwargs in _serialization_methods: if ( hasattr(obj, attr) and callable(getattr(obj, attr)) and not isinstance(obj, type) ): try: method = getattr(obj, attr) response = method(**kwargs) if not isinstance(response, dict): return str(response) return response except Exception as e: logger.debug( f"Failed to use {attr} to serialize {type(obj)} to" f" JSON: {repr(e)}" ) pass return _simple_default(obj) except BaseException as e: logger.debug(f"Failed to serialize {type(obj)} to JSON: {e}") return str(obj) def _elide_surrogates(s: bytes) -> bytes: pattern = re.compile(rb"\\ud[89a-f][0-9a-f]{2}", re.IGNORECASE) result = pattern.sub(b"", s) return result def dumps_json(obj: Any) -> bytes: """Serialize an object to a JSON formatted string. Parameters ---------- obj : Any The object to serialize. default : Callable[[Any], Any] or None, default=None The default function to use for serialization. Returns: ------- str The JSON formatted string. """ try: return _orjson.dumps( obj, default=_serialize_json, option=_orjson.OPT_SERIALIZE_NUMPY | _orjson.OPT_SERIALIZE_DATACLASS | _orjson.OPT_SERIALIZE_UUID | _orjson.OPT_NON_STR_KEYS, ) except TypeError as e: # Usually caused by UTF surrogate characters logger.debug(f"Orjson serialization failed: {repr(e)}. Falling back to json.") result = json.dumps( obj, default=_serialize_json, ensure_ascii=True, ).encode("utf-8") try: result = _orjson.dumps( _orjson.loads(result.decode("utf-8", errors="surrogateescape")) ) except _orjson.JSONDecodeError: result = _elide_surrogates(result) return result
Memory