"""This module contains the StringEvaluator class."""
import uuid
from typing import Callable, Dict, Optional
from pydantic import BaseModel
from langsmith.evaluation.evaluator import EvaluationResult, RunEvaluator
from langsmith.schemas import Example, Run
class StringEvaluator(RunEvaluator, BaseModel):
"""Grades the run's string input, output, and optional answer."""
evaluation_name: Optional[str] = None
"""The name evaluation, such as 'Accuracy' or 'Salience'."""
input_key: str = "input"
"""The key in the run inputs to extract the input string."""
prediction_key: str = "output"
"""The key in the run outputs to extra the prediction string."""
answer_key: Optional[str] = "output"
"""The key in the example outputs the answer string."""
grading_function: Callable[[str, str, Optional[str]], Dict]
"""Function that grades the run output against the example output."""
def evaluate_run(
self,
run: Run,
example: Optional[Example] = None,
evaluator_run_id: Optional[uuid.UUID] = None,
) -> EvaluationResult:
"""Evaluate a single run."""
if run.outputs is None:
raise ValueError("Run outputs cannot be None.")
if not example or example.outputs is None or self.answer_key is None:
answer = None
else:
answer = example.outputs.get(self.answer_key)
run_input = run.inputs[self.input_key]
run_output = run.outputs[self.prediction_key]
grading_results = self.grading_function(run_input, run_output, answer)
return EvaluationResult(**{"key": self.evaluation_name, **grading_results})