in src/dfcx_scrapi/tools/metrics.py [0:0]
def run(self, inputs: pd.DataFrame) -> pd.DataFrame:
reference_statements = pd.DataFrame(
columns=["reference_statements"], index=inputs.index
)
if self._context_recall or self._answer_correctness:
reference_statements[
"reference_statements"] = concurrent.thread_map(
self._statement_extractor.extract_statements,
inputs["query"].tolist(),
inputs["expected_answer"].tolist(),
max_workers=4,
desc="Extracting statements: `expected_answer`",
)
prediction_statements = pd.DataFrame(
columns=["prediction_statements"], index=inputs.index
)
if self._faithfulness or (
self._answer_correctness
and self._answer_correctness.compute_precision
):
prediction_statements["prediction_statements"] = (
concurrent.thread_map(
self._statement_extractor.extract_statements,
inputs["query"].tolist(),
[
response.answer_text
for response in inputs["query_result"].tolist()
],
max_workers=4,
desc="Extracting statements: `answer_text`",
)
)
output = pd.DataFrame(index=inputs.index)
if self._answer_correctness:
answer_correctness_results = self._answer_correctness.run(
inputs=pd.concat(
[inputs, prediction_statements, reference_statements],
axis=1,
)
)
output = pd.concat([output, answer_correctness_results], axis=1)
if self._context_recall:
context_recall_results = self._context_recall.run(
inputs=pd.concat([inputs, reference_statements], axis=1)
)
output = pd.concat([output, context_recall_results], axis=1)
if self._faithfulness:
faithfulness_results = self._faithfulness.run(
inputs=pd.concat([inputs, prediction_statements], axis=1)
)
output = pd.concat([output, faithfulness_results], axis=1)
return output