in src/lighteval/metrics/dynamic_metrics.py [0:0]
def sample_level_fn(doc: Doc, model_response: ModelResponse) -> float:
golds = doc.get_golds()
predictions = model_response.text
gold_extraction_regexes = get_extraction_regexes(doc, gold_extraction_target, language)
pred_extraction_regexes = get_extraction_regexes(doc, pred_extraction_target, language)
extracted_predictions = [
extract_target_from_pred(pred, pred_extraction_regexes, fallback_mode, extraction_mode, timeout_seconds)
for pred in predictions
]
extracted_golds = [
extract_target_from_pred(gold, gold_extraction_regexes, fallback_mode, extraction_mode, timeout_seconds)
for gold in golds
]
# Assert on empty gold and warn on empty pred
if any(len(g) == 0 for g in extracted_golds):
logger.warning(f"We did not manage to extract a gold in the correct format. Gold: {golds}")
extracted_golds = [[gold] for gold in golds]
if all(len(p) == 0 for p in extracted_predictions):
logger.warning(
f"We did not manage to extract a prediction in the correct format. Gold: {golds}, Pred: {predictions}"
)
# We have to use timeout because the sypmy to str conversion can be very slow
try:
add_to_specifics_with_timeout(doc, extracted_predictions, extracted_golds)
except Exception: # noqa: E722
logger.warning("Timeout when adding extracted predictions and golds to specific")
return aggregation_function(
[
(
1.0
if any(
compare_gold_target(gold, pred, precision, timeout_seconds=timeout_seconds)
for gold in extracted_golds
)
else 0.0
)
for pred in extracted_predictions
]
)