in yourbench/pipeline/citation_score_filtering.py [0:0]
def run(config: dict[str, Any]) -> None:
"""Entry point for the citation score filtering stage."""
stage_cfg = _get_stage_config(config)
if not stage_cfg.run:
logger.info("citation_score_filtering stage is disabled. Skipping.")
return
logger.info(f"Loading '{stage_cfg.subset}' subset for citation score filtering...")
try:
lighteval_ds = custom_load_dataset(config=config, subset=stage_cfg.subset)
except Exception as e:
logger.exception(f"Could not load subset '{stage_cfg.subset}': {e}")
return
if len(lighteval_ds) == 0:
logger.warning("Dataset is empty; nothing to process.")
return
logger.debug(f"Computing citation scores for {len(lighteval_ds)} rows")
scorer = CitationScoreCalculator(stage_cfg.alpha, stage_cfg.beta)
all_answer_citation_scores = []
all_chunk_citation_scores = []
all_final_scores = []
for row in lighteval_ds:
ans_score, chunk_score, final_score = scorer.compute(
citations=row.get("citations", []),
chunks=row.get("chunks", []),
answer=row.get("ground_truth_answer", ""),
)
all_answer_citation_scores.append(ans_score)
all_chunk_citation_scores.append(chunk_score)
all_final_scores.append(final_score)
# Use helper function to replace columns cleanly
# Note: This doesn't preserve original column metadata, but for computed float scores
# this is acceptable as type inference will correctly identify them as numeric
columns_data = {
"answer_citation_score": all_answer_citation_scores,
"chunk_citation_score": all_chunk_citation_scores,
"citation_score": all_final_scores,
}
lighteval_ds = replace_dataset_columns(lighteval_ds, columns_data)
logger.info("Saving updated dataset with new citation score columns...")
custom_save_dataset(dataset=lighteval_ds, config=config, subset=stage_cfg.subset)
logger.success("citation_score_filtering stage completed successfully.")