in yourbench/main.py [0:0]
def configure_pipeline_stages() -> dict:
"""Configure all pipeline stages with cascading options."""
console.print("\n[bold cyan]Pipeline Configuration[/bold cyan]")
stages = {
"ingestion": ("Convert documents to markdown", configure_ingestion),
"upload_ingest_to_hub": ("Upload to Hugging Face Hub", lambda x: {"run": x}),
"summarization": ("Generate document summaries", configure_summarization),
"chunking": ("Split documents into chunks", configure_chunking),
"single_shot_question_generation": ("Generate single-hop questions", configure_question_generation),
"multi_hop_question_generation": ("Generate multi-hop questions", configure_question_generation),
"lighteval": ("Create evaluation dataset", lambda x: {"run": x}),
"citation_score_filtering": ("Add citation scores", lambda x: {"run": x}),
}
pipeline_config = {}
# First, ask which stages to enable
console.print("Select pipeline stages to enable:")
enabled_stages = {}
for stage, (desc, _) in stages.items():
enabled = Confirm.ask(f" {stage} - {desc}", default=True)
enabled_stages[stage] = enabled
# Then configure each enabled stage
for stage, (desc, configure_fn) in stages.items():
if stage in ["single_shot_question_generation", "multi_hop_question_generation"]:
pipeline_config[stage] = configure_fn(stage, enabled_stages[stage])
else:
pipeline_config[stage] = configure_fn(enabled_stages[stage])
return pipeline_config