def configure_pipeline_stages()

in yourbench/main.py [0:0]


def configure_pipeline_stages() -> dict:
    """Configure all pipeline stages with cascading options."""
    console.print("\n[bold cyan]Pipeline Configuration[/bold cyan]")

    stages = {
        "ingestion": ("Convert documents to markdown", configure_ingestion),
        "upload_ingest_to_hub": ("Upload to Hugging Face Hub", lambda x: {"run": x}),
        "summarization": ("Generate document summaries", configure_summarization),
        "chunking": ("Split documents into chunks", configure_chunking),
        "single_shot_question_generation": ("Generate single-hop questions", configure_question_generation),
        "multi_hop_question_generation": ("Generate multi-hop questions", configure_question_generation),
        "lighteval": ("Create evaluation dataset", lambda x: {"run": x}),
        "citation_score_filtering": ("Add citation scores", lambda x: {"run": x}),
    }

    pipeline_config = {}

    # First, ask which stages to enable
    console.print("Select pipeline stages to enable:")
    enabled_stages = {}

    for stage, (desc, _) in stages.items():
        enabled = Confirm.ask(f"  {stage} - {desc}", default=True)
        enabled_stages[stage] = enabled

    # Then configure each enabled stage
    for stage, (desc, configure_fn) in stages.items():
        if stage in ["single_shot_question_generation", "multi_hop_question_generation"]:
            pipeline_config[stage] = configure_fn(stage, enabled_stages[stage])
        else:
            pipeline_config[stage] = configure_fn(enabled_stages[stage])

    return pipeline_config