in yourbench/pipeline/question_generation.py [0:0]
def run_multi_hop(config: dict[str, Any]) -> None:
"""
Orchestrates both multi-hop and cross-document question generation pipelines,
if enabled in config
"""
stage_cfg = config.get("pipeline", {}).get(MULTI_HOP_KEY, {})
cross_cfg = stage_cfg.get("cross_document", {})
run_multi = stage_cfg.get("run", False)
run_cross = cross_cfg.get("enable", False)
if not run_multi:
logger.info("Multi-hop question generation is disabled.")
return
question_mode = stage_cfg.get("question_mode", "open-ended")
if question_mode not in {"open-ended", "multi-choice"}:
logger.warning(f"Invalid question_mode '{question_mode}', defaulting to 'open-ended'")
question_mode = "open-ended"
system_prompt = (
MULTI_HOP_QUESTION_GENERATION_SYSTEM_PROMPT_MULTI
if question_mode == "multi-choice"
else MULTI_HOP_QUESTION_GENERATION_SYSTEM_PROMPT
)
system_msg = {"role": "system", "content": system_prompt}
chunked_ds = custom_load_dataset(config=config, subset="chunked")
logger.info(f"Loaded {len(chunked_ds)} documents for multi-hop processing.")
def _run_and_save(dataset, label: str):
if not dataset or len(dataset) == 0:
logger.warning(f"No valid {label} dataset found. Skipping.")
return
inference_calls, inference_index_map = build_multi_hop_inference_calls(dataset, system_msg, stage_cfg)
if not inference_calls:
logger.warning(f"No valid inference calls for {label}.")
return
responses = run_inference(config=config, step_name=MULTI_HOP_KEY, inference_calls=inference_calls)
final_rows = parse_multi_hop_responses(responses, inference_index_map, stage_cfg)
if final_rows:
logger.info(f"Saving {len(final_rows)} {label} questions.")
custom_save_dataset(Dataset.from_list(final_rows), config=config, subset=label)
else:
logger.info(f"No valid {label} questions parsed.")
# Run standard multi-hop if enabled
_run_and_save(chunked_ds, "multi_hop_questions")
# Run cross-document if enabled
if run_cross:
logger.info("Starting cross-document question generation.")
cross_ds = _create_cross_document_dataset(chunked_ds, cross_cfg)
logger.info(f"Generated {len(cross_ds)} cross-document combinations.")
_run_and_save(cross_ds, "cross_document_questions")