def build_multi_hop_inference_calls()

in yourbench/utils/inference/inference_builders.py [0:0]


def build_multi_hop_inference_calls(dataset, system_msg, stage_cfg):
    calls = []
    index_map = []

    for idx, row in enumerate(dataset):
        groups = sample_multihop_groups(row.get("multihop_chunks") or [], stage_cfg.get("chunk_sampling", {}))
        for group in groups:
            # TODO how it's possible here?
            if not isinstance(group, dict):
                logger.warning("Multihop groups are not a dict, skipping")
                continue
            chunk_ids = group.get("chunk_ids", [])
            texts = group.get("chunks_text", [])
            if not texts:
                logger.warning("Chunks texts are empty, skipping")
                continue
            full_text = "".join([f"<text_chunk_{i}>{t}</text_chunk_{i}>\n" for i, t in enumerate(texts)])
            user_msg = {
                "role": "user",
                "content": MULTI_HOP_QUESTION_GENERATION_USER_PROMPT.format(
                    title=row.get("document_filename", ""),
                    document_summary=row.get("document_summary", ""),
                    chunks=full_text,
                    additional_instructions=stage_cfg.get("additional_instructions", ""),
                ),
            }
            calls.append(InferenceCall(messages=[system_msg, user_msg], tags=["multi_hop_qa"]))
            index_map.append((idx, row.get("document_id", f"doc_{idx}"), chunk_ids))

    return calls, index_map