in yourbench/utils/inference/inference_builders.py [0:0]
def build_multi_hop_inference_calls(dataset, system_msg, stage_cfg):
calls = []
index_map = []
for idx, row in enumerate(dataset):
groups = sample_multihop_groups(row.get("multihop_chunks") or [], stage_cfg.get("chunk_sampling", {}))
for group in groups:
# TODO how it's possible here?
if not isinstance(group, dict):
logger.warning("Multihop groups are not a dict, skipping")
continue
chunk_ids = group.get("chunk_ids", [])
texts = group.get("chunks_text", [])
if not texts:
logger.warning("Chunks texts are empty, skipping")
continue
full_text = "".join([f"<text_chunk_{i}>{t}</text_chunk_{i}>\n" for i, t in enumerate(texts)])
user_msg = {
"role": "user",
"content": MULTI_HOP_QUESTION_GENERATION_USER_PROMPT.format(
title=row.get("document_filename", ""),
document_summary=row.get("document_summary", ""),
chunks=full_text,
additional_instructions=stage_cfg.get("additional_instructions", ""),
),
}
calls.append(InferenceCall(messages=[system_msg, user_msg], tags=["multi_hop_qa"]))
index_map.append((idx, row.get("document_id", f"doc_{idx}"), chunk_ids))
return calls, index_map