in yourbench/utils/parsing_engine.py [0:0]
def parse_multi_hop_responses(responses, index_map, stage_cfg):
rows = []
question_mode = str(stage_cfg.get("question_mode", "open-ended")).strip().lower()
for model, replies in responses.items():
for i, raw in enumerate(replies):
parsed = parse_qa_pairs_from_response(raw)
for pair in parsed:
try:
pair = shuffle_mcq(pair)
pair["question_mode"] = question_mode
if question_mode == "open-ended":
pair = normalize_open_ended(pair)
if pair is None:
continue
choices = []
elif question_mode == "multi-choice":
pair = normalize_multi_choice(pair)
if pair is None:
continue
choices = pair["choices"]
else:
logger.warning(f"Unsupported question_mode: {question_mode}")
continue
citations = validate_list(pair.get("citations", []))
rows.append(
QuestionRow(
chunk_id=None,
source_chunk_ids=index_map[i][2],
document_id=index_map[i][1],
additional_instructions=stage_cfg.get("additional_instructions", ""),
question=str(pair.get("question", "")).strip(),
self_answer=str(pair.get("answer", "")).strip(),
choices=choices,
estimated_difficulty=force_int_in_range(pair.get("estimated_difficulty", 5), 1, 10),
self_assessed_question_type=str(pair.get("question_type", "")).strip(),
question_mode=pair["question_mode"],
generating_model=model,
thought_process=str(pair.get("thought_process", "")),
raw_response=raw,
citations=citations,
).to_dict(format="multi-hop")
)
except Exception as e:
logger.warning(f"Parse error in multi-hop QA for doc {index_map[i][1]}: {e}")
continue
return rows