def parse_multi_hop_responses()

in yourbench/utils/parsing_engine.py [0:0]


def parse_multi_hop_responses(responses, index_map, stage_cfg):
    rows = []
    question_mode = str(stage_cfg.get("question_mode", "open-ended")).strip().lower()

    for model, replies in responses.items():
        for i, raw in enumerate(replies):
            parsed = parse_qa_pairs_from_response(raw)
            for pair in parsed:
                try:
                    pair = shuffle_mcq(pair)
                    pair["question_mode"] = question_mode

                    if question_mode == "open-ended":
                        pair = normalize_open_ended(pair)
                        if pair is None:
                            continue
                        choices = []
                    elif question_mode == "multi-choice":
                        pair = normalize_multi_choice(pair)
                        if pair is None:
                            continue
                        choices = pair["choices"]
                    else:
                        logger.warning(f"Unsupported question_mode: {question_mode}")
                        continue

                    citations = validate_list(pair.get("citations", []))

                    rows.append(
                        QuestionRow(
                            chunk_id=None,
                            source_chunk_ids=index_map[i][2],
                            document_id=index_map[i][1],
                            additional_instructions=stage_cfg.get("additional_instructions", ""),
                            question=str(pair.get("question", "")).strip(),
                            self_answer=str(pair.get("answer", "")).strip(),
                            choices=choices,
                            estimated_difficulty=force_int_in_range(pair.get("estimated_difficulty", 5), 1, 10),
                            self_assessed_question_type=str(pair.get("question_type", "")).strip(),
                            question_mode=pair["question_mode"],
                            generating_model=model,
                            thought_process=str(pair.get("thought_process", "")),
                            raw_response=raw,
                            citations=citations,
                        ).to_dict(format="multi-hop")
                    )
                except Exception as e:
                    logger.warning(f"Parse error in multi-hop QA for doc {index_map[i][1]}: {e}")
                    continue

    return rows