dpr_scale/run_retrieval_fb.py [62:111]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    )
    return parser


def merge_results(
    passages: Dict,
    questions: List,
    top_doc_ids: List,
    scores_list: List,
):
    # join passages text with the result ids, their questions
    merged_data = []
    assert len(top_doc_ids) == len(questions) == len(scores_list)
    for i, question, doc_ids, scores in zip(range(len(questions)), questions, top_doc_ids, scores_list):
        ctxs = [
            {
                "id": passages[id]["id"],
                "title": passages[id]["title"],
                "text": passages[id]["text"],
                "score": float(score),
            }
            for id, score in zip(doc_ids, scores)
        ]

        merged_data.append(
            {
                "question": question["question"],
                "answers": question["answers"] if "answers" in question else [],
                "ctxs": ctxs,
                "id": question.get("id", i),
            }
        )
    return merged_data


def build_index(paths):
    index = None
    for fname in paths:
        with PathManager.open(fname, 'rb') as f:
            vector = pickle.load(f)  # noqa
            if not index:
                index = faiss.IndexFlatIP(vector.size()[1])
            print(f"Adding {vector.size()} vectors from {fname}")
            index.add(vector.numpy())
    return index


def main(args, logger):
    # Temp patch for datamodule refactoring
    logger.info(args.__dict__)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


dpr_scale/run_retrieval_multiset.py [60:109]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    )
    return parser


def merge_results(
    passages: Dict,
    questions: List,
    top_doc_ids: List,
    scores_list: List,
):
    # join passages text with the result ids, their questions
    merged_data = []
    assert len(top_doc_ids) == len(questions) == len(scores_list)
    for i, question, doc_ids, scores in zip(range(len(questions)), questions, top_doc_ids, scores_list):
        ctxs = [
            {
                "id": passages[id]["id"],
                "title": passages[id]["title"],
                "text": passages[id]["text"],
                "score": float(score),
            }
            for id, score in zip(doc_ids, scores)
        ]

        merged_data.append(
            {
                "question": question["question"],
                "answers": question["answers"] if "answers" in question else [],
                "ctxs": ctxs,
                "id": question.get("id", i),
            }
        )
    return merged_data


def build_index(paths):
    index = None
    for fname in paths:
        with PathManager.open(fname, 'rb') as f:
            vector = pickle.load(f)  # noqa
            if not index:
                index = faiss.IndexFlatIP(vector.size()[1])
            print(f"Adding {vector.size()} vectors from {fname}")
            index.add(vector.numpy())
    return index


def main(args, logger):
    # Temp patch for datamodule refactoring
    logger.info(args.__dict__)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -