dpr_scale/utils/prep_wiki.py [101:134]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
            out_json = ujson.dumps(
                {
                    "question": question,
                    "question_pos": question_pos,
                    "answers": [],
                    "positive_ctxs": [
                        {
                            "text": passage,
                            "title": title,
                            "score": 1000,
                            "title_score": 1,
                            "passage_id": passage_id,
                        }
                    ],
                    "hard_negative_ctxs": [],
                }
            )
            outfile = train_file if random.random() > dev_pct else dev_file
            outfile.write(f"{out_json}\n")

            num_samples += 1
            if debug and num_samples == 20:
                break

    return num_samples


def main(args, logger):
    """
    No hard negative sampling done.
    Only positive samples given a passage are prepared.
    """

    logger.info(args.__dict__)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



dpr_scale/utils/prep_wiki_exp.py [172:203]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
                out_json = ujson.dumps(
                    {
                        "question": question,
                        "question_pos": question_pos,
                        "answers": [],
                        "positive_ctxs": [
                            {
                                "text": passage,
                                "title": title,
                                "score": 1000,
                                "title_score": 1,
                                "passage_id": passage_id,
                            }
                        ],
                        "hard_negative_ctxs": [],
                    }
                )
                outfile = train_file if random.random() > dev_pct else dev_file
                outfile.write(f"{out_json}\n")

                num_samples += 1
                if debug and num_samples == 20:
                    break

    return num_samples


def main(args, logger):
    # PYTHONPATH=. python dpr_scale/utils/prep_wiki_exp.py --workers 16 --doc_path /private/home/vladk/data/wikipedia/wiki_passages/psgs_w100.tsv
    # PYTHONPATH=. python dpr_scale/utils/prep_wiki_exp.py --workers 16 --doc_dict_path /checkpoint/kushall/data/wikipedia/psgs_w100_doc_dict.npy

    logger.info(args.__dict__)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



