dpr_scale/utils/ccnews_stats.py [82:98]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
def main(args, logger):
    """
    No hard negative sampling done. Only positive samples given a passage are prepared.
    TODO: One way would be to get other passages from the same article as negatives.
    """

    logger.info(args.__dict__)
    files = [
        os.path.join(dir_path, file_name)
        for (dir_path, dir_names, file_names) in os.walk(args.doc_dir)
        for file_name in file_names
    ]
    if args.debug:
        files = files[:2]

    workers = min(args.workers, len(files))
    logger.info(f"Number of workers = {workers}")
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



dpr_scale/utils/prep_ccnews.py [194:210]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
def main(args, logger):
    """
    No hard negative sampling done.
    Only positive samples given a passage are prepared.
    """

    logger.info(args.__dict__)
    files = [
        os.path.join(dir_path, file_name)
        for (dir_path, dir_names, file_names) in os.walk(args.doc_dir)
        for file_name in file_names
    ]
    if args.debug:
        files = files[:2]

    workers = min(args.workers, len(files))
    logger.info(f"Number of workers = {workers}")
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



