in obelics/callers/download_warc.py [0:0]
def get_args():
parser = argparse.ArgumentParser(description="Download warc files from Common Crawl pointers.")
parser.add_argument(
"--path_metadata_dataset",
type=str,
default="./large_files/metadata_dataset_10000",
help="Path of the dataset containing the metadata to retrieve the warc files.",
)
parser.add_argument(
"--path_save_dir_warc_dataset",
type=str,
default="./large_files/warc_dataset_10000",
help="The directory to save the warc dataset.",
)
parser.add_argument(
"--num_proc",
type=int,
default=cpu_count(),
help="Number of processes to use for the multiprocessing.",
)
args = parser.parse_args()
return args