def get_args()

in run_pipeline.py [0:0]
58 lines of code
1 McCabe index (conditional complexity)

def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument("--n_samples", type=int, default=100_000)
    parser.add_argument("--start", type=int, default=-1)
    parser.add_argument("--end", type=int, default=100_000)
    parser.add_argument("--device", type=str, default="cuda")
    parser.add_argument("--save_load_path", type=str, default="./cc_100k")
    parser.add_argument(
        "--input_dataset",
        type=str,
        default="HuggingFaceFW/FW-12-12-2023-CC-2023-06",
        help="dataset with the samples to use for clustering",
    )
    parser.add_argument(
        "--data_subset",
        type=str,
        default=None,
        help="dataset subset",
    )
    parser.add_argument("--input_content", type=str, default="content")
    parser.add_argument(
        "--topic_mode",
        type=str,
        choices=["single_topic", "multiple_topics"],
        default="multiple_topics",
        help="Specify 'single_topic' to generate only one topic and score its educational value, or 'multiple_topics' to generate the 3 most relevant topics in the cluster.",
    )
    parser.add_argument(
        "--dbscan_eps",
        type=float,
        default=0.08,
        help="The maximum distance between two samples for them to be considered as in the neighborhood of each other.",
    )
    parser.add_argument(
        "--dbscan_min_samples",
        type=int,
        default=50,
        help="The number of samples in a neighborhood for a point to be considered as a core point.",
    )
    parser.add_argument(
        "--mode",
        choices=["run", "load", "infer"],
        default="run",
        help="Run the pipeline from scratch/load existing model to build hf datasets or to infer on new texts",
    )
    parser.add_argument(
        "--inference_repo_name",
        type=str,
        default="infer_fw_on_ultrachat",
        help="HF repo name for the clusters dataset in inference mode",
    )
    parser.add_argument(
        "--build_hf_ds",
        action="store_true",
        help="Builds HF datasets used for space visualization and pushes them to the hub",
    )
    parser.add_argument("--username", type=str, default="loubnabnl")
    return parser.parse_args()