def main()

in cc_net/mine.py [0:0]


def main(config: str = "base", **config_as_dict: Any) -> None:
    # Use the given 'config' as default value.
    config_base = config
    if config_base in PREDEF_CONFIGS:
        conf = PREDEF_CONFIGS[config_base]
    elif Path(config_base).exists():
        conf = Config.from_json(Path(config_base))
    else:
        raise ValueError(
            f"Invalid value {config_base} for --config. "
            f"Choose from ({', '.join(PREDEF_CONFIGS)}) or give an existing .json file."
        )
    conf = conf._replace(**{k: v for (k, v) in config_as_dict.items() if v is not None})

    print(f"Will run cc_net.mine.main with the following config:", conf)

    all_files = mine(conf)
    if conf.will_split:
        assert all_files
        assert all(d.is_dir() for d in all_files)
        all_dirs = all_files
        if "split_by_lang" in conf.pipeline:
            # Only try regrouping if we split the shards.
            regroup(conf, all_dirs)
        elif "split_by_segment" in conf.pipeline:
            # If we split by segment then regrouping is trivial, since segments appear in only one shard.
            move_segments(conf, all_dirs)

    if conf.config_name == "test":
        _validate_test(conf, conf.get_mined_dir(regroup=True))