def get_compatible_libraries_for_csv()

in services/worker/src/worker/job_runners/dataset/compatible_libraries.py [0:0]


def get_compatible_libraries_for_csv(dataset: str, hf_token: Optional[str], login_required: bool) -> CompatibleLibrary:
    library: DatasetLibrary
    builder_configs = get_builder_configs_with_simplified_data_files(dataset, module_name="csv", hf_token=hf_token)
    for config in builder_configs:
        if any(len(data_files) != 1 for data_files in config.data_files.values()):
            raise DatasetWithTooComplexDataFilesPatternsError(
                f"Failed to simplify csv data files pattern: {config.data_files}"
            )
    loading_codes: list[LoadingCode] = [
        {
            "config_name": config.name,
            "arguments": {"splits": {str(split): data_files[0] for split, data_files in config.data_files.items()}},
            "code": "",
        }
        for config in builder_configs
    ]
    is_single_file = all(
        "*" not in data_file and "[" not in data_file
        for loading_code in loading_codes
        for data_file in loading_code["arguments"]["splits"].values()
    )
    comment = LOGIN_COMMENT if login_required else ""
    if is_single_file:
        library = "pandas"
        function = "pd.read_csv"
        for loading_code in loading_codes:
            first_file = next(iter(loading_code["arguments"]["splits"].values()))
            if ".tsv" in first_file:
                args = ', sep="\\t"'
            else:
                args = ""
            if len(loading_code["arguments"]["splits"]) == 1:
                data_file = next(iter(loading_code["arguments"]["splits"].values()))
                loading_code["code"] = PANDAS_CODE.format(
                    function=function, dataset=dataset, data_file=data_file, args=args, comment=comment
                )
            else:
                loading_code["code"] = PANDAS_CODE_SPLITS.format(
                    function=function,
                    dataset=dataset,
                    splits=loading_code["arguments"]["splits"],
                    first_split=next(iter(loading_code["arguments"]["splits"])),
                    args=args,
                    comment=comment,
                )
    else:
        library = "dask"
        function = "dd.read_csv"
        for loading_code in loading_codes:
            if len(loading_code["arguments"]["splits"]) == 1:
                pattern = next(iter(loading_code["arguments"]["splits"].values()))
                loading_code["code"] = DASK_CODE.format(
                    function=function, dataset=dataset, pattern=pattern, comment=comment
                )
            else:
                loading_code["code"] = DASK_CODE_SPLITS.format(
                    function=function,
                    dataset=dataset,
                    splits=loading_code["arguments"]["splits"],
                    first_split=next(iter(loading_code["arguments"]["splits"])),
                    comment=comment,
                )
    return {"language": "python", "library": library, "function": function, "loading_codes": loading_codes}