in services/worker/src/worker/job_runner_factory.py [0:0]
def _create_job_runner(self, job_info: JobInfo) -> JobRunner:
job_type = job_info["type"]
if job_type == DatasetConfigNamesJobRunner.get_job_type():
return DatasetConfigNamesJobRunner(
job_info=job_info,
app_config=self.app_config,
hf_datasets_cache=self.hf_datasets_cache,
)
if job_type == DatasetFiletypesJobRunner.get_job_type():
return DatasetFiletypesJobRunner(
job_info=job_info,
app_config=self.app_config,
hf_datasets_cache=self.hf_datasets_cache,
)
if job_type == ConfigSplitNamesJobRunner.get_job_type():
return ConfigSplitNamesJobRunner(
job_info=job_info,
app_config=self.app_config,
hf_datasets_cache=self.hf_datasets_cache,
)
if job_type == SplitFirstRowsJobRunner.get_job_type():
return SplitFirstRowsJobRunner(
job_info=job_info,
app_config=self.app_config,
hf_datasets_cache=self.hf_datasets_cache,
parquet_metadata_directory=self.parquet_metadata_directory,
storage_client=self.storage_client,
)
if job_type == ConfigParquetAndInfoJobRunner.get_job_type():
return ConfigParquetAndInfoJobRunner(
job_info=job_info,
app_config=self.app_config,
hf_datasets_cache=self.hf_datasets_cache,
)
if job_type == ConfigParquetJobRunner.get_job_type():
return ConfigParquetJobRunner(
job_info=job_info,
app_config=self.app_config,
)
if job_type == ConfigParquetMetadataJobRunner.get_job_type():
return ConfigParquetMetadataJobRunner(
job_info=job_info,
app_config=self.app_config,
parquet_metadata_directory=self.parquet_metadata_directory,
)
if job_type == DatasetParquetJobRunner.get_job_type():
return DatasetParquetJobRunner(
job_info=job_info,
app_config=self.app_config,
)
if job_type == DatasetInfoJobRunner.get_job_type():
return DatasetInfoJobRunner(
job_info=job_info,
app_config=self.app_config,
)
if job_type == ConfigInfoJobRunner.get_job_type():
return ConfigInfoJobRunner(
job_info=job_info,
app_config=self.app_config,
)
if job_type == DatasetSizeJobRunner.get_job_type():
return DatasetSizeJobRunner(
job_info=job_info,
app_config=self.app_config,
)
if job_type == ConfigSizeJobRunner.get_job_type():
return ConfigSizeJobRunner(
job_info=job_info,
app_config=self.app_config,
)
if job_type == DatasetSplitNamesJobRunner.get_job_type():
return DatasetSplitNamesJobRunner(
job_info=job_info,
app_config=self.app_config,
)
if job_type == SplitIsValidJobRunner.get_job_type():
return SplitIsValidJobRunner(
job_info=job_info,
app_config=self.app_config,
)
if job_type == ConfigIsValidJobRunner.get_job_type():
return ConfigIsValidJobRunner(
job_info=job_info,
app_config=self.app_config,
)
if job_type == DatasetIsValidJobRunner.get_job_type():
return DatasetIsValidJobRunner(
job_info=job_info,
app_config=self.app_config,
)
if job_type == SplitImageUrlColumnsJobRunner.get_job_type():
return SplitImageUrlColumnsJobRunner(
job_info=job_info,
app_config=self.app_config,
)
if job_type == SplitOptInOutUrlsScanJobRunner.get_job_type():
return SplitOptInOutUrlsScanJobRunner(
job_info=job_info,
app_config=self.app_config,
hf_datasets_cache=self.hf_datasets_cache,
)
if job_type == ConfigOptInOutUrlsCountJobRunner.get_job_type():
return ConfigOptInOutUrlsCountJobRunner(
job_info=job_info,
app_config=self.app_config,
)
if job_type == DatasetOptInOutUrlsCountJobRunner.get_job_type():
return DatasetOptInOutUrlsCountJobRunner(
job_info=job_info,
app_config=self.app_config,
)
if job_type == SplitOptInOutUrlsCountJobRunner.get_job_type():
return SplitOptInOutUrlsCountJobRunner(
job_info=job_info,
app_config=self.app_config,
)
if job_type == SplitPresidioEntitiesScanJobRunner.get_job_type():
return SplitPresidioEntitiesScanJobRunner(
job_info=job_info,
app_config=self.app_config,
hf_datasets_cache=self.hf_datasets_cache,
)
if job_type == DatasetPresidioEntitiesCountJobRunner.get_job_type():
return DatasetPresidioEntitiesCountJobRunner(
job_info=job_info,
app_config=self.app_config,
)
if job_type == SplitDescriptiveStatisticsJobRunner.get_job_type():
return SplitDescriptiveStatisticsJobRunner(
job_info=job_info,
app_config=self.app_config,
statistics_cache_directory=self.statistics_cache_directory,
parquet_metadata_directory=self.parquet_metadata_directory,
)
if job_type == DatasetHubCacheJobRunner.get_job_type():
return DatasetHubCacheJobRunner(
job_info=job_info,
app_config=self.app_config,
)
if job_type == DatasetCompatibleLibrariesJobRunner.get_job_type():
return DatasetCompatibleLibrariesJobRunner(
job_info=job_info,
app_config=self.app_config,
hf_datasets_cache=self.hf_datasets_cache,
)
if job_type == DatasetModalitiesJobRunner.get_job_type():
return DatasetModalitiesJobRunner(
job_info=job_info,
app_config=self.app_config,
)
if job_type == DatasetCroissantCrumbsJobRunner.get_job_type():
return DatasetCroissantCrumbsJobRunner(
job_info=job_info,
app_config=self.app_config,
)
raise KeyError(f"Unsupported job type: '{job_type}'.")