in lmms_eval/api/task.py [0:0]
def download(self, dataset_kwargs=None) -> None:
download_config = DownloadConfig()
download_config.max_retries = dataset_kwargs.get("max_retries", 3) if dataset_kwargs is not None else 3
download_config.num_proc = dataset_kwargs.get("num_proc", 8) if dataset_kwargs is not None else 8
self.dataset = datasets.load_dataset(
path=self.DATASET_PATH,
name=self.DATASET_NAME,
download_mode=datasets.DownloadMode.REUSE_DATASET_IF_EXISTS,
**dataset_kwargs if dataset_kwargs is not None else {},
)
self.dataset_no_image = datasets.load_dataset(
path=self.DATASET_PATH,
name=self.DATASET_NAME,
download_mode=datasets.DownloadMode.REUSE_DATASET_IF_EXISTS,
**dataset_kwargs if dataset_kwargs is not None else {},
)
for doc_name in self.dataset_no_image:
remove_cols = []
features = self.dataset_no_image[doc_name].features
# If it is an Image instance or a Sequence of Image instance. Remove it
for feature in features:
if isinstance(features[feature], Image):
remove_cols.append(feature)
elif isinstance(features[feature], Sequence) and isinstance(features[feature].feature, Image):
remove_cols.append(feature)
for remove_col in remove_cols:
self.dataset_no_image[doc_name] = self.dataset_no_image[doc_name].remove_columns(remove_col)