in src/evaluate/module.py [0:0]
def _create_cache_file(self, timeout=1) -> Tuple[str, FileLock]:
"""Create a new cache file. If the default cache file is used, we generated a new hash."""
file_path = os.path.join(self.data_dir, f"{self.experiment_id}-{self.num_process}-{self.process_id}.arrow")
filelock = None
for i in range(self.max_concurrent_cache_files):
filelock = FileLock(file_path + ".lock")
try:
filelock.acquire(timeout=timeout)
except Timeout:
# If we have reached the max number of attempts or we are not allow to find a free name (distributed setup)
# We raise an error
if self.num_process != 1:
raise ValueError(
f"Error in _create_cache_file: another evaluation module instance is already using the local cache file at {file_path}. "
f"Please specify an experiment_id (currently: {self.experiment_id}) to avoid collision "
f"between distributed evaluation module instances."
) from None
if i == self.max_concurrent_cache_files - 1:
raise ValueError(
f"Cannot acquire lock, too many evaluation module instance are operating concurrently on this file system."
f"You should set a larger value of max_concurrent_cache_files when creating the evaluation module "
f"(current value is {self.max_concurrent_cache_files})."
) from None
# In other cases (allow to find new file name + not yet at max num of attempts) we can try to sample a new hashing name.
file_uuid = str(uuid.uuid4())
file_path = os.path.join(
self.data_dir, f"{self.experiment_id}-{file_uuid}-{self.num_process}-{self.process_id}.arrow"
)
else:
break
return file_path, filelock