in src/datasets/fingerprint.py [0:0]
def _fingerprint(func):
if not inplace and not all(name in func.__code__.co_varnames for name in fingerprint_names):
raise ValueError(f"function {func} is missing parameters {fingerprint_names} in signature")
if randomized_function: # randomized function have seed and generator parameters
if "seed" not in func.__code__.co_varnames:
raise ValueError(f"'seed' must be in {func}'s signature")
if "generator" not in func.__code__.co_varnames:
raise ValueError(f"'generator' must be in {func}'s signature")
# this call has to be outside the wrapper or since __qualname__ changes in multiprocessing
transform = format_transform_for_fingerprint(func, version=version)
@wraps(func)
def wrapper(*args, **kwargs):
kwargs_for_fingerprint = format_kwargs_for_fingerprint(
func,
args,
kwargs,
use_kwargs=use_kwargs,
ignore_kwargs=ignore_kwargs,
randomized_function=randomized_function,
)
if args:
dataset: Dataset = args[0]
args = args[1:]
else:
dataset: Dataset = kwargs.pop(next(iter(inspect.signature(func).parameters)))
# compute new_fingerprint and add it to the args of not in-place transforms
if inplace:
new_fingerprint = update_fingerprint(dataset._fingerprint, transform, kwargs_for_fingerprint)
else:
for fingerprint_name in fingerprint_names: # transforms like `train_test_split` have several hashes
if kwargs.get(fingerprint_name) is None:
kwargs_for_fingerprint["fingerprint_name"] = fingerprint_name
kwargs[fingerprint_name] = update_fingerprint(
dataset._fingerprint, transform, kwargs_for_fingerprint
)
else:
validate_fingerprint(kwargs[fingerprint_name])
# Call actual function
out = func(dataset, *args, **kwargs)
# Update fingerprint of in-place transforms + update in-place history of transforms
if inplace: # update after calling func so that the fingerprint doesn't change if the function fails
dataset._fingerprint = new_fingerprint
return out
wrapper._decorator_name_ = "fingerprint"
return wrapper