in src/mlmax/preprocessing.py [0:0]
def transform(df, args, preprocess=None):
if preprocess is None:
model_directory = os.path.join(args.data_dir, "model")
print(f"Reading model from {model_directory}")
with tarfile.open(
os.path.join(model_directory, "proc_model.tar.gz"), mode="r:gz"
) as archive:
print(f"Exctracting tarfile to {model_directory}")
archive.extractall(path=model_directory)
preprocess = joblib.load(os.path.join(model_directory, "model.joblib"))
features = preprocess.transform(df)
print(f"Data shape after preprocessing: {features.shape}")
return features