in scripts/interpret.py [0:0]
def prep_ablation(df):
"""Prepare data for making predictions."""
# convert df to list of dicts
data = df.to_json(orient="records")
data = json.loads(data)
# load model assets from training job
model_assets = train.get_train_assets()
#print('extracting features')
numerical_features, categorical_features, textual_features = preprocess.extract_features(
data,
model_assets['numerical_feature_names'],
model_assets['categorical_feature_names'],
model_assets['textual_feature_names']
)
# extract labels
_, _, _, label_name = preprocess.get_feature_names(df)
labels = preprocess.extract_labels(
data,
label_name
)
# preprocess the data
#print('transforming numerical_features')
numerical_features = model_assets['numerical_transformer'].transform(numerical_features)
#print('transforming categorical_features')
categorical_features = model_assets['categorical_transformer'].transform(categorical_features)
#print('transforming textual_features')
textual_features = model_assets['textual_transformer'].transform(textual_features)
#print('concatenating features')
categorical_features = categorical_features.toarray()
textual_features = np.array(textual_features)
textual_features = textual_features.reshape(textual_features.shape[0], -1)
features = np.concatenate([
numerical_features,
categorical_features,
textual_features
], axis=1)
return features, labels