in ludwig/data/preprocessing.py [0:0]
def build_metadata(dataset_df, features, global_preprocessing_parameters):
train_set_metadata = {}
for feature in features:
if 'preprocessing' in feature:
preprocessing_parameters = merge_dict(
global_preprocessing_parameters[feature[TYPE]],
feature['preprocessing']
)
else:
preprocessing_parameters = global_preprocessing_parameters[
feature[TYPE]
]
# deal with encoders that have fixed preprocessing
if 'encoder' in feature:
encoders_registry = get_from_registry(
feature[TYPE],
input_type_registry
).encoder_registry
encoder_class = encoders_registry[feature['encoder']]
if hasattr(encoder_class, 'fixed_preprocessing_parameters'):
encoder_fpp = encoder_class.fixed_preprocessing_parameters
preprocessing_parameters = merge_dict(
preprocessing_parameters,
resolve_pointers(encoder_fpp, feature, 'feature.')
)
handle_missing_values(
dataset_df,
feature,
preprocessing_parameters
)
get_feature_meta = get_from_registry(
feature[TYPE],
base_type_registry
).get_feature_meta
train_set_metadata[feature['name']] = get_feature_meta(
dataset_df[feature['name']].astype(str),
preprocessing_parameters
)
return train_set_metadata