def build_metadata()

in ludwig/data/preprocessing.py [0:0]


def build_metadata(dataset_df, features, global_preprocessing_parameters):
    train_set_metadata = {}
    for feature in features:
        if 'preprocessing' in feature:
            preprocessing_parameters = merge_dict(
                global_preprocessing_parameters[feature[TYPE]],
                feature['preprocessing']
            )
        else:
            preprocessing_parameters = global_preprocessing_parameters[
                feature[TYPE]
            ]

        # deal with encoders that have fixed preprocessing
        if 'encoder' in feature:
            encoders_registry = get_from_registry(
                feature[TYPE],
                input_type_registry
            ).encoder_registry
            encoder_class = encoders_registry[feature['encoder']]
            if hasattr(encoder_class, 'fixed_preprocessing_parameters'):
                encoder_fpp = encoder_class.fixed_preprocessing_parameters

                preprocessing_parameters = merge_dict(
                    preprocessing_parameters,
                    resolve_pointers(encoder_fpp, feature, 'feature.')
                )

        handle_missing_values(
            dataset_df,
            feature,
            preprocessing_parameters
        )

        get_feature_meta = get_from_registry(
            feature[TYPE],
            base_type_registry
        ).get_feature_meta
        train_set_metadata[feature['name']] = get_feature_meta(
            dataset_df[feature['name']].astype(str),
            preprocessing_parameters
        )

    return train_set_metadata