def handle_missing_values()

in ludwig/data/preprocessing.py [0:0]


def handle_missing_values(dataset_df, feature, preprocessing_parameters):
    missing_value_strategy = preprocessing_parameters['missing_value_strategy']

    if missing_value_strategy == FILL_WITH_CONST:
        dataset_df[feature['name']] = dataset_df[feature['name']].fillna(
            preprocessing_parameters['fill_value'],
        )
    elif missing_value_strategy == FILL_WITH_MODE:
        dataset_df[feature['name']] = dataset_df[feature['name']].fillna(
            dataset_df[feature['name']].value_counts().index[0],
        )
    elif missing_value_strategy == FILL_WITH_MEAN:
        if feature[TYPE] != NUMERICAL:
            raise ValueError(
                'Filling missing values with mean is supported '
                'only for numerical types',
            )
        dataset_df[feature['name']] = dataset_df[feature['name']].fillna(
            dataset_df[feature['name']].mean(),
        )
    elif missing_value_strategy in ['backfill', 'bfill', 'pad', 'ffill']:
        dataset_df[feature['name']] = dataset_df[feature['name']].fillna(
            method=missing_value_strategy,
        )
    elif missing_value_strategy == DROP_ROW:
        dataset_df.dropna(subset=[feature['name']], inplace=True)
    else:
        raise ValueError('Invalid missing value strategy')