def clean_up_data()

in blogs/wind-turbine-engie/utils.py [0:0]


def clean_up_data(df):
    df['Timestamp'] = pd.to_datetime(df['Date_time'], infer_datetime_format=True, utc=True)
    df.drop_duplicates(subset=['Timestamp'], keep='first', inplace=True)
    
    df = df.set_index(pd.DatetimeIndex(df['Timestamp']))
    df.drop(columns=['Wind_turbine_name','Date_time', 'Timestamp'], inplace=True)
    
    df.dropna(axis=1, how='all', inplace=True)
    df.dropna(axis=0, how='all', inplace=True)
    
    df.sort_index(inplace=True)

    df_r = df.resample('10T').ffill(limit=1)
    
    bad_cols = locate_features_with_too_many_missing_values(df_r, 0.1)
    df_r.drop(columns=bad_cols, inplace=True)
    
    df_r.index = pd.to_datetime(df_r.index).strftime("%Y-%m-%dT%H:%M:%S.%f")
    return df_r