in blogs/wind-turbine-engie/utils.py [0:0]
def clean_up_data(df):
df['Timestamp'] = pd.to_datetime(df['Date_time'], infer_datetime_format=True, utc=True)
df.drop_duplicates(subset=['Timestamp'], keep='first', inplace=True)
df = df.set_index(pd.DatetimeIndex(df['Timestamp']))
df.drop(columns=['Wind_turbine_name','Date_time', 'Timestamp'], inplace=True)
df.dropna(axis=1, how='all', inplace=True)
df.dropna(axis=0, how='all', inplace=True)
df.sort_index(inplace=True)
df_r = df.resample('10T').ffill(limit=1)
bad_cols = locate_features_with_too_many_missing_values(df_r, 0.1)
df_r.drop(columns=bad_cols, inplace=True)
df_r.index = pd.to_datetime(df_r.index).strftime("%Y-%m-%dT%H:%M:%S.%f")
return df_r