def preprocess_df()

in contentselection/oracle.py [0:0]


def preprocess_df(df):
    # Fill NaNs with 0 or suitable values
    df['comment_count'] = df['comment_count'].fillna(0)
    df['view_count'] = df['view_count'].fillna(0)
    df['like_count'] = df['like_count'].fillna(0)
    df['channel_follower_count'] = df['channel_follower_count'].fillna(0)
    df['duration_seconds'] = df['duration_seconds'].fillna(0)
    
    # Normalize numerical columns for fair weighting
    scaler = MinMaxScaler()
    df[['comment_count', 'view_count', 'like_count']] = scaler.fit_transform(
        df[['comment_count', 'view_count', 'like_count']]
    )
    
    return df