in contentselection/oracle.py [0:0]
def preprocess_df(df):
# Fill NaNs with 0 or suitable values
df['comment_count'] = df['comment_count'].fillna(0)
df['view_count'] = df['view_count'].fillna(0)
df['like_count'] = df['like_count'].fillna(0)
df['channel_follower_count'] = df['channel_follower_count'].fillna(0)
df['duration_seconds'] = df['duration_seconds'].fillna(0)
# Normalize numerical columns for fair weighting
scaler = MinMaxScaler()
df[['comment_count', 'view_count', 'like_count']] = scaler.fit_transform(
df[['comment_count', 'view_count', 'like_count']]
)
return df