in contentselection/oracle.py [0:0]
def main_algorithm(df, taxonomy_file, target_hours = 4500):
df = preprocess_df(df)
df = compute_user_activity(df)
# Load taxonomy from JSON file
with open(taxonomy_file, 'r') as file:
taxonomy = json.load(file)
# Map inferred categories to their parent categories
df = map_to_parent_categories(df, taxonomy)
# Select videos based on updated criteria
selected_videos = select_videos(df, target_hours=target_hours)
print(f"Total selected videos: {len(selected_videos)}")
print(f"Total duration (seconds): {selected_videos['duration_seconds'].sum()}")
return selected_videos