def main_algorithm()

in contentselection/oracle.py [0:0]


def main_algorithm(df, taxonomy_file, target_hours = 4500):
    df = preprocess_df(df)
    df = compute_user_activity(df)
    
    # Load taxonomy from JSON file
    with open(taxonomy_file, 'r') as file:
        taxonomy = json.load(file)
    
    # Map inferred categories to their parent categories
    df = map_to_parent_categories(df, taxonomy)
    
    # Select videos based on updated criteria
    selected_videos = select_videos(df, target_hours=target_hours)
    
    print(f"Total selected videos: {len(selected_videos)}")
    print(f"Total duration (seconds): {selected_videos['duration_seconds'].sum()}")
    
    return selected_videos