def process_video()

in rawdataset/ytdlps3/download_and_upload.py [0:0]


def process_video(video_id, s3_bucket, s3_path):
    try:
        # Create a temporary directory to store downloaded files
        download_path = '/tmp/youtube_downloads'
        os.makedirs(download_path, exist_ok=True)

        # Download the video, subtitles (if available), and metadata
        info_dict, subtitle_file_path = download_youtube_video(video_id, download_path)

        # Define file paths
        video_file = os.path.join(download_path, f'{video_id}.mp4')
        metadata_file = os.path.join(download_path, f'{video_id}.info.json')

        # Upload each file to the specified S3 path if it exists
        if os.path.exists(video_file):
            upload_to_s3(video_file, s3_bucket, os.path.join(s3_path, f'{video_id}.mp4'))
        if os.path.exists(metadata_file):
            upload_to_s3(metadata_file, s3_bucket, os.path.join(s3_path, f'{video_id}.json'))
        if subtitle_file_path and os.path.exists(subtitle_file_path):
            upload_to_s3(subtitle_file_path, s3_bucket, os.path.join(s3_path, f'{video_id}.en.vtt'))

        # Cleanup
        for file_name in os.listdir(download_path):
            os.remove(os.path.join(download_path, file_name))

    except Exception as e:
        error_message = str(e)
        log_failure(video_id, error_message, s3_bucket, s3_path)