in rawdataset/ytdlps3/download_and_upload.py [0:0]
def process_video(video_id, s3_bucket, s3_path):
try:
# Create a temporary directory to store downloaded files
download_path = '/tmp/youtube_downloads'
os.makedirs(download_path, exist_ok=True)
# Download the video, subtitles (if available), and metadata
info_dict, subtitle_file_path = download_youtube_video(video_id, download_path)
# Define file paths
video_file = os.path.join(download_path, f'{video_id}.mp4')
metadata_file = os.path.join(download_path, f'{video_id}.info.json')
# Upload each file to the specified S3 path if it exists
if os.path.exists(video_file):
upload_to_s3(video_file, s3_bucket, os.path.join(s3_path, f'{video_id}.mp4'))
if os.path.exists(metadata_file):
upload_to_s3(metadata_file, s3_bucket, os.path.join(s3_path, f'{video_id}.json'))
if subtitle_file_path and os.path.exists(subtitle_file_path):
upload_to_s3(subtitle_file_path, s3_bucket, os.path.join(s3_path, f'{video_id}.en.vtt'))
# Cleanup
for file_name in os.listdir(download_path):
os.remove(os.path.join(download_path, file_name))
except Exception as e:
error_message = str(e)
log_failure(video_id, error_message, s3_bucket, s3_path)