video_processing/extract_frames.py (51 lines of code) (raw):

import pandas as pd import pathlib from argparse import ArgumentParser from tqdm import tqdm from modules import get_key_frames parser = ArgumentParser() parser.add_argument("--path", type=str, required=True) parser.add_argument("--frames-path", type=str, required=True) parser.add_argument("--parquet-path", type=str, required=True) parser.add_argument("--parquet-out-path", type=str, required=True) args = parser.parse_args() path = pathlib.Path(args.path) frames_path = pathlib.Path(args.frames_path) parquet_path = pathlib.Path(args.parquet_path) parquet_out_path = pathlib.Path(args.parquet_out_path) df = pd.read_parquet(parquet_path) if "frames" in df.columns: print("`frames` already found.") exit() data = [] with tqdm() as pbar: for _, row in df.iterrows(): video = path.joinpath(row["file"]) frames_dir = video.parent.with_name("frames") if not frames_dir.exists(): frames_dir.mkdir(parents=True, exist_ok=True) pbar.set_description(video.name) key_frames = get_key_frames(video) pbar.set_postfix_str(f"{len(key_frames)} key frames") first = key_frames[0] mid = None last = None if len(key_frames) == 2: last = key_frames[1] elif len(key_frames) > 2: mid = key_frames[len(key_frames) // 2] last = key_frames[-1] frames = [] for idx, frame in enumerate([first, mid, last]): if frame is None: continue frame_path = frames_dir.joinpath(f"{video.stem}_{idx}.jpg") if not frame_path.exists(): frame.save(frame_path) frames.append(frame_path.name) data.append({"frames": frames}) frames_df = pd.DataFrame(data) print(frames_df) df = df.join(frames_df) print(df) df.to_parquet(parquet_out_path)