video_processing/extract_frames.py (51 lines of code) (raw):
import pandas as pd
import pathlib
from argparse import ArgumentParser
from tqdm import tqdm
from modules import get_key_frames
parser = ArgumentParser()
parser.add_argument("--path", type=str, required=True)
parser.add_argument("--frames-path", type=str, required=True)
parser.add_argument("--parquet-path", type=str, required=True)
parser.add_argument("--parquet-out-path", type=str, required=True)
args = parser.parse_args()
path = pathlib.Path(args.path)
frames_path = pathlib.Path(args.frames_path)
parquet_path = pathlib.Path(args.parquet_path)
parquet_out_path = pathlib.Path(args.parquet_out_path)
df = pd.read_parquet(parquet_path)
if "frames" in df.columns:
print("`frames` already found.")
exit()
data = []
with tqdm() as pbar:
for _, row in df.iterrows():
video = path.joinpath(row["file"])
frames_dir = video.parent.with_name("frames")
if not frames_dir.exists():
frames_dir.mkdir(parents=True, exist_ok=True)
pbar.set_description(video.name)
key_frames = get_key_frames(video)
pbar.set_postfix_str(f"{len(key_frames)} key frames")
first = key_frames[0]
mid = None
last = None
if len(key_frames) == 2:
last = key_frames[1]
elif len(key_frames) > 2:
mid = key_frames[len(key_frames) // 2]
last = key_frames[-1]
frames = []
for idx, frame in enumerate([first, mid, last]):
if frame is None:
continue
frame_path = frames_dir.joinpath(f"{video.stem}_{idx}.jpg")
if not frame_path.exists():
frame.save(frame_path)
frames.append(frame_path.name)
data.append({"frames": frames})
frames_df = pd.DataFrame(data)
print(frames_df)
df = df.join(frames_df)
print(df)
df.to_parquet(parquet_out_path)