in slowfast/visualization/ava_demo_precomputed_boxes.py [0:0]
def draw_video(self):
"""
Draw predicted and ground-truth (if provided) results on the video/folder of images.
Write the visualized result to a video output file.
"""
all_boxes = merge_pred_gt_boxes(self.pred_boxes, self.gt_boxes)
common_classes = (
self.cfg.DEMO.COMMON_CLASS_NAMES
if len(self.cfg.DEMO.LABEL_FILE_PATH) != 0
else None
)
video_vis = VideoVisualizer(
num_classes=self.cfg.MODEL.NUM_CLASSES,
class_names_path=self.cfg.DEMO.LABEL_FILE_PATH,
top_k=self.cfg.TENSORBOARD.MODEL_VIS.TOPK_PREDS,
thres=self.cfg.DEMO.COMMON_CLASS_THRES,
lower_thres=self.cfg.DEMO.UNCOMMON_CLASS_THRES,
common_class_names=common_classes,
colormap=self.cfg.TENSORBOARD.MODEL_VIS.COLORMAP,
mode=self.cfg.DEMO.VIS_MODE,
)
all_keys = sorted(all_boxes.keys())
# Draw around the keyframe for 2/10 of the sequence length.
# This is chosen using heuristics.
draw_range = [
self.seq_length // 2 - self.seq_length // 10,
self.seq_length // 2 + self.seq_length // 10,
]
draw_range_repeat = [
draw_range[0],
(draw_range[1] - draw_range[0]) * self.no_frames_repeat
+ draw_range[0],
]
prev_buffer = []
prev_end_idx = 0
logger.info("Start Visualization...")
for keyframe_idx in tqdm.tqdm(all_keys):
pred_gt_boxes = all_boxes[keyframe_idx]
# Find the starting index of the clip. If start_idx exceeds the beginning
# of the video, we only choose valid frame from index 0.
start_idx = max(0, keyframe_idx - self.seq_length // 2)
# Number of frames from the start of the current clip and the
# end of the previous clip.
dist = start_idx - prev_end_idx
# If there are unwritten frames in between clips.
if dist >= 0:
# Get the frames in between previous clip and current clip.
frames = self._get_frame_range(prev_end_idx, dist)
# We keep a buffer of frames for overlapping visualization.
# Write these to the output file.
for frame in prev_buffer:
frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
self.display(frame)
# Write them to output file without any visualization
# since they don't have any corresponding keyframes.
for frame in frames:
self.display(frame)
prev_buffer = []
num_new_frames = self.seq_length
# If there are overlapping frames in between clips.
elif dist < 0:
# Flush all ready frames.
for frame in prev_buffer[:dist]:
frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
self.display(frame)
prev_buffer = prev_buffer[dist:]
num_new_frames = self.seq_length + dist
# Obtain new frames for the current clip from the input video file.
new_frames = self._get_frame_range(
max(start_idx, prev_end_idx), num_new_frames
)
new_frames = [
cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) for frame in new_frames
]
clip = prev_buffer + new_frames
# Calculate the end of this clip. This will be `prev_end_idx` for the
# next iteration.
prev_end_idx = max(start_idx, prev_end_idx) + len(new_frames)
# For each precomputed or gt boxes.
for i, boxes in enumerate(pred_gt_boxes):
if i == 0:
repeat = self.no_frames_repeat
current_draw_range = draw_range
else:
repeat = 1
current_draw_range = draw_range_repeat
# Make sure draw range does not fall out of end of clip.
current_draw_range[1] = min(
current_draw_range[1], len(clip) - 1
)
ground_truth = boxes[0]
bboxes = boxes[1]
label = boxes[2]
# Draw predictions.
clip = video_vis.draw_clip_range(
clip,
label,
bboxes=torch.Tensor(bboxes),
ground_truth=ground_truth,
draw_range=current_draw_range,
repeat_frame=repeat,
)
# Store the current clip as buffer.
prev_buffer = clip
# Write the remaining buffer to output file.
for frame in prev_buffer:
frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
self.display(frame)
# If we still have some remaining frames in the input file,
# write those to the output file as well.
if prev_end_idx < self.total_frames:
dist = self.total_frames - prev_end_idx
remaining_clip = self._get_frame_range(prev_end_idx, dist)
for frame in remaining_clip:
self.display(frame)