in inference/infer_video_d2.py [0:0]
def main(args):
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file(args.cfg))
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(args.cfg)
predictor = DefaultPredictor(cfg)
if os.path.isdir(args.im_or_folder):
im_list = glob.iglob(args.im_or_folder + '/*.' + args.image_ext)
else:
im_list = [args.im_or_folder]
for video_name in im_list:
out_name = os.path.join(
args.output_dir, os.path.basename(video_name)
)
print('Processing {}'.format(video_name))
boxes = []
segments = []
keypoints = []
for frame_i, im in enumerate(read_video(video_name)):
t = time.time()
outputs = predictor(im)['instances'].to('cpu')
print('Frame {} processed in {:.3f}s'.format(frame_i, time.time() - t))
has_bbox = False
if outputs.has('pred_boxes'):
bbox_tensor = outputs.pred_boxes.tensor.numpy()
if len(bbox_tensor) > 0:
has_bbox = True
scores = outputs.scores.numpy()[:, None]
bbox_tensor = np.concatenate((bbox_tensor, scores), axis=1)
if has_bbox:
kps = outputs.pred_keypoints.numpy()
kps_xy = kps[:, :, :2]
kps_prob = kps[:, :, 2:3]
kps_logit = np.zeros_like(kps_prob) # Dummy
kps = np.concatenate((kps_xy, kps_logit, kps_prob), axis=2)
kps = kps.transpose(0, 2, 1)
else:
kps = []
bbox_tensor = []
# Mimic Detectron1 format
cls_boxes = [[], bbox_tensor]
cls_keyps = [[], kps]
boxes.append(cls_boxes)
segments.append(None)
keypoints.append(cls_keyps)
# Video resolution
metadata = {
'w': im.shape[1],
'h': im.shape[0],
}
np.savez_compressed(out_name, boxes=boxes, segments=segments, keypoints=keypoints, metadata=metadata)