in slowfast/datasets/ava_helper.py [0:0]
def load_boxes_and_labels(cfg, mode):
"""
Loading boxes and labels from csv files.
Args:
cfg (CfgNode): config.
mode (str): 'train', 'val', or 'test' mode.
Returns:
all_boxes (dict): a dict which maps from `video_name` and
`frame_sec` to a list of `box`. Each `box` is a
[`box_coord`, `box_labels`] where `box_coord` is the
coordinates of box and 'box_labels` are the corresponding
labels for the box.
"""
gt_lists = cfg.AVA.TRAIN_GT_BOX_LISTS if mode == "train" else []
pred_lists = (
cfg.AVA.TRAIN_PREDICT_BOX_LISTS
if mode == "train"
else cfg.AVA.TEST_PREDICT_BOX_LISTS
)
ann_filenames = [
os.path.join(cfg.AVA.ANNOTATION_DIR, filename)
for filename in gt_lists + pred_lists
]
ann_is_gt_box = [True] * len(gt_lists) + [False] * len(pred_lists)
detect_thresh = cfg.AVA.DETECTION_SCORE_THRESH
all_boxes = {}
count = 0
unique_box_count = 0
for filename, is_gt_box in zip(ann_filenames, ann_is_gt_box):
with PathManager.open(filename, "r") as f:
for line in f:
row = line.strip().split(",")
# When we use predicted boxes to train/eval, we need to
# ignore the boxes whose scores are below the threshold.
if not is_gt_box:
score = float(row[7])
if score < detect_thresh:
continue
video_name, frame_sec = row[0], int(row[1])
# Only select frame_sec % 4 = 0 samples for validation if not
# set FULL_TEST_ON_VAL.
if (
mode == "val"
and not cfg.AVA.FULL_TEST_ON_VAL
and frame_sec % 4 != 0
):
continue
# Box with format [x1, y1, x2, y2] with a range of [0, 1] as float.
box_key = ",".join(row[2:6])
box = list(map(float, row[2:6]))
label = -1 if row[6] == "" else int(row[6])
if video_name not in all_boxes:
all_boxes[video_name] = {}
for sec in AVA_VALID_FRAMES:
all_boxes[video_name][sec] = {}
if box_key not in all_boxes[video_name][frame_sec]:
all_boxes[video_name][frame_sec][box_key] = [box, []]
unique_box_count += 1
all_boxes[video_name][frame_sec][box_key][1].append(label)
if label != -1:
count += 1
for video_name in all_boxes.keys():
for frame_sec in all_boxes[video_name].keys():
# Save in format of a list of [box_i, box_i_labels].
all_boxes[video_name][frame_sec] = list(
all_boxes[video_name][frame_sec].values()
)
logger.info(
"Finished loading annotations from: %s" % ", ".join(ann_filenames)
)
logger.info("Detection threshold: {}".format(detect_thresh))
logger.info("Number of unique boxes: %d" % unique_box_count)
logger.info("Number of annotations: %d" % count)
return all_boxes