scripts/generate_maskrcnn.py [596:718]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        yield root_dir, num_actions, panorama_images, camera_infos, is_finished


def run(args):
    assert args.image_width == args.image_height, f"Squared video frames only (w={args.image_width} != h={args.image_height})"

    region_detector = MaskRCNNDetector(
        box_score_thresh=args.box_score_thresh,
        box_nms_thresh=args.box_nms_thresh,
        max_boxes_per_image=args.max_boxes_per_image,
        checkpoint_path=args.model_checkpoint
    )
    device = torch.device(args.cuda_device) if args.cuda_device != -1 else torch.device("cpu")

    region_detector.to(device)

    dataset = ALFREDImageDataset(args)

    loader = DataLoader(dataset, shuffle=False, batch_size=args.batch_size, num_workers=args.num_workers)

    start_time = datetime.now()

    for batch_idx, batch in enumerate(
            tqdm.tqdm(loader, desc=f"Generating MaskRCNN features for ALFRED {args.split_id}")):
        dirs, step_ids, pano_ids, images, sizes, camera_infos, is_finished = batch
        with torch.no_grad():
            # FasterRCNN feature extraction for the current frame
            images = images.to(device)
            detector_results = region_detector(images)

            paths_to_tensors = [
                (path, step_id, pano_ids[i], is_finished[i],
                 detector_results[i]["features"],
                 detector_results[i]["boxes"],
                 detector_results[i]["masks"],
                 detector_results[i]["scores"],
                 detector_results[i]["labels"]) for i, (path, step_id) in enumerate(zip(dirs, step_ids))
            ]

            for i, data in enumerate(paths_to_tensors):
                path, step_id, pano_id, done, box_features, boxes, masks, class_probs, class_labels = data
                features_path = os.path.join(path, args.features_folder)
                if not os.path.exists(features_path):
                    os.makedirs(features_path)
                output_file = os.path.join(features_path, f"{str(step_id.item())}-{str(pano_id.item())}.npz")

                num_boxes = args.panoramic_boxes[pano_id.item()]

                if boxes.shape[0] > 0:
                    boxes = boxes.cpu().numpy()
                    center_coords = (boxes[:, 0] + boxes[:, 2]) // 2, (
                            boxes[:, 1] + boxes[:, 3]) // 2

                    h_angle, v_angle = calculate_angles(
                        center_coords[0],
                        center_coords[1],
                        camera_infos["h_view_angle"][i].item(),
                        camera_infos["v_view_angle"][i].item()
                    )

                    boxes_angles = np.stack([h_angle, v_angle], 1)
                else:
                    boxes_angles = np.zeros((boxes.shape[0], 2))
                    boxes = boxes.cpu().numpy()

                box_features = box_features[:num_boxes]
                boxes_angles = boxes_angles[:num_boxes]
                boxes = boxes[:num_boxes]
                masks = masks[:num_boxes]
                class_probs = class_probs[:num_boxes]
                class_labels = class_labels[:num_boxes]

                np.savez_compressed(
                    output_file,
                    box_features=box_features.cpu().numpy(),
                    roi_angles=boxes_angles,
                    boxes=boxes,
                    masks=(masks > 0.5).cpu().numpy(),
                    class_probs=class_probs.cpu().numpy(),
                    class_labels=class_labels.cpu().numpy(),
                    num_objects=box_features.shape[0],
                    pano_id=pano_id
                )

                done = done.item()

                if done:
                    # this will store a file specifying that all the features have been generated for the current
                    # trajectory
                    with open(os.path.join(features_path, "done"), mode="w") as out_file:
                        out_file.write(str(done))

    if len(dataset.failed) > 0:
        print(f"Trajectory execution failed for {len(dataset.failed)} trajectories: ")
        for traj in dataset.failed:
            print(traj["path"])

    end_time = datetime.now()

    print(f"Total feature extraction time: {end_time - start_time}")


if __name__ == "__main__":
    # parse arguments
    parser = argparse.ArgumentParser()
    parser.add_argument('--debug', action='store_true')
    parser.add_argument('--split_id', type=str, default="train",
                        help="The identifier of the split for which we should extract the features for")
    parser.add_argument('--features_folder', type=str, default="torch_maskrcnn")
    parser.add_argument('--model_checkpoint', type=str)
    parser.add_argument('--data_path', type=str, default="storage/data/alfred/json_feat_2.1.0")
    parser.add_argument('--splits', type=str, default="storage/data/alfred/splits/oct21.json")
    parser.add_argument('--num_workers', type=int, default=0)
    parser.add_argument('--batch_size', type=int, default=8)
    parser.add_argument('--reward_config', type=str, default='configs/rewards.json')
    parser.add_argument('--cuda_device', type=int, default=-1)
    parser.add_argument('--image_width', type=int, default=300)
    parser.add_argument('--image_height', type=int, default=300)

    ## FasterRCNN parameters
    parser.add_argument('--box_score_thresh', type=float, default=0.05)
    parser.add_argument('--box_nms_thresh', type=float, default=0.5)
    parser.add_argument('--max_boxes_per_image', type=float, default=36)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


scripts/generate_maskrcnn_horizon0.py [594:716]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        yield root_dir, num_actions, panorama_images, camera_infos, is_finished


def run(args):
    assert args.image_width == args.image_height, f"Squared video frames only (w={args.image_width} != h={args.image_height})"

    region_detector = MaskRCNNDetector(
        box_score_thresh=args.box_score_thresh,
        box_nms_thresh=args.box_nms_thresh,
        max_boxes_per_image=args.max_boxes_per_image,
        checkpoint_path=args.model_checkpoint
    )
    device = torch.device(args.cuda_device) if args.cuda_device != -1 else torch.device("cpu")

    region_detector.to(device)

    dataset = ALFREDImageDataset(args)

    loader = DataLoader(dataset, shuffle=False, batch_size=args.batch_size, num_workers=args.num_workers)

    start_time = datetime.now()

    for batch_idx, batch in enumerate(
            tqdm.tqdm(loader, desc=f"Generating MaskRCNN features for ALFRED {args.split_id}")):
        dirs, step_ids, pano_ids, images, sizes, camera_infos, is_finished = batch
        with torch.no_grad():
            # FasterRCNN feature extraction for the current frame
            images = images.to(device)
            detector_results = region_detector(images)

            paths_to_tensors = [
                (path, step_id, pano_ids[i], is_finished[i],
                 detector_results[i]["features"],
                 detector_results[i]["boxes"],
                 detector_results[i]["masks"],
                 detector_results[i]["scores"],
                 detector_results[i]["labels"]) for i, (path, step_id) in enumerate(zip(dirs, step_ids))
            ]

            for i, data in enumerate(paths_to_tensors):
                path, step_id, pano_id, done, box_features, boxes, masks, class_probs, class_labels = data
                features_path = os.path.join(path, args.features_folder)
                if not os.path.exists(features_path):
                    os.makedirs(features_path)
                output_file = os.path.join(features_path, f"{str(step_id.item())}-{str(pano_id.item())}.npz")

                num_boxes = args.panoramic_boxes[pano_id.item()]

                if boxes.shape[0] > 0:
                    boxes = boxes.cpu().numpy()
                    center_coords = (boxes[:, 0] + boxes[:, 2]) // 2, (
                            boxes[:, 1] + boxes[:, 3]) // 2

                    h_angle, v_angle = calculate_angles(
                        center_coords[0],
                        center_coords[1],
                        camera_infos["h_view_angle"][i].item(),
                        camera_infos["v_view_angle"][i].item()
                    )

                    boxes_angles = np.stack([h_angle, v_angle], 1)
                else:
                    boxes_angles = np.zeros((boxes.shape[0], 2))
                    boxes = boxes.cpu().numpy()

                box_features = box_features[:num_boxes]
                boxes_angles = boxes_angles[:num_boxes]
                boxes = boxes[:num_boxes]
                masks = masks[:num_boxes]
                class_probs = class_probs[:num_boxes]
                class_labels = class_labels[:num_boxes]

                np.savez_compressed(
                    output_file,
                    box_features=box_features.cpu().numpy(),
                    roi_angles=boxes_angles,
                    boxes=boxes,
                    masks=(masks > 0.5).cpu().numpy(),
                    class_probs=class_probs.cpu().numpy(),
                    class_labels=class_labels.cpu().numpy(),
                    num_objects=box_features.shape[0],
                    pano_id=pano_id
                )

                done = done.item()

                if done:
                    # this will store a file specifying that all the features have been generated for the current
                    # trajectory
                    with open(os.path.join(features_path, "done"), mode="w") as out_file:
                        out_file.write(str(done))

    if len(dataset.failed) > 0:
        print(f"Trajectory execution failed for {len(dataset.failed)} trajectories: ")
        for traj in dataset.failed:
            print(traj["path"])

    end_time = datetime.now()

    print(f"Total feature extraction time: {end_time - start_time}")


if __name__ == "__main__":
    # parse arguments
    parser = argparse.ArgumentParser()
    parser.add_argument('--debug', action='store_true')
    parser.add_argument('--split_id', type=str, default="train",
                        help="The identifier of the split for which we should extract the features for")
    parser.add_argument('--features_folder', type=str, default="torch_maskrcnn")
    parser.add_argument('--model_checkpoint', type=str)
    parser.add_argument('--data_path', type=str, default="storage/data/alfred/json_feat_2.1.0")
    parser.add_argument('--splits', type=str, default="storage/data/alfred/splits/oct21.json")
    parser.add_argument('--num_workers', type=int, default=0)
    parser.add_argument('--batch_size', type=int, default=8)
    parser.add_argument('--reward_config', type=str, default='configs/rewards.json')
    parser.add_argument('--cuda_device', type=int, default=-1)
    parser.add_argument('--image_width', type=int, default=300)
    parser.add_argument('--image_height', type=int, default=300)

    ## FasterRCNN parameters
    parser.add_argument('--box_score_thresh', type=float, default=0.05)
    parser.add_argument('--box_nms_thresh', type=float, default=0.5)
    parser.add_argument('--max_boxes_per_image', type=float, default=36)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -