libraries/python/coco/process_single_image_output.py (185 lines of code) (raw):
#!/usr/bin/env python
##############################################################################
# Copyright 2017-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
##############################################################################
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import argparse
import json
import pickle
import cv2
import numpy as np
import pycocotools.mask as mask_util
parser = argparse.ArgumentParser(description="Process the single image output")
parser.add_argument(
"--blob-names", type=str, required=True, help="Comma separated blob names. "
)
parser.add_argument(
"--blob-files",
type=str,
required=True,
help="Comma separated blob files. The order is expected to be "
"the same as the blob names.",
)
parser.add_argument(
"--im-info",
type=str,
required=True,
help="The file for image info. Used to get the height,width of the image",
)
parser.add_argument(
"--output-file",
type=str,
required=True,
help="The output file of the processed predictions.",
)
parser.add_argument(
"--rle-encode", action="store_true", help="Whether to use rle encode."
)
class ProcessSingleImageOutput(object):
def __init__(self, args):
self.args = args
def getData(self, filename):
result = []
with open(filename, "r") as f:
line = f.readline()
while line != "":
content_list = []
dims_list = [int(dim.strip()) for dim in line.strip().split(",")]
line = f.readline().strip()
if len(line) > 0:
content_list = [float(entry.strip()) for entry in line.split(",")]
line = f.readline()
dims = np.asarray(dims_list)
content = np.asarray(content_list)
data = np.reshape(content, dims)
result.append(data)
return result
def getBlobs(self):
blob_names = self.args.blob_names.split(",")
blob_files = self.args.blob_files.split(",")
blobs = {}
assert len(blob_names) == len(blob_files)
for i in range(len(blob_names)):
blobs[blob_names[i]] = self.getData(blob_files[i])
# restructure the blobs.
# All blobs should have the same number of entries
num = len(blobs[blob_names[0]])
blob_array = []
for i in range(num):
one_entry = {}
for name in blob_names:
assert (
len(blobs[name]) == num
), "Different entries have different numbers"
one_entry[name] = blobs[name][i]
blob_array.append(one_entry)
return blob_array
def expand_boxes(self, boxes, scale):
"""Expand an array of boxes by a given scale."""
box_dim = boxes.shape[1]
if box_dim == 4:
w_half = (boxes[:, 2] - boxes[:, 0]) * 0.5
h_half = (boxes[:, 3] - boxes[:, 1]) * 0.5
x_c = (boxes[:, 2] + boxes[:, 0]) * 0.5
y_c = (boxes[:, 3] + boxes[:, 1]) * 0.5
w_half *= scale
h_half *= scale
boxes_exp = np.zeros(boxes.shape)
boxes_exp[:, 0] = x_c - w_half
boxes_exp[:, 2] = x_c + w_half
boxes_exp[:, 1] = y_c - h_half
boxes_exp[:, 3] = y_c + h_half
elif box_dim == 5:
boxes_exp = boxes.copy()
boxes_exp[:, 2:4] *= scale
else:
raise Exception("Unsupported box dimension: {}".format(box_dim))
return boxes_exp
def compute_segm_results(
self,
masks,
ref_boxes,
classids,
im_h,
im_w,
thresh_binarize=0.5,
rle_encode=True,
):
"""masks: (#boxes, #classes, mask_dim, mask_dim)
ref_boxes: (#boxes, 5), where each row is [x1, y1, x2, y2, cls]
classids: (#boxes, )
ret: list of im_masks, [im_mask, ...] or [im_mask_rle, ...]
"""
assert len(masks.shape) == 4
assert masks.shape[2] == masks.shape[3]
assert masks.shape[0] == ref_boxes.shape[0]
assert ref_boxes.shape[1] == 4
assert len(classids) == masks.shape[0]
all_segms = []
# To work around an issue with cv2.resize (it seems to automatically pad
# with repeated border values), we manually zero-pad the masks by 1 pixel
# prior to resizing back to the original image resolution. This prevents
# "top hat" artifacts. We therefore need to expand the reference boxes by an
# appropriate factor.
M = masks.shape[2]
scale = (M + 2.0) / M
ref_boxes = self.expand_boxes(ref_boxes, scale)
ref_boxes = ref_boxes.astype(np.int32)
padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32)
for mask_ind in range(masks.shape[0]):
cur_cls = int(classids[mask_ind])
padded_mask[1:-1, 1:-1] = masks[mask_ind, cur_cls, :, :]
ref_box = ref_boxes[mask_ind, :]
w = ref_box[2] - ref_box[0] + 1
h = ref_box[3] - ref_box[1] + 1
w = np.maximum(w, 1)
h = np.maximum(h, 1)
mask = cv2.resize(padded_mask, (w, h))
mask = np.array(mask > thresh_binarize, dtype=np.uint8)
im_mask = np.zeros((im_h, im_w), dtype=np.uint8)
x_0 = max(ref_box[0], 0)
x_1 = min(ref_box[2] + 1, im_w)
y_0 = max(ref_box[1], 0)
y_1 = min(ref_box[3] + 1, im_h)
im_mask[y_0:y_1, x_0:x_1] = mask[
(y_0 - ref_box[1]) : (y_1 - ref_box[1]),
(x_0 - ref_box[0]) : (x_1 - ref_box[0]),
]
ret = im_mask
if rle_encode:
# Get RLE encoding used by the COCO evaluation API
rle = mask_util.encode(np.array(im_mask[:, :, np.newaxis], order="F"))[
0
]
ret = rle
all_segms.append(ret)
return all_segms
def process(self):
im_infos = []
with open(self.args.im_info, "r") as f:
lines = f.readlines()
for line in lines:
im_info = json.loads(line)
im_infos.append(im_info)
blobs = self.getBlobs()
assert len(im_infos) == len(
blobs
), "The number for im_infos and blobs do not match"
results = []
for i in range(len(blobs)):
one_blob = blobs[i]
im_info = im_infos[i]
classids = one_blob["class_nms"]
scores = one_blob["score_nms"] # bbox scores, (R, )
boxes = one_blob["bbox_nms"] # i.e., boxes, (R, 4*1)
masks = one_blob["mask_fcn_probs"] # (R, cls, mask_dim, mask_dim)
R = boxes.shape[0]
im_masks = []
if R > 0:
im_masks = self.compute_segm_results(
masks,
boxes,
classids,
im_info["height"],
im_info["width"],
rle_encode=self.args.rle_encode,
)
boxes = np.column_stack((boxes, scores))
ret = {
"classids": classids,
"boxes": boxes,
"masks": masks,
"im_masks": im_masks,
}
results.append(ret)
with open(self.args.output_file, "w") as f:
pickle.dump(results, f, pickle.HIGHEST_PROTOCOL)
if __name__ == "__main__":
args = parser.parse_args()
app = ProcessSingleImageOutput(args)
app.process()