in engine/eval_detection.py [0:0]
def predict_and_save(opts,
input_tensor: Tensor,
model: nn.Module,
input_arr: Optional[np.ndarray] = None,
device: Optional = torch.device("cpu"),
mixed_precision_training: Optional[bool] = False,
is_validation: Optional[bool] = False,
file_name: Optional[str] = None,
output_stride: Optional[int] = 32, # Default is 32 because ImageNet models have 5 downsampling stages (2^5 = 32)
orig_h: Optional[int] = None,
orig_w: Optional[int] = None
):
if input_arr is None and not is_validation:
input_arr = (
to_numpy(input_tensor) # convert to numpy
.squeeze(0) # remove batch dimension
)
curr_height, curr_width = input_tensor.shape[2:]
# check if dimensions are multiple of output_stride, otherwise, we get dimension mismatch errors.
# if not, then resize them
new_h = (curr_height // output_stride) * output_stride
new_w = (curr_width // output_stride) * output_stride
if new_h != curr_height or new_w != curr_width:
# resize the input image, so that we do not get dimension mismatch errors in the forward pass
input_tensor = F.interpolate(input=input_tensor, size=(new_h, new_w), mode="bilinear", align_corners=False)
# move data to device
input_tensor = input_tensor.to(device)
with autocast(enabled=mixed_precision_training):
# prediction
prediction: DetectionPredTuple = model.predict(input_tensor, is_scaling=False)
# convert tensors to boxes
boxes = prediction.boxes.cpu().numpy()
labels = prediction.labels.cpu().numpy()
scores = prediction.scores.cpu().numpy()
if orig_w is None:
assert orig_h is None
orig_h, orig_w = input_arr.shape[:2]
elif orig_h is None:
assert orig_w is None
orig_h, orig_w = input_arr.shape[:2]
assert orig_h is not None and orig_w is not None
boxes[..., 0::2] = boxes[..., 0::2] * orig_w
boxes[..., 1::2] = boxes[..., 1::2] * orig_h
boxes[..., 0::2] = np.clip(a_min=0, a_max=orig_w, a=boxes[..., 0::2])
boxes[..., 1::2] = np.clip(a_min=0, a_max=orig_h, a=boxes[..., 1::2])
if is_validation:
return boxes, labels, scores
boxes = boxes.astype(np.int)
for label, score, coords in zip(labels, scores, boxes):
r, g, b = COLOR_MAP[label]
c1 = (coords[0], coords[1])
c2 = (coords[2], coords[3])
cv2.rectangle(input_arr, c1, c2, (r, g, b), thickness=RECT_BORDER_THICKNESS)
label_text = '{label}: {score:.2f}'.format(label=object_names[label], score=score)
t_size = cv2.getTextSize(label_text, FONT_SIZE, 1, TEXT_THICKNESS)[0]
c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
cv2.rectangle(input_arr, c1, c2, (r, g, b), -1)
cv2.putText(input_arr, label_text, (c1[0], c1[1] + t_size[1] + 4), FONT_SIZE, 1, LABEL_COLOR, TEXT_THICKNESS)
if file_name is not None:
file_name = file_name.split(os.sep)[-1].split(".")[0] + ".jpg"
res_dir = "{}/detection_results".format(getattr(opts, "common.exp_loc", None))
if not os.path.isdir(res_dir):
os.makedirs(res_dir, exist_ok=True)
res_fname = "{}/{}".format(res_dir, file_name)
cv2.imwrite(res_fname, input_arr)
logger.log("Detection results stored at: {}".format(res_fname))