def predict_and_save()

in engine/eval_detection.py [0:0]


def predict_and_save(opts,
                     input_tensor: Tensor,
                     model: nn.Module,
                     input_arr: Optional[np.ndarray] = None,
                     device: Optional = torch.device("cpu"),
                     mixed_precision_training: Optional[bool] = False,
                     is_validation: Optional[bool] = False,
                     file_name: Optional[str] = None,
                     output_stride: Optional[int] = 32, # Default is 32 because ImageNet models have 5 downsampling stages (2^5 = 32)
                     orig_h: Optional[int] = None,
                     orig_w: Optional[int] = None
                     ):

    if input_arr is None and not is_validation:
        input_arr = (
            to_numpy(input_tensor) # convert to numpy
            .squeeze(0) # remove batch dimension
        )

    curr_height, curr_width = input_tensor.shape[2:]

    # check if dimensions are multiple of output_stride, otherwise, we get dimension mismatch errors.
    # if not, then resize them
    new_h = (curr_height // output_stride) * output_stride
    new_w = (curr_width // output_stride) * output_stride

    if new_h != curr_height or new_w != curr_width:
        # resize the input image, so that we do not get dimension mismatch errors in the forward pass
        input_tensor = F.interpolate(input=input_tensor, size=(new_h, new_w), mode="bilinear", align_corners=False)

    # move data to device
    input_tensor = input_tensor.to(device)

    with autocast(enabled=mixed_precision_training):
        # prediction
        prediction: DetectionPredTuple = model.predict(input_tensor, is_scaling=False)

    # convert tensors to boxes
    boxes = prediction.boxes.cpu().numpy()
    labels = prediction.labels.cpu().numpy()
    scores = prediction.scores.cpu().numpy()

    if orig_w is None:
        assert orig_h is None
        orig_h, orig_w = input_arr.shape[:2]
    elif orig_h is None:
        assert orig_w is None
        orig_h, orig_w = input_arr.shape[:2]

    assert orig_h is not None and orig_w is not None
    boxes[..., 0::2] = boxes[..., 0::2] * orig_w
    boxes[..., 1::2] = boxes[..., 1::2] * orig_h
    boxes[..., 0::2] = np.clip(a_min=0, a_max=orig_w, a=boxes[..., 0::2])
    boxes[..., 1::2] = np.clip(a_min=0, a_max=orig_h, a=boxes[..., 1::2])

    if is_validation:
        return boxes, labels, scores

    boxes = boxes.astype(np.int)

    for label, score, coords in zip(labels, scores, boxes):
        r, g, b = COLOR_MAP[label]
        c1 = (coords[0], coords[1])
        c2 = (coords[2], coords[3])

        cv2.rectangle(input_arr, c1, c2, (r, g, b), thickness=RECT_BORDER_THICKNESS)
        label_text = '{label}: {score:.2f}'.format(label=object_names[label], score=score)
        t_size = cv2.getTextSize(label_text, FONT_SIZE, 1, TEXT_THICKNESS)[0]
        c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
        cv2.rectangle(input_arr, c1, c2, (r, g, b), -1)
        cv2.putText(input_arr, label_text, (c1[0], c1[1] + t_size[1] + 4), FONT_SIZE, 1, LABEL_COLOR, TEXT_THICKNESS)

    if file_name is not None:
        file_name = file_name.split(os.sep)[-1].split(".")[0] + ".jpg"
        res_dir = "{}/detection_results".format(getattr(opts, "common.exp_loc", None))
        if not os.path.isdir(res_dir):
            os.makedirs(res_dir, exist_ok=True)
        res_fname = "{}/{}".format(res_dir, file_name)
        cv2.imwrite(res_fname, input_arr)
        logger.log("Detection results stored at: {}".format(res_fname))