virtual void Forward()

in src/operator/contrib/proposal.cc [262:411]


  virtual void Forward(const OpContext &ctx,
                       const std::vector<TBlob> &in_data,
                       const std::vector<OpReqType> &req,
                       const std::vector<TBlob> &out_data,
                       const std::vector<TBlob> &aux_states) {
    using namespace mshadow;
    using namespace mshadow::expr;
    CHECK_EQ(in_data.size(), 3);
    CHECK_EQ(out_data.size(), 2);
    CHECK_GT(req.size(), 1);
    CHECK_EQ(req[proposal::kOut], kWriteTo);
    CHECK_EQ(in_data[proposal::kClsProb].shape_[0], 1)
      << "Sorry, multiple images each device is not implemented.";

    Stream<xpu> *s = ctx.get_stream<xpu>();

    Shape<4> scores_shape = Shape4(in_data[proposal::kClsProb].shape_[0],
                                   in_data[proposal::kClsProb].shape_[1] / 2,
                                   in_data[proposal::kClsProb].shape_[2],
                                   in_data[proposal::kClsProb].shape_[3]);
    real_t* foreground_score_ptr = in_data[proposal::kClsProb].dptr<real_t>()
                                    + scores_shape.Size();
    Tensor<cpu, 4> scores = Tensor<cpu, 4>(foreground_score_ptr, scores_shape);
    Tensor<cpu, 4> bbox_deltas = in_data[proposal::kBBoxPred].get<cpu, 4, real_t>(s);
    Tensor<cpu, 2> im_info = in_data[proposal::kImInfo].get<cpu, 2, real_t>(s);

    Tensor<cpu, 2> out = out_data[proposal::kOut].get<cpu, 2, real_t>(s);
    Tensor<cpu, 2> out_score = out_data[proposal::kScore].get<cpu, 2, real_t>(s);

    int num_anchors = in_data[proposal::kClsProb].shape_[1] / 2;
    int height = scores.size(2);
    int width = scores.size(3);
    int count = num_anchors * height * width;
    int rpn_pre_nms_top_n = (param_.rpn_pre_nms_top_n > 0) ? param_.rpn_pre_nms_top_n : count;
    rpn_pre_nms_top_n = std::min(rpn_pre_nms_top_n, count);
    int rpn_post_nms_top_n = std::min(param_.rpn_post_nms_top_n, rpn_pre_nms_top_n);

    int workspace_size = count * 5 + 2 * count + rpn_pre_nms_top_n * 5 + 3 * rpn_pre_nms_top_n;
    Tensor<cpu, 1> workspace = ctx.requested[proposal::kTempResource].get_space<cpu>(
      Shape1(workspace_size), s);
    int start = 0;
    Tensor<cpu, 2> workspace_proposals(workspace.dptr_ + start, Shape2(count, 5));
    start += count * 5;
    Tensor<cpu, 2> workspace_pre_nms(workspace.dptr_ + start, Shape2(2, count));
    start += 2 * count;
    Tensor<cpu, 2> workspace_ordered_proposals(workspace.dptr_ + start,
                                               Shape2(rpn_pre_nms_top_n, 5));
    start += rpn_pre_nms_top_n * 5;
    Tensor<cpu, 2> workspace_nms(workspace.dptr_ + start, Shape2(3, rpn_pre_nms_top_n));
    start += 3 * rpn_pre_nms_top_n;
    CHECK_EQ(workspace_size, start) << workspace_size << " " << start << std::endl;

    // Generate anchors
    std::vector<float> base_anchor(4);
    base_anchor[0] = 0.0;
    base_anchor[1] = 0.0;
    base_anchor[2] = param_.feature_stride - 1.0;
    base_anchor[3] = param_.feature_stride - 1.0;
    CHECK_EQ(num_anchors, param_.ratios.info.size() * param_.scales.info.size());
    std::vector<float> anchors;
    utils::GenerateAnchors(base_anchor,
                           param_.ratios.info,
                           param_.scales.info,
                           &anchors);
    std::memcpy(workspace_proposals.dptr_, &anchors[0], sizeof(float) * anchors.size());

    // Enumerate all shifted anchors
    for (index_t i = 0; i < num_anchors; ++i) {
      for (index_t j = 0; j < height; ++j) {
        for (index_t k = 0; k < width; ++k) {
          index_t index = j * (width * num_anchors) + k * (num_anchors) + i;
          workspace_proposals[index][0] = workspace_proposals[i][0] + k * param_.feature_stride;
          workspace_proposals[index][1] = workspace_proposals[i][1] + j * param_.feature_stride;
          workspace_proposals[index][2] = workspace_proposals[i][2] + k * param_.feature_stride;
          workspace_proposals[index][3] = workspace_proposals[i][3] + j * param_.feature_stride;
          workspace_proposals[index][4] = scores[0][i][j][k];
        }
      }
    }

    // prevent padded predictions
    int real_height = static_cast<int>(im_info[0][0] / param_.feature_stride);
    int real_width = static_cast<int>(im_info[0][1] / param_.feature_stride);
    CHECK_GE(height, real_height) << height << " " << real_height << std::endl;
    CHECK_GE(width, real_width) << width << " " << real_width << std::endl;

    if (param_.iou_loss) {
      utils::IoUTransformInv(workspace_proposals, bbox_deltas, im_info[0][0], im_info[0][1],
                             real_height, real_width, &(workspace_proposals));
    } else {
      utils::BBoxTransformInv(workspace_proposals, bbox_deltas, im_info[0][0], im_info[0][1],
                              real_height, real_width, &(workspace_proposals));
    }
    utils::FilterBox(&workspace_proposals, param_.rpn_min_size * im_info[0][2]);

    Tensor<cpu, 1> score = workspace_pre_nms[0];
    Tensor<cpu, 1> order = workspace_pre_nms[1];

    utils::CopyScore(workspace_proposals,
                     &score,
                     &order);
    utils::ReverseArgsort(score,
                          &order);
    utils::ReorderProposals(workspace_proposals,
                            order,
                            rpn_pre_nms_top_n,
                            &workspace_ordered_proposals);

    index_t out_size = 0;
    Tensor<cpu, 1> area = workspace_nms[0];
    Tensor<cpu, 1> suppressed = workspace_nms[1];
    Tensor<cpu, 1> keep = workspace_nms[2];
    suppressed = 0;  // surprised!

    utils::NonMaximumSuppression(workspace_ordered_proposals,
                                 param_.threshold,
                                 rpn_post_nms_top_n,
                                 &area,
                                 &suppressed,
                                 &keep,
                                 &out_size);

    // fill in output rois
    for (index_t i = 0; i < out.size(0); ++i) {
      // batch index 0
      out[i][0] = 0;
      if (i < out_size) {
        index_t index = keep[i];
        for (index_t j = 0; j < 4; ++j) {
          out[i][j + 1] =  workspace_ordered_proposals[index][j];
        }
      } else {
        index_t index = keep[i % out_size];
        for (index_t j = 0; j < 4; ++j) {
          out[i][j + 1] = workspace_ordered_proposals[index][j];
        }
      }
    }

    // fill in output score
    for (index_t i = 0; i < out_score.size(0); i++) {
      if (i < out_size) {
        index_t index = keep[i];
        out_score[i][0] = workspace_ordered_proposals[index][4];
      } else {
        index_t index = keep[i % out_size];
        out_score[i][0] = workspace_ordered_proposals[index][4];
      }
    }
  }