void RepeatDataToFrom()

in src/core/tensor/tensor.cc [646:718]


void RepeatDataToFrom(bool broadcast_flag, const vector<size_t> &repeats,
                      int axis, Tensor *dst, const Tensor &src,
                      const size_t num) {
  if (repeats.size() == 1) {
    broadcast_flag = true;
  } else if (repeats.size() > 1) {
    if (axis == Noaxis) {
      LOG(FATAL) << "When repeats parameter is sequence, axis cannot be None";
    }
  }
  for (size_t i = 0; i < repeats.size(); i++) {
    CHECK_GE(repeats[i], 0);
  }
  auto width = SizeOf(src.data_type());
  CHECK_EQ(width, SizeOf(dst->data_type()));
  // size_t nBytes = num * width;
  int chunk = width;
  int axis_shape = 1;
  int shape_outer = 1;
  if (axis == Noaxis) {
    axis_shape = 1;
    shape_outer = Product(src.shape());
  } else {
    for (int i = 0; i < axis; i++) {
      shape_outer *= src.shape()[i];
    }
    axis_shape = src.shape()[axis];
    for (int i = axis + 1; i < static_cast<int>(src.nDim()); i++) {
      chunk *= src.shape()[i];
    }
  }

  Device *dev = nullptr;
  CopyDirection direct;
  std::shared_ptr<Device> src_dev = src.device(), dst_dev = dst->device();
  if (dst_dev->lang() != src_dev->lang()) {
    // let the none cpp device conduct copy op
    if (dst_dev->lang() == kCpp) {
      dev = src_dev.get();
      direct = kDeviceToHost;
    } else if (src_dev->lang() == kCpp) {
      dev = dst_dev.get();
      direct = kHostToDevice;
    } else {
      LOG(FATAL)
          << "Not support mem repeat copy between Cuda and OpenCL device";
    }
  } else {
    dev = src_dev.get();
    direct = src_dev->lang() == kCpp ? kHostToHost : kDeviceToDevice;
  }

  int dst_offset = 0;
  int src_offset = 0;
  Tensor &dstRef = *dst;
  for (int i = 0; i < shape_outer; i++) {
    for (int j = 0; j < axis_shape; j++) {
      int temp = broadcast_flag ? repeats[0] : repeats[j];
      for (int k = 0; k < temp; k++) {
        dev->Exec(
            [dev, dstRef, src, chunk, direct, dst_offset,
             src_offset](Context *ctx) mutable {
              Block *from = src.block(), *to = dstRef.block();
              dev->CopyDataToFrom(to, from, chunk, direct, dst_offset,
                                  src_offset, ctx);
            },
            {src.block()}, {dst->block()}, "CopyDataToFrom");
        dst_offset += chunk;
      }
      src_offset += chunk;
    }
  }
}