Tensor NestedTensor_add_Tensor()

in nestedtensor/csrc/BinaryOps.cpp [12:115]


Tensor NestedTensor_add_Tensor(
    const Tensor& self_,
    const Tensor& other_,
    const Scalar& alpha) {
  Tensor self = self_;
  Tensor other = other_;
  if (is_nested_tensor_impl(self) && is_nested_tensor_impl(other)) {
    EfficientSizeNode self_efficient_nested_size =
        get_efficient_nested_size(self);
    EfficientSizeNode other_efficient_nested_size =
        get_efficient_nested_size(other);
    if (efficient_size_matches(
            self_efficient_nested_size, other_efficient_nested_size)) {
      if (get_is_contiguous(self, c10::MemoryFormat::ChannelsLast) &&
          get_is_contiguous(other, c10::MemoryFormat::ChannelsLast)) {
        return wrap_buffer(
            at::add(
                get_buffer(self).view({-1}), get_buffer(other).view({-1})),
            self_efficient_nested_size,
            get_efficient_nested_stride(self));
      }
      if (!get_is_contiguous(self)) {
        self = NestedTensor_contiguous(self);
      }
      if (!get_is_contiguous(other)) {
        other = NestedTensor_contiguous(other);
      }
      return wrap_buffer(
          at::add(
              get_buffer(self).reshape({-1}), get_buffer(other).reshape({-1})),
          self_efficient_nested_size,
          get_efficient_nested_stride(self));
    }
  }
  if (is_nested_tensor_impl(self) && !is_nested_tensor_impl(other)) {
    self = NestedTensor_contiguous(self);
    int64_t self_dim = get_dim(self);
    auto self_opt_sizes = get_opt_sizes(self);
#ifdef WITH_CUDA
    if (self_dim == 4 && other.dim() == 4 &&
        self_opt_sizes[0] &&
        self_opt_sizes[1] &&
        (*self_opt_sizes[1]) == other.size(1) &&
        other.size(0) == 1 &&
        other.size(2) == 1 &&
        other.size(3) == 1 &&
        self.dtype() ==  c10::ScalarType::Half &&
        other.dtype() == c10::ScalarType::Half) {
      other = other.contiguous();
      at::Tensor self_buffer = get_buffer(self);
      Tensor nt_sizes_ =
          get_efficient_nested_size(self).sizes().to(torch::kInt32);
      Tensor nt_sizes_1 = at::native::narrow(nt_sizes_, 1, 1, 1);
      Tensor nt_sizes_2 = at::native::narrow(nt_sizes_, 1, 2, 1);
      Tensor nt_sizes_all = nt_sizes_1 * nt_sizes_2;
      std::vector<int> numbers;
      for (int64_t i = 0; i < nt_sizes_all.size(0); i++) {
        for (int64_t j = 0; j < *self_opt_sizes[1]; j++) {
          numbers.push_back(nt_sizes_all[i].item<int>());
        }
      }
      at::Tensor numbers_t = torch::tensor(numbers).to(torch::kInt32);
      Tensor nt_sizes_cumsum =
          at::cumsum(numbers_t, 0).to(torch::kInt32).reshape({-1});
      TORCH_CHECK(nt_sizes_.dim() == 2, "NestedTensor metadata of unexpected dimension.")
      Tensor nt_sizes = at::cat({torch::tensor({0}, torch::kInt32), nt_sizes_cumsum});
      nt_sizes = nt_sizes.to(torch::kCUDA);
      at::cuda::CUDAStream defaultStream = at::cuda::getDefaultCUDAStream();
      at::Tensor result_buffer = self_buffer.clone();

      c10::Half* self_ptr = self_buffer.data_ptr<c10::Half>();
      c10::Half* other_ptr = other.data_ptr<c10::Half>();
      c10::Half* result_ptr = result_buffer.data_ptr<c10::Half>();
      nested_tensor::cuda::add_scalar_kernelLauncher(
          self_ptr,
          other_ptr,
          result_ptr,
          (int)(*self_opt_sizes[0] * *self_opt_sizes[1]),
          (int)(*self_opt_sizes[0]),
          nt_sizes.data_ptr<int>(),
          defaultStream);
      return wrap_buffer(std::move(result_buffer), get_efficient_nested_size(self),
          get_efficient_nested_stride(self));
    }
#endif
    if (self_opt_sizes[self_dim - 1] && other.dim() == 1 &&
        (*(self_opt_sizes[self_dim - 1])) == other.size(0)) {
      Tensor self_buffer = get_buffer(self);
      Tensor result_buffer =
          at::add(self_buffer.reshape({-1, other.size(0)}), other)
              .reshape({-1});
      return wrap_buffer(
          std::move(result_buffer),
          get_efficient_nested_size(self),
          get_efficient_nested_stride(self));
    }
  }
  std::tie(self, other) = _expand_other_as(self_, other_);
  return map_nested_tensor(
      [&alpha](Tensor s, Tensor o) {
      return at::add(s, o, alpha); },
      self,
      other);
}