velox::FlatVectorPtr flatVectorFromPyList()

in csrc/velox/lib.cpp [36:93]


velox::FlatVectorPtr<T> flatVectorFromPyList(const py::list& data) {
  // TODO
  // Consider using the pattern used in arrayVectorFromPyList for creating the
  // underlying FlatVector, which creates an empty vector using
  // BaseVector::create() and then calls set() and setNullCount() to let the
  // library handle internal buffer allocation, call the appropriate API for
  // specific types, and keep internal data in sync with each other

  velox::BufferPtr dataBuffer = velox::AlignedBuffer::allocate<T>(
      data.size(), TorchArrowGlobalStatic::rootMemoryPool());
  velox::BufferPtr nullBuffer = velox::AlignedBuffer::allocate<bool>(
      data.size(), TorchArrowGlobalStatic::rootMemoryPool());

  T* rawData = dataBuffer->asMutable<T>();
  uint64_t* rawNulls = nullBuffer->asMutable<uint64_t>();
  // For non-string types, stringArena is merely a lightweight proxy for
  // creating an empty std::vector<BufferPtr> to be passed to construct the
  // FlatVector
  velox::StringViewBufferHolder stringArena(
      TorchArrowGlobalStatic::rootMemoryPool());
  velox::vector_size_t nullCount = 0;

  for (py::size_t i = 0; i < data.size(); i++) {
    if (!data[i].is_none()) {
      // Using bitUtils for bool vectors.
      if constexpr (std::is_same<T, bool>::value) {
        velox::bits::setBit(rawData, i, data[i].cast<bool>());
      } else if constexpr (std::is_same<T, velox::StringView>::value) {
        // Two memcpy's happen here: pybind11::object casting to std::string and
        // StringViewBufferHolder copying data from the buffer in the
        // std::string onto the buffers it manages. We can teach
        // StringViewBufferHolder how to copy data from
        // pybind11::str/pybind11::object to skip one copy
        rawData[i] = stringArena.getOwnedValue(data[i].cast<std::string>());
      } else {
        rawData[i] = data[i].cast<T>();
      }
      velox::bits::setNull(rawNulls, i, false);
    } else {
      // Prevent null StringViews to point to garbage.
      if constexpr (std::is_same<T, velox::StringView>::value) {
        rawData[i] = T();
      }
      velox::bits::setNull(rawNulls, i, true);
      ++nullCount;
    }
  }

  auto flatVector = std::make_shared<velox::FlatVector<T>>(
      TorchArrowGlobalStatic::rootMemoryPool(),
      std::move(nullBuffer),
      data.size(),
      std::move(dataBuffer),
      stringArena.moveBuffers());
  flatVector->setNullCount(nullCount);

  return flatVector;
}