velox::ArrayVectorPtr arrayVectorFromPyList()

in csrc/velox/lib.cpp [492:568]


velox::ArrayVectorPtr arrayVectorFromPyList(const py::list& data) {
  using velox::vector_size_t;

  // Prepare the arguments for creating ArrayVector
  velox::BufferPtr offsets = velox::AlignedBuffer::allocate<vector_size_t>(
      data.size(), TorchArrowGlobalStatic::rootMemoryPool());
  velox::BufferPtr sizes = velox::AlignedBuffer::allocate<vector_size_t>(
      data.size(), TorchArrowGlobalStatic::rootMemoryPool());
  velox::BufferPtr nulls = velox::AlignedBuffer::allocate<bool>(
      data.size(), TorchArrowGlobalStatic::rootMemoryPool());

  vector_size_t* rawOffsets = offsets->asMutable<vector_size_t>();
  vector_size_t* rawSizes = sizes->asMutable<vector_size_t>();
  uint64_t* rawNulls = nulls->asMutable<uint64_t>();

  vector_size_t numElements = 0;
  vector_size_t nullCount = 0;
  for (py::size_t i = 0; i < data.size(); i++) {
    if (!data[i].is_none()) {
      numElements += data[i].cast<py::list>().size();
      velox::bits::setNull(rawNulls, i, false);
    } else {
      ++nullCount;
      velox::bits::setNull(rawNulls, i, true);
    }
  }

  // Create the underlying flat vector
  std::shared_ptr<velox::FlatVector<T>> flatVector =
      std::dynamic_pointer_cast<velox::FlatVector<T>>(velox::BaseVector::create(
          velox::CppToType<T>::create(),
          numElements,
          TorchArrowGlobalStatic::rootMemoryPool()));
  uint64_t* elementRawNulls = flatVector->mutableRawNulls();

  vector_size_t currentIdx = 0;
  vector_size_t elementNullCount = 0;
  for (const auto& d : data) {
    if (d.is_none()) {
      *rawSizes++ = 0;
      //
      *rawOffsets++ = 0;
      continue;
    }

    py::list elementArray = d.cast<py::list>();
    *rawSizes++ = elementArray.size();
    *rawOffsets++ = currentIdx;

    for (auto element : elementArray) {
      if (!element.is_none()) {
        if constexpr (std::is_same<T, velox::StringView>::value) {
          flatVector->set(
              currentIdx, velox::StringView(element.cast<std::string>()));
        } else {
          flatVector->set(currentIdx, element.cast<T>());
        }
        // `set()` will set nulls[i] = false for us
      } else {
        velox::bits::setNull(elementRawNulls, currentIdx, true);
        ++elementNullCount;
      }
      ++currentIdx;
    }
  }
  flatVector->setNullCount(elementNullCount);

  return std::make_shared<velox::ArrayVector>(
      TorchArrowGlobalStatic::rootMemoryPool(),
      velox::ARRAY(velox::CppToType<T>::create()),
      nulls,
      data.size(),
      offsets,
      sizes,
      flatVector,
      nullCount);
}