in csrc/velox/lib.cpp [492:568]
velox::ArrayVectorPtr arrayVectorFromPyList(const py::list& data) {
using velox::vector_size_t;
// Prepare the arguments for creating ArrayVector
velox::BufferPtr offsets = velox::AlignedBuffer::allocate<vector_size_t>(
data.size(), TorchArrowGlobalStatic::rootMemoryPool());
velox::BufferPtr sizes = velox::AlignedBuffer::allocate<vector_size_t>(
data.size(), TorchArrowGlobalStatic::rootMemoryPool());
velox::BufferPtr nulls = velox::AlignedBuffer::allocate<bool>(
data.size(), TorchArrowGlobalStatic::rootMemoryPool());
vector_size_t* rawOffsets = offsets->asMutable<vector_size_t>();
vector_size_t* rawSizes = sizes->asMutable<vector_size_t>();
uint64_t* rawNulls = nulls->asMutable<uint64_t>();
vector_size_t numElements = 0;
vector_size_t nullCount = 0;
for (py::size_t i = 0; i < data.size(); i++) {
if (!data[i].is_none()) {
numElements += data[i].cast<py::list>().size();
velox::bits::setNull(rawNulls, i, false);
} else {
++nullCount;
velox::bits::setNull(rawNulls, i, true);
}
}
// Create the underlying flat vector
std::shared_ptr<velox::FlatVector<T>> flatVector =
std::dynamic_pointer_cast<velox::FlatVector<T>>(velox::BaseVector::create(
velox::CppToType<T>::create(),
numElements,
TorchArrowGlobalStatic::rootMemoryPool()));
uint64_t* elementRawNulls = flatVector->mutableRawNulls();
vector_size_t currentIdx = 0;
vector_size_t elementNullCount = 0;
for (const auto& d : data) {
if (d.is_none()) {
*rawSizes++ = 0;
//
*rawOffsets++ = 0;
continue;
}
py::list elementArray = d.cast<py::list>();
*rawSizes++ = elementArray.size();
*rawOffsets++ = currentIdx;
for (auto element : elementArray) {
if (!element.is_none()) {
if constexpr (std::is_same<T, velox::StringView>::value) {
flatVector->set(
currentIdx, velox::StringView(element.cast<std::string>()));
} else {
flatVector->set(currentIdx, element.cast<T>());
}
// `set()` will set nulls[i] = false for us
} else {
velox::bits::setNull(elementRawNulls, currentIdx, true);
++elementNullCount;
}
++currentIdx;
}
}
flatVector->setNullCount(elementNullCount);
return std::make_shared<velox::ArrayVector>(
TorchArrowGlobalStatic::rootMemoryPool(),
velox::ARRAY(velox::CppToType<T>::create()),
nulls,
data.size(),
offsets,
sizes,
flatVector,
nullCount);
}