in csrc/velox/lib.cpp [36:93]
velox::FlatVectorPtr<T> flatVectorFromPyList(const py::list& data) {
// TODO
// Consider using the pattern used in arrayVectorFromPyList for creating the
// underlying FlatVector, which creates an empty vector using
// BaseVector::create() and then calls set() and setNullCount() to let the
// library handle internal buffer allocation, call the appropriate API for
// specific types, and keep internal data in sync with each other
velox::BufferPtr dataBuffer = velox::AlignedBuffer::allocate<T>(
data.size(), TorchArrowGlobalStatic::rootMemoryPool());
velox::BufferPtr nullBuffer = velox::AlignedBuffer::allocate<bool>(
data.size(), TorchArrowGlobalStatic::rootMemoryPool());
T* rawData = dataBuffer->asMutable<T>();
uint64_t* rawNulls = nullBuffer->asMutable<uint64_t>();
// For non-string types, stringArena is merely a lightweight proxy for
// creating an empty std::vector<BufferPtr> to be passed to construct the
// FlatVector
velox::StringViewBufferHolder stringArena(
TorchArrowGlobalStatic::rootMemoryPool());
velox::vector_size_t nullCount = 0;
for (py::size_t i = 0; i < data.size(); i++) {
if (!data[i].is_none()) {
// Using bitUtils for bool vectors.
if constexpr (std::is_same<T, bool>::value) {
velox::bits::setBit(rawData, i, data[i].cast<bool>());
} else if constexpr (std::is_same<T, velox::StringView>::value) {
// Two memcpy's happen here: pybind11::object casting to std::string and
// StringViewBufferHolder copying data from the buffer in the
// std::string onto the buffers it manages. We can teach
// StringViewBufferHolder how to copy data from
// pybind11::str/pybind11::object to skip one copy
rawData[i] = stringArena.getOwnedValue(data[i].cast<std::string>());
} else {
rawData[i] = data[i].cast<T>();
}
velox::bits::setNull(rawNulls, i, false);
} else {
// Prevent null StringViews to point to garbage.
if constexpr (std::is_same<T, velox::StringView>::value) {
rawData[i] = T();
}
velox::bits::setNull(rawNulls, i, true);
++nullCount;
}
}
auto flatVector = std::make_shared<velox::FlatVector<T>>(
TorchArrowGlobalStatic::rootMemoryPool(),
std::move(nullBuffer),
data.size(),
std::move(dataBuffer),
stringArena.moveBuffers());
flatVector->setNullCount(nullCount);
return flatVector;
}