in tfx_bsl/cc/arrow/array_util.cc [413:473]
absl::Status MakeListArrayFromParentIndicesAndValues(
const size_t num_parents,
const std::shared_ptr<arrow::Array>& parent_indices,
const std::shared_ptr<Array>& values, const bool empty_list_as_null,
std::shared_ptr<Array>* out) {
if (parent_indices->type()->id() != arrow::Type::INT64) {
return absl::InvalidArgumentError("Parent indices array must be int64.");
}
const size_t length = parent_indices->length();
if (values->length() != length) {
return absl::InvalidArgumentError(absl::StrCat(
"values array and parent indices array must be of the same length: ",
values->length(), " v.s. ", parent_indices->length()));
}
const auto& parent_indices_int64 =
*static_cast<const arrow::Int64Array*>(parent_indices.get());
if (length != 0 && num_parents < parent_indices_int64.Value(length - 1) + 1) {
return absl::InvalidArgumentError(absl::StrCat(
"Found a parent index ", parent_indices_int64.Value(length - 1),
" while num_parents was ", num_parents));
}
arrow::TypedBufferBuilder<bool> null_bitmap_builder;
if (empty_list_as_null) {
TFX_BSL_RETURN_IF_ERROR(
FromArrowStatus(null_bitmap_builder.Reserve(num_parents)));
}
arrow::TypedBufferBuilder<int64_t> offsets_builder;
TFX_BSL_RETURN_IF_ERROR(
FromArrowStatus(offsets_builder.Reserve(num_parents + 1)));
offsets_builder.UnsafeAppend(0);
for (int64_t i = 0, current_pi = 0; i < num_parents; ++i) {
if (current_pi >= parent_indices_int64.length() ||
parent_indices_int64.Value(current_pi) != i) {
if (empty_list_as_null) null_bitmap_builder.UnsafeAppend(false);
} else {
while (current_pi < parent_indices_int64.length() &&
parent_indices_int64.Value(current_pi) == i) {
++current_pi;
}
if (empty_list_as_null) null_bitmap_builder.UnsafeAppend(true);
}
offsets_builder.UnsafeAppend(current_pi);
}
const int64_t null_count = null_bitmap_builder.false_count();
std::shared_ptr<arrow::Buffer> null_bitmap_buffer;
if (empty_list_as_null) {
TFX_BSL_RETURN_IF_ERROR(
FromArrowStatus(null_bitmap_builder.Finish(&null_bitmap_buffer)));
}
std::shared_ptr<arrow::Buffer> offsets_buffer;
TFX_BSL_RETURN_IF_ERROR(
FromArrowStatus(offsets_builder.Finish(&offsets_buffer)));
*out = std::make_shared<LargeListArray>(
arrow::large_list(values->type()), num_parents, offsets_buffer, values,
null_bitmap_buffer, null_count, /*offset=*/0);
return absl::OkStatus();
}