absl::Status MakeListArrayFromParentIndicesAndValues()

in tfx_bsl/cc/arrow/array_util.cc [413:473]


absl::Status MakeListArrayFromParentIndicesAndValues(
    const size_t num_parents,
    const std::shared_ptr<arrow::Array>& parent_indices,
    const std::shared_ptr<Array>& values, const bool empty_list_as_null,
    std::shared_ptr<Array>* out) {
  if (parent_indices->type()->id() != arrow::Type::INT64) {
    return absl::InvalidArgumentError("Parent indices array must be int64.");
  }
  const size_t length = parent_indices->length();
  if (values->length() != length) {
    return absl::InvalidArgumentError(absl::StrCat(
        "values array and parent indices array must be of the same length: ",
        values->length(), " v.s. ", parent_indices->length()));
  }
  const auto& parent_indices_int64 =
      *static_cast<const arrow::Int64Array*>(parent_indices.get());
  if (length != 0 && num_parents < parent_indices_int64.Value(length - 1) + 1) {
    return absl::InvalidArgumentError(absl::StrCat(
        "Found a parent index ", parent_indices_int64.Value(length - 1),
        " while num_parents was ", num_parents));
  }

  arrow::TypedBufferBuilder<bool> null_bitmap_builder;
  if (empty_list_as_null) {
    TFX_BSL_RETURN_IF_ERROR(
        FromArrowStatus(null_bitmap_builder.Reserve(num_parents)));
  }
  arrow::TypedBufferBuilder<int64_t> offsets_builder;
  TFX_BSL_RETURN_IF_ERROR(
      FromArrowStatus(offsets_builder.Reserve(num_parents + 1)));

  offsets_builder.UnsafeAppend(0);
  for (int64_t i = 0, current_pi = 0; i < num_parents; ++i) {
    if (current_pi >= parent_indices_int64.length() ||
        parent_indices_int64.Value(current_pi) != i) {
      if (empty_list_as_null) null_bitmap_builder.UnsafeAppend(false);
    } else {
      while (current_pi < parent_indices_int64.length() &&
             parent_indices_int64.Value(current_pi) == i) {
        ++current_pi;
      }
      if (empty_list_as_null) null_bitmap_builder.UnsafeAppend(true);
    }
    offsets_builder.UnsafeAppend(current_pi);
  }

  const int64_t null_count = null_bitmap_builder.false_count();
  std::shared_ptr<arrow::Buffer> null_bitmap_buffer;
  if (empty_list_as_null) {
    TFX_BSL_RETURN_IF_ERROR(
        FromArrowStatus(null_bitmap_builder.Finish(&null_bitmap_buffer)));
  }
  std::shared_ptr<arrow::Buffer> offsets_buffer;
  TFX_BSL_RETURN_IF_ERROR(
      FromArrowStatus(offsets_builder.Finish(&offsets_buffer)));

  *out = std::make_shared<LargeListArray>(
      arrow::large_list(values->type()), num_parents, offsets_buffer, values,
      null_bitmap_buffer, null_count, /*offset=*/0);
  return absl::OkStatus();
}