void ArraySpan::FillFromScalar()

in cpp/src/arrow/array/data.cc [413:603]


void ArraySpan::FillFromScalar(const Scalar& value) {
  static uint8_t kTrueBit = 0x01;
  static uint8_t kFalseBit = 0x00;

  this->type = value.type.get();
  this->length = 1;

  Type::type type_id = value.type->id();

  if (type_id == Type::NA) {
    this->null_count = 1;
  } else if (!internal::may_have_validity_bitmap(type_id)) {
    this->null_count = 0;
  } else {
    // Populate null count and validity bitmap
    this->null_count = value.is_valid ? 0 : 1;
    this->buffers[0].data = value.is_valid ? &kTrueBit : &kFalseBit;
    this->buffers[0].size = 1;
  }

  if (type_id == Type::BOOL) {
    const auto& scalar = checked_cast<const BooleanScalar&>(value);
    this->buffers[1].data = scalar.value ? &kTrueBit : &kFalseBit;
    this->buffers[1].size = 1;
  } else if (is_primitive(type_id) || is_decimal(type_id) ||
             type_id == Type::DICTIONARY) {
    const auto& scalar = checked_cast<const internal::PrimitiveScalarBase&>(value);
    const uint8_t* scalar_data = reinterpret_cast<const uint8_t*>(scalar.view().data());
    this->buffers[1].data = const_cast<uint8_t*>(scalar_data);
    this->buffers[1].size = scalar.type->byte_width();
    if (type_id == Type::DICTIONARY) {
      // Populate dictionary data
      const auto& dict_scalar = checked_cast<const DictionaryScalar&>(value);
      this->child_data.resize(1);
      this->child_data[0].SetMembers(*dict_scalar.value.dictionary->data());
    }
  } else if (is_base_binary_like(type_id)) {
    const auto& scalar = checked_cast<const BaseBinaryScalar&>(value);

    const uint8_t* data_buffer = nullptr;
    int64_t data_size = 0;
    if (scalar.is_valid) {
      data_buffer = scalar.value->data();
      data_size = scalar.value->size();
    }
    if (is_binary_like(type_id)) {
      const auto& binary_scalar = checked_cast<const BinaryScalar&>(value);
      this->buffers[1] = OffsetsForScalar(binary_scalar.scratch_space_, sizeof(int32_t));
    } else {
      // is_large_binary_like
      const auto& large_binary_scalar = checked_cast<const LargeBinaryScalar&>(value);
      this->buffers[1] =
          OffsetsForScalar(large_binary_scalar.scratch_space_, sizeof(int64_t));
    }
    this->buffers[2].data = const_cast<uint8_t*>(data_buffer);
    this->buffers[2].size = data_size;
  } else if (type_id == Type::BINARY_VIEW || type_id == Type::STRING_VIEW) {
    const auto& scalar = checked_cast<const BinaryViewScalar&>(value);

    this->buffers[1].size = BinaryViewType::kSize;
    this->buffers[1].data = scalar.scratch_space_;
    if (scalar.is_valid) {
      this->buffers[2] = internal::PackVariadicBuffers({&scalar.value, 1});
    }
  } else if (type_id == Type::FIXED_SIZE_BINARY) {
    const auto& scalar = checked_cast<const BaseBinaryScalar&>(value);
    this->buffers[1].data = const_cast<uint8_t*>(scalar.value->data());
    this->buffers[1].size = scalar.value->size();
  } else if (is_var_length_list_like(type_id) || type_id == Type::FIXED_SIZE_LIST) {
    const auto& scalar = checked_cast<const BaseListScalar&>(value);

    this->child_data.resize(1);
    if (scalar.value != nullptr) {
      // When the scalar is null, scalar.value can also be null
      this->child_data[0].SetMembers(*scalar.value->data());
    } else {
      // Even when the value is null, we still must populate the
      // child_data to yield a valid array. Tedious
      internal::FillZeroLengthArray(this->type->field(0)->type().get(),
                                    &this->child_data[0]);
    }

    if (type_id == Type::LIST) {
      const auto& list_scalar = checked_cast<const ListScalar&>(value);
      this->buffers[1] = OffsetsForScalar(list_scalar.scratch_space_, sizeof(int32_t));
    } else if (type_id == Type::MAP) {
      const auto& map_scalar = checked_cast<const MapScalar&>(value);
      this->buffers[1] = OffsetsForScalar(map_scalar.scratch_space_, sizeof(int32_t));
    } else if (type_id == Type::LARGE_LIST) {
      const auto& large_list_scalar = checked_cast<const LargeListScalar&>(value);
      this->buffers[1] =
          OffsetsForScalar(large_list_scalar.scratch_space_, sizeof(int64_t));
    } else if (type_id == Type::LIST_VIEW) {
      const auto& list_view_scalar = checked_cast<const ListViewScalar&>(value);
      std::tie(this->buffers[1], this->buffers[2]) =
          OffsetsAndSizesForScalar(list_view_scalar.scratch_space_, sizeof(int32_t));
    } else if (type_id == Type::LARGE_LIST_VIEW) {
      const auto& large_list_view_scalar =
          checked_cast<const LargeListViewScalar&>(value);
      std::tie(this->buffers[1], this->buffers[2]) = OffsetsAndSizesForScalar(
          large_list_view_scalar.scratch_space_, sizeof(int64_t));
    } else {
      DCHECK_EQ(type_id, Type::FIXED_SIZE_LIST);
      // FIXED_SIZE_LIST: does not have a second buffer
      this->buffers[1] = {};
    }
  } else if (type_id == Type::STRUCT) {
    const auto& scalar = checked_cast<const StructScalar&>(value);
    this->child_data.resize(this->type->num_fields());
    DCHECK_EQ(this->type->num_fields(), static_cast<int>(scalar.value.size()));
    for (size_t i = 0; i < scalar.value.size(); ++i) {
      this->child_data[i].FillFromScalar(*scalar.value[i]);
    }
  } else if (is_union(type_id)) {
    // First buffer is kept null since unions have no validity vector
    this->buffers[0] = {};

    this->child_data.resize(this->type->num_fields());
    if (type_id == Type::DENSE_UNION) {
      const auto& scalar = checked_cast<const DenseUnionScalar&>(value);
      auto* union_scratch_space =
          reinterpret_cast<UnionScalar::UnionScratchSpace*>(&scalar.scratch_space_);

      this->buffers[1].data = reinterpret_cast<uint8_t*>(&union_scratch_space->type_code);
      this->buffers[1].size = 1;

      this->buffers[2] = OffsetsForScalar(union_scratch_space->offsets, sizeof(int32_t));
      // We can't "see" the other arrays in the union, but we put the "active"
      // union array in the right place and fill zero-length arrays for the
      // others
      const auto& child_ids = checked_cast<const UnionType*>(this->type)->child_ids();
      DCHECK_GE(scalar.type_code, 0);
      DCHECK_LT(scalar.type_code, static_cast<int>(child_ids.size()));
      for (int i = 0; i < static_cast<int>(this->child_data.size()); ++i) {
        if (i == child_ids[scalar.type_code]) {
          this->child_data[i].FillFromScalar(*scalar.value);
        } else {
          internal::FillZeroLengthArray(this->type->field(i)->type().get(),
                                        &this->child_data[i]);
        }
      }
    } else {
      const auto& scalar = checked_cast<const SparseUnionScalar&>(value);
      auto* union_scratch_space =
          reinterpret_cast<UnionScalar::UnionScratchSpace*>(&scalar.scratch_space_);

      this->buffers[1].data = reinterpret_cast<uint8_t*>(&union_scratch_space->type_code);
      this->buffers[1].size = 1;

      // Sparse union scalars have a full complement of child values even
      // though only one of them is relevant, so we just fill them in here
      for (int i = 0; i < static_cast<int>(this->child_data.size()); ++i) {
        this->child_data[i].FillFromScalar(*scalar.value[i]);
      }
    }
  } else if (type_id == Type::EXTENSION) {
    // Pass through storage
    const auto& scalar = checked_cast<const ExtensionScalar&>(value);
    FillFromScalar(*scalar.value);

    // Restore the extension type
    this->type = value.type.get();
  } else if (type_id == Type::RUN_END_ENCODED) {
    const auto& scalar = checked_cast<const RunEndEncodedScalar&>(value);
    this->child_data.resize(2);

    auto set_run_end = [&](auto run_end) {
      auto& e = this->child_data[0];
      e.type = scalar.run_end_type().get();
      e.length = 1;
      e.null_count = 0;
      e.buffers[1].data = scalar.scratch_space_;
      e.buffers[1].size = sizeof(run_end);
    };

    switch (scalar.run_end_type()->id()) {
      case Type::INT16:
        set_run_end(static_cast<int16_t>(1));
        break;
      case Type::INT32:
        set_run_end(static_cast<int32_t>(1));
        break;
      default:
        DCHECK_EQ(scalar.run_end_type()->id(), Type::INT64);
        set_run_end(static_cast<int64_t>(1));
    }
    this->child_data[1].FillFromScalar(*scalar.value);
  } else {
    DCHECK_EQ(Type::NA, type_id) << "should be unreachable: " << *value.type;
  }
}