in cpp/src/arrow/array/data.cc [413:603]
void ArraySpan::FillFromScalar(const Scalar& value) {
static uint8_t kTrueBit = 0x01;
static uint8_t kFalseBit = 0x00;
this->type = value.type.get();
this->length = 1;
Type::type type_id = value.type->id();
if (type_id == Type::NA) {
this->null_count = 1;
} else if (!internal::may_have_validity_bitmap(type_id)) {
this->null_count = 0;
} else {
// Populate null count and validity bitmap
this->null_count = value.is_valid ? 0 : 1;
this->buffers[0].data = value.is_valid ? &kTrueBit : &kFalseBit;
this->buffers[0].size = 1;
}
if (type_id == Type::BOOL) {
const auto& scalar = checked_cast<const BooleanScalar&>(value);
this->buffers[1].data = scalar.value ? &kTrueBit : &kFalseBit;
this->buffers[1].size = 1;
} else if (is_primitive(type_id) || is_decimal(type_id) ||
type_id == Type::DICTIONARY) {
const auto& scalar = checked_cast<const internal::PrimitiveScalarBase&>(value);
const uint8_t* scalar_data = reinterpret_cast<const uint8_t*>(scalar.view().data());
this->buffers[1].data = const_cast<uint8_t*>(scalar_data);
this->buffers[1].size = scalar.type->byte_width();
if (type_id == Type::DICTIONARY) {
// Populate dictionary data
const auto& dict_scalar = checked_cast<const DictionaryScalar&>(value);
this->child_data.resize(1);
this->child_data[0].SetMembers(*dict_scalar.value.dictionary->data());
}
} else if (is_base_binary_like(type_id)) {
const auto& scalar = checked_cast<const BaseBinaryScalar&>(value);
const uint8_t* data_buffer = nullptr;
int64_t data_size = 0;
if (scalar.is_valid) {
data_buffer = scalar.value->data();
data_size = scalar.value->size();
}
if (is_binary_like(type_id)) {
const auto& binary_scalar = checked_cast<const BinaryScalar&>(value);
this->buffers[1] = OffsetsForScalar(binary_scalar.scratch_space_, sizeof(int32_t));
} else {
// is_large_binary_like
const auto& large_binary_scalar = checked_cast<const LargeBinaryScalar&>(value);
this->buffers[1] =
OffsetsForScalar(large_binary_scalar.scratch_space_, sizeof(int64_t));
}
this->buffers[2].data = const_cast<uint8_t*>(data_buffer);
this->buffers[2].size = data_size;
} else if (type_id == Type::BINARY_VIEW || type_id == Type::STRING_VIEW) {
const auto& scalar = checked_cast<const BinaryViewScalar&>(value);
this->buffers[1].size = BinaryViewType::kSize;
this->buffers[1].data = scalar.scratch_space_;
if (scalar.is_valid) {
this->buffers[2] = internal::PackVariadicBuffers({&scalar.value, 1});
}
} else if (type_id == Type::FIXED_SIZE_BINARY) {
const auto& scalar = checked_cast<const BaseBinaryScalar&>(value);
this->buffers[1].data = const_cast<uint8_t*>(scalar.value->data());
this->buffers[1].size = scalar.value->size();
} else if (is_var_length_list_like(type_id) || type_id == Type::FIXED_SIZE_LIST) {
const auto& scalar = checked_cast<const BaseListScalar&>(value);
this->child_data.resize(1);
if (scalar.value != nullptr) {
// When the scalar is null, scalar.value can also be null
this->child_data[0].SetMembers(*scalar.value->data());
} else {
// Even when the value is null, we still must populate the
// child_data to yield a valid array. Tedious
internal::FillZeroLengthArray(this->type->field(0)->type().get(),
&this->child_data[0]);
}
if (type_id == Type::LIST) {
const auto& list_scalar = checked_cast<const ListScalar&>(value);
this->buffers[1] = OffsetsForScalar(list_scalar.scratch_space_, sizeof(int32_t));
} else if (type_id == Type::MAP) {
const auto& map_scalar = checked_cast<const MapScalar&>(value);
this->buffers[1] = OffsetsForScalar(map_scalar.scratch_space_, sizeof(int32_t));
} else if (type_id == Type::LARGE_LIST) {
const auto& large_list_scalar = checked_cast<const LargeListScalar&>(value);
this->buffers[1] =
OffsetsForScalar(large_list_scalar.scratch_space_, sizeof(int64_t));
} else if (type_id == Type::LIST_VIEW) {
const auto& list_view_scalar = checked_cast<const ListViewScalar&>(value);
std::tie(this->buffers[1], this->buffers[2]) =
OffsetsAndSizesForScalar(list_view_scalar.scratch_space_, sizeof(int32_t));
} else if (type_id == Type::LARGE_LIST_VIEW) {
const auto& large_list_view_scalar =
checked_cast<const LargeListViewScalar&>(value);
std::tie(this->buffers[1], this->buffers[2]) = OffsetsAndSizesForScalar(
large_list_view_scalar.scratch_space_, sizeof(int64_t));
} else {
DCHECK_EQ(type_id, Type::FIXED_SIZE_LIST);
// FIXED_SIZE_LIST: does not have a second buffer
this->buffers[1] = {};
}
} else if (type_id == Type::STRUCT) {
const auto& scalar = checked_cast<const StructScalar&>(value);
this->child_data.resize(this->type->num_fields());
DCHECK_EQ(this->type->num_fields(), static_cast<int>(scalar.value.size()));
for (size_t i = 0; i < scalar.value.size(); ++i) {
this->child_data[i].FillFromScalar(*scalar.value[i]);
}
} else if (is_union(type_id)) {
// First buffer is kept null since unions have no validity vector
this->buffers[0] = {};
this->child_data.resize(this->type->num_fields());
if (type_id == Type::DENSE_UNION) {
const auto& scalar = checked_cast<const DenseUnionScalar&>(value);
auto* union_scratch_space =
reinterpret_cast<UnionScalar::UnionScratchSpace*>(&scalar.scratch_space_);
this->buffers[1].data = reinterpret_cast<uint8_t*>(&union_scratch_space->type_code);
this->buffers[1].size = 1;
this->buffers[2] = OffsetsForScalar(union_scratch_space->offsets, sizeof(int32_t));
// We can't "see" the other arrays in the union, but we put the "active"
// union array in the right place and fill zero-length arrays for the
// others
const auto& child_ids = checked_cast<const UnionType*>(this->type)->child_ids();
DCHECK_GE(scalar.type_code, 0);
DCHECK_LT(scalar.type_code, static_cast<int>(child_ids.size()));
for (int i = 0; i < static_cast<int>(this->child_data.size()); ++i) {
if (i == child_ids[scalar.type_code]) {
this->child_data[i].FillFromScalar(*scalar.value);
} else {
internal::FillZeroLengthArray(this->type->field(i)->type().get(),
&this->child_data[i]);
}
}
} else {
const auto& scalar = checked_cast<const SparseUnionScalar&>(value);
auto* union_scratch_space =
reinterpret_cast<UnionScalar::UnionScratchSpace*>(&scalar.scratch_space_);
this->buffers[1].data = reinterpret_cast<uint8_t*>(&union_scratch_space->type_code);
this->buffers[1].size = 1;
// Sparse union scalars have a full complement of child values even
// though only one of them is relevant, so we just fill them in here
for (int i = 0; i < static_cast<int>(this->child_data.size()); ++i) {
this->child_data[i].FillFromScalar(*scalar.value[i]);
}
}
} else if (type_id == Type::EXTENSION) {
// Pass through storage
const auto& scalar = checked_cast<const ExtensionScalar&>(value);
FillFromScalar(*scalar.value);
// Restore the extension type
this->type = value.type.get();
} else if (type_id == Type::RUN_END_ENCODED) {
const auto& scalar = checked_cast<const RunEndEncodedScalar&>(value);
this->child_data.resize(2);
auto set_run_end = [&](auto run_end) {
auto& e = this->child_data[0];
e.type = scalar.run_end_type().get();
e.length = 1;
e.null_count = 0;
e.buffers[1].data = scalar.scratch_space_;
e.buffers[1].size = sizeof(run_end);
};
switch (scalar.run_end_type()->id()) {
case Type::INT16:
set_run_end(static_cast<int16_t>(1));
break;
case Type::INT32:
set_run_end(static_cast<int32_t>(1));
break;
default:
DCHECK_EQ(scalar.run_end_type()->id(), Type::INT64);
set_run_end(static_cast<int64_t>(1));
}
this->child_data[1].FillFromScalar(*scalar.value);
} else {
DCHECK_EQ(Type::NA, type_id) << "should be unreachable: " << *value.type;
}
}