arrow-array/src/array/struct_array.rs (676 lines of code) (raw):
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
use crate::array::print_long_array;
use crate::{make_array, new_null_array, Array, ArrayRef, RecordBatch};
use arrow_buffer::{BooleanBuffer, Buffer, NullBuffer};
use arrow_data::{ArrayData, ArrayDataBuilder};
use arrow_schema::{ArrowError, DataType, Field, FieldRef, Fields};
use std::sync::Arc;
use std::{any::Any, ops::Index};
/// An array of [structs](https://arrow.apache.org/docs/format/Columnar.html#struct-layout)
///
/// Each child (called *field*) is represented by a separate array.
///
/// # Comparison with [RecordBatch]
///
/// Both [`RecordBatch`] and [`StructArray`] represent a collection of columns / arrays with the
/// same length.
///
/// However, there are a couple of key differences:
///
/// * [`StructArray`] can be nested within other [`Array`], including itself
/// * [`RecordBatch`] can contain top-level metadata on its associated [`Schema`][arrow_schema::Schema]
/// * [`StructArray`] can contain top-level nulls, i.e. `null`
/// * [`RecordBatch`] can only represent nulls in its child columns, i.e. `{"field": null}`
///
/// [`StructArray`] is therefore a more general data container than [`RecordBatch`], and as such
/// code that needs to handle both will typically share an implementation in terms of
/// [`StructArray`] and convert to/from [`RecordBatch`] as necessary.
///
/// [`From`] implementations are provided to facilitate this conversion, however, converting
/// from a [`StructArray`] containing top-level nulls to a [`RecordBatch`] will panic, as there
/// is no way to preserve them.
///
/// # Example: Create an array from a vector of fields
///
/// ```
/// use std::sync::Arc;
/// use arrow_array::{Array, ArrayRef, BooleanArray, Int32Array, StructArray};
/// use arrow_schema::{DataType, Field};
///
/// let boolean = Arc::new(BooleanArray::from(vec![false, false, true, true]));
/// let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31]));
///
/// let struct_array = StructArray::from(vec![
/// (
/// Arc::new(Field::new("b", DataType::Boolean, false)),
/// boolean.clone() as ArrayRef,
/// ),
/// (
/// Arc::new(Field::new("c", DataType::Int32, false)),
/// int.clone() as ArrayRef,
/// ),
/// ]);
/// assert_eq!(struct_array.column(0).as_ref(), boolean.as_ref());
/// assert_eq!(struct_array.column(1).as_ref(), int.as_ref());
/// assert_eq!(4, struct_array.len());
/// assert_eq!(0, struct_array.null_count());
/// assert_eq!(0, struct_array.offset());
/// ```
#[derive(Clone)]
pub struct StructArray {
len: usize,
data_type: DataType,
nulls: Option<NullBuffer>,
fields: Vec<ArrayRef>,
}
impl StructArray {
/// Create a new [`StructArray`] from the provided parts, panicking on failure
///
/// # Panics
///
/// Panics if [`Self::try_new`] returns an error
pub fn new(fields: Fields, arrays: Vec<ArrayRef>, nulls: Option<NullBuffer>) -> Self {
Self::try_new(fields, arrays, nulls).unwrap()
}
/// Create a new [`StructArray`] from the provided parts, returning an error on failure
///
/// The length will be inferred from the length of the child arrays. Returns an error if
/// there are no child arrays. Consider using [`Self::try_new_with_length`] if the length
/// is known to avoid this.
///
/// # Errors
///
/// Errors if
///
/// * `fields.len() == 0`
/// * Any reason that [`Self::try_new_with_length`] would error
pub fn try_new(
fields: Fields,
arrays: Vec<ArrayRef>,
nulls: Option<NullBuffer>,
) -> Result<Self, ArrowError> {
let len = arrays.first().map(|x| x.len()).ok_or_else(||ArrowError::InvalidArgumentError("use StructArray::try_new_with_length or StructArray::new_empty to create a struct array with no fields so that the length can be set correctly".to_string()))?;
Self::try_new_with_length(fields, arrays, nulls, len)
}
/// Create a new [`StructArray`] from the provided parts, returning an error on failure
///
/// # Errors
///
/// Errors if
///
/// * `fields.len() != arrays.len()`
/// * `fields[i].data_type() != arrays[i].data_type()`
/// * `arrays[i].len() != arrays[j].len()`
/// * `arrays[i].len() != nulls.len()`
/// * `!fields[i].is_nullable() && !nulls.contains(arrays[i].nulls())`
pub fn try_new_with_length(
fields: Fields,
arrays: Vec<ArrayRef>,
nulls: Option<NullBuffer>,
len: usize,
) -> Result<Self, ArrowError> {
if fields.len() != arrays.len() {
return Err(ArrowError::InvalidArgumentError(format!(
"Incorrect number of arrays for StructArray fields, expected {} got {}",
fields.len(),
arrays.len()
)));
}
if let Some(n) = nulls.as_ref() {
if n.len() != len {
return Err(ArrowError::InvalidArgumentError(format!(
"Incorrect number of nulls for StructArray, expected {len} got {}",
n.len(),
)));
}
}
for (f, a) in fields.iter().zip(&arrays) {
if f.data_type() != a.data_type() {
return Err(ArrowError::InvalidArgumentError(format!(
"Incorrect datatype for StructArray field {:?}, expected {} got {}",
f.name(),
f.data_type(),
a.data_type()
)));
}
if a.len() != len {
return Err(ArrowError::InvalidArgumentError(format!(
"Incorrect array length for StructArray field {:?}, expected {} got {}",
f.name(),
len,
a.len()
)));
}
if !f.is_nullable() {
if let Some(a) = a.logical_nulls() {
if !nulls.as_ref().map(|n| n.contains(&a)).unwrap_or_default() {
return Err(ArrowError::InvalidArgumentError(format!(
"Found unmasked nulls for non-nullable StructArray field {:?}",
f.name()
)));
}
}
}
}
Ok(Self {
len,
data_type: DataType::Struct(fields),
nulls: nulls.filter(|n| n.null_count() > 0),
fields: arrays,
})
}
/// Create a new [`StructArray`] of length `len` where all values are null
pub fn new_null(fields: Fields, len: usize) -> Self {
let arrays = fields
.iter()
.map(|f| new_null_array(f.data_type(), len))
.collect();
Self {
len,
data_type: DataType::Struct(fields),
nulls: Some(NullBuffer::new_null(len)),
fields: arrays,
}
}
/// Create a new [`StructArray`] from the provided parts without validation
///
/// The length will be inferred from the length of the child arrays. Panics if there are no
/// child arrays. Consider using [`Self::new_unchecked_with_length`] if the length is known
/// to avoid this.
///
/// # Safety
///
/// Safe if [`Self::new`] would not panic with the given arguments
pub unsafe fn new_unchecked(
fields: Fields,
arrays: Vec<ArrayRef>,
nulls: Option<NullBuffer>,
) -> Self {
if cfg!(feature = "force_validate") {
return Self::new(fields, arrays, nulls);
}
let len = arrays.first().map(|x| x.len()).expect(
"cannot use StructArray::new_unchecked if there are no fields, length is unknown",
);
Self {
len,
data_type: DataType::Struct(fields),
nulls,
fields: arrays,
}
}
/// Create a new [`StructArray`] from the provided parts without validation
///
/// # Safety
///
/// Safe if [`Self::new`] would not panic with the given arguments
pub unsafe fn new_unchecked_with_length(
fields: Fields,
arrays: Vec<ArrayRef>,
nulls: Option<NullBuffer>,
len: usize,
) -> Self {
if cfg!(feature = "force_validate") {
return Self::try_new_with_length(fields, arrays, nulls, len).unwrap();
}
Self {
len,
data_type: DataType::Struct(fields),
nulls,
fields: arrays,
}
}
/// Create a new [`StructArray`] containing no fields
///
/// # Panics
///
/// If `len != nulls.len()`
pub fn new_empty_fields(len: usize, nulls: Option<NullBuffer>) -> Self {
if let Some(n) = &nulls {
assert_eq!(len, n.len())
}
Self {
len,
data_type: DataType::Struct(Fields::empty()),
fields: vec![],
nulls,
}
}
/// Deconstruct this array into its constituent parts
pub fn into_parts(self) -> (Fields, Vec<ArrayRef>, Option<NullBuffer>) {
let f = match self.data_type {
DataType::Struct(f) => f,
_ => unreachable!(),
};
(f, self.fields, self.nulls)
}
/// Returns the field at `pos`.
pub fn column(&self, pos: usize) -> &ArrayRef {
&self.fields[pos]
}
/// Return the number of fields in this struct array
pub fn num_columns(&self) -> usize {
self.fields.len()
}
/// Returns the fields of the struct array
pub fn columns(&self) -> &[ArrayRef] {
&self.fields
}
/// Return field names in this struct array
pub fn column_names(&self) -> Vec<&str> {
match self.data_type() {
DataType::Struct(fields) => fields
.iter()
.map(|f| f.name().as_str())
.collect::<Vec<&str>>(),
_ => unreachable!("Struct array's data type is not struct!"),
}
}
/// Returns the [`Fields`] of this [`StructArray`]
pub fn fields(&self) -> &Fields {
match self.data_type() {
DataType::Struct(f) => f,
_ => unreachable!(),
}
}
/// Return child array whose field name equals to column_name
///
/// Note: A schema can currently have duplicate field names, in which case
/// the first field will always be selected.
/// This issue will be addressed in [ARROW-11178](https://issues.apache.org/jira/browse/ARROW-11178)
pub fn column_by_name(&self, column_name: &str) -> Option<&ArrayRef> {
self.column_names()
.iter()
.position(|c| c == &column_name)
.map(|pos| self.column(pos))
}
/// Returns a zero-copy slice of this array with the indicated offset and length.
pub fn slice(&self, offset: usize, len: usize) -> Self {
assert!(
offset.saturating_add(len) <= self.len,
"the length + offset of the sliced StructArray cannot exceed the existing length"
);
let fields = self.fields.iter().map(|a| a.slice(offset, len)).collect();
Self {
len,
data_type: self.data_type.clone(),
nulls: self.nulls.as_ref().map(|n| n.slice(offset, len)),
fields,
}
}
}
impl From<ArrayData> for StructArray {
fn from(data: ArrayData) -> Self {
let parent_offset = data.offset();
let parent_len = data.len();
let fields = data
.child_data()
.iter()
.map(|cd| {
if parent_offset != 0 || parent_len != cd.len() {
make_array(cd.slice(parent_offset, parent_len))
} else {
make_array(cd.clone())
}
})
.collect();
Self {
len: data.len(),
data_type: data.data_type().clone(),
nulls: data.nulls().cloned(),
fields,
}
}
}
impl From<StructArray> for ArrayData {
fn from(array: StructArray) -> Self {
let builder = ArrayDataBuilder::new(array.data_type)
.len(array.len)
.nulls(array.nulls)
.child_data(array.fields.iter().map(|x| x.to_data()).collect());
unsafe { builder.build_unchecked() }
}
}
impl TryFrom<Vec<(&str, ArrayRef)>> for StructArray {
type Error = ArrowError;
/// builds a StructArray from a vector of names and arrays.
fn try_from(values: Vec<(&str, ArrayRef)>) -> Result<Self, ArrowError> {
let (fields, arrays): (Vec<_>, _) = values
.into_iter()
.map(|(name, array)| {
(
Field::new(name, array.data_type().clone(), array.is_nullable()),
array,
)
})
.unzip();
StructArray::try_new(fields.into(), arrays, None)
}
}
impl Array for StructArray {
fn as_any(&self) -> &dyn Any {
self
}
fn to_data(&self) -> ArrayData {
self.clone().into()
}
fn into_data(self) -> ArrayData {
self.into()
}
fn data_type(&self) -> &DataType {
&self.data_type
}
fn slice(&self, offset: usize, length: usize) -> ArrayRef {
Arc::new(self.slice(offset, length))
}
fn len(&self) -> usize {
self.len
}
fn is_empty(&self) -> bool {
self.len == 0
}
fn shrink_to_fit(&mut self) {
if let Some(nulls) = &mut self.nulls {
nulls.shrink_to_fit();
}
self.fields.iter_mut().for_each(|n| n.shrink_to_fit());
}
fn offset(&self) -> usize {
0
}
fn nulls(&self) -> Option<&NullBuffer> {
self.nulls.as_ref()
}
fn logical_null_count(&self) -> usize {
// More efficient that the default implementation
self.null_count()
}
fn get_buffer_memory_size(&self) -> usize {
let mut size = self.fields.iter().map(|a| a.get_buffer_memory_size()).sum();
if let Some(n) = self.nulls.as_ref() {
size += n.buffer().capacity();
}
size
}
fn get_array_memory_size(&self) -> usize {
let mut size = self.fields.iter().map(|a| a.get_array_memory_size()).sum();
size += std::mem::size_of::<Self>();
if let Some(n) = self.nulls.as_ref() {
size += n.buffer().capacity();
}
size
}
}
impl From<Vec<(FieldRef, ArrayRef)>> for StructArray {
fn from(v: Vec<(FieldRef, ArrayRef)>) -> Self {
let (fields, arrays): (Vec<_>, _) = v.into_iter().unzip();
StructArray::new(fields.into(), arrays, None)
}
}
impl std::fmt::Debug for StructArray {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
writeln!(f, "StructArray")?;
writeln!(f, "-- validity:")?;
writeln!(f, "[")?;
print_long_array(self, f, |_array, _index, f| write!(f, "valid"))?;
writeln!(f, "]\n[")?;
for (child_index, name) in self.column_names().iter().enumerate() {
let column = self.column(child_index);
writeln!(
f,
"-- child {}: \"{}\" ({:?})",
child_index,
name,
column.data_type()
)?;
std::fmt::Debug::fmt(column, f)?;
writeln!(f)?;
}
write!(f, "]")
}
}
impl From<(Vec<(FieldRef, ArrayRef)>, Buffer)> for StructArray {
fn from(pair: (Vec<(FieldRef, ArrayRef)>, Buffer)) -> Self {
let len = pair.0.first().map(|x| x.1.len()).unwrap_or_default();
let (fields, arrays): (Vec<_>, Vec<_>) = pair.0.into_iter().unzip();
let nulls = NullBuffer::new(BooleanBuffer::new(pair.1, 0, len));
Self::new(fields.into(), arrays, Some(nulls))
}
}
impl From<RecordBatch> for StructArray {
fn from(value: RecordBatch) -> Self {
Self {
len: value.num_rows(),
data_type: DataType::Struct(value.schema().fields().clone()),
nulls: None,
fields: value.columns().to_vec(),
}
}
}
impl Index<&str> for StructArray {
type Output = ArrayRef;
/// Get a reference to a column's array by name.
///
/// Note: A schema can currently have duplicate field names, in which case
/// the first field will always be selected.
/// This issue will be addressed in [ARROW-11178](https://issues.apache.org/jira/browse/ARROW-11178)
///
/// # Panics
///
/// Panics if the name is not in the schema.
fn index(&self, name: &str) -> &Self::Output {
self.column_by_name(name).unwrap()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{BooleanArray, Float32Array, Float64Array, Int32Array, Int64Array, StringArray};
use arrow_buffer::ToByteSlice;
#[test]
fn test_struct_array_builder() {
let boolean_array = BooleanArray::from(vec![false, false, true, true]);
let int_array = Int64Array::from(vec![42, 28, 19, 31]);
let fields = vec![
Field::new("a", DataType::Boolean, false),
Field::new("b", DataType::Int64, false),
];
let struct_array_data = ArrayData::builder(DataType::Struct(fields.into()))
.len(4)
.add_child_data(boolean_array.to_data())
.add_child_data(int_array.to_data())
.build()
.unwrap();
let struct_array = StructArray::from(struct_array_data);
assert_eq!(struct_array.column(0).as_ref(), &boolean_array);
assert_eq!(struct_array.column(1).as_ref(), &int_array);
}
#[test]
fn test_struct_array_from() {
let boolean = Arc::new(BooleanArray::from(vec![false, false, true, true]));
let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31]));
let struct_array = StructArray::from(vec![
(
Arc::new(Field::new("b", DataType::Boolean, false)),
boolean.clone() as ArrayRef,
),
(
Arc::new(Field::new("c", DataType::Int32, false)),
int.clone() as ArrayRef,
),
]);
assert_eq!(struct_array.column(0).as_ref(), boolean.as_ref());
assert_eq!(struct_array.column(1).as_ref(), int.as_ref());
assert_eq!(4, struct_array.len());
assert_eq!(0, struct_array.null_count());
assert_eq!(0, struct_array.offset());
}
#[test]
fn test_struct_array_from_data_with_offset_and_length() {
// Various ways to make the struct array:
//
// [{x: 2}, {x: 3}, None]
//
// from slicing larger buffers/arrays with offsets and lengths
let int_arr = Int32Array::from(vec![1, 2, 3, 4, 5]);
let int_field = Field::new("x", DataType::Int32, false);
let struct_nulls = NullBuffer::new(BooleanBuffer::from(vec![true, true, false]));
let int_data = int_arr.to_data();
// Case 1: Offset + length, nulls are not sliced
let case1 = ArrayData::builder(DataType::Struct(Fields::from(vec![int_field.clone()])))
.len(3)
.offset(1)
.nulls(Some(struct_nulls))
.add_child_data(int_data.clone())
.build()
.unwrap();
// Case 2: Offset + length, nulls are sliced
let struct_nulls =
NullBuffer::new(BooleanBuffer::from(vec![true, true, true, false, true]).slice(1, 3));
let case2 = ArrayData::builder(DataType::Struct(Fields::from(vec![int_field.clone()])))
.len(3)
.offset(1)
.nulls(Some(struct_nulls.clone()))
.add_child_data(int_data.clone())
.build()
.unwrap();
// Case 3: struct length is smaller than child length but no offset
let offset_int_data = int_data.slice(1, 4);
let case3 = ArrayData::builder(DataType::Struct(Fields::from(vec![int_field.clone()])))
.len(3)
.nulls(Some(struct_nulls))
.add_child_data(offset_int_data)
.build()
.unwrap();
let expected = StructArray::new(
Fields::from(vec![int_field.clone()]),
vec![Arc::new(int_arr)],
Some(NullBuffer::new(BooleanBuffer::from(vec![
true, true, true, false, true,
]))),
)
.slice(1, 3);
for case in [case1, case2, case3] {
let struct_arr_from_data = StructArray::from(case);
assert_eq!(struct_arr_from_data, expected);
assert_eq!(struct_arr_from_data.column(0), expected.column(0));
}
}
#[test]
#[should_panic(expected = "assertion failed: (offset + length) <= self.len()")]
fn test_struct_array_from_data_with_offset_and_length_error() {
let int_arr = Int32Array::from(vec![1, 2, 3, 4, 5]);
let int_field = Field::new("x", DataType::Int32, false);
let struct_nulls = NullBuffer::new(BooleanBuffer::from(vec![true, true, false]));
let int_data = int_arr.to_data();
// If parent offset is 3 and len is 3 then child must have 6 items
let struct_data =
ArrayData::builder(DataType::Struct(Fields::from(vec![int_field.clone()])))
.len(3)
.offset(3)
.nulls(Some(struct_nulls))
.add_child_data(int_data)
.build()
.unwrap();
let _ = StructArray::from(struct_data);
}
/// validates that struct can be accessed using `column_name` as index i.e. `struct_array["column_name"]`.
#[test]
fn test_struct_array_index_access() {
let boolean = Arc::new(BooleanArray::from(vec![false, false, true, true]));
let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31]));
let struct_array = StructArray::from(vec![
(
Arc::new(Field::new("b", DataType::Boolean, false)),
boolean.clone() as ArrayRef,
),
(
Arc::new(Field::new("c", DataType::Int32, false)),
int.clone() as ArrayRef,
),
]);
assert_eq!(struct_array["b"].as_ref(), boolean.as_ref());
assert_eq!(struct_array["c"].as_ref(), int.as_ref());
}
/// validates that the in-memory representation follows [the spec](https://arrow.apache.org/docs/format/Columnar.html#struct-layout)
#[test]
fn test_struct_array_from_vec() {
let strings: ArrayRef = Arc::new(StringArray::from(vec![
Some("joe"),
None,
None,
Some("mark"),
]));
let ints: ArrayRef = Arc::new(Int32Array::from(vec![Some(1), Some(2), None, Some(4)]));
let arr =
StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())]).unwrap();
let struct_data = arr.into_data();
assert_eq!(4, struct_data.len());
assert_eq!(0, struct_data.null_count());
let expected_string_data = ArrayData::builder(DataType::Utf8)
.len(4)
.null_bit_buffer(Some(Buffer::from(&[9_u8])))
.add_buffer(Buffer::from([0, 3, 3, 3, 7].to_byte_slice()))
.add_buffer(Buffer::from(b"joemark"))
.build()
.unwrap();
let expected_int_data = ArrayData::builder(DataType::Int32)
.len(4)
.null_bit_buffer(Some(Buffer::from(&[11_u8])))
.add_buffer(Buffer::from([1, 2, 0, 4].to_byte_slice()))
.build()
.unwrap();
assert_eq!(expected_string_data, struct_data.child_data()[0]);
assert_eq!(expected_int_data, struct_data.child_data()[1]);
}
#[test]
fn test_struct_array_from_vec_error() {
let strings: ArrayRef = Arc::new(StringArray::from(vec![
Some("joe"),
None,
None,
// 3 elements, not 4
]));
let ints: ArrayRef = Arc::new(Int32Array::from(vec![Some(1), Some(2), None, Some(4)]));
let err = StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())])
.unwrap_err()
.to_string();
assert_eq!(
err,
"Invalid argument error: Incorrect array length for StructArray field \"f2\", expected 3 got 4"
)
}
#[test]
#[should_panic(
expected = "Incorrect datatype for StructArray field \\\"b\\\", expected Int16 got Boolean"
)]
fn test_struct_array_from_mismatched_types_single() {
drop(StructArray::from(vec![(
Arc::new(Field::new("b", DataType::Int16, false)),
Arc::new(BooleanArray::from(vec![false, false, true, true])) as Arc<dyn Array>,
)]));
}
#[test]
#[should_panic(
expected = "Incorrect datatype for StructArray field \\\"b\\\", expected Int16 got Boolean"
)]
fn test_struct_array_from_mismatched_types_multiple() {
drop(StructArray::from(vec![
(
Arc::new(Field::new("b", DataType::Int16, false)),
Arc::new(BooleanArray::from(vec![false, false, true, true])) as Arc<dyn Array>,
),
(
Arc::new(Field::new("c", DataType::Utf8, false)),
Arc::new(Int32Array::from(vec![42, 28, 19, 31])),
),
]));
}
#[test]
fn test_struct_array_slice() {
let boolean_data = ArrayData::builder(DataType::Boolean)
.len(5)
.add_buffer(Buffer::from([0b00010000]))
.null_bit_buffer(Some(Buffer::from([0b00010001])))
.build()
.unwrap();
let int_data = ArrayData::builder(DataType::Int32)
.len(5)
.add_buffer(Buffer::from([0, 28, 42, 0, 0].to_byte_slice()))
.null_bit_buffer(Some(Buffer::from([0b00000110])))
.build()
.unwrap();
let field_types = vec![
Field::new("a", DataType::Boolean, true),
Field::new("b", DataType::Int32, true),
];
let struct_array_data = ArrayData::builder(DataType::Struct(field_types.into()))
.len(5)
.add_child_data(boolean_data.clone())
.add_child_data(int_data.clone())
.null_bit_buffer(Some(Buffer::from([0b00010111])))
.build()
.unwrap();
let struct_array = StructArray::from(struct_array_data);
assert_eq!(5, struct_array.len());
assert_eq!(1, struct_array.null_count());
assert!(struct_array.is_valid(0));
assert!(struct_array.is_valid(1));
assert!(struct_array.is_valid(2));
assert!(struct_array.is_null(3));
assert!(struct_array.is_valid(4));
assert_eq!(boolean_data, struct_array.column(0).to_data());
assert_eq!(int_data, struct_array.column(1).to_data());
let c0 = struct_array.column(0);
let c0 = c0.as_any().downcast_ref::<BooleanArray>().unwrap();
assert_eq!(5, c0.len());
assert_eq!(3, c0.null_count());
assert!(c0.is_valid(0));
assert!(!c0.value(0));
assert!(c0.is_null(1));
assert!(c0.is_null(2));
assert!(c0.is_null(3));
assert!(c0.is_valid(4));
assert!(c0.value(4));
let c1 = struct_array.column(1);
let c1 = c1.as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(5, c1.len());
assert_eq!(3, c1.null_count());
assert!(c1.is_null(0));
assert!(c1.is_valid(1));
assert_eq!(28, c1.value(1));
assert!(c1.is_valid(2));
assert_eq!(42, c1.value(2));
assert!(c1.is_null(3));
assert!(c1.is_null(4));
let sliced_array = struct_array.slice(2, 3);
let sliced_array = sliced_array.as_any().downcast_ref::<StructArray>().unwrap();
assert_eq!(3, sliced_array.len());
assert_eq!(1, sliced_array.null_count());
assert!(sliced_array.is_valid(0));
assert!(sliced_array.is_null(1));
assert!(sliced_array.is_valid(2));
let sliced_c0 = sliced_array.column(0);
let sliced_c0 = sliced_c0.as_any().downcast_ref::<BooleanArray>().unwrap();
assert_eq!(3, sliced_c0.len());
assert!(sliced_c0.is_null(0));
assert!(sliced_c0.is_null(1));
assert!(sliced_c0.is_valid(2));
assert!(sliced_c0.value(2));
let sliced_c1 = sliced_array.column(1);
let sliced_c1 = sliced_c1.as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(3, sliced_c1.len());
assert!(sliced_c1.is_valid(0));
assert_eq!(42, sliced_c1.value(0));
assert!(sliced_c1.is_null(1));
assert!(sliced_c1.is_null(2));
}
#[test]
#[should_panic(
expected = "Incorrect array length for StructArray field \\\"c\\\", expected 1 got 2"
)]
fn test_invalid_struct_child_array_lengths() {
drop(StructArray::from(vec![
(
Arc::new(Field::new("b", DataType::Float32, false)),
Arc::new(Float32Array::from(vec![1.1])) as Arc<dyn Array>,
),
(
Arc::new(Field::new("c", DataType::Float64, false)),
Arc::new(Float64Array::from(vec![2.2, 3.3])),
),
]));
}
#[test]
#[should_panic(expected = "use StructArray::try_new_with_length")]
fn test_struct_array_from_empty() {
// This can't work because we don't know how many rows the array should have. Previously we inferred 0 but
// that often led to bugs.
let _ = StructArray::from(vec![]);
}
#[test]
fn test_empty_struct_array() {
assert!(StructArray::try_new(Fields::empty(), vec![], None).is_err());
let arr = StructArray::new_empty_fields(10, None);
assert_eq!(arr.len(), 10);
assert_eq!(arr.null_count(), 0);
assert_eq!(arr.num_columns(), 0);
let arr2 = StructArray::try_new_with_length(Fields::empty(), vec![], None, 10).unwrap();
assert_eq!(arr2.len(), 10);
let arr = StructArray::new_empty_fields(10, Some(NullBuffer::new_null(10)));
assert_eq!(arr.len(), 10);
assert_eq!(arr.null_count(), 10);
assert_eq!(arr.num_columns(), 0);
let arr2 = StructArray::try_new_with_length(
Fields::empty(),
vec![],
Some(NullBuffer::new_null(10)),
10,
)
.unwrap();
assert_eq!(arr2.len(), 10);
}
#[test]
#[should_panic(expected = "Found unmasked nulls for non-nullable StructArray field \\\"c\\\"")]
fn test_struct_array_from_mismatched_nullability() {
drop(StructArray::from(vec![(
Arc::new(Field::new("c", DataType::Int32, false)),
Arc::new(Int32Array::from(vec![Some(42), None, Some(19)])) as ArrayRef,
)]));
}
#[test]
fn test_struct_array_fmt_debug() {
let arr: StructArray = StructArray::new(
vec![Arc::new(Field::new("c", DataType::Int32, true))].into(),
vec![Arc::new(Int32Array::from((0..30).collect::<Vec<_>>())) as ArrayRef],
Some(NullBuffer::new(BooleanBuffer::from(
(0..30).map(|i| i % 2 == 0).collect::<Vec<_>>(),
))),
);
assert_eq!(format!("{arr:?}"), "StructArray\n-- validity:\n[\n valid,\n null,\n valid,\n null,\n valid,\n null,\n valid,\n null,\n valid,\n null,\n ...10 elements...,\n valid,\n null,\n valid,\n null,\n valid,\n null,\n valid,\n null,\n valid,\n null,\n]\n[\n-- child 0: \"c\" (Int32)\nPrimitiveArray<Int32>\n[\n 0,\n 1,\n 2,\n 3,\n 4,\n 5,\n 6,\n 7,\n 8,\n 9,\n ...10 elements...,\n 20,\n 21,\n 22,\n 23,\n 24,\n 25,\n 26,\n 27,\n 28,\n 29,\n]\n]")
}
}