in arrow-data/src/data.rs [1005:1113]
fn validate_child_data(&self) -> Result<(), ArrowError> {
match &self.data_type {
DataType::List(field) | DataType::Map(field, _) => {
let values_data = self.get_single_valid_child_data(field.data_type())?;
self.validate_offsets::<i32>(values_data.len)?;
Ok(())
}
DataType::LargeList(field) => {
let values_data = self.get_single_valid_child_data(field.data_type())?;
self.validate_offsets::<i64>(values_data.len)?;
Ok(())
}
DataType::ListView(field) => {
let values_data = self.get_single_valid_child_data(field.data_type())?;
self.validate_offsets_and_sizes::<i32>(values_data.len)?;
Ok(())
}
DataType::LargeListView(field) => {
let values_data = self.get_single_valid_child_data(field.data_type())?;
self.validate_offsets_and_sizes::<i64>(values_data.len)?;
Ok(())
}
DataType::FixedSizeList(field, list_size) => {
let values_data = self.get_single_valid_child_data(field.data_type())?;
let list_size: usize = (*list_size).try_into().map_err(|_| {
ArrowError::InvalidArgumentError(format!(
"{} has a negative list_size {}",
self.data_type, list_size
))
})?;
let expected_values_len = self.len
.checked_mul(list_size)
.expect("integer overflow computing expected number of expected values in FixedListSize");
if values_data.len < expected_values_len {
return Err(ArrowError::InvalidArgumentError(format!(
"Values length {} is less than the length ({}) multiplied by the value size ({}) for {}",
values_data.len, self.len, list_size, self.data_type
)));
}
Ok(())
}
DataType::Struct(fields) => {
self.validate_num_child_data(fields.len())?;
for (i, field) in fields.iter().enumerate() {
let field_data = self.get_valid_child_data(i, field.data_type())?;
// Ensure child field has sufficient size
if field_data.len < self.len {
return Err(ArrowError::InvalidArgumentError(format!(
"{} child array #{} for field {} has length smaller than expected for struct array ({} < {})",
self.data_type, i, field.name(), field_data.len, self.len
)));
}
}
Ok(())
}
DataType::RunEndEncoded(run_ends_field, values_field) => {
self.validate_num_child_data(2)?;
let run_ends_data = self.get_valid_child_data(0, run_ends_field.data_type())?;
let values_data = self.get_valid_child_data(1, values_field.data_type())?;
if run_ends_data.len != values_data.len {
return Err(ArrowError::InvalidArgumentError(format!(
"The run_ends array length should be the same as values array length. Run_ends array length is {}, values array length is {}",
run_ends_data.len, values_data.len
)));
}
if run_ends_data.nulls.is_some() {
return Err(ArrowError::InvalidArgumentError(
"Found null values in run_ends array. The run_ends array should not have null values.".to_string(),
));
}
Ok(())
}
DataType::Union(fields, mode) => {
self.validate_num_child_data(fields.len())?;
for (i, (_, field)) in fields.iter().enumerate() {
let field_data = self.get_valid_child_data(i, field.data_type())?;
if mode == &UnionMode::Sparse && field_data.len < (self.len + self.offset) {
return Err(ArrowError::InvalidArgumentError(format!(
"Sparse union child array #{} has length smaller than expected for union array ({} < {})",
i, field_data.len, self.len + self.offset
)));
}
}
Ok(())
}
DataType::Dictionary(_key_type, value_type) => {
self.get_single_valid_child_data(value_type)?;
Ok(())
}
_ => {
// other types do not have child data
if !self.child_data.is_empty() {
return Err(ArrowError::InvalidArgumentError(format!(
"Expected no child arrays for type {} but got {}",
self.data_type,
self.child_data.len()
)));
}
Ok(())
}
}
}