in parquet/src/arrow/array_reader/struct_array.rs [103:178]
fn consume_batch(&mut self) -> Result<ArrayRef> {
if self.children.is_empty() {
return Ok(Arc::new(StructArray::from(Vec::new())));
}
let children_array = self
.children
.iter_mut()
.map(|reader| reader.consume_batch())
.collect::<Result<Vec<_>>>()?;
// check that array child data has same size
let children_array_len =
children_array.first().map(|arr| arr.len()).ok_or_else(|| {
general_err!("Struct array reader should have at least one child!")
})?;
let all_children_len_eq = children_array
.iter()
.all(|arr| arr.len() == children_array_len);
if !all_children_len_eq {
return Err(general_err!("Not all children array length are the same!"));
}
// Now we can build array data
let mut array_data_builder = ArrayDataBuilder::new(self.data_type.clone())
.len(children_array_len)
.child_data(
children_array
.iter()
.map(|x| x.to_data())
.collect::<Vec<ArrayData>>(),
);
if self.nullable {
// calculate struct def level data
// children should have consistent view of parent, only need to inspect first child
let def_levels = self.children[0]
.get_def_levels()
.expect("child with nullable parents must have definition level");
// calculate bitmap for current array
let mut bitmap_builder = BooleanBufferBuilder::new(children_array_len);
match self.children[0].get_rep_levels() {
Some(rep_levels) => {
// Sanity check
assert_eq!(rep_levels.len(), def_levels.len());
for (rep_level, def_level) in rep_levels.iter().zip(def_levels) {
if rep_level > &self.struct_rep_level {
// Already handled by inner list - SKIP
continue;
}
bitmap_builder.append(*def_level >= self.struct_def_level)
}
}
None => {
for def_level in def_levels {
bitmap_builder.append(*def_level >= self.struct_def_level)
}
}
}
if bitmap_builder.len() != children_array_len {
return Err(general_err!("Failed to decode level data for struct array"));
}
array_data_builder =
array_data_builder.null_bit_buffer(Some(bitmap_builder.into()));
}
let array_data = unsafe { array_data_builder.build_unchecked() };
Ok(Arc::new(StructArray::from(array_data)))
}