in parquet/src/arrow/arrow_writer/levels.rs [239:324]
fn write_list<O: OffsetSizeTrait>(
&mut self,
offsets: &[O],
nulls: Option<&NullBuffer>,
values: &dyn Array,
range: Range<usize>,
) {
let (child, ctx) = match self {
Self::List(child, ctx) => (child, ctx),
_ => unreachable!(),
};
let offsets = &offsets[range.start..range.end + 1];
let write_non_null_slice =
|child: &mut LevelInfoBuilder, start_idx: usize, end_idx: usize| {
child.write(values, start_idx..end_idx);
child.visit_leaves(|leaf| {
let rep_levels = leaf.rep_levels.as_mut().unwrap();
let mut rev = rep_levels.iter_mut().rev();
let mut remaining = end_idx - start_idx;
loop {
let next = rev.next().unwrap();
if *next > ctx.rep_level {
// Nested element - ignore
continue;
}
remaining -= 1;
if remaining == 0 {
*next = ctx.rep_level - 1;
break;
}
}
})
};
let write_empty_slice = |child: &mut LevelInfoBuilder| {
child.visit_leaves(|leaf| {
let rep_levels = leaf.rep_levels.as_mut().unwrap();
rep_levels.push(ctx.rep_level - 1);
let def_levels = leaf.def_levels.as_mut().unwrap();
def_levels.push(ctx.def_level - 1);
})
};
let write_null_slice = |child: &mut LevelInfoBuilder| {
child.visit_leaves(|leaf| {
let rep_levels = leaf.rep_levels.as_mut().unwrap();
rep_levels.push(ctx.rep_level - 1);
let def_levels = leaf.def_levels.as_mut().unwrap();
def_levels.push(ctx.def_level - 2);
})
};
match nulls {
Some(nulls) => {
let null_offset = range.start;
// TODO: Faster bitmask iteration (#1757)
for (idx, w) in offsets.windows(2).enumerate() {
let is_valid = nulls.is_valid(idx + null_offset);
let start_idx = w[0].as_usize();
let end_idx = w[1].as_usize();
if !is_valid {
write_null_slice(child)
} else if start_idx == end_idx {
write_empty_slice(child)
} else {
write_non_null_slice(child, start_idx, end_idx)
}
}
}
None => {
for w in offsets.windows(2) {
let start_idx = w[0].as_usize();
let end_idx = w[1].as_usize();
if start_idx == end_idx {
write_empty_slice(child)
} else {
write_non_null_slice(child, start_idx, end_idx)
}
}
}
}
}