in parquet/src/encodings/encoding/mod.rs [347:411]
fn flush_block_values(&mut self) -> Result<()> {
if self.values_in_block == 0 {
return Ok(());
}
let mut min_delta = i64::MAX;
for i in 0..self.values_in_block {
min_delta = cmp::min(min_delta, self.deltas[i]);
}
// Write min delta
self.bit_writer.put_zigzag_vlq_int(min_delta);
// Slice to store bit width for each mini block
let offset = self.bit_writer.skip(self.num_mini_blocks);
for i in 0..self.num_mini_blocks {
// Find how many values we need to encode - either block size or whatever
// values left
let n = cmp::min(self.mini_block_size, self.values_in_block);
if n == 0 {
// Decoders should be agnostic to the padding value, we therefore use 0xFF
// when running tests. However, not all implementations may handle this correctly
// so pad with 0 when not running tests
let pad_value = cfg!(test).then(|| 0xFF).unwrap_or(0);
for j in i..self.num_mini_blocks {
self.bit_writer.write_at(offset + j, pad_value);
}
break;
}
// Compute the max delta in current mini block
let mut max_delta = i64::MIN;
for j in 0..n {
max_delta =
cmp::max(max_delta, self.deltas[i * self.mini_block_size + j]);
}
// Compute bit width to store (max_delta - min_delta)
let bit_width =
num_required_bits(self.subtract_u64(max_delta, min_delta)) as usize;
self.bit_writer.write_at(offset + i, bit_width as u8);
// Encode values in current mini block using min_delta and bit_width
for j in 0..n {
let packed_value = self
.subtract_u64(self.deltas[i * self.mini_block_size + j], min_delta);
self.bit_writer.put_value(packed_value, bit_width);
}
// Pad the last block (n < mini_block_size)
for _ in n..self.mini_block_size {
self.bit_writer.put_value(0, bit_width);
}
self.values_in_block -= n;
}
assert_eq!(
self.values_in_block, 0,
"Expected 0 values in block, found {}",
self.values_in_block
);
Ok(())
}