in parquet/src/encodings/decoding.rs [643:706]
fn set_data(&mut self, data: Bytes, _index: usize) -> Result<()> {
self.bit_reader = BitReader::new(data);
self.initialized = true;
// Read header information
self.block_size = self
.bit_reader
.get_vlq_int()
.ok_or_else(|| eof_err!("Not enough data to decode 'block_size'"))?
.try_into()
.map_err(|_| general_err!("invalid 'block_size'"))?;
self.mini_blocks_per_block = self
.bit_reader
.get_vlq_int()
.ok_or_else(|| eof_err!("Not enough data to decode 'mini_blocks_per_block'"))?
.try_into()
.map_err(|_| general_err!("invalid 'mini_blocks_per_block'"))?;
self.values_left = self
.bit_reader
.get_vlq_int()
.ok_or_else(|| eof_err!("Not enough data to decode 'values_left'"))?
.try_into()
.map_err(|_| general_err!("invalid 'values_left'"))?;
let first_value = self
.bit_reader
.get_zigzag_vlq_int()
.ok_or_else(|| eof_err!("Not enough data to decode 'first_value'"))?;
self.first_value =
Some(T::T::from_i64(first_value).ok_or_else(|| general_err!("first value too large"))?);
if self.block_size % 128 != 0 {
return Err(general_err!(
"'block_size' must be a multiple of 128, got {}",
self.block_size
));
}
if self.block_size % self.mini_blocks_per_block != 0 {
return Err(general_err!(
"'block_size' must be a multiple of 'mini_blocks_per_block' got {} and {}",
self.block_size,
self.mini_blocks_per_block
));
}
// Reset decoding state
self.mini_block_idx = 0;
self.values_per_mini_block = self.block_size / self.mini_blocks_per_block;
self.mini_block_remaining = 0;
self.mini_block_bit_widths.clear();
if self.values_per_mini_block % 32 != 0 {
return Err(general_err!(
"'values_per_mini_block' must be a multiple of 32 got {}",
self.values_per_mini_block
));
}
Ok(())
}