fn update_column_offset_index()

in parquet/src/column/writer/mod.rs [778:865]


    fn update_column_offset_index(
        &mut self,
        page_statistics: Option<&ValueStatistics<E::T>>,
        page_variable_length_bytes: Option<i64>,
    ) {
        // update the column index
        let null_page =
            (self.page_metrics.num_buffered_rows as u64) == self.page_metrics.num_page_nulls;
        // a page contains only null values,
        // and writers have to set the corresponding entries in min_values and max_values to byte[0]
        if null_page && self.column_index_builder.valid() {
            self.column_index_builder.append(
                null_page,
                vec![],
                vec![],
                self.page_metrics.num_page_nulls as i64,
            );
        } else if self.column_index_builder.valid() {
            // from page statistics
            // If can't get the page statistics, ignore this column/offset index for this column chunk
            match &page_statistics {
                None => {
                    self.column_index_builder.to_invalid();
                }
                Some(stat) => {
                    // Check if min/max are still ascending/descending across pages
                    let new_min = stat.min_opt().unwrap();
                    let new_max = stat.max_opt().unwrap();
                    if let Some((last_min, last_max)) = &self.last_non_null_data_page_min_max {
                        if self.data_page_boundary_ascending {
                            // If last min/max are greater than new min/max then not ascending anymore
                            let not_ascending = compare_greater(&self.descr, last_min, new_min)
                                || compare_greater(&self.descr, last_max, new_max);
                            if not_ascending {
                                self.data_page_boundary_ascending = false;
                            }
                        }

                        if self.data_page_boundary_descending {
                            // If new min/max are greater than last min/max then not descending anymore
                            let not_descending = compare_greater(&self.descr, new_min, last_min)
                                || compare_greater(&self.descr, new_max, last_max);
                            if not_descending {
                                self.data_page_boundary_descending = false;
                            }
                        }
                    }
                    self.last_non_null_data_page_min_max = Some((new_min.clone(), new_max.clone()));

                    if self.can_truncate_value() {
                        self.column_index_builder.append(
                            null_page,
                            self.truncate_min_value(
                                self.props.column_index_truncate_length(),
                                stat.min_bytes_opt().unwrap(),
                            )
                            .0,
                            self.truncate_max_value(
                                self.props.column_index_truncate_length(),
                                stat.max_bytes_opt().unwrap(),
                            )
                            .0,
                            self.page_metrics.num_page_nulls as i64,
                        );
                    } else {
                        self.column_index_builder.append(
                            null_page,
                            stat.min_bytes_opt().unwrap().to_vec(),
                            stat.max_bytes_opt().unwrap().to_vec(),
                            self.page_metrics.num_page_nulls as i64,
                        );
                    }
                }
            }
        }

        // Append page histograms to the `ColumnIndex` histograms
        self.column_index_builder.append_histograms(
            &self.page_metrics.repetition_level_histogram,
            &self.page_metrics.definition_level_histogram,
        );

        // Update the offset index
        if let Some(builder) = self.offset_index_builder.as_mut() {
            builder.append_row_count(self.page_metrics.num_buffered_rows as i64);
            builder.append_unencoded_byte_array_data_bytes(page_variable_length_bytes);
        }
    }