in parquet/src/file/serialized_reader.rs [1361:1542]
fn test_page_index_reader_all_type() {
let test_file = get_test_file("alltypes_tiny_pages_plain.parquet");
let builder = ReadOptionsBuilder::new();
//enable read page index
let options = builder.with_page_index().build();
let reader_result = SerializedFileReader::new_with_options(test_file, options);
let reader = reader_result.unwrap();
// Test contents in Parquet metadata
let metadata = reader.metadata();
assert_eq!(metadata.num_row_groups(), 1);
let column_index = metadata.column_index().unwrap();
let row_group_offset_indexes = &metadata.offset_index().unwrap()[0];
// only one row group
assert_eq!(column_index.len(), 1);
let row_group_metadata = metadata.row_group(0);
//col0->id: INT32 UNCOMPRESSED DO:0 FPO:4 SZ:37325/37325/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: 0, max: 7299, num_nulls: 0]
assert!(!&column_index[0][0].is_sorted());
let boundary_order = &column_index[0][0].get_boundary_order();
assert!(boundary_order.is_some());
matches!(boundary_order.unwrap(), BoundaryOrder::UNORDERED);
if let Index::INT32(index) = &column_index[0][0] {
check_native_page_index(
index,
325,
get_row_group_min_max_bytes(row_group_metadata, 0),
BoundaryOrder::UNORDERED,
);
assert_eq!(row_group_offset_indexes[0].len(), 325);
} else {
unreachable!()
};
//col1->bool_col:BOOLEAN UNCOMPRESSED DO:0 FPO:37329 SZ:3022/3022/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: false, max: true, num_nulls: 0]
assert!(&column_index[0][1].is_sorted());
if let Index::BOOLEAN(index) = &column_index[0][1] {
assert_eq!(index.indexes.len(), 82);
assert_eq!(row_group_offset_indexes[1].len(), 82);
} else {
unreachable!()
};
//col2->tinyint_col: INT32 UNCOMPRESSED DO:0 FPO:40351 SZ:37325/37325/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: 0, max: 9, num_nulls: 0]
assert!(&column_index[0][2].is_sorted());
if let Index::INT32(index) = &column_index[0][2] {
check_native_page_index(
index,
325,
get_row_group_min_max_bytes(row_group_metadata, 2),
BoundaryOrder::ASCENDING,
);
assert_eq!(row_group_offset_indexes[2].len(), 325);
} else {
unreachable!()
};
//col4->smallint_col: INT32 UNCOMPRESSED DO:0 FPO:77676 SZ:37325/37325/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: 0, max: 9, num_nulls: 0]
assert!(&column_index[0][3].is_sorted());
if let Index::INT32(index) = &column_index[0][3] {
check_native_page_index(
index,
325,
get_row_group_min_max_bytes(row_group_metadata, 3),
BoundaryOrder::ASCENDING,
);
assert_eq!(row_group_offset_indexes[3].len(), 325);
} else {
unreachable!()
};
//col5->smallint_col: INT32 UNCOMPRESSED DO:0 FPO:77676 SZ:37325/37325/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: 0, max: 9, num_nulls: 0]
assert!(&column_index[0][4].is_sorted());
if let Index::INT32(index) = &column_index[0][4] {
check_native_page_index(
index,
325,
get_row_group_min_max_bytes(row_group_metadata, 4),
BoundaryOrder::ASCENDING,
);
assert_eq!(row_group_offset_indexes[4].len(), 325);
} else {
unreachable!()
};
//col6->bigint_col: INT64 UNCOMPRESSED DO:0 FPO:152326 SZ:71598/71598/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: 0, max: 90, num_nulls: 0]
assert!(!&column_index[0][5].is_sorted());
if let Index::INT64(index) = &column_index[0][5] {
check_native_page_index(
index,
528,
get_row_group_min_max_bytes(row_group_metadata, 5),
BoundaryOrder::UNORDERED,
);
assert_eq!(row_group_offset_indexes[5].len(), 528);
} else {
unreachable!()
};
//col7->float_col: FLOAT UNCOMPRESSED DO:0 FPO:223924 SZ:37325/37325/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: -0.0, max: 9.9, num_nulls: 0]
assert!(&column_index[0][6].is_sorted());
if let Index::FLOAT(index) = &column_index[0][6] {
check_native_page_index(
index,
325,
get_row_group_min_max_bytes(row_group_metadata, 6),
BoundaryOrder::ASCENDING,
);
assert_eq!(row_group_offset_indexes[6].len(), 325);
} else {
unreachable!()
};
//col8->double_col: DOUBLE UNCOMPRESSED DO:0 FPO:261249 SZ:71598/71598/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: -0.0, max: 90.89999999999999, num_nulls: 0]
assert!(!&column_index[0][7].is_sorted());
if let Index::DOUBLE(index) = &column_index[0][7] {
check_native_page_index(
index,
528,
get_row_group_min_max_bytes(row_group_metadata, 7),
BoundaryOrder::UNORDERED,
);
assert_eq!(row_group_offset_indexes[7].len(), 528);
} else {
unreachable!()
};
//col9->date_string_col: BINARY UNCOMPRESSED DO:0 FPO:332847 SZ:111948/111948/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: 01/01/09, max: 12/31/10, num_nulls: 0]
assert!(!&column_index[0][8].is_sorted());
if let Index::BYTE_ARRAY(index) = &column_index[0][8] {
check_native_page_index(
index,
974,
get_row_group_min_max_bytes(row_group_metadata, 8),
BoundaryOrder::UNORDERED,
);
assert_eq!(row_group_offset_indexes[8].len(), 974);
} else {
unreachable!()
};
//col10->string_col: BINARY UNCOMPRESSED DO:0 FPO:444795 SZ:45298/45298/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: 0, max: 9, num_nulls: 0]
assert!(&column_index[0][9].is_sorted());
if let Index::BYTE_ARRAY(index) = &column_index[0][9] {
check_native_page_index(
index,
352,
get_row_group_min_max_bytes(row_group_metadata, 9),
BoundaryOrder::ASCENDING,
);
assert_eq!(row_group_offset_indexes[9].len(), 352);
} else {
unreachable!()
};
//col11->timestamp_col: INT96 UNCOMPRESSED DO:0 FPO:490093 SZ:111948/111948/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[num_nulls: 0, min/max not defined]
//Notice: min_max values for each page for this col not exits.
assert!(!&column_index[0][10].is_sorted());
if let Index::NONE = &column_index[0][10] {
assert_eq!(row_group_offset_indexes[10].len(), 974);
} else {
unreachable!()
};
//col12->year: INT32 UNCOMPRESSED DO:0 FPO:602041 SZ:37325/37325/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: 2009, max: 2010, num_nulls: 0]
assert!(&column_index[0][11].is_sorted());
if let Index::INT32(index) = &column_index[0][11] {
check_native_page_index(
index,
325,
get_row_group_min_max_bytes(row_group_metadata, 11),
BoundaryOrder::ASCENDING,
);
assert_eq!(row_group_offset_indexes[11].len(), 325);
} else {
unreachable!()
};
//col13->month: INT32 UNCOMPRESSED DO:0 FPO:639366 SZ:37325/37325/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: 1, max: 12, num_nulls: 0]
assert!(!&column_index[0][12].is_sorted());
if let Index::INT32(index) = &column_index[0][12] {
check_native_page_index(
index,
325,
get_row_group_min_max_bytes(row_group_metadata, 12),
BoundaryOrder::UNORDERED,
);
assert_eq!(row_group_offset_indexes[12].len(), 325);
} else {
unreachable!()
};
}