in parquet/src/arrow/arrow_reader/mod.rs [2373:2517]
fn run_single_column_reader_tests<T, F, G>(
rand_max: i32,
converted_type: ConvertedType,
arrow_type: Option<ArrowDataType>,
converter: F,
encodings: &[Encoding],
) where
T: DataType,
G: RandGen<T>,
F: Fn(&[Option<T::T>]) -> ArrayRef,
{
let all_options = vec![
// choose record_batch_batch (15) so batches cross row
// group boundaries (50 rows in 2 row groups) cases.
TestOptions::new(2, 100, 15),
// choose record_batch_batch (5) so batches sometime fall
// on row group boundaries and (25 rows in 3 row groups
// --> row groups of 10, 10, and 5). Tests buffer
// refilling edge cases.
TestOptions::new(3, 25, 5),
// Choose record_batch_size (25) so all batches fall
// exactly on row group boundary (25). Tests buffer
// refilling edge cases.
TestOptions::new(4, 100, 25),
// Set maximum page size so row groups have multiple pages
TestOptions::new(3, 256, 73).with_max_data_page_size(128),
// Set small dictionary page size to test dictionary fallback
TestOptions::new(3, 256, 57).with_max_dict_page_size(128),
// Test optional but with no nulls
TestOptions::new(2, 256, 127).with_null_percent(0),
// Test optional with nulls
TestOptions::new(2, 256, 93).with_null_percent(25),
// Test with limit of 0
TestOptions::new(4, 100, 25).with_limit(0),
// Test with limit of 50
TestOptions::new(4, 100, 25).with_limit(50),
// Test with limit equal to number of rows
TestOptions::new(4, 100, 25).with_limit(10),
// Test with limit larger than number of rows
TestOptions::new(4, 100, 25).with_limit(101),
// Test with limit + offset equal to number of rows
TestOptions::new(4, 100, 25).with_offset(30).with_limit(20),
// Test with limit + offset equal to number of rows
TestOptions::new(4, 100, 25).with_offset(20).with_limit(80),
// Test with limit + offset larger than number of rows
TestOptions::new(4, 100, 25).with_offset(20).with_limit(81),
// Test with no page-level statistics
TestOptions::new(2, 256, 91)
.with_null_percent(25)
.with_enabled_statistics(EnabledStatistics::Chunk),
// Test with no statistics
TestOptions::new(2, 256, 91)
.with_null_percent(25)
.with_enabled_statistics(EnabledStatistics::None),
// Test with all null
TestOptions::new(2, 128, 91)
.with_null_percent(100)
.with_enabled_statistics(EnabledStatistics::None),
// Test skip
// choose record_batch_batch (15) so batches cross row
// group boundaries (50 rows in 2 row groups) cases.
TestOptions::new(2, 100, 15).with_row_selections(),
// choose record_batch_batch (5) so batches sometime fall
// on row group boundaries and (25 rows in 3 row groups
// --> row groups of 10, 10, and 5). Tests buffer
// refilling edge cases.
TestOptions::new(3, 25, 5).with_row_selections(),
// Choose record_batch_size (25) so all batches fall
// exactly on row group boundary (25). Tests buffer
// refilling edge cases.
TestOptions::new(4, 100, 25).with_row_selections(),
// Set maximum page size so row groups have multiple pages
TestOptions::new(3, 256, 73)
.with_max_data_page_size(128)
.with_row_selections(),
// Set small dictionary page size to test dictionary fallback
TestOptions::new(3, 256, 57)
.with_max_dict_page_size(128)
.with_row_selections(),
// Test optional but with no nulls
TestOptions::new(2, 256, 127)
.with_null_percent(0)
.with_row_selections(),
// Test optional with nulls
TestOptions::new(2, 256, 93)
.with_null_percent(25)
.with_row_selections(),
// Test optional with nulls
TestOptions::new(2, 256, 93)
.with_null_percent(25)
.with_row_selections()
.with_limit(10),
// Test optional with nulls
TestOptions::new(2, 256, 93)
.with_null_percent(25)
.with_row_selections()
.with_offset(20)
.with_limit(10),
// Test filter
// Test with row filter
TestOptions::new(4, 100, 25).with_row_filter(),
// Test with row selection and row filter
TestOptions::new(4, 100, 25)
.with_row_selections()
.with_row_filter(),
// Test with nulls and row filter
TestOptions::new(2, 256, 93)
.with_null_percent(25)
.with_max_data_page_size(10)
.with_row_filter(),
// Test with nulls and row filter and small pages
TestOptions::new(2, 256, 93)
.with_null_percent(25)
.with_max_data_page_size(10)
.with_row_selections()
.with_row_filter(),
// Test with row selection and no offset index and small pages
TestOptions::new(2, 256, 93)
.with_enabled_statistics(EnabledStatistics::None)
.with_max_data_page_size(10)
.with_row_selections(),
];
all_options.into_iter().for_each(|opts| {
for writer_version in [WriterVersion::PARQUET_1_0, WriterVersion::PARQUET_2_0] {
for encoding in encodings {
let opts = TestOptions {
writer_version,
encoding: *encoding,
..opts.clone()
};
single_column_reader_test::<T, _, G>(
opts,
rand_max,
converted_type,
arrow_type.clone(),
&converter,
)
}
}
});
}