in crates/iceberg/src/arrow/reader.rs [1698:1802]
fn test_build_deletes_row_selection() {
let schema_descr = get_test_schema_descr();
let mut columns = vec![];
for ptr in schema_descr.columns() {
let column = ColumnChunkMetaData::builder(ptr.clone()).build().unwrap();
columns.push(column);
}
let row_groups_metadata = vec![
build_test_row_group_meta(schema_descr.clone(), columns.clone(), 1000, 0),
build_test_row_group_meta(schema_descr.clone(), columns.clone(), 500, 1),
build_test_row_group_meta(schema_descr.clone(), columns.clone(), 500, 2),
build_test_row_group_meta(schema_descr.clone(), columns.clone(), 1000, 3),
build_test_row_group_meta(schema_descr.clone(), columns.clone(), 500, 4),
];
let selected_row_groups = Some(vec![1, 3]);
/* cases to cover:
* {skip|select} {first|intermediate|last} {one row|multiple rows} in
{first|imtermediate|last} {skipped|selected} row group
* row group selection disabled
*/
let positional_deletes = RoaringTreemap::from_iter(&[
1, // in skipped rg 0, should be ignored
3, // run of three consecutive items in skipped rg0
4, 5, 998, // two consecutive items at end of skipped rg0
999, 1000, // solitary row at start of selected rg1 (1, 9)
1010, // run of 3 rows in selected rg1
1011, 1012, // (3, 485)
1498, // run of two items at end of selected rg1
1499, 1500, // run of two items at start of skipped rg2
1501, 1600, // should ignore, in skipped rg2
1999, // single row at end of skipped rg2
2000, // run of two items at start of selected rg3
2001, // (4, 98)
2100, // single row in selected row group 3 (1, 99)
2200, // run of 3 consecutive rows in selected row group 3
2201, 2202, // (3, 796)
2999, // single item at end of selected rg3 (1)
3000, // single item at start of skipped rg4
]);
let positional_deletes = DeleteVector::new(positional_deletes);
// using selected row groups 1 and 3
let result = ArrowReader::build_deletes_row_selection(
&row_groups_metadata,
&selected_row_groups,
&positional_deletes,
)
.unwrap();
let expected = RowSelection::from(vec![
RowSelector::skip(1),
RowSelector::select(9),
RowSelector::skip(3),
RowSelector::select(485),
RowSelector::skip(4),
RowSelector::select(98),
RowSelector::skip(1),
RowSelector::select(99),
RowSelector::skip(3),
RowSelector::select(796),
RowSelector::skip(1),
]);
assert_eq!(result, expected);
// selecting all row groups
let result = ArrowReader::build_deletes_row_selection(
&row_groups_metadata,
&None,
&positional_deletes,
)
.unwrap();
let expected = RowSelection::from(vec![
RowSelector::select(1),
RowSelector::skip(1),
RowSelector::select(1),
RowSelector::skip(3),
RowSelector::select(992),
RowSelector::skip(3),
RowSelector::select(9),
RowSelector::skip(3),
RowSelector::select(485),
RowSelector::skip(4),
RowSelector::select(98),
RowSelector::skip(1),
RowSelector::select(398),
RowSelector::skip(3),
RowSelector::select(98),
RowSelector::skip(1),
RowSelector::select(99),
RowSelector::skip(3),
RowSelector::select(796),
RowSelector::skip(2),
RowSelector::select(499),
]);
assert_eq!(result, expected);
}