in datafusion/core/src/datasource/physical_plan/mod.rs [1072:1242]
fn partition_column_projector() {
let file_batch = build_table_i32(
("a", &vec![0, 1, 2]),
("b", &vec![-2, -1, 0]),
("c", &vec![10, 11, 12]),
);
let partition_cols = vec![
(
"year".to_owned(),
wrap_partition_type_in_dict(DataType::Utf8),
),
(
"month".to_owned(),
wrap_partition_type_in_dict(DataType::Utf8),
),
(
"day".to_owned(),
wrap_partition_type_in_dict(DataType::Utf8),
),
];
// create a projected schema
let conf = config_for_projection(
file_batch.schema(),
// keep all cols from file and 2 from partitioning
Some(vec![
0,
1,
2,
file_batch.schema().fields().len(),
file_batch.schema().fields().len() + 2,
]),
Statistics::default(),
partition_cols.clone(),
);
let (proj_schema, ..) = conf.project();
// created a projector for that projected schema
let mut proj = PartitionColumnProjector::new(
proj_schema,
&partition_cols
.iter()
.map(|x| x.0.clone())
.collect::<Vec<_>>(),
);
// project first batch
let projected_batch = proj
.project(
// file_batch is ok here because we kept all the file cols in the projection
file_batch,
&[
wrap_partition_value_in_dict(ScalarValue::Utf8(Some(
"2021".to_owned(),
))),
wrap_partition_value_in_dict(ScalarValue::Utf8(Some(
"10".to_owned(),
))),
wrap_partition_value_in_dict(ScalarValue::Utf8(Some(
"26".to_owned(),
))),
],
)
.expect("Projection of partition columns into record batch failed");
let expected = vec![
"+---+----+----+------+-----+",
"| a | b | c | year | day |",
"+---+----+----+------+-----+",
"| 0 | -2 | 10 | 2021 | 26 |",
"| 1 | -1 | 11 | 2021 | 26 |",
"| 2 | 0 | 12 | 2021 | 26 |",
"+---+----+----+------+-----+",
];
crate::assert_batches_eq!(expected, &[projected_batch]);
// project another batch that is larger than the previous one
let file_batch = build_table_i32(
("a", &vec![5, 6, 7, 8, 9]),
("b", &vec![-10, -9, -8, -7, -6]),
("c", &vec![12, 13, 14, 15, 16]),
);
let projected_batch = proj
.project(
// file_batch is ok here because we kept all the file cols in the projection
file_batch,
&[
wrap_partition_value_in_dict(ScalarValue::Utf8(Some(
"2021".to_owned(),
))),
wrap_partition_value_in_dict(ScalarValue::Utf8(Some(
"10".to_owned(),
))),
wrap_partition_value_in_dict(ScalarValue::Utf8(Some(
"27".to_owned(),
))),
],
)
.expect("Projection of partition columns into record batch failed");
let expected = vec![
"+---+-----+----+------+-----+",
"| a | b | c | year | day |",
"+---+-----+----+------+-----+",
"| 5 | -10 | 12 | 2021 | 27 |",
"| 6 | -9 | 13 | 2021 | 27 |",
"| 7 | -8 | 14 | 2021 | 27 |",
"| 8 | -7 | 15 | 2021 | 27 |",
"| 9 | -6 | 16 | 2021 | 27 |",
"+---+-----+----+------+-----+",
];
crate::assert_batches_eq!(expected, &[projected_batch]);
// project another batch that is smaller than the previous one
let file_batch = build_table_i32(
("a", &vec![0, 1, 3]),
("b", &vec![2, 3, 4]),
("c", &vec![4, 5, 6]),
);
let projected_batch = proj
.project(
// file_batch is ok here because we kept all the file cols in the projection
file_batch,
&[
wrap_partition_value_in_dict(ScalarValue::Utf8(Some(
"2021".to_owned(),
))),
wrap_partition_value_in_dict(ScalarValue::Utf8(Some(
"10".to_owned(),
))),
wrap_partition_value_in_dict(ScalarValue::Utf8(Some(
"28".to_owned(),
))),
],
)
.expect("Projection of partition columns into record batch failed");
let expected = vec![
"+---+---+---+------+-----+",
"| a | b | c | year | day |",
"+---+---+---+------+-----+",
"| 0 | 2 | 4 | 2021 | 28 |",
"| 1 | 3 | 5 | 2021 | 28 |",
"| 3 | 4 | 6 | 2021 | 28 |",
"+---+---+---+------+-----+",
];
crate::assert_batches_eq!(expected, &[projected_batch]);
// forgot to dictionary-wrap the scalar value
let file_batch = build_table_i32(
("a", &vec![0, 1, 2]),
("b", &vec![-2, -1, 0]),
("c", &vec![10, 11, 12]),
);
let projected_batch = proj
.project(
// file_batch is ok here because we kept all the file cols in the projection
file_batch,
&[
ScalarValue::Utf8(Some("2021".to_owned())),
ScalarValue::Utf8(Some("10".to_owned())),
ScalarValue::Utf8(Some("26".to_owned())),
],
)
.expect("Projection of partition columns into record batch failed");
let expected = vec![
"+---+----+----+------+-----+",
"| a | b | c | year | day |",
"+---+----+----+------+-----+",
"| 0 | -2 | 10 | 2021 | 26 |",
"| 1 | -1 | 11 | 2021 | 26 |",
"| 2 | 0 | 12 | 2021 | 26 |",
"+---+----+----+------+-----+",
];
crate::assert_batches_eq!(expected, &[projected_batch]);
}