in crates/iceberg/src/spec/view_metadata_builder.rs [1132:1219]
fn test_view_version_and_schema_deduplication() {
let schema_one = Schema::builder()
.with_schema_id(5)
.with_fields(vec![NestedField::required(
1,
"x",
Type::Primitive(PrimitiveType::Long),
)
.into()])
.build()
.unwrap();
let schema_two = Schema::builder()
.with_schema_id(7)
.with_fields(vec![NestedField::required(
1,
"y",
Type::Primitive(PrimitiveType::Long),
)
.into()])
.build()
.unwrap();
let schema_three = Schema::builder()
.with_schema_id(9)
.with_fields(vec![NestedField::required(
1,
"z",
Type::Primitive(PrimitiveType::Long),
)
.into()])
.build()
.unwrap();
let v1 = new_view_version(1, 5, "select * from ns.tbl");
let v2 = new_view_version(1, 7, "select count(*) from ns.tbl");
let v3 = new_view_version(1, 9, "select count(*) as count from ns.tbl");
let build_result = builder_without_changes()
.add_schema(schema_one.clone())
.add_schema(schema_two.clone())
.add_schema(schema_three.clone())
.set_current_version(v1.clone(), schema_one.clone())
.unwrap()
.set_current_version(v2.clone(), schema_two.clone())
.unwrap()
.set_current_version(v3.clone(), schema_three.clone())
.unwrap()
.set_current_version(v3.clone(), schema_three.clone())
.unwrap()
.set_current_version(v2.clone(), schema_two.clone())
.unwrap()
.set_current_version(v1.clone(), schema_one.clone())
.unwrap()
.build()
.unwrap();
assert_eq!(
Arc::unwrap_or_clone(build_result.metadata.current_version().clone()),
v1.clone().with_version_id(2).with_schema_id(2)
);
assert_eq!(build_result.metadata.versions.len(), 4);
assert_eq!(
build_result.metadata.versions[&2],
Arc::new(v1.clone().with_version_id(2).with_schema_id(2))
);
assert_eq!(
build_result.metadata.versions[&3],
Arc::new(v2.clone().with_version_id(3).with_schema_id(3))
);
assert_eq!(
build_result.metadata.versions[&4],
Arc::new(v3.clone().with_version_id(4).with_schema_id(4))
);
assert_eq!(
// Remove schema_id 1 and get struct only
build_result
.metadata
.schemas_iter()
.filter(|s| s.schema_id() != 1)
.sorted_by_key(|s| s.schema_id())
.map(|s| s.as_struct())
.collect::<Vec<_>>(),
vec![
schema_one.as_struct(),
schema_two.as_struct(),
schema_three.as_struct()
]
)
}