fn test_snapshot_summary_collector_build()

in crates/iceberg/src/spec/snapshot_summary.rs [719:828]


    fn test_snapshot_summary_collector_build() {
        let schema = Arc::new(
            Schema::builder()
                .with_fields(vec![
                    NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(),
                    NestedField::required(2, "name", Type::Primitive(PrimitiveType::String)).into(),
                ])
                .build()
                .unwrap(),
        );

        let partition_spec = Arc::new(
            PartitionSpec::builder(schema.clone())
                .add_unbound_fields(vec![UnboundPartitionField::builder()
                    .source_id(2)
                    .name("year".to_string())
                    .transform(Transform::Identity)
                    .build()])
                .unwrap()
                .with_spec_id(1)
                .build()
                .unwrap(),
        );

        let mut collector = SnapshotSummaryCollector::default();
        collector.set_partition_summary_limit(10);

        let file1 = DataFile {
            content: DataContentType::Data,
            file_path: "s3://testbucket/path/to/file1.parquet".to_string(),
            file_format: DataFileFormat::Parquet,
            partition: Struct::from_iter(vec![]),
            record_count: 10,
            file_size_in_bytes: 100,
            column_sizes: HashMap::from([(1, 46), (2, 48), (3, 48)]),
            value_counts: HashMap::from([(1, 10), (2, 10), (3, 10)]),
            null_value_counts: HashMap::from([(1, 0), (2, 0), (3, 0)]),
            nan_value_counts: HashMap::new(),
            lower_bounds: HashMap::from([
                (1, Datum::long(1)),
                (2, Datum::string("a")),
                (3, Datum::string("x")),
            ]),
            upper_bounds: HashMap::from([
                (1, Datum::long(1)),
                (2, Datum::string("a")),
                (3, Datum::string("x")),
            ]),
            key_metadata: None,
            split_offsets: vec![4],
            equality_ids: vec![],
            sort_order_id: Some(0),
            partition_spec_id: 0,
            first_row_id: None,
            referenced_data_file: None,
            content_offset: None,
            content_size_in_bytes: None,
        };

        let file2 = DataFile {
            content: DataContentType::Data,
            file_path: "s3://testbucket/path/to/file2.parquet".to_string(),
            file_format: DataFileFormat::Parquet,
            partition: Struct::from_iter(vec![Some(Literal::string("2025"))]),
            record_count: 20,
            file_size_in_bytes: 200,
            column_sizes: HashMap::from([(1, 46), (2, 48), (3, 48)]),
            value_counts: HashMap::from([(1, 20), (2, 20), (3, 20)]),
            null_value_counts: HashMap::from([(1, 0), (2, 0), (3, 0)]),
            nan_value_counts: HashMap::new(),
            lower_bounds: HashMap::from([
                (1, Datum::long(1)),
                (2, Datum::string("a")),
                (3, Datum::string("x")),
            ]),
            upper_bounds: HashMap::from([
                (1, Datum::long(1)),
                (2, Datum::string("a")),
                (3, Datum::string("x")),
            ]),
            key_metadata: None,
            split_offsets: vec![4],
            equality_ids: vec![],
            sort_order_id: Some(0),
            partition_spec_id: 0,
            first_row_id: None,
            referenced_data_file: None,
            content_offset: None,
            content_size_in_bytes: None,
        };

        collector.add_file(&file1, schema.clone(), partition_spec.clone());
        collector.add_file(&file2, schema.clone(), partition_spec.clone());

        collector.remove_file(&file1, schema.clone(), partition_spec.clone());

        let props = collector.build();

        assert_eq!(props.get(ADDED_FILE_SIZE).unwrap(), "300");
        assert_eq!(props.get(REMOVED_FILE_SIZE).unwrap(), "100");

        let partition_key = format!("{}{}", CHANGED_PARTITION_PREFIX, "year=\"2025\"");

        assert!(props.contains_key(&partition_key));

        let partition_summary = props.get(&partition_key).unwrap();
        assert!(partition_summary.contains(&format!("{}=200", ADDED_FILE_SIZE)));
        assert!(partition_summary.contains(&format!("{}=1", ADDED_DATA_FILES)));
        assert!(partition_summary.contains(&format!("{}=20", ADDED_RECORDS)));
    }