pyiceberg/table/statistics.py (21 lines of code) (raw):

# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. from typing import Dict, List, Literal, Optional from pydantic import Field from pyiceberg.typedef import IcebergBaseModel class BlobMetadata(IcebergBaseModel): type: Literal["apache-datasketches-theta-v1", "deletion-vector-v1"] snapshot_id: int = Field(alias="snapshot-id") sequence_number: int = Field(alias="sequence-number") fields: List[int] properties: Optional[Dict[str, str]] = None class StatisticsFile(IcebergBaseModel): snapshot_id: int = Field(alias="snapshot-id") statistics_path: str = Field(alias="statistics-path") file_size_in_bytes: int = Field(alias="file-size-in-bytes") file_footer_size_in_bytes: int = Field(alias="file-footer-size-in-bytes") key_metadata: Optional[str] = Field(alias="key-metadata", default=None) blob_metadata: List[BlobMetadata] = Field(alias="blob-metadata") def filter_statistics_by_snapshot_id( statistics: List[StatisticsFile], reject_snapshot_id: int, ) -> List[StatisticsFile]: return [stat for stat in statistics if stat.snapshot_id != reject_snapshot_id]