in pyiceberg/table/inspect.py [0:0]
def _generate_manifests_table(self, snapshot: Optional[Snapshot], is_all_manifests_table: bool = False) -> "pa.Table":
import pyarrow as pa
def _partition_summaries_to_rows(
spec: PartitionSpec, partition_summaries: List[PartitionFieldSummary]
) -> List[Dict[str, Any]]:
rows = []
for i, field_summary in enumerate(partition_summaries):
field = spec.fields[i]
partition_field_type = spec.partition_type(self.tbl.schema()).fields[i].field_type
lower_bound = (
(
field.transform.to_human_string(
partition_field_type, from_bytes(partition_field_type, field_summary.lower_bound)
)
)
if field_summary.lower_bound
else None
)
upper_bound = (
(
field.transform.to_human_string(
partition_field_type, from_bytes(partition_field_type, field_summary.upper_bound)
)
)
if field_summary.upper_bound
else None
)
rows.append(
{
"contains_null": field_summary.contains_null,
"contains_nan": field_summary.contains_nan,
"lower_bound": lower_bound,
"upper_bound": upper_bound,
}
)
return rows
specs = self.tbl.metadata.specs()
manifests = []
if snapshot:
for manifest in snapshot.manifests(self.tbl.io):
is_data_file = manifest.content == ManifestContent.DATA
is_delete_file = manifest.content == ManifestContent.DELETES
manifest_row = {
"content": manifest.content,
"path": manifest.manifest_path,
"length": manifest.manifest_length,
"partition_spec_id": manifest.partition_spec_id,
"added_snapshot_id": manifest.added_snapshot_id,
"added_data_files_count": manifest.added_files_count if is_data_file else 0,
"existing_data_files_count": manifest.existing_files_count if is_data_file else 0,
"deleted_data_files_count": manifest.deleted_files_count if is_data_file else 0,
"added_delete_files_count": manifest.added_files_count if is_delete_file else 0,
"existing_delete_files_count": manifest.existing_files_count if is_delete_file else 0,
"deleted_delete_files_count": manifest.deleted_files_count if is_delete_file else 0,
"partition_summaries": _partition_summaries_to_rows(specs[manifest.partition_spec_id], manifest.partitions)
if manifest.partitions
else [],
}
if is_all_manifests_table:
manifest_row["reference_snapshot_id"] = snapshot.snapshot_id
manifests.append(manifest_row)
return pa.Table.from_pylist(
manifests,
schema=self._get_all_manifests_schema() if is_all_manifests_table else self._get_manifests_schema(),
)