in de/fileutils.py [0:0]
def get_page_chunk_sizes(paths):
# get the result of parquet-layout command
for path in paths:
output = subprocess.check_output(["parquet-layout", path], text=True)
meta = json.loads(output)
for row_group in meta["row_groups"]:
for column in row_group["columns"]:
for page in column["pages"]:
if page["page_type"].startswith("data"):
yield page["uncompressed_bytes"], page["num_values"]