def get_page_chunk

def get_page_chunk_sizes()

in de/fileutils.py [0:0]

9 lines of code
6 McCabe index (conditional complexity)


def get_page_chunk_sizes(paths):
    # get the result of parquet-layout command
    for path in paths:
        output = subprocess.check_output(["parquet-layout", path], text=True)
        meta = json.loads(output)
        for row_group in meta["row_groups"]:
            for column in row_group["columns"]:
                for page in column["pages"]:
                    if page["page_type"].startswith("data"):
                        yield page["uncompressed_bytes"], page["num_values"]