def _get_parquet_writer_kwargs()

in pyiceberg/io/pyarrow.py [0:0]


def _get_parquet_writer_kwargs(table_properties: Properties) -> Dict[str, Any]:
    from pyiceberg.table import TableProperties

    for key_pattern in [
        TableProperties.PARQUET_ROW_GROUP_SIZE_BYTES,
        TableProperties.PARQUET_BLOOM_FILTER_MAX_BYTES,
        f"{TableProperties.PARQUET_BLOOM_FILTER_COLUMN_ENABLED_PREFIX}.*",
    ]:
        if unsupported_keys := fnmatch.filter(table_properties, key_pattern):
            warnings.warn(f"Parquet writer option(s) {unsupported_keys} not implemented")

    compression_codec = table_properties.get(TableProperties.PARQUET_COMPRESSION, TableProperties.PARQUET_COMPRESSION_DEFAULT)
    compression_level = property_as_int(
        properties=table_properties,
        property_name=TableProperties.PARQUET_COMPRESSION_LEVEL,
        default=TableProperties.PARQUET_COMPRESSION_LEVEL_DEFAULT,
    )
    if compression_codec == ICEBERG_UNCOMPRESSED_CODEC:
        compression_codec = PYARROW_UNCOMPRESSED_CODEC

    return {
        "compression": compression_codec,
        "compression_level": compression_level,
        "data_page_size": property_as_int(
            properties=table_properties,
            property_name=TableProperties.PARQUET_PAGE_SIZE_BYTES,
            default=TableProperties.PARQUET_PAGE_SIZE_BYTES_DEFAULT,
        ),
        "dictionary_pagesize_limit": property_as_int(
            properties=table_properties,
            property_name=TableProperties.PARQUET_DICT_SIZE_BYTES,
            default=TableProperties.PARQUET_DICT_SIZE_BYTES_DEFAULT,
        ),
        "write_batch_size": property_as_int(
            properties=table_properties,
            property_name=TableProperties.PARQUET_PAGE_ROW_LIMIT,
            default=TableProperties.PARQUET_PAGE_ROW_LIMIT_DEFAULT,
        ),
    }