in datafusion/common/src/file_options/parquet_writer.rs [92:173]
fn try_from(table_parquet_options: &TableParquetOptions) -> Result<Self> {
// Table options include kv_metadata and col-specific options
let TableParquetOptions {
global,
column_specific_options,
key_value_metadata,
} = table_parquet_options;
let mut builder = global.into_writer_properties_builder()?;
// check that the arrow schema is present in the kv_metadata, if configured to do so
if !global.skip_arrow_metadata
&& !key_value_metadata.contains_key(ARROW_SCHEMA_META_KEY)
{
return Err(_internal_datafusion_err!("arrow schema was not added to the kv_metadata, even though it is required by configuration settings"));
}
// add kv_meta, if any
if !key_value_metadata.is_empty() {
builder = builder.set_key_value_metadata(Some(
key_value_metadata
.to_owned()
.drain()
.map(|(key, value)| KeyValue { key, value })
.collect(),
));
}
// Apply column-specific options:
for (column, options) in column_specific_options {
let path = ColumnPath::new(column.split('.').map(|s| s.to_owned()).collect());
if let Some(bloom_filter_enabled) = options.bloom_filter_enabled {
builder = builder
.set_column_bloom_filter_enabled(path.clone(), bloom_filter_enabled);
}
if let Some(encoding) = &options.encoding {
let parsed_encoding = parse_encoding_string(encoding)?;
builder = builder.set_column_encoding(path.clone(), parsed_encoding);
}
if let Some(dictionary_enabled) = options.dictionary_enabled {
builder = builder
.set_column_dictionary_enabled(path.clone(), dictionary_enabled);
}
if let Some(compression) = &options.compression {
let parsed_compression = parse_compression_string(compression)?;
builder =
builder.set_column_compression(path.clone(), parsed_compression);
}
if let Some(statistics_enabled) = &options.statistics_enabled {
let parsed_value = parse_statistics_string(statistics_enabled)?;
builder =
builder.set_column_statistics_enabled(path.clone(), parsed_value);
}
if let Some(bloom_filter_fpp) = options.bloom_filter_fpp {
builder =
builder.set_column_bloom_filter_fpp(path.clone(), bloom_filter_fpp);
}
if let Some(bloom_filter_ndv) = options.bloom_filter_ndv {
builder =
builder.set_column_bloom_filter_ndv(path.clone(), bloom_filter_ndv);
}
// max_statistics_size is deprecated, currently it is not being used
// TODO: remove once deprecated
#[allow(deprecated)]
if let Some(max_statistics_size) = options.max_statistics_size {
builder = {
#[allow(deprecated)]
builder.set_column_max_statistics_size(path, max_statistics_size)
}
}
}
Ok(builder)
}