in src/parquet/metadata.cc [818:876]
std::unique_ptr<FileMetaData> Finish() {
int64_t total_rows = 0;
std::vector<format::RowGroup> row_groups;
for (auto row_group = row_groups_.begin(); row_group != row_groups_.end();
row_group++) {
auto rowgroup = *((*row_group).get());
row_groups.push_back(rowgroup);
total_rows += rowgroup.num_rows;
}
metadata_->__set_num_rows(total_rows);
metadata_->__set_row_groups(row_groups);
if (key_value_metadata_) {
metadata_->key_value_metadata.clear();
metadata_->key_value_metadata.reserve(key_value_metadata_->size());
for (int64_t i = 0; i < key_value_metadata_->size(); ++i) {
format::KeyValue kv_pair;
kv_pair.__set_key(key_value_metadata_->key(i));
kv_pair.__set_value(key_value_metadata_->value(i));
metadata_->key_value_metadata.push_back(kv_pair);
}
metadata_->__isset.key_value_metadata = true;
}
int32_t file_version = 0;
switch (properties_->version()) {
case ParquetVersion::PARQUET_1_0:
file_version = 1;
break;
case ParquetVersion::PARQUET_2_0:
file_version = 2;
break;
default:
break;
}
metadata_->__set_version(file_version);
metadata_->__set_created_by(properties_->created_by());
// Users cannot set the `ColumnOrder` since we donot not have user defined sort order
// in the spec yet.
// We always default to `TYPE_DEFINED_ORDER`. We can expose it in
// the API once we have user defined sort orders in the Parquet format.
// TypeDefinedOrder implies choose SortOrder based on LogicalType/PhysicalType
format::TypeDefinedOrder type_defined_order;
format::ColumnOrder column_order;
column_order.__set_TYPE_ORDER(type_defined_order);
column_order.__isset.TYPE_ORDER = true;
metadata_->column_orders.resize(schema_->num_columns(), column_order);
metadata_->__isset.column_orders = true;
parquet::schema::SchemaFlattener flattener(
static_cast<parquet::schema::GroupNode*>(schema_->schema_root().get()),
&metadata_->schema);
flattener.Flatten();
auto file_meta_data = std::unique_ptr<FileMetaData>(new FileMetaData());
file_meta_data->impl_->metadata_ = std::move(metadata_);
file_meta_data->impl_->InitSchema();
return file_meta_data;
}