in parquet/src/arrow/schema/mod.rs [1354:1515]
fn test_field_to_column_desc() {
let message_type = "
message arrow_schema {
REQUIRED BOOLEAN boolean;
REQUIRED INT32 int8 (INT_8);
REQUIRED INT32 int16 (INTEGER(16,true));
REQUIRED INT32 int32;
REQUIRED INT64 int64;
OPTIONAL DOUBLE double;
OPTIONAL FLOAT float;
OPTIONAL BINARY string (STRING);
OPTIONAL GROUP bools (LIST) {
REPEATED GROUP list {
OPTIONAL BOOLEAN element;
}
}
REQUIRED GROUP bools_non_null (LIST) {
REPEATED GROUP list {
REQUIRED BOOLEAN element;
}
}
OPTIONAL INT32 date (DATE);
OPTIONAL INT32 time_milli (TIME(MILLIS,false));
OPTIONAL INT64 time_micro (TIME_MICROS);
OPTIONAL INT64 ts_milli (TIMESTAMP_MILLIS);
REQUIRED INT64 ts_micro (TIMESTAMP(MICROS,false));
REQUIRED INT64 ts_seconds;
REQUIRED INT64 ts_micro_utc (TIMESTAMP(MICROS, true));
REQUIRED INT64 ts_millis_zero_offset (TIMESTAMP(MILLIS, true));
REQUIRED INT64 ts_millis_zero_negative_offset (TIMESTAMP(MILLIS, true));
REQUIRED INT64 ts_micro_non_utc (TIMESTAMP(MICROS, true));
REQUIRED GROUP struct {
REQUIRED BOOLEAN bools;
REQUIRED INT32 uint32 (INTEGER(32,false));
REQUIRED GROUP int32 (LIST) {
REPEATED GROUP list {
OPTIONAL INT32 element;
}
}
}
REQUIRED BINARY dictionary_strings (STRING);
REQUIRED INT32 decimal_int32 (DECIMAL(8,2));
REQUIRED INT64 decimal_int64 (DECIMAL(16,2));
REQUIRED FIXED_LEN_BYTE_ARRAY (13) decimal_fix_length (DECIMAL(30,2));
REQUIRED FIXED_LEN_BYTE_ARRAY (16) decimal128 (DECIMAL(38,2));
REQUIRED FIXED_LEN_BYTE_ARRAY (17) decimal256 (DECIMAL(39,2));
}
";
let parquet_group_type = parse_message_type(message_type).unwrap();
let parquet_schema = SchemaDescriptor::new(Arc::new(parquet_group_type));
let arrow_fields = vec![
Field::new("boolean", DataType::Boolean, false),
Field::new("int8", DataType::Int8, false),
Field::new("int16", DataType::Int16, false),
Field::new("int32", DataType::Int32, false),
Field::new("int64", DataType::Int64, false),
Field::new("double", DataType::Float64, true),
Field::new("float", DataType::Float32, true),
Field::new("string", DataType::Utf8, true),
Field::new_list(
"bools",
Field::new("element", DataType::Boolean, true),
true,
),
Field::new_list(
"bools_non_null",
Field::new("element", DataType::Boolean, false),
false,
),
Field::new("date", DataType::Date32, true),
Field::new("time_milli", DataType::Time32(TimeUnit::Millisecond), true),
Field::new("time_micro", DataType::Time64(TimeUnit::Microsecond), true),
Field::new(
"ts_milli",
DataType::Timestamp(TimeUnit::Millisecond, None),
true,
),
Field::new(
"ts_micro",
DataType::Timestamp(TimeUnit::Microsecond, None),
false,
),
Field::new(
"ts_seconds",
DataType::Timestamp(TimeUnit::Second, Some("UTC".into())),
false,
),
Field::new(
"ts_micro_utc",
DataType::Timestamp(TimeUnit::Microsecond, Some("UTC".into())),
false,
),
Field::new(
"ts_millis_zero_offset",
DataType::Timestamp(TimeUnit::Millisecond, Some("+00:00".into())),
false,
),
Field::new(
"ts_millis_zero_negative_offset",
DataType::Timestamp(TimeUnit::Millisecond, Some("-00:00".into())),
false,
),
Field::new(
"ts_micro_non_utc",
DataType::Timestamp(TimeUnit::Microsecond, Some("+01:00".into())),
false,
),
Field::new_struct(
"struct",
vec![
Field::new("bools", DataType::Boolean, false),
Field::new("uint32", DataType::UInt32, false),
Field::new_list(
"int32",
Field::new("element", DataType::Int32, true),
false,
),
],
false,
),
Field::new_dictionary(
"dictionary_strings",
DataType::Int32,
DataType::Utf8,
false,
),
Field::new("decimal_int32", DataType::Decimal128(8, 2), false),
Field::new("decimal_int64", DataType::Decimal128(16, 2), false),
Field::new("decimal_fix_length", DataType::Decimal128(30, 2), false),
Field::new("decimal128", DataType::Decimal128(38, 2), false),
Field::new("decimal256", DataType::Decimal256(39, 2), false),
];
let arrow_schema = Schema::new(arrow_fields);
let converted_arrow_schema = arrow_to_parquet_schema(&arrow_schema).unwrap();
assert_eq!(
parquet_schema.columns().len(),
converted_arrow_schema.columns().len()
);
parquet_schema
.columns()
.iter()
.zip(converted_arrow_schema.columns())
.for_each(|(a, b)| {
// Only check logical type if it's set on the Parquet side.
// This is because the Arrow conversion always sets logical type,
// even if there wasn't originally one.
// This is not an issue, but is an inconvenience for this test.
match a.logical_type() {
Some(_) => {
assert_eq!(a, b)
}
None => {
assert_eq!(a.name(), b.name());
assert_eq!(a.physical_type(), b.physical_type());
assert_eq!(a.converted_type(), b.converted_type());
}
};
});
}