fn test_field_to_column_desc()

in parquet/src/arrow/schema/mod.rs [1354:1515]


    fn test_field_to_column_desc() {
        let message_type = "
        message arrow_schema {
            REQUIRED BOOLEAN boolean;
            REQUIRED INT32   int8  (INT_8);
            REQUIRED INT32   int16 (INTEGER(16,true));
            REQUIRED INT32   int32;
            REQUIRED INT64   int64;
            OPTIONAL DOUBLE  double;
            OPTIONAL FLOAT   float;
            OPTIONAL BINARY  string (STRING);
            OPTIONAL GROUP   bools (LIST) {
                REPEATED GROUP list {
                    OPTIONAL BOOLEAN element;
                }
            }
            REQUIRED GROUP   bools_non_null (LIST) {
                REPEATED GROUP list {
                    REQUIRED BOOLEAN element;
                }
            }
            OPTIONAL INT32   date       (DATE);
            OPTIONAL INT32   time_milli (TIME(MILLIS,false));
            OPTIONAL INT64   time_micro (TIME_MICROS);
            OPTIONAL INT64   ts_milli (TIMESTAMP_MILLIS);
            REQUIRED INT64   ts_micro (TIMESTAMP(MICROS,false));
            REQUIRED INT64   ts_seconds;
            REQUIRED INT64   ts_micro_utc (TIMESTAMP(MICROS, true));
            REQUIRED INT64   ts_millis_zero_offset (TIMESTAMP(MILLIS, true));
            REQUIRED INT64   ts_millis_zero_negative_offset (TIMESTAMP(MILLIS, true));
            REQUIRED INT64   ts_micro_non_utc (TIMESTAMP(MICROS, true));
            REQUIRED GROUP struct {
                REQUIRED BOOLEAN bools;
                REQUIRED INT32 uint32 (INTEGER(32,false));
                REQUIRED GROUP   int32 (LIST) {
                    REPEATED GROUP list {
                        OPTIONAL INT32 element;
                    }
                }
            }
            REQUIRED BINARY  dictionary_strings (STRING);
            REQUIRED INT32 decimal_int32 (DECIMAL(8,2));
            REQUIRED INT64 decimal_int64 (DECIMAL(16,2));
            REQUIRED FIXED_LEN_BYTE_ARRAY (13) decimal_fix_length (DECIMAL(30,2));
            REQUIRED FIXED_LEN_BYTE_ARRAY (16) decimal128 (DECIMAL(38,2));
            REQUIRED FIXED_LEN_BYTE_ARRAY (17) decimal256 (DECIMAL(39,2));
        }
        ";
        let parquet_group_type = parse_message_type(message_type).unwrap();

        let parquet_schema = SchemaDescriptor::new(Arc::new(parquet_group_type));

        let arrow_fields = vec![
            Field::new("boolean", DataType::Boolean, false),
            Field::new("int8", DataType::Int8, false),
            Field::new("int16", DataType::Int16, false),
            Field::new("int32", DataType::Int32, false),
            Field::new("int64", DataType::Int64, false),
            Field::new("double", DataType::Float64, true),
            Field::new("float", DataType::Float32, true),
            Field::new("string", DataType::Utf8, true),
            Field::new_list(
                "bools",
                Field::new("element", DataType::Boolean, true),
                true,
            ),
            Field::new_list(
                "bools_non_null",
                Field::new("element", DataType::Boolean, false),
                false,
            ),
            Field::new("date", DataType::Date32, true),
            Field::new("time_milli", DataType::Time32(TimeUnit::Millisecond), true),
            Field::new("time_micro", DataType::Time64(TimeUnit::Microsecond), true),
            Field::new(
                "ts_milli",
                DataType::Timestamp(TimeUnit::Millisecond, None),
                true,
            ),
            Field::new(
                "ts_micro",
                DataType::Timestamp(TimeUnit::Microsecond, None),
                false,
            ),
            Field::new(
                "ts_seconds",
                DataType::Timestamp(TimeUnit::Second, Some("UTC".into())),
                false,
            ),
            Field::new(
                "ts_micro_utc",
                DataType::Timestamp(TimeUnit::Microsecond, Some("UTC".into())),
                false,
            ),
            Field::new(
                "ts_millis_zero_offset",
                DataType::Timestamp(TimeUnit::Millisecond, Some("+00:00".into())),
                false,
            ),
            Field::new(
                "ts_millis_zero_negative_offset",
                DataType::Timestamp(TimeUnit::Millisecond, Some("-00:00".into())),
                false,
            ),
            Field::new(
                "ts_micro_non_utc",
                DataType::Timestamp(TimeUnit::Microsecond, Some("+01:00".into())),
                false,
            ),
            Field::new_struct(
                "struct",
                vec![
                    Field::new("bools", DataType::Boolean, false),
                    Field::new("uint32", DataType::UInt32, false),
                    Field::new_list(
                        "int32",
                        Field::new("element", DataType::Int32, true),
                        false,
                    ),
                ],
                false,
            ),
            Field::new_dictionary(
                "dictionary_strings",
                DataType::Int32,
                DataType::Utf8,
                false,
            ),
            Field::new("decimal_int32", DataType::Decimal128(8, 2), false),
            Field::new("decimal_int64", DataType::Decimal128(16, 2), false),
            Field::new("decimal_fix_length", DataType::Decimal128(30, 2), false),
            Field::new("decimal128", DataType::Decimal128(38, 2), false),
            Field::new("decimal256", DataType::Decimal256(39, 2), false),
        ];
        let arrow_schema = Schema::new(arrow_fields);
        let converted_arrow_schema = arrow_to_parquet_schema(&arrow_schema).unwrap();

        assert_eq!(
            parquet_schema.columns().len(),
            converted_arrow_schema.columns().len()
        );
        parquet_schema
            .columns()
            .iter()
            .zip(converted_arrow_schema.columns())
            .for_each(|(a, b)| {
                // Only check logical type if it's set on the Parquet side.
                // This is because the Arrow conversion always sets logical type,
                // even if there wasn't originally one.
                // This is not an issue, but is an inconvenience for this test.
                match a.logical_type() {
                    Some(_) => {
                        assert_eq!(a, b)
                    }
                    None => {
                        assert_eq!(a.name(), b.name());
                        assert_eq!(a.physical_type(), b.physical_type());
                        assert_eq!(a.converted_type(), b.converted_type());
                    }
                };
            });
    }