in parquet/schema/reflection.go [322:549]
func typeToNode(name string, typ reflect.Type, repType parquet.Repetition, info *taggedInfo) Node {
// set up our default values for everything
var (
converted = ConvertedTypes.None
logical LogicalType = NoLogicalType{}
fieldID = int32(-1)
physical = parquet.Types.Undefined
typeLen = 0
precision = 0
scale = 0
)
if info != nil { // we have struct tag info to process
fieldID = info.FieldID
if info.Converted != ConvertedTypes.NA {
converted = info.Converted
}
logical = info.LogicalType
physical = info.Type
typeLen = int(info.Length)
precision = int(info.Precision)
scale = int(info.Scale)
if info.Name != "" {
name = info.Name
}
if info.RepetitionType != parquet.Repetitions.Undefined {
repType = info.RepetitionType
}
}
// simplify the logic by switching based on the reflection Kind
switch typ.Kind() {
case reflect.Map:
// a map must have a logical type of MAP or have no tag for logical type in which case
// we assume MAP logical type.
if !logical.IsNone() && !logical.Equals(MapLogicalType{}) {
panic("cannot set logical type to something other than map for a map")
}
infoCopy := newTaggedInfo()
if info != nil { // populate any value specific tags to propagate for the value type
infoCopy = info.CopyForValue()
}
// create the node for the value type of the map
value := typeToNode("value", typ.Elem(), parquet.Repetitions.Required, &infoCopy)
if info != nil { // change our copy to now use the key specific tags if they exist
infoCopy = info.CopyForKey()
}
// create the node for the key type of the map
key := typeToNode("key", typ.Key(), parquet.Repetitions.Required, &infoCopy)
if key.RepetitionType() != parquet.Repetitions.Required { // key cannot be optional
panic("key type of map must be Required")
}
return Must(MapOf(name, key, value, repType, fieldID))
case reflect.Struct:
if typ == reflect.TypeOf(float16.Num{}) {
return MustPrimitive(NewPrimitiveNodeLogical(name, repType, Float16LogicalType{}, parquet.Types.FixedLenByteArray, 2, fieldID))
}
// structs are Group nodes
fields := make(FieldList, 0)
for i := 0; i < typ.NumField(); i++ {
f := typ.Field(i)
tags := infoFromTags(f.Tag)
if tags == nil || !tags.Exclude {
fields = append(fields, typeToNode(f.Name, f.Type, parquet.Repetitions.Required, tags))
}
}
// group nodes don't have a physical type
if physical != parquet.Types.Undefined {
panic("cannot specify custom type on struct")
}
// group nodes don't have converted or logical types
if converted != ConvertedTypes.None {
panic("cannot specify converted types for a struct")
}
if !logical.IsNone() {
panic("cannot specify logicaltype for a struct")
}
return Must(NewGroupNode(name, repType, fields, fieldID))
case reflect.Ptr: // if we encounter a pointer create a node for the type it points to, but mark it as optional
return typeToNode(name, typ.Elem(), parquet.Repetitions.Optional, info)
case reflect.Array:
// arrays are repeated or fixed size
if typ == reflect.TypeOf(parquet.Int96{}) {
return NewInt96Node(name, repType, fieldID)
}
if typ.Elem() == reflect.TypeOf(byte(0)) { // something like [12]byte translates to FixedLenByteArray with length 12
if physical == parquet.Types.Undefined {
physical = parquet.Types.FixedLenByteArray
}
if typeLen == 0 { // if there was no type length specified in the tag, use the length of the type.
typeLen = typ.Len()
}
if !logical.IsNone() {
return MustPrimitive(NewPrimitiveNodeLogical(name, repType, logical, physical, typeLen, fieldID))
}
return MustPrimitive(NewPrimitiveNodeConverted(name, repType, physical, converted, typeLen, precision, scale, fieldID))
}
fallthrough // if it's not a fixed len byte array type, then just treat it like a slice
case reflect.Slice:
// for slices, we default to treating them as lists unless the repetition type is set to REPEATED or they are
// a bytearray/fixedlenbytearray
switch {
case repType == parquet.Repetitions.Repeated:
return typeToNode(name, typ.Elem(), parquet.Repetitions.Repeated, info)
case physical == parquet.Types.FixedLenByteArray || physical == parquet.Types.ByteArray:
if typ.Elem() != reflect.TypeOf(byte(0)) {
panic("slice with physical type ByteArray or FixedLenByteArray must be []byte")
}
fallthrough
case typ.Elem() == reflect.TypeOf(byte(0)):
if physical == parquet.Types.Undefined {
physical = parquet.Types.ByteArray
}
if !logical.IsNone() {
return MustPrimitive(NewPrimitiveNodeLogical(name, repType, logical, physical, typeLen, fieldID))
}
return MustPrimitive(NewPrimitiveNodeConverted(name, repType, physical, converted, typeLen, precision, scale, fieldID))
default:
var elemInfo *taggedInfo
if info != nil {
elemInfo = &taggedInfo{}
*elemInfo = info.CopyForValue()
}
if !logical.IsNone() && !logical.Equals(ListLogicalType{}) {
panic("slice must either be repeated or a List type")
}
if converted != ConvertedTypes.None && converted != ConvertedTypes.List {
panic("slice must either be repeated or a List type")
}
return Must(ListOf(typeToNode(name, typ.Elem(), parquet.Repetitions.Required, elemInfo), repType, fieldID))
}
case reflect.String:
// strings are byte arrays or fixedlen byte array
t := parquet.Types.ByteArray
switch physical {
case parquet.Types.Undefined, parquet.Types.ByteArray:
case parquet.Types.FixedLenByteArray:
t = parquet.Types.FixedLenByteArray
default:
panic("string fields should be of type bytearray or fixedlenbytearray only")
}
if !logical.IsNone() {
return MustPrimitive(NewPrimitiveNodeLogical(name, repType, logical, t, typeLen, fieldID))
}
return MustPrimitive(NewPrimitiveNodeConverted(name, repType, t, converted, typeLen, precision, scale, fieldID))
case reflect.Int, reflect.Int32, reflect.Int8, reflect.Int16, reflect.Int64:
// handle integer types, default to setting the corresponding logical type
ptyp := parquet.Types.Int32
if typ.Bits() == 64 {
ptyp = parquet.Types.Int64
}
if physical != parquet.Types.Undefined {
ptyp = physical
}
if !logical.IsNone() {
return MustPrimitive(NewPrimitiveNodeLogical(name, repType, logical, ptyp, typeLen, fieldID))
}
bitwidth := int8(typ.Bits())
if physical != parquet.Types.Undefined {
if ptyp == parquet.Types.Int32 {
bitwidth = 32
} else if ptyp == parquet.Types.Int64 {
bitwidth = 64
}
}
if converted != ConvertedTypes.None {
return MustPrimitive(NewPrimitiveNodeConverted(name, repType, ptyp, converted, 0, precision, scale, fieldID))
}
return MustPrimitive(NewPrimitiveNodeLogical(name, repType, NewIntLogicalType(bitwidth, true), ptyp, 0, fieldID))
case reflect.Uint, reflect.Uint32, reflect.Uint8, reflect.Uint16, reflect.Uint64:
// handle unsigned integer types and default to the corresponding logical type for it.
ptyp := parquet.Types.Int32
if typ.Bits() == 64 {
ptyp = parquet.Types.Int64
}
if physical != parquet.Types.Undefined {
ptyp = physical
}
if !logical.IsNone() {
return MustPrimitive(NewPrimitiveNodeLogical(name, repType, logical, ptyp, typeLen, fieldID))
}
bitwidth := int8(typ.Bits())
if physical != parquet.Types.Undefined {
if ptyp == parquet.Types.Int32 {
bitwidth = 32
} else if ptyp == parquet.Types.Int64 {
bitwidth = 64
}
}
if converted != ConvertedTypes.None {
return MustPrimitive(NewPrimitiveNodeConverted(name, repType, ptyp, converted, 0, precision, scale, fieldID))
}
return MustPrimitive(NewPrimitiveNodeLogical(name, repType, NewIntLogicalType(bitwidth, false), ptyp, 0, fieldID))
case reflect.Bool:
if !logical.IsNone() {
return MustPrimitive(NewPrimitiveNodeLogical(name, repType, logical, parquet.Types.Boolean, typeLen, fieldID))
}
return MustPrimitive(NewPrimitiveNodeConverted(name, repType, parquet.Types.Boolean, converted, typeLen, precision, scale, fieldID))
case reflect.Float32:
if !logical.IsNone() {
return MustPrimitive(NewPrimitiveNodeLogical(name, repType, logical, parquet.Types.Float, typeLen, fieldID))
}
return MustPrimitive(NewPrimitiveNodeConverted(name, repType, parquet.Types.Float, converted, typeLen, precision, scale, fieldID))
case reflect.Float64:
if !logical.IsNone() {
return MustPrimitive(NewPrimitiveNodeLogical(name, repType, logical, parquet.Types.Double, typeLen, fieldID))
}
return MustPrimitive(NewPrimitiveNodeConverted(name, repType, parquet.Types.Double, converted, typeLen, precision, scale, fieldID))
}
return nil
}