func typeToNode()

in parquet/schema/reflection.go [322:549]


func typeToNode(name string, typ reflect.Type, repType parquet.Repetition, info *taggedInfo) Node {
	// set up our default values for everything
	var (
		converted             = ConvertedTypes.None
		logical   LogicalType = NoLogicalType{}
		fieldID               = int32(-1)
		physical              = parquet.Types.Undefined
		typeLen               = 0
		precision             = 0
		scale                 = 0
	)
	if info != nil { // we have struct tag info to process
		fieldID = info.FieldID
		if info.Converted != ConvertedTypes.NA {
			converted = info.Converted
		}
		logical = info.LogicalType
		physical = info.Type
		typeLen = int(info.Length)
		precision = int(info.Precision)
		scale = int(info.Scale)

		if info.Name != "" {
			name = info.Name
		}
		if info.RepetitionType != parquet.Repetitions.Undefined {
			repType = info.RepetitionType
		}
	}

	// simplify the logic by switching based on the reflection Kind
	switch typ.Kind() {
	case reflect.Map:
		// a map must have a logical type of MAP or have no tag for logical type in which case
		// we assume MAP logical type.
		if !logical.IsNone() && !logical.Equals(MapLogicalType{}) {
			panic("cannot set logical type to something other than map for a map")
		}

		infoCopy := newTaggedInfo()
		if info != nil { // populate any value specific tags to propagate for the value type
			infoCopy = info.CopyForValue()
		}

		// create the node for the value type of the map
		value := typeToNode("value", typ.Elem(), parquet.Repetitions.Required, &infoCopy)
		if info != nil { // change our copy to now use the key specific tags if they exist
			infoCopy = info.CopyForKey()
		}

		// create the node for the key type of the map
		key := typeToNode("key", typ.Key(), parquet.Repetitions.Required, &infoCopy)
		if key.RepetitionType() != parquet.Repetitions.Required { // key cannot be optional
			panic("key type of map must be Required")
		}
		return Must(MapOf(name, key, value, repType, fieldID))
	case reflect.Struct:
		if typ == reflect.TypeOf(float16.Num{}) {
			return MustPrimitive(NewPrimitiveNodeLogical(name, repType, Float16LogicalType{}, parquet.Types.FixedLenByteArray, 2, fieldID))
		}
		// structs are Group nodes
		fields := make(FieldList, 0)
		for i := 0; i < typ.NumField(); i++ {
			f := typ.Field(i)
			tags := infoFromTags(f.Tag)
			if tags == nil || !tags.Exclude {
				fields = append(fields, typeToNode(f.Name, f.Type, parquet.Repetitions.Required, tags))
			}
		}
		// group nodes don't have a physical type
		if physical != parquet.Types.Undefined {
			panic("cannot specify custom type on struct")
		}
		// group nodes don't have converted or logical types
		if converted != ConvertedTypes.None {
			panic("cannot specify converted types for a struct")
		}
		if !logical.IsNone() {
			panic("cannot specify logicaltype for a struct")
		}
		return Must(NewGroupNode(name, repType, fields, fieldID))
	case reflect.Ptr: // if we encounter a pointer create a node for the type it points to, but mark it as optional
		return typeToNode(name, typ.Elem(), parquet.Repetitions.Optional, info)
	case reflect.Array:
		// arrays are repeated or fixed size
		if typ == reflect.TypeOf(parquet.Int96{}) {
			return NewInt96Node(name, repType, fieldID)
		}

		if typ.Elem() == reflect.TypeOf(byte(0)) { // something like [12]byte translates to FixedLenByteArray with length 12
			if physical == parquet.Types.Undefined {
				physical = parquet.Types.FixedLenByteArray
			}
			if typeLen == 0 { // if there was no type length specified in the tag, use the length of the type.
				typeLen = typ.Len()
			}
			if !logical.IsNone() {
				return MustPrimitive(NewPrimitiveNodeLogical(name, repType, logical, physical, typeLen, fieldID))
			}
			return MustPrimitive(NewPrimitiveNodeConverted(name, repType, physical, converted, typeLen, precision, scale, fieldID))
		}
		fallthrough // if it's not a fixed len byte array type, then just treat it like a slice
	case reflect.Slice:
		// for slices, we default to treating them as lists unless the repetition type is set to REPEATED or they are
		// a bytearray/fixedlenbytearray
		switch {
		case repType == parquet.Repetitions.Repeated:
			return typeToNode(name, typ.Elem(), parquet.Repetitions.Repeated, info)
		case physical == parquet.Types.FixedLenByteArray || physical == parquet.Types.ByteArray:
			if typ.Elem() != reflect.TypeOf(byte(0)) {
				panic("slice with physical type ByteArray or FixedLenByteArray must be []byte")
			}
			fallthrough
		case typ.Elem() == reflect.TypeOf(byte(0)):
			if physical == parquet.Types.Undefined {
				physical = parquet.Types.ByteArray
			}
			if !logical.IsNone() {
				return MustPrimitive(NewPrimitiveNodeLogical(name, repType, logical, physical, typeLen, fieldID))
			}
			return MustPrimitive(NewPrimitiveNodeConverted(name, repType, physical, converted, typeLen, precision, scale, fieldID))
		default:
			var elemInfo *taggedInfo
			if info != nil {
				elemInfo = &taggedInfo{}
				*elemInfo = info.CopyForValue()
			}

			if !logical.IsNone() && !logical.Equals(ListLogicalType{}) {
				panic("slice must either be repeated or a List type")
			}
			if converted != ConvertedTypes.None && converted != ConvertedTypes.List {
				panic("slice must either be repeated or a List type")
			}
			return Must(ListOf(typeToNode(name, typ.Elem(), parquet.Repetitions.Required, elemInfo), repType, fieldID))
		}
	case reflect.String:
		// strings are byte arrays or fixedlen byte array
		t := parquet.Types.ByteArray
		switch physical {
		case parquet.Types.Undefined, parquet.Types.ByteArray:
		case parquet.Types.FixedLenByteArray:
			t = parquet.Types.FixedLenByteArray
		default:
			panic("string fields should be of type bytearray or fixedlenbytearray only")
		}

		if !logical.IsNone() {
			return MustPrimitive(NewPrimitiveNodeLogical(name, repType, logical, t, typeLen, fieldID))
		}

		return MustPrimitive(NewPrimitiveNodeConverted(name, repType, t, converted, typeLen, precision, scale, fieldID))
	case reflect.Int, reflect.Int32, reflect.Int8, reflect.Int16, reflect.Int64:
		// handle integer types, default to setting the corresponding logical type
		ptyp := parquet.Types.Int32
		if typ.Bits() == 64 {
			ptyp = parquet.Types.Int64
		}

		if physical != parquet.Types.Undefined {
			ptyp = physical
		}

		if !logical.IsNone() {
			return MustPrimitive(NewPrimitiveNodeLogical(name, repType, logical, ptyp, typeLen, fieldID))
		}

		bitwidth := int8(typ.Bits())
		if physical != parquet.Types.Undefined {
			if ptyp == parquet.Types.Int32 {
				bitwidth = 32
			} else if ptyp == parquet.Types.Int64 {
				bitwidth = 64
			}
		}

		if converted != ConvertedTypes.None {
			return MustPrimitive(NewPrimitiveNodeConverted(name, repType, ptyp, converted, 0, precision, scale, fieldID))
		}

		return MustPrimitive(NewPrimitiveNodeLogical(name, repType, NewIntLogicalType(bitwidth, true), ptyp, 0, fieldID))
	case reflect.Uint, reflect.Uint32, reflect.Uint8, reflect.Uint16, reflect.Uint64:
		// handle unsigned integer types and default to the corresponding logical type for it.
		ptyp := parquet.Types.Int32
		if typ.Bits() == 64 {
			ptyp = parquet.Types.Int64
		}

		if physical != parquet.Types.Undefined {
			ptyp = physical
		}

		if !logical.IsNone() {
			return MustPrimitive(NewPrimitiveNodeLogical(name, repType, logical, ptyp, typeLen, fieldID))
		}

		bitwidth := int8(typ.Bits())
		if physical != parquet.Types.Undefined {
			if ptyp == parquet.Types.Int32 {
				bitwidth = 32
			} else if ptyp == parquet.Types.Int64 {
				bitwidth = 64
			}
		}

		if converted != ConvertedTypes.None {
			return MustPrimitive(NewPrimitiveNodeConverted(name, repType, ptyp, converted, 0, precision, scale, fieldID))
		}

		return MustPrimitive(NewPrimitiveNodeLogical(name, repType, NewIntLogicalType(bitwidth, false), ptyp, 0, fieldID))
	case reflect.Bool:
		if !logical.IsNone() {
			return MustPrimitive(NewPrimitiveNodeLogical(name, repType, logical, parquet.Types.Boolean, typeLen, fieldID))
		}
		return MustPrimitive(NewPrimitiveNodeConverted(name, repType, parquet.Types.Boolean, converted, typeLen, precision, scale, fieldID))
	case reflect.Float32:
		if !logical.IsNone() {
			return MustPrimitive(NewPrimitiveNodeLogical(name, repType, logical, parquet.Types.Float, typeLen, fieldID))
		}
		return MustPrimitive(NewPrimitiveNodeConverted(name, repType, parquet.Types.Float, converted, typeLen, precision, scale, fieldID))
	case reflect.Float64:
		if !logical.IsNone() {
			return MustPrimitive(NewPrimitiveNodeLogical(name, repType, logical, parquet.Types.Double, typeLen, fieldID))
		}
		return MustPrimitive(NewPrimitiveNodeConverted(name, repType, parquet.Types.Double, converted, typeLen, precision, scale, fieldID))
	}
	return nil
}