func importSchema()

in arrow/cdata/cdata.go [158:351]


func importSchema(schema *CArrowSchema) (ret arrow.Field, err error) {
	// always release, even on error
	defer C.ArrowSchemaRelease(schema)

	var childFields []arrow.Field
	if schema.n_children > 0 {
		// call ourselves recursively if there are children.
		// set up a slice to reference safely
		schemaChildren := unsafe.Slice(schema.children, schema.n_children)
		childFields = make([]arrow.Field, schema.n_children)
		for i, c := range schemaChildren {
			childFields[i], err = importSchema((*CArrowSchema)(c))
			if err != nil {
				return
			}
		}
	}

	// copy the schema name from the c-string
	ret.Name = C.GoString(schema.name)
	ret.Nullable = (schema.flags & C.ARROW_FLAG_NULLABLE) != 0
	ret.Metadata = decodeCMetadata(schema.metadata)

	// copies the c-string here, but it's very small
	f := C.GoString(schema.format)
	// handle our non-parameterized simple types.
	dt, ok := formatToSimpleType[f]
	if ok {
		ret.Type = dt

		if schema.dictionary != nil {
			valueField, err := importSchema(schema.dictionary)
			if err != nil {
				return ret, err
			}

			ret.Type = &arrow.DictionaryType{
				IndexType: ret.Type,
				ValueType: valueField.Type,
				Ordered:   schema.dictionary.flags&C.ARROW_FLAG_DICTIONARY_ORDERED != 0,
			}
		}

		return
	}

	// handle types with params via colon
	typs := strings.Split(f, ":")
	defaulttz := ""
	switch typs[0] {
	case "tss":
		tz := typs[1]
		if len(typs[1]) == 0 {
			tz = defaulttz
		}
		dt = &arrow.TimestampType{Unit: arrow.Second, TimeZone: tz}
	case "tsm":
		tz := typs[1]
		if len(typs[1]) == 0 {
			tz = defaulttz
		}
		dt = &arrow.TimestampType{Unit: arrow.Millisecond, TimeZone: tz}
	case "tsu":
		tz := typs[1]
		if len(typs[1]) == 0 {
			tz = defaulttz
		}
		dt = &arrow.TimestampType{Unit: arrow.Microsecond, TimeZone: tz}
	case "tsn":
		tz := typs[1]
		if len(typs[1]) == 0 {
			tz = defaulttz
		}
		dt = &arrow.TimestampType{Unit: arrow.Nanosecond, TimeZone: tz}
	case "w": // fixed size binary is "w:##" where ## is the byteWidth
		byteWidth, err := strconv.Atoi(typs[1])
		if err != nil {
			return ret, err
		}
		dt = &arrow.FixedSizeBinaryType{ByteWidth: byteWidth}
	case "d": // decimal types are d:<precision>,<scale>[,<bitsize>] size is assumed 128 if left out
		props := typs[1]
		propList := strings.Split(props, ",")
		bitwidth := 128
		var precision, scale int

		if len(propList) < 2 || len(propList) > 3 {
			return ret, xerrors.Errorf("invalid decimal spec '%s': wrong number of properties", f)
		} else if len(propList) == 3 {
			bitwidth, err = strconv.Atoi(propList[2])
			if err != nil {
				return ret, xerrors.Errorf("could not parse decimal bitwidth in '%s': %s", f, err.Error())
			}
		}

		precision, err = strconv.Atoi(propList[0])
		if err != nil {
			return ret, xerrors.Errorf("could not parse decimal precision in '%s': %s", f, err.Error())
		}

		scale, err = strconv.Atoi(propList[1])
		if err != nil {
			return ret, xerrors.Errorf("could not parse decimal scale in '%s': %s", f, err.Error())
		}

		switch bitwidth {
		case 32:
			dt = &arrow.Decimal32Type{Precision: int32(precision), Scale: int32(scale)}
		case 64:
			dt = &arrow.Decimal64Type{Precision: int32(precision), Scale: int32(scale)}
		case 128:
			dt = &arrow.Decimal128Type{Precision: int32(precision), Scale: int32(scale)}
		case 256:
			dt = &arrow.Decimal256Type{Precision: int32(precision), Scale: int32(scale)}
		default:
			return ret, xerrors.Errorf("unsupported decimal bitwidth, got '%s'", f)
		}
	}

	if f[0] == '+' { // types with children
		switch f[1] {
		case 'l': // list
			dt = arrow.ListOfField(childFields[0])
		case 'L': // large list
			dt = arrow.LargeListOfField(childFields[0])
		case 'v': // list view/large list view
			if f[2] == 'l' {
				dt = arrow.ListViewOfField(childFields[0])
			} else if f[2] == 'L' {
				dt = arrow.LargeListViewOfField(childFields[0])
			}
		case 'w': // fixed size list is w:# where # is the list size.
			listSize, err := strconv.Atoi(strings.Split(f, ":")[1])
			if err != nil {
				return ret, err
			}

			dt = arrow.FixedSizeListOfField(int32(listSize), childFields[0])
		case 's': // struct
			dt = arrow.StructOf(childFields...)
		case 'r': // run-end encoded
			if len(childFields) != 2 {
				return ret, fmt.Errorf("%w: run-end encoded arrays must have 2 children", arrow.ErrInvalid)
			}
			dt = arrow.RunEndEncodedOf(childFields[0].Type, childFields[1].Type)
		case 'm': // map type is basically a list of structs.
			st := childFields[0].Type.(*arrow.StructType)
			dt = arrow.MapOf(st.Field(0).Type, st.Field(1).Type)
			dt.(*arrow.MapType).KeysSorted = (schema.flags & C.ARROW_FLAG_MAP_KEYS_SORTED) != 0
		case 'u': // union
			var mode arrow.UnionMode
			switch f[2] {
			case 'd':
				mode = arrow.DenseMode
			case 's':
				mode = arrow.SparseMode
			default:
				err = fmt.Errorf("%w: invalid union type", arrow.ErrInvalid)
				return
			}

			codes := strings.Split(strings.Split(f, ":")[1], ",")
			typeCodes := make([]arrow.UnionTypeCode, 0, len(codes))
			for _, i := range codes {
				v, e := strconv.ParseInt(i, 10, 8)
				if e != nil {
					err = fmt.Errorf("%w: invalid type code: %s", arrow.ErrInvalid, e)
					return
				}
				if v < 0 {
					err = fmt.Errorf("%w: negative type code in union: format string %s", arrow.ErrInvalid, f)
					return
				}
				typeCodes = append(typeCodes, arrow.UnionTypeCode(v))
			}

			if len(childFields) != len(typeCodes) {
				err = fmt.Errorf("%w: ArrowArray struct number of children incompatible with format string", arrow.ErrInvalid)
				return
			}

			dt = arrow.UnionOf(mode, childFields, typeCodes)
		}
	}

	if dt == nil {
		// if we didn't find a type, then it's something we haven't implemented.
		err = xerrors.New("unimplemented type")
	} else {
		ret.Type = dt
	}

	return
}