in arrow/cdata/cdata.go [158:351]
func importSchema(schema *CArrowSchema) (ret arrow.Field, err error) {
// always release, even on error
defer C.ArrowSchemaRelease(schema)
var childFields []arrow.Field
if schema.n_children > 0 {
// call ourselves recursively if there are children.
// set up a slice to reference safely
schemaChildren := unsafe.Slice(schema.children, schema.n_children)
childFields = make([]arrow.Field, schema.n_children)
for i, c := range schemaChildren {
childFields[i], err = importSchema((*CArrowSchema)(c))
if err != nil {
return
}
}
}
// copy the schema name from the c-string
ret.Name = C.GoString(schema.name)
ret.Nullable = (schema.flags & C.ARROW_FLAG_NULLABLE) != 0
ret.Metadata = decodeCMetadata(schema.metadata)
// copies the c-string here, but it's very small
f := C.GoString(schema.format)
// handle our non-parameterized simple types.
dt, ok := formatToSimpleType[f]
if ok {
ret.Type = dt
if schema.dictionary != nil {
valueField, err := importSchema(schema.dictionary)
if err != nil {
return ret, err
}
ret.Type = &arrow.DictionaryType{
IndexType: ret.Type,
ValueType: valueField.Type,
Ordered: schema.dictionary.flags&C.ARROW_FLAG_DICTIONARY_ORDERED != 0,
}
}
return
}
// handle types with params via colon
typs := strings.Split(f, ":")
defaulttz := ""
switch typs[0] {
case "tss":
tz := typs[1]
if len(typs[1]) == 0 {
tz = defaulttz
}
dt = &arrow.TimestampType{Unit: arrow.Second, TimeZone: tz}
case "tsm":
tz := typs[1]
if len(typs[1]) == 0 {
tz = defaulttz
}
dt = &arrow.TimestampType{Unit: arrow.Millisecond, TimeZone: tz}
case "tsu":
tz := typs[1]
if len(typs[1]) == 0 {
tz = defaulttz
}
dt = &arrow.TimestampType{Unit: arrow.Microsecond, TimeZone: tz}
case "tsn":
tz := typs[1]
if len(typs[1]) == 0 {
tz = defaulttz
}
dt = &arrow.TimestampType{Unit: arrow.Nanosecond, TimeZone: tz}
case "w": // fixed size binary is "w:##" where ## is the byteWidth
byteWidth, err := strconv.Atoi(typs[1])
if err != nil {
return ret, err
}
dt = &arrow.FixedSizeBinaryType{ByteWidth: byteWidth}
case "d": // decimal types are d:<precision>,<scale>[,<bitsize>] size is assumed 128 if left out
props := typs[1]
propList := strings.Split(props, ",")
bitwidth := 128
var precision, scale int
if len(propList) < 2 || len(propList) > 3 {
return ret, xerrors.Errorf("invalid decimal spec '%s': wrong number of properties", f)
} else if len(propList) == 3 {
bitwidth, err = strconv.Atoi(propList[2])
if err != nil {
return ret, xerrors.Errorf("could not parse decimal bitwidth in '%s': %s", f, err.Error())
}
}
precision, err = strconv.Atoi(propList[0])
if err != nil {
return ret, xerrors.Errorf("could not parse decimal precision in '%s': %s", f, err.Error())
}
scale, err = strconv.Atoi(propList[1])
if err != nil {
return ret, xerrors.Errorf("could not parse decimal scale in '%s': %s", f, err.Error())
}
switch bitwidth {
case 32:
dt = &arrow.Decimal32Type{Precision: int32(precision), Scale: int32(scale)}
case 64:
dt = &arrow.Decimal64Type{Precision: int32(precision), Scale: int32(scale)}
case 128:
dt = &arrow.Decimal128Type{Precision: int32(precision), Scale: int32(scale)}
case 256:
dt = &arrow.Decimal256Type{Precision: int32(precision), Scale: int32(scale)}
default:
return ret, xerrors.Errorf("unsupported decimal bitwidth, got '%s'", f)
}
}
if f[0] == '+' { // types with children
switch f[1] {
case 'l': // list
dt = arrow.ListOfField(childFields[0])
case 'L': // large list
dt = arrow.LargeListOfField(childFields[0])
case 'v': // list view/large list view
if f[2] == 'l' {
dt = arrow.ListViewOfField(childFields[0])
} else if f[2] == 'L' {
dt = arrow.LargeListViewOfField(childFields[0])
}
case 'w': // fixed size list is w:# where # is the list size.
listSize, err := strconv.Atoi(strings.Split(f, ":")[1])
if err != nil {
return ret, err
}
dt = arrow.FixedSizeListOfField(int32(listSize), childFields[0])
case 's': // struct
dt = arrow.StructOf(childFields...)
case 'r': // run-end encoded
if len(childFields) != 2 {
return ret, fmt.Errorf("%w: run-end encoded arrays must have 2 children", arrow.ErrInvalid)
}
dt = arrow.RunEndEncodedOf(childFields[0].Type, childFields[1].Type)
case 'm': // map type is basically a list of structs.
st := childFields[0].Type.(*arrow.StructType)
dt = arrow.MapOf(st.Field(0).Type, st.Field(1).Type)
dt.(*arrow.MapType).KeysSorted = (schema.flags & C.ARROW_FLAG_MAP_KEYS_SORTED) != 0
case 'u': // union
var mode arrow.UnionMode
switch f[2] {
case 'd':
mode = arrow.DenseMode
case 's':
mode = arrow.SparseMode
default:
err = fmt.Errorf("%w: invalid union type", arrow.ErrInvalid)
return
}
codes := strings.Split(strings.Split(f, ":")[1], ",")
typeCodes := make([]arrow.UnionTypeCode, 0, len(codes))
for _, i := range codes {
v, e := strconv.ParseInt(i, 10, 8)
if e != nil {
err = fmt.Errorf("%w: invalid type code: %s", arrow.ErrInvalid, e)
return
}
if v < 0 {
err = fmt.Errorf("%w: negative type code in union: format string %s", arrow.ErrInvalid, f)
return
}
typeCodes = append(typeCodes, arrow.UnionTypeCode(v))
}
if len(childFields) != len(typeCodes) {
err = fmt.Errorf("%w: ArrowArray struct number of children incompatible with format string", arrow.ErrInvalid)
return
}
dt = arrow.UnionOf(mode, childFields, typeCodes)
}
}
if dt == nil {
// if we didn't find a type, then it's something we haven't implemented.
err = xerrors.New("unimplemented type")
} else {
ret.Type = dt
}
return
}