arrow/datatype_nested.go (275 lines of code) (raw):

// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package arrow import ( "fmt" "strings" ) // ListType describes a nested type in which each array slot contains // a variable-size sequence of values, all having the same relative type. type ListType struct { elem Field } func ListOfField(f Field) *ListType { if f.Type == nil { panic("arrow: nil type for list field") } return &ListType{elem: f} } // ListOf returns the list type with element type t. // For example, if t represents int32, ListOf(t) represents []int32. // // ListOf panics if t is nil or invalid. NullableElem defaults to true func ListOf(t DataType) *ListType { if t == nil { panic("arrow: nil DataType") } return &ListType{elem: Field{Name: "item", Type: t, Nullable: true}} } // ListOfNonNullable is like ListOf but NullableElem defaults to false, indicating // that the child type should be marked as non-nullable. func ListOfNonNullable(t DataType) *ListType { if t == nil { panic("arrow: nil DataType") } return &ListType{elem: Field{Name: "item", Type: t, Nullable: false}} } func (*ListType) ID() Type { return LIST } func (*ListType) Name() string { return "list" } func (t *ListType) String() string { if t.elem.Nullable { return fmt.Sprintf("list<%s: %s, nullable>", t.elem.Name, t.elem.Type) } return fmt.Sprintf("list<%s: %s>", t.elem.Name, t.elem.Type) } func (t *ListType) Fingerprint() string { child := t.elem.Type.Fingerprint() if len(child) > 0 { return typeFingerprint(t) + "{" + child + "}" } return "" } func (t *ListType) SetElemMetadata(md Metadata) { t.elem.Metadata = md } func (t *ListType) SetElemNullable(n bool) { t.elem.Nullable = n } // Elem returns the ListType's element type. func (t *ListType) Elem() DataType { return t.elem.Type } func (t *ListType) ElemField() Field { return t.elem } // FixedSizeListType describes a nested type in which each array slot contains // a fixed-size sequence of values, all having the same relative type. type FixedSizeListType struct { n int32 // number of elements in the list elem Field } func FixedSizeListOfField(n int32, f Field) *FixedSizeListType { if f.Type == nil { panic("arrow: nil DataType") } if n <= 0 { panic("arrow: invalid size") } return &FixedSizeListType{n: n, elem: f} } // FixedSizeListOf returns the list type with element type t. // For example, if t represents int32, FixedSizeListOf(10, t) represents [10]int32. // // FixedSizeListOf panics if t is nil or invalid. // FixedSizeListOf panics if n is <= 0. // NullableElem defaults to true func FixedSizeListOf(n int32, t DataType) *FixedSizeListType { if t == nil { panic("arrow: nil DataType") } if n <= 0 { panic("arrow: invalid size") } return &FixedSizeListType{n: n, elem: Field{Name: "item", Type: t, Nullable: true}} } // FixedSizeListOfNonNullable is like FixedSizeListOf but NullableElem defaults to false // indicating that the child type should be marked as non-nullable. func FixedSizeListOfNonNullable(n int32, t DataType) *FixedSizeListType { if t == nil { panic("arrow: nil DataType") } if n <= 0 { panic("arrow: invalid size") } return &FixedSizeListType{n: n, elem: Field{Name: "item", Type: t, Nullable: false}} } func (*FixedSizeListType) ID() Type { return FIXED_SIZE_LIST } func (*FixedSizeListType) Name() string { return "fixed_size_list" } func (t *FixedSizeListType) String() string { if t.elem.Nullable { return fmt.Sprintf("fixed_size_list<%s: %s, nullable>[%d]", t.elem.Name, t.elem.Type, t.n) } return fmt.Sprintf("fixed_size_list<%s: %s>[%d]", t.elem.Name, t.elem.Type, t.n) } // Elem returns the FixedSizeListType's element type. func (t *FixedSizeListType) Elem() DataType { return t.elem.Type } // Len returns the FixedSizeListType's size. func (t *FixedSizeListType) Len() int32 { return t.n } func (t *FixedSizeListType) ElemField() Field { return t.elem } func (t *FixedSizeListType) Fingerprint() string { child := t.elem.Type.Fingerprint() if len(child) > 0 { return fmt.Sprintf("%s[%d]{%s}", typeFingerprint(t), t.n, child) } return "" } // StructType describes a nested type parameterized by an ordered sequence // of relative types, called its fields. type StructType struct { fields []Field index map[string]int meta Metadata } // StructOf returns the struct type with fields fs. // // StructOf panics if there are duplicated fields. // StructOf panics if there is a field with an invalid DataType. func StructOf(fs ...Field) *StructType { n := len(fs) if n == 0 { return &StructType{} } t := &StructType{ fields: make([]Field, n), index: make(map[string]int, n), } for i, f := range fs { if f.Type == nil { panic("arrow: field with nil DataType") } t.fields[i] = Field{ Name: f.Name, Type: f.Type, Nullable: f.Nullable, Metadata: f.Metadata.clone(), } if _, dup := t.index[f.Name]; dup { panic(fmt.Errorf("arrow: duplicate field with name %q", f.Name)) } t.index[f.Name] = i } return t } func (*StructType) ID() Type { return STRUCT } func (*StructType) Name() string { return "struct" } func (t *StructType) String() string { o := new(strings.Builder) o.WriteString("struct<") for i, f := range t.fields { if i > 0 { o.WriteString(", ") } o.WriteString(fmt.Sprintf("%s: %v", f.Name, f.Type)) } o.WriteString(">") return o.String() } func (t *StructType) Fields() []Field { return t.fields } func (t *StructType) Field(i int) Field { return t.fields[i] } func (t *StructType) FieldByName(name string) (Field, bool) { i, ok := t.index[name] if !ok { return Field{}, false } return t.fields[i], true } func (t *StructType) FieldIdx(name string) (int, bool) { i, ok := t.index[name] return i, ok } func (t *StructType) Fingerprint() string { var b strings.Builder b.WriteString(typeFingerprint(t)) b.WriteByte('{') for _, c := range t.fields { child := c.Fingerprint() if len(child) == 0 { return "" } b.WriteString(child) b.WriteByte(';') } b.WriteByte('}') return b.String() } type MapType struct { value *ListType KeysSorted bool } func MapOf(key, item DataType) *MapType { if key == nil || item == nil { panic("arrow: nil key or item type for MapType") } return &MapType{value: ListOf(StructOf(Field{Name: "key", Type: key}, Field{Name: "value", Type: item, Nullable: true}))} } func (*MapType) ID() Type { return MAP } func (*MapType) Name() string { return "map" } func (t *MapType) String() string { var o strings.Builder o.WriteString(fmt.Sprintf("map<%s, %s", t.value.Elem().(*StructType).Field(0).Type, t.value.Elem().(*StructType).Field(1).Type)) if t.KeysSorted { o.WriteString(", keys_sorted") } o.WriteString(">") return o.String() } func (t *MapType) KeyField() Field { return t.value.Elem().(*StructType).Field(0) } func (t *MapType) KeyType() DataType { return t.KeyField().Type } func (t *MapType) ItemField() Field { return t.value.Elem().(*StructType).Field(1) } func (t *MapType) ItemType() DataType { return t.ItemField().Type } func (t *MapType) ValueType() *StructType { return t.value.Elem().(*StructType) } func (t *MapType) ValueField() Field { return Field{ Name: "entries", Type: t.ValueType(), } } func (t *MapType) SetItemNullable(nullable bool) { t.value.Elem().(*StructType).fields[1].Nullable = nullable } func (t *MapType) Fingerprint() string { keyFingerprint := t.KeyType().Fingerprint() itemFingerprint := t.ItemType().Fingerprint() if keyFingerprint == "" || itemFingerprint == "" { return "" } fingerprint := typeFingerprint(t) if t.KeysSorted { fingerprint += "s" } return fingerprint + "{" + keyFingerprint + itemFingerprint + "}" } type Field struct { Name string // Field name Type DataType // The field's data type Nullable bool // Fields can be nullable Metadata Metadata // The field's metadata, if any } func (f Field) Fingerprint() string { typeFingerprint := f.Type.Fingerprint() if typeFingerprint == "" { return "" } var b strings.Builder b.WriteByte('F') if f.Nullable { b.WriteByte('n') } else { b.WriteByte('N') } b.WriteString(f.Name) b.WriteByte('{') b.WriteString(typeFingerprint) b.WriteByte('}') return b.String() } func (f Field) HasMetadata() bool { return f.Metadata.Len() != 0 } func (f Field) Equal(o Field) bool { switch { case f.Name != o.Name: return false case f.Nullable != o.Nullable: return false case !TypeEqual(f.Type, o.Type, CheckMetadata()): return false case !f.Metadata.Equal(o.Metadata): return false } return true } func (f Field) String() string { o := new(strings.Builder) nullable := "" if f.Nullable { nullable = ", nullable" } fmt.Fprintf(o, "%s: type=%v%v", f.Name, f.Type, nullable) if f.HasMetadata() { fmt.Fprintf(o, "\n%*.smetadata: %v", len(f.Name)+2, "", f.Metadata) } return o.String() } var ( _ DataType = (*ListType)(nil) _ DataType = (*StructType)(nil) _ DataType = (*MapType)(nil) )