arrow/datatype_nested.go (703 lines of code) (raw):

// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package arrow import ( "errors" "fmt" "strconv" "strings" "github.com/apache/arrow-go/v18/arrow/internal/debug" ) type ( NestedType interface { DataType // Fields method provides a copy of NestedType fields // (so it can be safely mutated and will not result in updating the NestedType). Fields() []Field // NumFields provides the number of fields without allocating. NumFields() int } ListLikeType interface { DataType Elem() DataType ElemField() Field } VarLenListLikeType interface { ListLikeType } ) // ListType describes a nested type in which each array slot contains // a variable-size sequence of values, all having the same relative type. type ListType struct { elem Field } func ListOfField(f Field) *ListType { if f.Type == nil { panic("arrow: nil type for list field") } return &ListType{elem: f} } // ListOf returns the list type with element type t. // For example, if t represents int32, ListOf(t) represents []int32. // // ListOf panics if t is nil or invalid. NullableElem defaults to true func ListOf(t DataType) *ListType { if t == nil { panic("arrow: nil DataType") } return &ListType{elem: Field{Name: "item", Type: t, Nullable: true}} } // ListOfNonNullable is like ListOf but NullableElem defaults to false, indicating // that the child type should be marked as non-nullable. func ListOfNonNullable(t DataType) *ListType { if t == nil { panic("arrow: nil DataType") } return &ListType{elem: Field{Name: "item", Type: t, Nullable: false}} } func (*ListType) ID() Type { return LIST } func (*ListType) Name() string { return "list" } func (t *ListType) String() string { if t.elem.Nullable { return fmt.Sprintf("list<%s: %s, nullable>", t.elem.Name, t.elem.Type) } return fmt.Sprintf("list<%s: %s>", t.elem.Name, t.elem.Type) } func (t *ListType) Fingerprint() string { child := t.elem.Type.Fingerprint() if len(child) > 0 { return typeFingerprint(t) + "{" + child + "}" } return "" } func (t *ListType) SetElemMetadata(md Metadata) { t.elem.Metadata = md } func (t *ListType) SetElemNullable(n bool) { t.elem.Nullable = n } // Elem returns the ListType's element type. func (t *ListType) Elem() DataType { return t.elem.Type } func (t *ListType) ElemField() Field { return t.elem } func (t *ListType) Fields() []Field { return []Field{t.ElemField()} } func (t *ListType) NumFields() int { return 1 } func (*ListType) Layout() DataTypeLayout { return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(Int32SizeBytes)}} } func (*ListType) OffsetTypeTraits() OffsetTraits { return Int32Traits } type LargeListType struct { ListType } func (LargeListType) ID() Type { return LARGE_LIST } func (LargeListType) Name() string { return "large_list" } func (t *LargeListType) String() string { return "large_" + t.ListType.String() } func (t *LargeListType) Fingerprint() string { child := t.elem.Type.Fingerprint() if len(child) > 0 { return typeFingerprint(t) + "{" + child + "}" } return "" } func (*LargeListType) Layout() DataTypeLayout { return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(Int64SizeBytes)}} } func (*LargeListType) OffsetTypeTraits() OffsetTraits { return Int64Traits } func LargeListOfField(f Field) *LargeListType { if f.Type == nil { panic("arrow: nil type for list field") } return &LargeListType{ListType{elem: f}} } // LargeListOf returns the list type with element type t. // For example, if t represents int32, LargeListOf(t) represents []int32. // // LargeListOf panics if t is nil or invalid. NullableElem defaults to true func LargeListOf(t DataType) *LargeListType { if t == nil { panic("arrow: nil DataType") } return &LargeListType{ListType{elem: Field{Name: "item", Type: t, Nullable: true}}} } // LargeListOfNonNullable is like ListOf but NullableElem defaults to false, indicating // that the child type should be marked as non-nullable. func LargeListOfNonNullable(t DataType) *LargeListType { if t == nil { panic("arrow: nil DataType") } return &LargeListType{ListType{elem: Field{Name: "item", Type: t, Nullable: false}}} } // FixedSizeListType describes a nested type in which each array slot contains // a fixed-size sequence of values, all having the same relative type. type FixedSizeListType struct { n int32 // number of elements in the list elem Field } func FixedSizeListOfField(n int32, f Field) *FixedSizeListType { if f.Type == nil { panic("arrow: nil DataType") } if n <= 0 { panic("arrow: invalid size") } return &FixedSizeListType{n: n, elem: f} } // FixedSizeListOf returns the list type with element type t. // For example, if t represents int32, FixedSizeListOf(10, t) represents [10]int32. // // FixedSizeListOf panics if t is nil or invalid. // FixedSizeListOf panics if n is <= 0. // NullableElem defaults to true func FixedSizeListOf(n int32, t DataType) *FixedSizeListType { if t == nil { panic("arrow: nil DataType") } if n <= 0 { panic("arrow: invalid size") } return &FixedSizeListType{n: n, elem: Field{Name: "item", Type: t, Nullable: true}} } // FixedSizeListOfNonNullable is like FixedSizeListOf but NullableElem defaults to false // indicating that the child type should be marked as non-nullable. func FixedSizeListOfNonNullable(n int32, t DataType) *FixedSizeListType { if t == nil { panic("arrow: nil DataType") } if n <= 0 { panic("arrow: invalid size") } return &FixedSizeListType{n: n, elem: Field{Name: "item", Type: t, Nullable: false}} } func (*FixedSizeListType) ID() Type { return FIXED_SIZE_LIST } func (*FixedSizeListType) Name() string { return "fixed_size_list" } func (t *FixedSizeListType) String() string { if t.elem.Nullable { return fmt.Sprintf("fixed_size_list<%s: %s, nullable>[%d]", t.elem.Name, t.elem.Type, t.n) } return fmt.Sprintf("fixed_size_list<%s: %s>[%d]", t.elem.Name, t.elem.Type, t.n) } func (t *FixedSizeListType) SetElemNullable(n bool) { t.elem.Nullable = n } // Elem returns the FixedSizeListType's element type. func (t *FixedSizeListType) Elem() DataType { return t.elem.Type } // Len returns the FixedSizeListType's size. func (t *FixedSizeListType) Len() int32 { return t.n } func (t *FixedSizeListType) ElemField() Field { return t.elem } func (t *FixedSizeListType) Fingerprint() string { child := t.elem.Type.Fingerprint() if len(child) > 0 { return fmt.Sprintf("%s[%d]{%s}", typeFingerprint(t), t.n, child) } return "" } func (t *FixedSizeListType) Fields() []Field { return []Field{t.ElemField()} } func (t *FixedSizeListType) NumFields() int { return 1 } func (*FixedSizeListType) Layout() DataTypeLayout { return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap()}} } type ListViewType struct { elem Field } func ListViewOfField(f Field) *ListViewType { if f.Type == nil { panic("arrow: nil DataType") } return &ListViewType{elem: f} } // ListViewOf returns the list-view type with element type t. // For example, if t represents int32, ListViewOf(t) represents []int32. // // ListViewOf panics if t is nil or invalid. NullableElem defaults to true func ListViewOf(t DataType) *ListViewType { if t == nil { panic("arrow: nil DataType") } return &ListViewType{elem: Field{Name: "item", Type: t, Nullable: true}} } // ListViewOfNonNullable is like ListViewOf but NullableElem defaults to false, indicating // that the child type should be marked as non-nullable. func ListViewOfNonNullable(t DataType) *ListViewType { if t == nil { panic("arrow: nil DataType") } return &ListViewType{elem: Field{Name: "item", Type: t, Nullable: false}} } func (*ListViewType) ID() Type { return LIST_VIEW } func (*ListViewType) Name() string { return "list_view" } func (t *ListViewType) String() string { if t.elem.Nullable { return fmt.Sprintf("list_view<%s: %s, nullable>", t.elem.Name, t.elem.Type) } return fmt.Sprintf("list_view<%s: %s>", t.elem.Name, t.elem.Type) } func (t *ListViewType) Fingerprint() string { child := t.elem.Type.Fingerprint() if len(child) > 0 { return typeFingerprint(t) + "{" + child + "}" } return "" } func (t *ListViewType) SetElemMetadata(md Metadata) { t.elem.Metadata = md } func (t *ListViewType) SetElemNullable(n bool) { t.elem.Nullable = n } // Elem returns the ListViewType's element type. func (t *ListViewType) Elem() DataType { return t.elem.Type } func (t *ListViewType) ElemField() Field { return t.elem } func (t *ListViewType) Fields() []Field { return []Field{t.ElemField()} } func (t *ListViewType) NumFields() int { return 1 } func (*ListViewType) Layout() DataTypeLayout { return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(Int32SizeBytes), SpecFixedWidth(Int32SizeBytes)}} } func (*ListViewType) OffsetTypeTraits() OffsetTraits { return Int32Traits } type LargeListViewType struct { elem Field } func LargeListViewOfField(f Field) *LargeListViewType { if f.Type == nil { panic("arrow: nil DataType") } return &LargeListViewType{elem: f} } // LargeListViewOf returns the list-view type with element type t. // For example, if t represents int32, LargeListViewOf(t) represents []int32. // // LargeListViewOf panics if t is nil or invalid. NullableElem defaults to true func LargeListViewOf(t DataType) *LargeListViewType { if t == nil { panic("arrow: nil DataType") } return &LargeListViewType{elem: Field{Name: "item", Type: t, Nullable: true}} } // LargeListViewOfNonNullable is like LargeListViewOf but NullableElem defaults // to false, indicating that the child type should be marked as non-nullable. func LargeListViewOfNonNullable(t DataType) *LargeListViewType { if t == nil { panic("arrow: nil DataType") } return &LargeListViewType{elem: Field{Name: "item", Type: t, Nullable: false}} } func (*LargeListViewType) ID() Type { return LARGE_LIST_VIEW } func (*LargeListViewType) Name() string { return "large_list_view" } func (t *LargeListViewType) String() string { if t.elem.Nullable { return fmt.Sprintf("large_list_view<%s: %s, nullable>", t.elem.Name, t.elem.Type) } return fmt.Sprintf("large_list_view<%s: %s>", t.elem.Name, t.elem.Type) } func (t *LargeListViewType) Fingerprint() string { child := t.elem.Type.Fingerprint() if len(child) > 0 { return typeFingerprint(t) + "{" + child + "}" } return "" } func (t *LargeListViewType) SetElemMetadata(md Metadata) { t.elem.Metadata = md } func (t *LargeListViewType) SetElemNullable(n bool) { t.elem.Nullable = n } // Elem returns the LargeListViewType's element type. func (t *LargeListViewType) Elem() DataType { return t.elem.Type } func (t *LargeListViewType) ElemField() Field { return t.elem } func (t *LargeListViewType) Fields() []Field { return []Field{t.ElemField()} } func (t *LargeListViewType) NumFields() int { return 1 } func (*LargeListViewType) Layout() DataTypeLayout { return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(Int64SizeBytes), SpecFixedWidth(Int64SizeBytes)}} } func (*LargeListViewType) OffsetTypeTraits() OffsetTraits { return Int64Traits } // StructType describes a nested type parameterized by an ordered sequence // of relative types, called its fields. type StructType struct { fields []Field index map[string][]int meta Metadata } // StructOf returns the struct type with fields fs. // // StructOf panics if there is a field with an invalid DataType. func StructOf(fs ...Field) *StructType { n := len(fs) if n == 0 { return &StructType{} } t := &StructType{ fields: make([]Field, n), index: make(map[string][]int, n), } for i, f := range fs { if f.Type == nil { panic("arrow: field with nil DataType") } t.fields[i] = Field{ Name: f.Name, Type: f.Type, Nullable: f.Nullable, Metadata: f.Metadata.clone(), } if indices, exists := t.index[f.Name]; exists { t.index[f.Name] = append(indices, i) } else { t.index[f.Name] = []int{i} } } return t } func (*StructType) ID() Type { return STRUCT } func (*StructType) Name() string { return "struct" } func (t *StructType) String() string { var o strings.Builder o.WriteString("struct<") for i, f := range t.fields { if i > 0 { o.WriteString(", ") } o.WriteString(fmt.Sprintf("%s: %v", f.Name, f.Type)) } o.WriteString(">") return o.String() } // Fields method provides a copy of StructType fields // (so it can be safely mutated and will not result in updating the StructType). func (t *StructType) Fields() []Field { fields := make([]Field, len(t.fields)) copy(fields, t.fields) return fields } func (t *StructType) NumFields() int { return len(t.fields) } func (t *StructType) Field(i int) Field { return t.fields[i] } // FieldByName gets the field with the given name. // // If there are multiple fields with the given name, FieldByName // returns the first such field. func (t *StructType) FieldByName(name string) (Field, bool) { i, ok := t.index[name] if !ok { return Field{}, false } return t.fields[i[0]], true } // FieldIdx gets the index of the field with the given name. // // If there are multiple fields with the given name, FieldIdx returns // the index of the first such field. func (t *StructType) FieldIdx(name string) (int, bool) { i, ok := t.index[name] if ok { return i[0], true } return -1, false } // FieldsByName returns all fields with the given name. func (t *StructType) FieldsByName(n string) ([]Field, bool) { indices, ok := t.index[n] if !ok { return nil, ok } fields := make([]Field, 0, len(indices)) for _, v := range indices { fields = append(fields, t.fields[v]) } return fields, ok } // FieldIndices returns indices of all fields with the given name, or nil. func (t *StructType) FieldIndices(name string) []int { return t.index[name] } func (t *StructType) Fingerprint() string { var b strings.Builder b.WriteString(typeFingerprint(t)) b.WriteByte('{') for _, c := range t.fields { child := c.Fingerprint() if len(child) == 0 { return "" } b.WriteString(child) b.WriteByte(';') } b.WriteByte('}') return b.String() } func (*StructType) Layout() DataTypeLayout { return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap()}} } type MapType struct { value *ListType KeysSorted bool } func MapOf(key, item DataType) *MapType { if key == nil || item == nil { panic("arrow: nil key or item type for MapType") } return &MapType{value: ListOf(StructOf(Field{Name: "key", Type: key}, Field{Name: "value", Type: item, Nullable: true}))} } func MapOfFields(key, item Field) *MapType { if key.Type == nil || item.Type == nil { panic("arrow: nil key or item type for MapType") } if key.Nullable { panic("arrow: key field must be non-nullable") } key.Name = "key" item.Name = "value" return &MapType{value: ListOfField(Field{ Name: "entries", Type: StructOf(key, item), })} } func MapOfWithMetadata(key DataType, keyMetadata Metadata, item DataType, itemMetadata Metadata) *MapType { if key == nil || item == nil { panic("arrow: nil key or item type for MapType") } return &MapType{value: ListOf(StructOf(Field{ Name: "key", Type: key, Metadata: keyMetadata, }, Field{ Name: "value", Type: item, Nullable: true, Metadata: itemMetadata, }))} } func (*MapType) ID() Type { return MAP } func (*MapType) Name() string { return "map" } func (t *MapType) String() string { var o strings.Builder o.WriteString(fmt.Sprintf("map<%s, %s", t.value.Elem().(*StructType).Field(0).Type, t.value.Elem().(*StructType).Field(1).Type)) if t.KeysSorted { o.WriteString(", keys_sorted") } if t.ItemField().Nullable { o.WriteString(", items_nullable") } else { o.WriteString(", items_non_nullable") } o.WriteString(">") return o.String() } func (t *MapType) KeyField() Field { return t.value.Elem().(*StructType).Field(0) } func (t *MapType) KeyType() DataType { return t.KeyField().Type } func (t *MapType) ItemField() Field { return t.value.Elem().(*StructType).Field(1) } func (t *MapType) ItemType() DataType { return t.ItemField().Type } // Deprecated: use MapType.Elem().(*StructType) instead func (t *MapType) ValueType() *StructType { return t.Elem().(*StructType) } // Deprecated: use MapType.ElemField() instead func (t *MapType) ValueField() Field { return t.ElemField() } // Elem returns the MapType's element type (if treating MapType as ListLikeType) func (t *MapType) Elem() DataType { return t.value.Elem() } // ElemField returns the MapType's element field (if treating MapType as ListLikeType) func (t *MapType) ElemField() Field { return Field{Name: "entries", Type: t.Elem()} } func (t *MapType) SetItemNullable(nullable bool) { t.value.Elem().(*StructType).fields[1].Nullable = nullable } func (t *MapType) Fingerprint() string { keyFingerprint := t.KeyType().Fingerprint() itemFingerprint := t.ItemType().Fingerprint() if keyFingerprint == "" || itemFingerprint == "" { return "" } fingerprint := typeFingerprint(t) if t.KeysSorted { fingerprint += "s" } return fingerprint + "{" + keyFingerprint + itemFingerprint + "}" } func (t *MapType) Fields() []Field { return []Field{t.ElemField()} } func (t *MapType) NumFields() int { return 1 } func (t *MapType) Layout() DataTypeLayout { return t.value.Layout() } func (*MapType) OffsetTypeTraits() OffsetTraits { return Int32Traits } type ( // UnionTypeCode is an alias to int8 which is the type of the ids // used for union arrays. UnionTypeCode = int8 UnionMode int8 ) const ( MaxUnionTypeCode UnionTypeCode = 127 InvalidUnionChildID int = -1 SparseMode UnionMode = iota // SPARSE DenseMode // DENSE ) // UnionType is an interface to encompass both Dense and Sparse Union types. // // A UnionType is a nested type where each logical value is taken // from a single child. A buffer of 8-bit type ids (typed as UnionTypeCode) // indicates which child a given logical value is to be taken from. This is // represented as the "child id" or "child index", which is the index into the // list of child fields for a given child. type UnionType interface { NestedType // Mode returns either SparseMode or DenseMode depending on the current // concrete data type. Mode() UnionMode // ChildIDs returns a slice of ints to map UnionTypeCode values to // the index in the Fields that represents the given Type. It is // initialized with all values being InvalidUnionChildID (-1) // before being populated based on the TypeCodes and fields of the type. // The field for a given type can be retrieved by Fields()[ChildIDs()[typeCode]] ChildIDs() []int // TypeCodes returns the list of available type codes for this union type // which will correspond to indexes into the ChildIDs slice to locate the // appropriate child. A union Array contains a buffer of these type codes // which indicate for a given index, which child has the value for that index. TypeCodes() []UnionTypeCode // MaxTypeCode returns the value of the largest TypeCode in the list of typecodes // that are defined by this Union type MaxTypeCode() UnionTypeCode } // UnionOf returns an appropriate union type for the given Mode (Sparse or Dense), // child fields, and type codes. len(fields) == len(typeCodes) must be true, or else // this will panic. len(fields) can be 0. func UnionOf(mode UnionMode, fields []Field, typeCodes []UnionTypeCode) UnionType { switch mode { case SparseMode: return SparseUnionOf(fields, typeCodes) case DenseMode: return DenseUnionOf(fields, typeCodes) default: panic("arrow: invalid union mode") } } type unionType struct { children []Field typeCodes []UnionTypeCode childIDs [int(MaxUnionTypeCode) + 1]int } func (t *unionType) init(fields []Field, typeCodes []UnionTypeCode) { // initialize all child IDs to -1 t.childIDs[0] = InvalidUnionChildID for i := 1; i < len(t.childIDs); i *= 2 { copy(t.childIDs[i:], t.childIDs[:i]) } t.children = fields t.typeCodes = typeCodes for i, tc := range t.typeCodes { t.childIDs[tc] = i } } // Fields method provides a copy of union type fields // (so it can be safely mutated and will not result in updating the union type). func (t *unionType) Fields() []Field { fields := make([]Field, len(t.children)) copy(fields, t.children) return fields } func (t *unionType) NumFields() int { return len(t.children) } func (t *unionType) TypeCodes() []UnionTypeCode { return t.typeCodes } func (t *unionType) ChildIDs() []int { return t.childIDs[:] } func (t *unionType) validate(fields []Field, typeCodes []UnionTypeCode, _ UnionMode) error { if len(fields) != len(typeCodes) { return errors.New("arrow: union types should have the same number of fields as type codes") } for _, c := range typeCodes { if c < 0 || c > MaxUnionTypeCode { return errors.New("arrow: union type code out of bounds") } } return nil } func (t *unionType) MaxTypeCode() (max UnionTypeCode) { if len(t.typeCodes) == 0 { return } max = t.typeCodes[0] for _, c := range t.typeCodes[1:] { if c > max { max = c } } return } func (t *unionType) String() string { var b strings.Builder b.WriteByte('<') for i := range t.typeCodes { if i != 0 { b.WriteString(", ") } fmt.Fprintf(&b, "%s=%d", t.children[i], t.typeCodes[i]) } b.WriteByte('>') return b.String() } func (t *unionType) fingerprint() string { var b strings.Builder for _, c := range t.typeCodes { fmt.Fprintf(&b, ":%d", c) } b.WriteString("]{") for _, c := range t.children { fingerprint := c.Fingerprint() if len(fingerprint) == 0 { return "" } b.WriteString(fingerprint) b.WriteByte(';') } b.WriteByte('}') return b.String() } func fieldsFromArrays(arrays []Array, names ...string) (ret []Field) { ret = make([]Field, len(arrays)) if len(names) == 0 { for i, c := range arrays { ret[i] = Field{Name: strconv.Itoa(i), Type: c.DataType(), Nullable: true} } } else { debug.Assert(len(names) == len(arrays), "mismatch of arrays and names") for i, c := range arrays { ret[i] = Field{Name: names[i], Type: c.DataType(), Nullable: true} } } return } // SparseUnionType is the concrete type for Sparse union data. // // A sparse union is a nested type where each logical value is taken // from a single child. A buffer of 8-bit type ids indicates which child // a given logical value is to be taken from. // // In a sparse union, each child array will have the same length as the // union array itself, regardless of the actual number of union values which // refer to it. // // Unlike most other types, unions do not have a top-level validity bitmap. type SparseUnionType struct { unionType } // SparseUnionFromArrays enables creating a union type from a list of Arrays, // field names, and type codes. len(fields) should be either 0 or equal to len(children). // len(codes) should also be either 0, or equal to len(children). // // If len(fields) == 0, then the fields will be named numerically as "0", "1", "2"... // and so on. If len(codes) == 0, then the type codes will be constructed as // [0, 1, 2, ..., n]. func SparseUnionFromArrays(children []Array, fields []string, codes []UnionTypeCode) *SparseUnionType { if len(codes) == 0 { codes = make([]UnionTypeCode, len(children)) for i := range children { codes[i] = UnionTypeCode(i) } } return SparseUnionOf(fieldsFromArrays(children, fields...), codes) } // SparseUnionOf is equivalent to UnionOf(arrow.SparseMode, fields, typeCodes), // constructing a SparseUnionType from a list of fields and type codes. // // If len(fields) != len(typeCodes) this will panic. They are allowed to be // of length 0. func SparseUnionOf(fields []Field, typeCodes []UnionTypeCode) *SparseUnionType { ret := &SparseUnionType{} if err := ret.validate(fields, typeCodes, ret.Mode()); err != nil { panic(err) } ret.init(fields, typeCodes) return ret } func (SparseUnionType) ID() Type { return SPARSE_UNION } func (SparseUnionType) Name() string { return "sparse_union" } func (SparseUnionType) Mode() UnionMode { return SparseMode } func (t *SparseUnionType) Fingerprint() string { return typeFingerprint(t) + "[s" + t.fingerprint() } func (SparseUnionType) Layout() DataTypeLayout { return DataTypeLayout{Buffers: []BufferSpec{SpecFixedWidth(Uint8SizeBytes)}} } func (t *SparseUnionType) String() string { return t.Name() + t.unionType.String() } // DenseUnionType is the concrete type for dense union data. // // A dense union is a nested type where each logical value is taken from a // single child, at a specific offset. A buffer of 8-bit type ids (typed // as UnionTypeCode) indicates which child a given logical value is to be // taken from and a buffer of 32-bit offsets indicating which physical position // in the given child array has the logical value for that index. // // Unlike a sparse union, a dense union allows encoding only the child values // which are actually referred to by the union array. This is counterbalanced // by the additional footprint of the offsets buffer, and the additional // indirection cost when looking up values. // // Unlike most other types, unions don't have a top-level validity bitmap type DenseUnionType struct { unionType } // DenseUnionFromArrays enables creating a union type from a list of Arrays, // field names, and type codes. len(fields) should be either 0 or equal to len(children). // len(codes) should also be either 0, or equal to len(children). // // If len(fields) == 0, then the fields will be named numerically as "0", "1", "2"... // and so on. If len(codes) == 0, then the type codes will be constructed as // [0, 1, 2, ..., n]. func DenseUnionFromArrays(children []Array, fields []string, codes []UnionTypeCode) *DenseUnionType { if len(codes) == 0 { codes = make([]UnionTypeCode, len(children)) for i := range children { codes[i] = UnionTypeCode(i) } } return DenseUnionOf(fieldsFromArrays(children, fields...), codes) } // DenseUnionOf is equivalent to UnionOf(arrow.DenseMode, fields, typeCodes), // constructing a DenseUnionType from a list of fields and type codes. // // If len(fields) != len(typeCodes) this will panic. They are allowed to be // of length 0. func DenseUnionOf(fields []Field, typeCodes []UnionTypeCode) *DenseUnionType { ret := &DenseUnionType{} if err := ret.validate(fields, typeCodes, ret.Mode()); err != nil { panic(err) } ret.init(fields, typeCodes) return ret } func (DenseUnionType) ID() Type { return DENSE_UNION } func (DenseUnionType) Name() string { return "dense_union" } func (DenseUnionType) Mode() UnionMode { return DenseMode } func (t *DenseUnionType) Fingerprint() string { return typeFingerprint(t) + "[s" + t.fingerprint() } func (DenseUnionType) Layout() DataTypeLayout { return DataTypeLayout{Buffers: []BufferSpec{SpecFixedWidth(Uint8SizeBytes), SpecFixedWidth(Int32SizeBytes)}} } func (DenseUnionType) OffsetTypeTraits() OffsetTraits { return Int32Traits } func (t *DenseUnionType) String() string { return t.Name() + t.unionType.String() } type Field struct { Name string // Field name Type DataType // The field's data type Nullable bool // Fields can be nullable Metadata Metadata // The field's metadata, if any } func (f Field) Fingerprint() string { typeFingerprint := f.Type.Fingerprint() if typeFingerprint == "" { return "" } var b strings.Builder b.WriteByte('F') if f.Nullable { b.WriteByte('n') } else { b.WriteByte('N') } b.WriteString(f.Name) b.WriteByte('{') b.WriteString(typeFingerprint) b.WriteByte('}') return b.String() } func (f Field) HasMetadata() bool { return f.Metadata.Len() != 0 } func (f Field) Equal(o Field) bool { switch { case f.Name != o.Name: return false case f.Nullable != o.Nullable: return false case !TypeEqual(f.Type, o.Type, CheckMetadata()): return false case !f.Metadata.Equal(o.Metadata): return false } return true } func (f Field) String() string { var o strings.Builder nullable := "" if f.Nullable { nullable = ", nullable" } fmt.Fprintf(&o, "%s: type=%v%v", f.Name, f.Type, nullable) if f.HasMetadata() { fmt.Fprintf(&o, "\n%*.smetadata: %v", len(f.Name)+2, "", f.Metadata) } return o.String() } var ( _ DataType = (*ListType)(nil) _ DataType = (*LargeListType)(nil) _ DataType = (*FixedSizeListType)(nil) _ DataType = (*StructType)(nil) _ DataType = (*MapType)(nil) _ DataType = (*DenseUnionType)(nil) _ DataType = (*SparseUnionType)(nil) _ NestedType = (*ListType)(nil) _ NestedType = (*LargeListType)(nil) _ NestedType = (*FixedSizeListType)(nil) _ NestedType = (*MapType)(nil) _ NestedType = (*DenseUnionType)(nil) _ NestedType = (*SparseUnionType)(nil) _ ListLikeType = (*ListType)(nil) _ ListLikeType = (*LargeListType)(nil) _ ListLikeType = (*FixedSizeListType)(nil) _ ListLikeType = (*MapType)(nil) _ VarLenListLikeType = (*ListType)(nil) _ VarLenListLikeType = (*LargeListType)(nil) _ VarLenListLikeType = (*ListViewType)(nil) _ VarLenListLikeType = (*LargeListViewType)(nil) _ VarLenListLikeType = (*FixedSizeListType)(nil) _ VarLenListLikeType = (*MapType)(nil) )