arrow/array/fixed_size_list.go (299 lines of code) (raw):

// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array import ( "bytes" "fmt" "strings" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/bitutil" "github.com/apache/arrow-go/v18/arrow/internal/debug" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/apache/arrow-go/v18/internal/json" ) // FixedSizeList represents an immutable sequence of N array values. type FixedSizeList struct { array n int32 values arrow.Array } var _ ListLike = (*FixedSizeList)(nil) // NewFixedSizeListData returns a new List array value, from data. func NewFixedSizeListData(data arrow.ArrayData) *FixedSizeList { a := &FixedSizeList{} a.refCount.Add(1) a.setData(data.(*Data)) return a } func (a *FixedSizeList) ListValues() arrow.Array { return a.values } func (a *FixedSizeList) ValueStr(i int) string { if a.IsNull(i) { return NullValueStr } return string(a.GetOneForMarshal(i).(json.RawMessage)) } func (a *FixedSizeList) String() string { o := new(strings.Builder) o.WriteString("[") for i := 0; i < a.Len(); i++ { if i > 0 { o.WriteString(" ") } if !a.IsValid(i) { o.WriteString(NullValueStr) continue } sub := a.newListValue(i) fmt.Fprintf(o, "%v", sub) sub.Release() } o.WriteString("]") return o.String() } func (a *FixedSizeList) newListValue(i int) arrow.Array { beg, end := a.ValueOffsets(i) return NewSlice(a.values, beg, end) } func (a *FixedSizeList) setData(data *Data) { a.array.setData(data) a.n = a.DataType().(*arrow.FixedSizeListType).Len() a.values = MakeFromData(data.childData[0]) } func arrayEqualFixedSizeList(left, right *FixedSizeList) bool { for i := 0; i < left.Len(); i++ { if left.IsNull(i) { continue } o := func() bool { l := left.newListValue(i) defer l.Release() r := right.newListValue(i) defer r.Release() return Equal(l, r) }() if !o { return false } } return true } // Len returns the number of elements in the array. func (a *FixedSizeList) Len() int { return a.array.Len() } func (a *FixedSizeList) ValueOffsets(i int) (start, end int64) { n := int64(a.n) off := int64(a.array.data.offset) start, end = (off+int64(i))*n, (off+int64(i+1))*n return } func (a *FixedSizeList) Retain() { a.array.Retain() a.values.Retain() } func (a *FixedSizeList) Release() { a.array.Release() a.values.Release() } func (a *FixedSizeList) GetOneForMarshal(i int) interface{} { if a.IsNull(i) { return nil } slice := a.newListValue(i) defer slice.Release() v, err := json.Marshal(slice) if err != nil { panic(err) } return json.RawMessage(v) } func (a *FixedSizeList) MarshalJSON() ([]byte, error) { var buf bytes.Buffer enc := json.NewEncoder(&buf) buf.WriteByte('[') for i := 0; i < a.Len(); i++ { if i != 0 { buf.WriteByte(',') } if a.IsNull(i) { enc.Encode(nil) continue } slice := a.newListValue(i) if err := enc.Encode(slice); err != nil { return nil, err } slice.Release() } buf.WriteByte(']') return buf.Bytes(), nil } type FixedSizeListBuilder struct { baseListBuilder n int32 // number of elements in the fixed-size list. } // NewFixedSizeListBuilder returns a builder, using the provided memory allocator. // The created list builder will create a list whose elements will be of type etype. func NewFixedSizeListBuilder(mem memory.Allocator, n int32, etype arrow.DataType) *FixedSizeListBuilder { fslb := &FixedSizeListBuilder{ baseListBuilder{ builder: builder{mem: mem}, values: NewBuilder(mem, etype), dt: arrow.FixedSizeListOf(n, etype), }, n, } fslb.baseListBuilder.builder.refCount.Add(1) return fslb } // NewFixedSizeListBuilderWithField returns a builder similarly to // NewFixedSizeListBuilder, but it accepts a child rather than just a datatype // to ensure nullability context is preserved. func NewFixedSizeListBuilderWithField(mem memory.Allocator, n int32, field arrow.Field) *FixedSizeListBuilder { fslb := &FixedSizeListBuilder{ baseListBuilder{ builder: builder{mem: mem}, values: NewBuilder(mem, field.Type), dt: arrow.FixedSizeListOfField(n, field), }, n, } fslb.baseListBuilder.builder.refCount.Add(1) return fslb } func (b *FixedSizeListBuilder) Type() arrow.DataType { return b.dt } // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. func (b *FixedSizeListBuilder) Release() { debug.Assert(b.refCount.Load() > 0, "too many releases") if b.refCount.Add(-1) == 0 { if b.nullBitmap != nil { b.nullBitmap.Release() b.nullBitmap = nil } if b.values != nil { b.values.Release() b.values = nil } } } func (b *FixedSizeListBuilder) Append(v bool) { b.Reserve(1) b.unsafeAppendBoolToBitmap(v) } // AppendNull will append null values to the underlying values by itself func (b *FixedSizeListBuilder) AppendNull() { b.Reserve(1) b.unsafeAppendBoolToBitmap(false) // require to append this due to value indexes for i := int32(0); i < b.n; i++ { b.values.AppendNull() } } // AppendNulls will append n null values to the underlying values by itself func (b *FixedSizeListBuilder) AppendNulls(n int) { for i := 0; i < n; i++ { b.AppendNull() } } func (b *FixedSizeListBuilder) AppendEmptyValue() { b.Append(true) for i := int32(0); i < b.n; i++ { b.values.AppendEmptyValue() } } func (b *FixedSizeListBuilder) AppendEmptyValues(n int) { for i := 0; i < n; i++ { b.AppendEmptyValue() } } func (b *FixedSizeListBuilder) AppendValues(valid []bool) { b.Reserve(len(valid)) b.builder.unsafeAppendBoolsToBitmap(valid, len(valid)) } func (b *FixedSizeListBuilder) unsafeAppendBoolToBitmap(isValid bool) { if isValid { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) } else { b.nulls++ } b.length++ } func (b *FixedSizeListBuilder) init(capacity int) { b.builder.init(capacity) } // Reserve ensures there is enough space for appending n elements // by checking the capacity and calling Resize if necessary. func (b *FixedSizeListBuilder) Reserve(n int) { b.builder.reserve(n, b.Resize) } // Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), // additional memory will be allocated. If n is smaller, the allocated memory may reduced. func (b *FixedSizeListBuilder) Resize(n int) { if n < minBuilderCapacity { n = minBuilderCapacity } if b.capacity == 0 { b.init(n) } else { b.builder.resize(n, b.builder.init) } } func (b *FixedSizeListBuilder) ValueBuilder() Builder { return b.values } // NewArray creates a List array from the memory buffers used by the builder and resets the FixedSizeListBuilder // so it can be used to build a new array. func (b *FixedSizeListBuilder) NewArray() arrow.Array { return b.NewListArray() } // NewListArray creates a List array from the memory buffers used by the builder and resets the FixedSizeListBuilder // so it can be used to build a new array. func (b *FixedSizeListBuilder) NewListArray() (a *FixedSizeList) { data := b.newData() a = NewFixedSizeListData(data) data.Release() return } func (b *FixedSizeListBuilder) newData() (data *Data) { values := b.values.NewArray() defer values.Release() data = NewData( b.dt, b.length, []*memory.Buffer{b.nullBitmap}, []arrow.ArrayData{values.Data()}, b.nulls, 0, ) b.reset() return } func (b *FixedSizeListBuilder) AppendValueFromString(s string) error { if s == NullValueStr { b.AppendNull() return nil } dec := json.NewDecoder(strings.NewReader(s)) return b.UnmarshalOne(dec) } func (b *FixedSizeListBuilder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { return err } switch t { case json.Delim('['): b.Append(true) if err := b.values.Unmarshal(dec); err != nil { return err } // consume ']' _, err := dec.Token() return err case nil: b.AppendNull() default: return &json.UnmarshalTypeError{ Value: fmt.Sprint(t), Struct: b.dt.String(), } } return nil } func (b *FixedSizeListBuilder) Unmarshal(dec *json.Decoder) error { for dec.More() { if err := b.UnmarshalOne(dec); err != nil { return err } } return nil } func (b *FixedSizeListBuilder) UnmarshalJSON(data []byte) error { dec := json.NewDecoder(bytes.NewReader(data)) t, err := dec.Token() if err != nil { return err } if delim, ok := t.(json.Delim); !ok || delim != '[' { return fmt.Errorf("fixed size list builder must unpack from json array, found %s", delim) } return b.Unmarshal(dec) } var ( _ arrow.Array = (*FixedSizeList)(nil) _ Builder = (*FixedSizeListBuilder)(nil) )