arrow/array/fixedsize_binarybuilder.go (189 lines of code) (raw):

// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package array import ( "bytes" "encoding/base64" "fmt" "reflect" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/internal/debug" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/apache/arrow-go/v18/internal/json" ) // A FixedSizeBinaryBuilder is used to build a FixedSizeBinary array using the Append methods. type FixedSizeBinaryBuilder struct { builder dtype *arrow.FixedSizeBinaryType values *byteBufferBuilder } func NewFixedSizeBinaryBuilder(mem memory.Allocator, dtype *arrow.FixedSizeBinaryType) *FixedSizeBinaryBuilder { b := &FixedSizeBinaryBuilder{ builder: builder{mem: mem}, dtype: dtype, values: newByteBufferBuilder(mem), } b.builder.refCount.Add(1) return b } func (b *FixedSizeBinaryBuilder) Type() arrow.DataType { return b.dtype } // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. // Release may be called simultaneously from multiple goroutines. func (b *FixedSizeBinaryBuilder) Release() { debug.Assert(b.refCount.Load() > 0, "too many releases") if b.refCount.Add(-1) == 0 { if b.nullBitmap != nil { b.nullBitmap.Release() b.nullBitmap = nil } if b.values != nil { b.values.Release() b.values = nil } } } func (b *FixedSizeBinaryBuilder) Append(v []byte) { if len(v) != b.dtype.ByteWidth { // TODO(alexandre): should we return an error instead? panic("len(v) != b.dtype.ByteWidth") } b.Reserve(1) b.values.Append(v) b.UnsafeAppendBoolToBitmap(true) } func (b *FixedSizeBinaryBuilder) AppendNull() { b.Reserve(1) b.values.Advance(b.dtype.ByteWidth) b.UnsafeAppendBoolToBitmap(false) } func (b *FixedSizeBinaryBuilder) AppendNulls(n int) { for i := 0; i < n; i++ { b.AppendNull() } } func (b *FixedSizeBinaryBuilder) AppendEmptyValue() { b.Reserve(1) b.values.Advance(b.dtype.ByteWidth) b.UnsafeAppendBoolToBitmap(true) } func (b *FixedSizeBinaryBuilder) AppendEmptyValues(n int) { for i := 0; i < n; i++ { b.AppendEmptyValue() } } func (b *FixedSizeBinaryBuilder) UnsafeAppend(v []byte) { b.values.unsafeAppend(v) b.UnsafeAppendBoolToBitmap(true) } // AppendValues will append the values in the v slice. The valid slice determines which values // in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, // all values in v are appended and considered valid. func (b *FixedSizeBinaryBuilder) AppendValues(v [][]byte, valid []bool) { if len(v) != len(valid) && len(valid) != 0 { panic("len(v) != len(valid) && len(valid) != 0") } if len(v) == 0 { return } b.Reserve(len(v)) for _, vv := range v { switch len(vv) { case 0: b.values.Advance(b.dtype.ByteWidth) case b.dtype.ByteWidth: b.values.Append(vv) default: panic(fmt.Errorf("array: invalid binary length (got=%d, want=%d)", len(vv), b.dtype.ByteWidth)) } } b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) } func (b *FixedSizeBinaryBuilder) init(capacity int) { b.builder.init(capacity) b.values.resize(capacity * b.dtype.ByteWidth) } // Reserve ensures there is enough space for appending n elements // by checking the capacity and calling Resize if necessary. func (b *FixedSizeBinaryBuilder) Reserve(n int) { b.builder.reserve(n, b.Resize) } // Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), // additional memory will be allocated. If n is smaller, the allocated memory may reduced. func (b *FixedSizeBinaryBuilder) Resize(n int) { b.builder.resize(n, b.init) } // NewArray creates a FixedSizeBinary array from the memory buffers used by the // builder and resets the FixedSizeBinaryBuilder so it can be used to build a new array. func (b *FixedSizeBinaryBuilder) NewArray() arrow.Array { return b.NewFixedSizeBinaryArray() } // NewFixedSizeBinaryArray creates a FixedSizeBinary array from the memory buffers used by the builder and resets the FixedSizeBinaryBuilder // so it can be used to build a new array. func (b *FixedSizeBinaryBuilder) NewFixedSizeBinaryArray() (a *FixedSizeBinary) { data := b.newData() a = NewFixedSizeBinaryData(data) data.Release() return } func (b *FixedSizeBinaryBuilder) newData() (data *Data) { values := b.values.Finish() data = NewData(b.dtype, b.length, []*memory.Buffer{b.nullBitmap, values}, nil, b.nulls, 0) if values != nil { values.Release() } b.builder.reset() return } func (b *FixedSizeBinaryBuilder) AppendValueFromString(s string) error { if s == NullValueStr { b.AppendNull() return nil } data, err := base64.StdEncoding.DecodeString(s) if err != nil { b.AppendNull() return err } b.Append(data) return nil } func (b *FixedSizeBinaryBuilder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { return err } var val []byte switch v := t.(type) { case string: data, err := base64.StdEncoding.DecodeString(v) if err != nil { return err } val = data case []byte: val = v case nil: b.AppendNull() return nil default: return &json.UnmarshalTypeError{ Value: fmt.Sprint(t), Type: reflect.TypeOf([]byte{}), Offset: dec.InputOffset(), Struct: fmt.Sprintf("FixedSizeBinary[%d]", b.dtype.ByteWidth), } } if len(val) != b.dtype.ByteWidth { return &json.UnmarshalTypeError{ Value: fmt.Sprint(val), Type: reflect.TypeOf([]byte{}), Offset: dec.InputOffset(), Struct: fmt.Sprintf("FixedSizeBinary[%d]", b.dtype.ByteWidth), } } b.Append(val) return nil } func (b *FixedSizeBinaryBuilder) Unmarshal(dec *json.Decoder) error { for dec.More() { if err := b.UnmarshalOne(dec); err != nil { return err } } return nil } func (b *FixedSizeBinaryBuilder) UnmarshalJSON(data []byte) error { dec := json.NewDecoder(bytes.NewReader(data)) t, err := dec.Token() if err != nil { return err } if delim, ok := t.(json.Delim); !ok || delim != '[' { return fmt.Errorf("fixed size binary builder must unpack from json array, found %s", delim) } return b.Unmarshal(dec) } var _ Builder = (*FixedSizeBinaryBuilder)(nil)