arrow/extensions/uuid.go (203 lines of code) (raw):

// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package extensions import ( "bytes" "fmt" "reflect" "strings" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/apache/arrow-go/v18/internal/json" "github.com/apache/arrow-go/v18/parquet/schema" "github.com/google/uuid" ) type UUIDBuilder struct { *array.ExtensionBuilder } // NewUUIDBuilder creates a new UUIDBuilder, exposing a convenient and efficient interface // for writing uuid.UUID (or [16]byte) values to the underlying FixedSizeBinary storage array. func NewUUIDBuilder(mem memory.Allocator) *UUIDBuilder { return &UUIDBuilder{ExtensionBuilder: array.NewExtensionBuilder(mem, NewUUIDType())} } func (b *UUIDBuilder) Append(v uuid.UUID) { b.AppendBytes(v) } func (b *UUIDBuilder) AppendBytes(v [16]byte) { b.ExtensionBuilder.Builder.(*array.FixedSizeBinaryBuilder).Append(v[:]) } func (b *UUIDBuilder) UnsafeAppend(v uuid.UUID) { b.ExtensionBuilder.Builder.(*array.FixedSizeBinaryBuilder).UnsafeAppend(v[:]) } func (b *UUIDBuilder) AppendValueFromString(s string) error { if s == array.NullValueStr { b.AppendNull() return nil } uid, err := uuid.Parse(s) if err != nil { return err } b.Append(uid) return nil } func (b *UUIDBuilder) AppendValues(v []uuid.UUID, valid []bool) { if len(v) != len(valid) && len(valid) != 0 { panic("len(v) != len(valid) && len(valid) != 0") } data := make([][]byte, len(v)) for i := range v { if len(valid) > 0 && !valid[i] { continue } data[i] = v[i][:] } b.ExtensionBuilder.Builder.(*array.FixedSizeBinaryBuilder).AppendValues(data, valid) } func (b *UUIDBuilder) UnmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { return err } var val uuid.UUID switch v := t.(type) { case string: val, err = uuid.Parse(v) if err != nil { return err } case []byte: val, err = uuid.ParseBytes(v) if err != nil { return err } case nil: b.AppendNull() return nil default: return &json.UnmarshalTypeError{ Value: fmt.Sprint(t), Type: reflect.TypeOf([]byte{}), Offset: dec.InputOffset(), Struct: fmt.Sprintf("FixedSizeBinary[%d]", 16), } } b.Append(val) return nil } func (b *UUIDBuilder) Unmarshal(dec *json.Decoder) error { for dec.More() { if err := b.UnmarshalOne(dec); err != nil { return err } } return nil } func (b *UUIDBuilder) UnmarshalJSON(data []byte) error { dec := json.NewDecoder(bytes.NewReader(data)) t, err := dec.Token() if err != nil { return err } if delim, ok := t.(json.Delim); !ok || delim != '[' { return fmt.Errorf("uuid builder must unpack from json array, found %s", delim) } return b.Unmarshal(dec) } // UUIDArray is a simple array which is a FixedSizeBinary(16) type UUIDArray struct { array.ExtensionArrayBase } func (a *UUIDArray) String() string { arr := a.Storage().(*array.FixedSizeBinary) o := new(strings.Builder) o.WriteString("[") for i := 0; i < arr.Len(); i++ { if i > 0 { o.WriteString(" ") } switch { case a.IsNull(i): o.WriteString(array.NullValueStr) default: fmt.Fprintf(o, "%q", a.Value(i)) } } o.WriteString("]") return o.String() } func (a *UUIDArray) Value(i int) uuid.UUID { if a.IsNull(i) { return uuid.Nil } return uuid.Must(uuid.FromBytes(a.Storage().(*array.FixedSizeBinary).Value(i))) } func (a *UUIDArray) Values() []uuid.UUID { values := make([]uuid.UUID, a.Len()) for i := range values { values[i] = a.Value(i) } return values } func (a *UUIDArray) ValueStr(i int) string { switch { case a.IsNull(i): return array.NullValueStr default: return a.Value(i).String() } } func (a *UUIDArray) MarshalJSON() ([]byte, error) { vals := make([]any, a.Len()) for i := range vals { vals[i] = a.GetOneForMarshal(i) } return json.Marshal(vals) } func (a *UUIDArray) GetOneForMarshal(i int) interface{} { if a.IsValid(i) { return a.Value(i) } return nil } // UUIDType is a simple extension type that represents a FixedSizeBinary(16) // to be used for representing UUIDs type UUIDType struct { arrow.ExtensionBase } // ParquetLogicalType implements pqarrow.ExtensionCustomParquetType. func (e *UUIDType) ParquetLogicalType() schema.LogicalType { return schema.UUIDLogicalType{} } // NewUUIDType is a convenience function to create an instance of UUIDType // with the correct storage type func NewUUIDType() *UUIDType { return &UUIDType{ExtensionBase: arrow.ExtensionBase{Storage: &arrow.FixedSizeBinaryType{ByteWidth: 16}}} } // ArrayType returns TypeOf(UUIDArray{}) for constructing UUID arrays func (*UUIDType) ArrayType() reflect.Type { return reflect.TypeOf(UUIDArray{}) } func (*UUIDType) ExtensionName() string { return "arrow.uuid" } func (*UUIDType) Bytes() int { return 16 } func (*UUIDType) BitWidth() int { return 128 } func (e *UUIDType) String() string { return fmt.Sprintf("extension<%s>", e.ExtensionName()) } func (e *UUIDType) MarshalJSON() ([]byte, error) { return []byte(fmt.Sprintf(`{"name":"%s","metadata":%s}`, e.ExtensionName(), e.Serialize())), nil } func (*UUIDType) Serialize() string { return "" } // Deserialize expects storageType to be FixedSizeBinaryType{ByteWidth: 16} func (*UUIDType) Deserialize(storageType arrow.DataType, data string) (arrow.ExtensionType, error) { if !arrow.TypeEqual(storageType, &arrow.FixedSizeBinaryType{ByteWidth: 16}) { return nil, fmt.Errorf("invalid storage type for UUIDType: %s", storageType.Name()) } return NewUUIDType(), nil } // ExtensionEquals returns true if both extensions have the same name func (e *UUIDType) ExtensionEquals(other arrow.ExtensionType) bool { return e.ExtensionName() == other.ExtensionName() } func (*UUIDType) NewBuilder(mem memory.Allocator) array.Builder { return NewUUIDBuilder(mem) } var ( _ arrow.ExtensionType = (*UUIDType)(nil) _ array.CustomExtensionBuilder = (*UUIDType)(nil) _ array.ExtensionArray = (*UUIDArray)(nil) _ array.Builder = (*UUIDBuilder)(nil) _ arrow.FixedWidthDataType = (*UUIDType)(nil) )