arrow/extensions/json.go (92 lines of code) (raw):

// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package extensions import ( "fmt" "reflect" "slices" "github.com/apache/arrow-go/v18/arrow" "github.com/apache/arrow-go/v18/arrow/array" "github.com/apache/arrow-go/v18/internal/json" "github.com/apache/arrow-go/v18/parquet/schema" ) var jsonSupportedStorageTypes = []arrow.DataType{ arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString, arrow.BinaryTypes.StringView, } // JSONType represents a UTF-8 encoded JSON string as specified in RFC8259. type JSONType struct { arrow.ExtensionBase } // ParquetLogicalType implements pqarrow.ExtensionCustomParquetType. func (b *JSONType) ParquetLogicalType() schema.LogicalType { return schema.JSONLogicalType{} } // NewJSONType creates a new JSONType with the specified storage type. // storageType must be one of String, LargeString, StringView. func NewJSONType(storageType arrow.DataType) (*JSONType, error) { if !slices.Contains(jsonSupportedStorageTypes, storageType) { return nil, fmt.Errorf("unsupported storage type for JSON extension type: %s", storageType) } return &JSONType{ExtensionBase: arrow.ExtensionBase{Storage: storageType}}, nil } func (b *JSONType) ArrayType() reflect.Type { return reflect.TypeOf(JSONArray{}) } func (b *JSONType) Deserialize(storageType arrow.DataType, data string) (arrow.ExtensionType, error) { if !(data == "" || data == "{}") { return nil, fmt.Errorf("serialized metadata for JSON extension type must be '' or '{}', found: %s", data) } return NewJSONType(storageType) } func (b *JSONType) ExtensionEquals(other arrow.ExtensionType) bool { return b.ExtensionName() == other.ExtensionName() && arrow.TypeEqual(b.Storage, other.StorageType()) } func (b *JSONType) ExtensionName() string { return "arrow.json" } func (b *JSONType) Serialize() string { return "" } func (b *JSONType) String() string { return fmt.Sprintf("extension<%s[storage_type=%s]>", b.ExtensionName(), b.Storage) } // JSONArray is logically an array of UTF-8 encoded JSON strings. // Its values are unmarshaled to native Go values. type JSONArray struct { array.ExtensionArrayBase } func (a *JSONArray) String() string { b, err := a.MarshalJSON() if err != nil { panic(fmt.Sprintf("failed marshal JSONArray: %s", err)) } return string(b) } func (a *JSONArray) Value(i int) any { val := a.ValueBytes(i) var res any if err := json.Unmarshal(val, &res); err != nil { panic(err) } return res } func (a *JSONArray) ValueStr(i int) string { return string(a.ValueBytes(i)) } func (a *JSONArray) ValueBytes(i int) []byte { // convert to json.RawMessage, set to nil if elem isNull. val := a.ValueJSON(i) // simply returns wrapped bytes, or null if val is nil. b, err := val.MarshalJSON() if err != nil { panic(err) } return b } // ValueJSON wraps the underlying string value as a json.RawMessage, // or returns nil if the array value is null. func (a *JSONArray) ValueJSON(i int) json.RawMessage { var val json.RawMessage if a.IsValid(i) { val = json.RawMessage(a.Storage().(array.StringLike).Value(i)) } return val } // MarshalJSON implements json.Marshaler. // Marshaling json.RawMessage is a no-op, except that nil values will // be marshaled as a JSON null. func (a *JSONArray) MarshalJSON() ([]byte, error) { values := make([]json.RawMessage, a.Len()) for i := 0; i < a.Len(); i++ { values[i] = a.ValueJSON(i) } return json.Marshal(values) } // GetOneForMarshal implements arrow.Array. func (a *JSONArray) GetOneForMarshal(i int) interface{} { return a.ValueJSON(i) } var ( _ arrow.ExtensionType = (*JSONType)(nil) _ array.ExtensionArray = (*JSONArray)(nil) )