src/arraytypes/struct.jl (105 lines of code) (raw):

# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """ Arrow.Struct An `ArrowVector` where each element is a "struct" of some kind with ordered, named fields, like a `NamedTuple{names, types}` or regular julia `struct`. """ struct Struct{T,S} <: ArrowVector{T} validity::ValidityBitmap data::S # Tuple of ArrowVector ℓ::Int metadata::Union{Nothing,Base.ImmutableDict{String,String}} end Base.size(s::Struct) = (s.ℓ,) isnamedtuple(::Type{<:NamedTuple}) = true isnamedtuple(T) = false istuple(::Type{<:Tuple}) = true istuple(T) = false @propagate_inbounds function Base.getindex(s::Struct{T,S}, i::Integer) where {T,S} @boundscheck checkbounds(s, i) NT = Base.nonmissingtype(T) if isnamedtuple(NT) || istuple(NT) if NT !== T return s.validity[i] ? NT(ntuple(j -> s.data[j][i], fieldcount(S))) : missing else return NT(ntuple(j -> s.data[j][i], fieldcount(S))) end else if NT !== T return s.validity[i] ? ArrowTypes.fromarrow(NT, (s.data[j][i] for j = 1:fieldcount(S))...) : missing else return ArrowTypes.fromarrow(NT, (s.data[j][i] for j = 1:fieldcount(S))...) end end end # @propagate_inbounds function Base.setindex!(s::Struct{T}, v::T, i::Integer) where {T} # @boundscheck checkbounds(s, i) # if v === missing # @inbounds s.validity[i] = false # else # NT = Base.nonmissingtype(T) # N = fieldcount(NT) # foreach(1:N) do j # @inbounds s.data[j][i] = getfield(v, j) # end # end # return v # end struct ToStruct{T,i,A} <: AbstractVector{T} data::A # eltype is NamedTuple or some struct end ToStruct(x::A, j::Integer) where {A} = ToStruct{fieldtype(Base.nonmissingtype(eltype(A)), j),j,A}(x) Base.IndexStyle(::Type{<:ToStruct}) = Base.IndexLinear() Base.size(x::ToStruct) = (length(x.data),) Base.@propagate_inbounds function Base.getindex(A::ToStruct{T,j}, i::Integer) where {T,j} @boundscheck checkbounds(A, i) @inbounds x = A.data[i] return x === missing ? ArrowTypes.default(T) : getfield(x, j) end arrowvector(::StructKind, x::Struct, i, nl, fi, de, ded, meta; kw...) = x namedtupletype(::Type{NamedTuple{names,types}}, data) where {names,types} = NamedTuple{names,Tuple{(eltype(x) for x in data)...}} namedtupletype(::Type{T}, data) where {T} = NamedTuple{fieldnames(T),Tuple{(eltype(x) for x in data)...}} namedtupletype(::Type{T}, data) where {T<:Tuple} = NamedTuple{map(Symbol, fieldnames(T)),Tuple{(eltype(x) for x in data)...}} function arrowvector(::StructKind, x, i, nl, fi, de, ded, meta; kw...) len = length(x) validity = ValidityBitmap(x) T = Base.nonmissingtype(eltype(x)) data = Tuple( arrowvector(ToStruct(x, j), i, nl + 1, j, de, ded, nothing; kw...) for j = 1:fieldcount(T) ) return Struct{withmissing(eltype(x), namedtupletype(T, data)),typeof(data)}( validity, data, len, meta, ) end function compress(Z::Meta.CompressionType.T, comp, x::A) where {A<:Struct} len = length(x) nc = nullcount(x) validity = compress(Z, comp, x.validity) buffers = [validity] children = Compressed[] for y in x.data push!(children, compress(Z, comp, y)) end return Compressed{Z,A}(x, buffers, len, nc, children) end function makenodesbuffers!( col::Struct{T}, fieldnodes, fieldbuffers, bufferoffset, alignment, ) where {T} len = length(col) nc = nullcount(col) push!(fieldnodes, FieldNode(len, nc)) @debugv 1 "made field node: nodeidx = $(length(fieldnodes)), col = $(typeof(col)), len = $(fieldnodes[end].length), nc = $(fieldnodes[end].null_count)" # validity bitmap blen = nc == 0 ? 0 : bitpackedbytes(len, alignment) push!(fieldbuffers, Buffer(bufferoffset, blen)) @debugv 1 "made field buffer: bufferidx = $(length(fieldbuffers)), offset = $(fieldbuffers[end].offset), len = $(fieldbuffers[end].length), padded = $(padding(fieldbuffers[end].length, alignment))" bufferoffset += blen for child in col.data bufferoffset = makenodesbuffers!(child, fieldnodes, fieldbuffers, bufferoffset, alignment) end return bufferoffset end function writebuffer(io, col::Struct, alignment) @debugv 1 "writebuffer: col = $(typeof(col))" @debugv 2 col writebitmap(io, col, alignment) # write values arrays for child in col.data writebuffer(io, child, alignment) end return end