memstore/vectors/vector.go (203 lines of code) (raw):

// Copyright (c) 2017-2018 Uber Technologies, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package vectors // #include <stdlib.h> // #include <string.h> import ( "github.com/uber/aresdb/cgoutils" "github.com/uber/aresdb/memstore/common" "math" "unsafe" "fmt" "github.com/uber/aresdb/utils" ) // Vector stores a batch of columnar data (values, nulls, or counts) for a column. type Vector struct { // The data type of the value stored in the vector. DataType common.DataType CmpFunc common.CompareFunc // Max number of values that can be stored in the vector. Size int // Allocated size of the vector in bytes. Bytes int // Number of bits occupied per unit, possible values: 1, 8, 16, 32, 64, 128. unitBits int // Pointer to the vector buffer. buffer uintptr // **All following fields only works for live batch's vectors.** // Min and Max values seen, only used for time columns of fact tables. minValue uint32 maxValue uint32 // Number of trues in a bool typed vector. numTrues int } // NewVector creates a vector with the specified bits per unit and size(capacity). // The majority of its storage space is managed in C. func NewVector(dataType common.DataType, size int) *Vector { unitBits := common.DataTypeBits(dataType) bytes := CalculateVectorBytes(dataType, size) buffer := cgoutils.HostAlloc(bytes) return &Vector{ DataType: dataType, CmpFunc: common.GetCompareFunc(dataType), unitBits: unitBits, Size: size, Bytes: bytes, buffer: uintptr(buffer), minValue: math.MaxUint32, } } // CalculateVectorBytes calculates bytes the vector will occupy given data type and size without actual allocation. func CalculateVectorBytes(dataType common.DataType, size int) int { unitBits := common.DataTypeBits(dataType) bits := unitBits * size // Round up to 512 bits (64 bytes). remainder := bits % 512 if remainder > 0 { bits += 512 - remainder } return bits / 8 } // CalculateVectorPartyBytes calculates bytes the vector party will occupy. // Note: data type supported in go memory will report memory usage when value is actually set, therefore report 0 here func CalculateVectorPartyBytes(dataType common.DataType, size int, hasNulls bool, hasCounts bool) int { if common.IsGoType(dataType) { // batchSize * size of golang pointer return size * 8 } if common.IsArrayType(dataType) { // this only calculates the offset and caps for list live vector party, value vector is controlled inside vp // list archive vector party can not use this either offsets := CalculateVectorBytes(common.Uint32, 2*size) caps := CalculateVectorBytes(common.Uint32, size) return offsets + caps } bytes := CalculateVectorBytes(dataType, size) if hasNulls { bytes += CalculateVectorBytes(common.Bool, size) } if hasCounts { bytes += CalculateVectorBytes(common.Uint32, size+1) } return bytes } // SafeDestruct destructs this vector's storage space managed in C. func (v *Vector) SafeDestruct() { if v != nil { cgoutils.HostFree(unsafe.Pointer(v.buffer)) } } // Buffer returns the pointer to the underlying buffer. func (v *Vector) Buffer() unsafe.Pointer { return unsafe.Pointer(v.buffer) } // SetBool sets the bool value for the specified index. func (v *Vector) SetBool(index int, value bool) { if index >= v.Size { panic(fmt.Sprintf("SetBool index %d access out of bound %d", index, v.Size)) } wordOffset := uintptr(index / 32 * 4) localBit := uint(index % 32) wordAddr := (*uint32)(unsafe.Pointer(v.buffer + wordOffset)) oldValue := *wordAddr if value { *wordAddr |= 1 << localBit } else { *wordAddr &^= 1 << localBit } if *wordAddr != oldValue { if value { v.numTrues++ } else { v.numTrues-- } } } // SetValue sets the data value for the specified index. // index bound is not checked! // data points to a buffer (in UpsertBatch for instance) that contains the value to be set. func (v *Vector) SetValue(index int, data unsafe.Pointer) { if index >= v.Size { panic(fmt.Sprintf("SetValue index %d access out of bound %d", index, v.Size)) } idx := uintptr(index) switch v.unitBits { case 8: *(*uint8)(unsafe.Pointer(v.buffer + idx)) = *(*uint8)(data) case 16: *(*uint16)(unsafe.Pointer(v.buffer + idx*2)) = *(*uint16)(data) case 32: value := *(*uint32)(data) *(*uint32)(unsafe.Pointer(v.buffer + idx*4)) = value if value > v.maxValue { v.maxValue = value } if value < v.minValue { v.minValue = value } case 64: value := *(*uint64)(data) *(*uint64)(unsafe.Pointer(v.buffer + idx*8)) = value case 128: *(*uint64)(unsafe.Pointer(v.buffer + idx*16)) = *(*uint64)(data) *(*uint64)(unsafe.Pointer(v.buffer + idx*16 + 8)) = *(*uint64)(unsafe.Pointer(uintptr(data) + 8)) } } // GetBool returns the bool value for the specified index. // index bound is not checked! func (v *Vector) GetBool(index int) bool { wordOffset := uintptr(index / 32 * 4) localBit := uint(index % 32) wordAddr := (*uint32)(unsafe.Pointer(v.buffer + wordOffset)) return *wordAddr&(1<<localBit) != 0 } // GetValue returns the data value for the specified index. // index bound is not checked! // The return value points to the internal buffer location that stores the value. func (v *Vector) GetValue(index int) unsafe.Pointer { return unsafe.Pointer(v.buffer + uintptr(v.unitBits/8*index)) } // GetMinValue return the min value of the Vector Party func (v *Vector) GetMinValue() uint32 { return v.minValue } // GetMaxValue return the max value of the Vector Party func (v *Vector) GetMaxValue() uint32 { return v.maxValue } // LowerBound returns the index of the first element in vector[first, last) that is greater or equal // to the given value. The result is only valid if vector[first, last) is fully sorted in ascendant // order. If all values in the given range is less than the given value, LowerBound // returns last. // Note that first/last is not checked against vector bound. func (v *Vector) LowerBound(first int, last int, value unsafe.Pointer) int { for first < last { mid := (last + first) / 2 cmpRes := 0 if v.DataType == common.Bool { cmpRes = common.CompareBool(v.GetBool(mid), *(*uint32)(value) != 0) } else { cmpRes = v.CmpFunc(v.GetValue(mid), value) } if cmpRes >= 0 { last = mid } else { first = mid + 1 } } return first } // UpperBound returns the index of the first element in vector[first, last) that is greater than the // given value. The result is only valid if vector[first, last) is fully sorted in ascendant // order. If all values in the given range is less than the given value, LowerBound returns last. // Note that first/last is not checked against vector bound. func (v *Vector) UpperBound(first int, last int, value unsafe.Pointer) int { for first < last { mid := (last + first) / 2 cmpRes := 0 if v.DataType == common.Bool { cmpRes = common.CompareBool(v.GetBool(mid), *(*uint32)(value) != 0) } else { cmpRes = v.CmpFunc(v.GetValue(mid), value) } if cmpRes > 0 { last = mid } else { first = mid + 1 } } return first } // GetSliceBytesAligned calculate the number of bytes of a slice of the vector, // represented by [lowerBound, upperBound), // aligned to 64-byte // return the buffer pointer, new start index (start entry in vector), and length in bytes func (v *Vector) GetSliceBytesAligned(lowerBound int, upperBound int) (buffer unsafe.Pointer, startIndex int, bytes int) { if v == nil || lowerBound == upperBound { return } // find the latest 64-byte aligned boundary startByte := v.unitBits * lowerBound / 8 startByte -= startByte % 64 startIndex = lowerBound - startByte*8/v.unitBits endByte := (v.unitBits*upperBound + 7) / 8 bytes = utils.AlignOffset(endByte-startByte, 64) return utils.MemAccess(v.Buffer(), startByte), startIndex, bytes } // SetAllValid set all bits to be 1 in a bool typed vector. func (v *Vector) SetAllValid() { // buffer are 64 bytes aligned so we can assign word by word. for i := 0; i < v.Bytes; i += 4 { *(*uint32)(utils.MemAccess(unsafe.Pointer(v.buffer), i)) = 0xFFFFFFFF } } // CheckAllValid checks whether all bits are 1 in a bool typed vector. func (v *Vector) CheckAllValid() bool { i := 0 totalCompleteBytes := v.Size / 8 remainingBits := v.Size % 8 wordBoundary := totalCompleteBytes - totalCompleteBytes%4 // First check word by word. for ; i < wordBoundary; i += 4 { if !(*(*uint32)(utils.MemAccess(unsafe.Pointer(v.buffer), i)) == 0xFFFFFFFF) { return false } } // then we check byte by byte. for ; i < totalCompleteBytes; i++ { if !(*(*uint8)(utils.MemAccess(unsafe.Pointer(v.buffer), i)) == 0xFF) { return false } } if remainingBits == 0 { return true } // check remaining bits. var mask uint8 = (1 << uint(remainingBits)) - 1 return ((*(*uint8)(utils.MemAccess(unsafe.Pointer(v.buffer), i))) & mask) == mask }