query/common/dimval.go (162 lines of code) (raw):
// Copyright (c) 2017-2018 Uber Technologies, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package common
import (
memCom "github.com/uber/aresdb/memstore/common"
"github.com/uber/aresdb/utils"
"strconv"
"time"
"unsafe"
)
const (
// string representing null dimension values
NULLString = "NULL"
)
// DimCountsPerDimWidth defines dimension counts per dimension width
// 16-byte 8-byte 4-byte 2-byte 1-byte
type DimCountsPerDimWidth [5]uint8
// ReadDimension reads a dimension value given the index and corresponding data type of node.
// tzRemedy is used to remedy the timezone offset
func ReadDimension(valueStart, nullStart unsafe.Pointer,
index int, dataType memCom.DataType, enumReverseDict []string, meta *TimeDimensionMeta, cache map[TimeDimensionMeta]map[int64]string) *string {
isTimeDimension := meta != nil
// check for nulls
if *(*uint8)(memAccess(nullStart, index)) == 0 {
return nil
}
// determine value width in bytes
valueBytes := memCom.DataTypeBytes(dataType)
valuePtr := memAccess(valueStart, valueBytes*index)
// read intValue; handle float and signed types
var intValue int64
var result string
switch dataType {
case memCom.Float32:
// in case time dimension value was converted to float for division
if isTimeDimension {
intValue = int64(*(*float32)(valuePtr))
} else {
result = strconv.FormatFloat(float64(*(*float32)(valuePtr)), 'g', -1, 32)
return &result
}
case memCom.Int64, memCom.Int32, memCom.Int16, memCom.Int8, memCom.Bool:
switch valueBytes {
case 8:
intValue = int64(*(*int64)(valuePtr))
case 4:
intValue = int64(*(*int32)(valuePtr))
case 2:
intValue = int64(*(*int16)(valuePtr))
case 1:
intValue = int64(*(*int8)(valuePtr))
}
result = strconv.FormatInt(intValue, 10)
return &result
case memCom.Uint32, memCom.Uint16, memCom.BigEnum, memCom.Uint8, memCom.SmallEnum:
switch valueBytes {
case 4:
intValue = int64(*(*uint32)(valuePtr))
case 2:
intValue = int64(*(*uint16)(valuePtr))
case 1:
intValue = int64(*(*uint8)(valuePtr))
}
case memCom.UUID:
return formatWithDataValue(valuePtr, memCom.UUID)
case memCom.GeoPoint:
return formatWithDataValue(valuePtr, memCom.GeoPoint)
default:
// Should never happen.
return nil
}
// translate enum case back to string for unsigned types
if intValue >= 0 && intValue < int64(len(enumReverseDict)) {
result = enumReverseDict[int(intValue)]
} else if isTimeDimension {
result = formatTimeDimension(intValue, *meta, cache)
} else {
result = strconv.FormatInt(intValue, 10)
}
return &result
}
// formatWithDataValue formats value with given type
func formatWithDataValue(valuePtr unsafe.Pointer, dataType memCom.DataType) *string {
formatted := memCom.DataValue{
Valid: true,
DataType: dataType,
OtherVal: valuePtr,
}.ConvertToHumanReadable(dataType)
if formatted == nil {
return nil
}
if result, ok := formatted.(string); !ok {
return nil
} else {
return &result
}
}
// GetDimensionStartOffsets calculates the value and null starting position for given dimension inside dimension vector
// dimIndex is the ordered index of given dimension inside the dimension vector
func GetDimensionStartOffsets(numDimsPerDimWidth DimCountsPerDimWidth, dimIndex int, length int) (valueOffset, nullOffset int) {
startDim := 0
dimBytes := 1 << uint(len(numDimsPerDimWidth)-1)
for _, numDim := range numDimsPerDimWidth {
// found which range this dimension vector belongs to
if startDim+int(numDim) > dimIndex {
valueOffset += (dimIndex - startDim) * length * dimBytes
break
}
startDim += int(numDim)
valueOffset += int(numDim) * length * dimBytes
// dimBytes /= 2
dimBytes >>= 1
}
valueBytes := 0
for index, numDim := range numDimsPerDimWidth {
valueBytes += (1 << uint(len(numDimsPerDimWidth)-index-1)) * int(numDim)
}
nullOffset = (valueBytes + dimIndex) * length
return valueOffset, nullOffset
}
func formatTimeDimension(val int64, meta TimeDimensionMeta, cache map[TimeDimensionMeta]map[int64]string) (result string) {
// We will not process timeUnit for application/hll because if application/hll holds the raw uint32
// value. If we convert it to milliseconds, it will overflow.
if meta.TimeUnit != "" {
val = utils.AdjustOffset(meta.FromOffset, meta.ToOffset,
meta.DSTSwitchTs, val)
switch meta.TimeUnit {
case "day":
val /= SecondsPerDay
case "hour":
val /= SecondsPerHour
case "minute":
val /= SecondsPerMinute
case "millisecond":
val *= 1000
}
return strconv.FormatInt(val, 10)
}
// skip timezone table dims
// TODO(shz): support timezone table dims
if !meta.IsTimezoneTable {
if cacheMap, ok := cache[meta]; ok {
if result, exists := cacheMap[val]; exists {
return result
}
} else if cache != nil {
cache[meta] = make(map[int64]string)
}
}
switch meta.TimeBucketizer {
case "time of day":
t := time.Unix(val, 0)
return t.UTC().Format("15:04")
case "hour of day":
t := time.Unix(val-val%3600, 0)
return t.UTC().Format("15:04")
case "hour of week":
t := time.Unix(val+SecondsPer4Day, 0)
return t.UTC().Format("Monday 15:04")
case "day of week":
// 1970-01-01 was a Thursday
t := time.Unix(((val+4)%7)*SecondsPerDay, 0)
return t.UTC().Format("Monday")
default:
bucket, err := ParseRegularTimeBucketizer(meta.TimeBucketizer)
if err != nil {
return strconv.FormatInt(val, 10)
}
switch bucket.Unit {
case "m":
t := time.Unix(val, 0)
return t.UTC().Format("2006-01-02 15:04")
case "h":
t := time.Unix(val-val%3600, 0)
return t.UTC().Format("2006-01-02 15:00")
case "d":
t := time.Unix(val-val%(24*60*60), 0)
return t.UTC().Format("2006-01-02")
}
}
if !meta.IsTimezoneTable {
cache[meta][val] = result
}
return
}