memstore/common/schema.go (174 lines of code) (raw):
// Copyright (c) 2017-2018 Uber Technologies, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package common
import (
"encoding/json"
"sync"
"unsafe"
metaCom "github.com/uber/aresdb/metastore/common"
"github.com/uber/aresdb/utils"
)
// TableSchema stores metadata of the table such as columns and primary keys.
// It also stores the dictionaries for enum columns.
type TableSchema struct {
sync.RWMutex `json:"-"`
// Main schema of the table. Mutable.
Schema metaCom.Table `json:"schema"`
// Maps from column names to their IDs. Mutable.
ColumnIDs map[string]int `json:"columnIDs"`
// Maps from enum column names to their case dictionaries. Mutable.
EnumDicts map[string]EnumDict `json:"enumDicts"`
// DataType for each column ordered by column ID. Mutable.
ValueTypeByColumn []DataType `json:"valueTypeByColumn"`
// Number of bytes in the primary key. Immutable.
PrimaryKeyBytes int `json:"primaryKeyBytes"`
// Types of each primary key column. Immutable.
PrimaryKeyColumnTypes []DataType `json:"primaryKeyColumnTypes"`
// Default values of each column. Mutable. Nil means default value is not set.
DefaultValues []*DataValue `json:"-"`
}
// EnumDict contains mapping from and to enum strings to numbers.
type EnumDict struct {
// Either 0x100 for small_enum, or 0x10000 for big_enum.
Capacity int `json:"capacity"`
Dict map[string]int `json:"dict"`
ReverseDict []string `json:"reverseDict"`
}
// NewTableSchema creates a new table schema object from metaStore table object,
// this does not set enum cases.
func NewTableSchema(table *metaCom.Table) *TableSchema {
tableSchema := &TableSchema{
Schema: *table,
ColumnIDs: make(map[string]int),
EnumDicts: make(map[string]EnumDict),
ValueTypeByColumn: make([]DataType, len(table.Columns)),
PrimaryKeyColumnTypes: make([]DataType, len(table.PrimaryKeyColumns)),
DefaultValues: make([]*DataValue, len(table.Columns)),
}
for id, column := range table.Columns {
if !column.Deleted {
tableSchema.ColumnIDs[column.Name] = id
}
tableSchema.ValueTypeByColumn[id] = DataTypeForColumn(column)
}
for i, columnID := range table.PrimaryKeyColumns {
columnType := tableSchema.ValueTypeByColumn[columnID]
tableSchema.PrimaryKeyColumnTypes[i] = columnType
dataBits := DataTypeBits(columnType)
if dataBits < 8 {
dataBits = 8
}
tableSchema.PrimaryKeyBytes += dataBits / 8
}
return tableSchema
}
// MarshalJSON marshals TableSchema into json.
func (t *TableSchema) MarshalJSON() ([]byte, error) {
// Avoid loop json.Marshal calls.
type alias TableSchema
t.RLock()
defer t.RUnlock()
return json.Marshal((*alias)(t))
}
// SetTable sets a updated table and update TableSchema,
// should acquire lock before calling.
func (t *TableSchema) SetTable(table *metaCom.Table) {
t.Schema = *table
for id, column := range table.Columns {
if !column.Deleted {
t.ColumnIDs[column.Name] = id
} else {
delete(t.ColumnIDs, column.Name)
}
if id >= len(t.ValueTypeByColumn) {
t.ValueTypeByColumn = append(t.ValueTypeByColumn, DataTypeForColumn(column))
}
if id >= len(t.DefaultValues) {
t.DefaultValues = append(t.DefaultValues, nil)
}
}
}
// SetDefaultValue parses the default value string if present and sets to TableSchema.
// Schema lock should be acquired and release by caller and enum dict should already be
// created/update before this function.
func (t *TableSchema) SetDefaultValue(columnID int) {
// Default values are already set.
if t.DefaultValues[columnID] != nil {
return
}
column := t.Schema.Columns[columnID]
defStrVal := column.DefaultValue
if defStrVal == nil || column.Deleted {
t.DefaultValues[columnID] = &NullDataValue
return
}
dataType := t.ValueTypeByColumn[columnID]
dataTypeName := DataTypeName[dataType]
val := DataValue{
Valid: true,
DataType: dataType,
}
if dataType == SmallEnum || dataType == BigEnum {
enumDict, ok := t.EnumDicts[column.Name]
if !ok {
// Should no happen since the enum dict should already be created.
utils.GetLogger().With(
"data_type", dataTypeName,
"default_value", *defStrVal,
"column", t.Schema.Columns[columnID].Name,
).Panic("Cannot find EnumDict for column")
}
enumVal, ok := enumDict.Dict[*defStrVal]
if !ok {
// Should no happen since the enum value should already be created.
utils.GetLogger().With(
"data_type", dataTypeName,
"default_value", *defStrVal,
"column", t.Schema.Columns[columnID].Name,
).Panic("Cannot find enum value for column")
}
if dataType == SmallEnum {
enumValUint8 := uint8(enumVal)
val.OtherVal = unsafe.Pointer(&enumValUint8)
} else {
enumValUint16 := uint16(enumVal)
val.OtherVal = unsafe.Pointer(&enumValUint16)
}
} else {
dataValue, err := ValueFromString(*defStrVal, dataType)
if err != nil {
// Should not happen since the string value is already validated by schema handler.
utils.GetLogger().With(
"data_type", dataTypeName,
"default_value", *defStrVal,
"column", t.Schema.Columns[columnID].Name,
).Panic("Cannot parse default value")
}
if dataType == Bool {
val.IsBool = true
val.BoolVal = dataValue.BoolVal
} else {
val.OtherVal = dataValue.OtherVal
}
}
val.CmpFunc = GetCompareFunc(dataType)
t.DefaultValues[columnID] = &val
return
}
// createEnumDict creates the enum dictionary for the specified column with the
// specified initial cases, and attaches it to TableSchema object.
// Caller should acquire the schema lock before calling this function.
func (t *TableSchema) CreateEnumDict(columnName string, enumCases []string) {
columnID := t.ColumnIDs[columnName]
dataType := t.ValueTypeByColumn[columnID]
enumCapacity := 1 << uint(DataTypeBits(dataType))
enumDict := map[string]int{}
for id, enumCase := range enumCases {
enumDict[enumCase] = id
}
t.EnumDicts[columnName] = EnumDict{
Capacity: enumCapacity,
Dict: enumDict,
ReverseDict: enumCases,
}
}
// GetValueTypeByColumn makes a copy of the ValueTypeByColumn so callers don't have to hold a read
// lock to access it.
func (t *TableSchema) GetValueTypeByColumn() []DataType {
t.RLock()
defer t.RUnlock()
return t.ValueTypeByColumn
}
// GetPrimaryKeyColumns makes a copy of the Schema.PrimaryKeyColumns so callers don't have to hold
// a read lock to access it.
func (t *TableSchema) GetPrimaryKeyColumns() []int {
t.RLock()
defer t.RUnlock()
return t.Schema.PrimaryKeyColumns
}
// GetColumnDeletions returns a boolean slice that indicates whether a column has been deleted. Callers
// need to hold a read lock.
func (t *TableSchema) GetColumnDeletions() []bool {
deletedByColumn := make([]bool, len(t.Schema.Columns))
for columnID, column := range t.Schema.Columns {
deletedByColumn[columnID] = column.Deleted
}
return deletedByColumn
}
// GetColumnIfNonNilDefault returns a boolean slice that indicates whether a column has non nil default value. Callers
// need to hold a read lock.
func (t *TableSchema) GetColumnIfNonNilDefault() []bool {
nonNilDefaultByColumn := make([]bool, len(t.Schema.Columns))
for columnID, column := range t.Schema.Columns {
nonNilDefaultByColumn[columnID] = column.DefaultValue != nil
}
return nonNilDefaultByColumn
}
// GetArchivingSortColumns makes a copy of the Schema.ArchivingSortColumns so
// callers don't have to hold a read lock to access it.
func (t *TableSchema) GetArchivingSortColumns() []int {
t.RLock()
defer t.RUnlock()
return t.Schema.ArchivingSortColumns
}