metastore/common/model.go (76 lines of code) (raw):

// Copyright (c) 2017-2018 Uber Technologies, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package common // ColumnConfig defines the schema of a column config that can be mutated by // UpdateColumn API call. // swagger:model columnConfig type ColumnConfig struct { // ColumnEvictionConfig : For column level in-memory eviction, it’s the best // effort TTL for in-memory data. // Column level eviction has nothing to do with data availability, but based // on how much data we pre-loaded, the major impact will be there for query // performance. Here we bring in two priorities configs: Preloading days and // Priority. // - Preloading days is defined at each column level to indicate how many // recent days data we want to preload to host memory. This is best effort // operation. // - Priority is defined at each column level to indicate the priority of // each column. When data eviction happens, we will rely on column priority // to decide which column will be evicted first. // High number implies high priority. PreloadingDays int `json:"preloadingDays,omitempty"` Priority int64 `json:"priority,omitempty"` } // Column defines the schema of a column from MetaStore. // swagger:model column type Column struct { // Immutable, columns cannot be renamed. Name string `json:"name"` // Immutable, columns cannot have their types changed. Type string `json:"type"` // Deleted columns are kept as placeholders in Table.Columns. // read only: true Deleted bool `json:"deleted,omitempty"` // We store the default value as string here since it's from user input. // Nil means the default value is NULL. Actual default value of column data type // should be stored in memstore. DefaultValue *string `json:"defaultValue,omitempty"` // Whether to compare characters case insensitively for enum columns. It only matters // for ingestion client as it's the place to concert enum strings to enum values. CaseInsensitive bool `json:"caseInsensitive,omitempty"` // Whether disable enum cases auto expansion. DisableAutoExpand bool `json:"disableAutoExpand,omitempty"` // Mutable column configs. Config ColumnConfig `json:"config,omitempty"` // HLLEnabled determines whether a column is enabled for hll cardinality estimation // HLLConfig is immutable HLLConfig HLLConfig `json:"hllConfig,omitempty"` } // HLLConfig defines hll configuration // swagger:model hllConfig type HLLConfig struct { IsHLLColumn bool `json:"isHLLColumn,omitempty"` } // TableConfig defines the table configurations that can be changed // swagger:model tableConfig type TableConfig struct { // Common table configs // Initial setting of number of buckets for primary key // if equals to 0, default will be used InitialPrimaryKeyNumBuckets int `json:"initPrimaryKeyNumBuckets,omitempty"` // Size of each live batch, should be sufficiently large. BatchSize int `json:"batchSize,omitempty" validate:"min=1"` // Specifies how often to create a new redo log file. RedoLogRotationInterval int `json:"redoLogRotationInterval,omitempty" validate:"min=1"` // Specifies the size limit of a single redo log file. MaxRedoLogFileSize int `json:"maxRedoLogFileSize,omitempty" validate:"min=1"` // Fact table specific configs // Number of minutes after event time before a record can be archived. ArchivingDelayMinutes uint32 `json:"archivingDelayMinutes,omitempty" validate:"min=1"` // Specifies how often archiving runs. ArchivingIntervalMinutes uint32 `json:"archivingIntervalMinutes,omitempty" validate:"min=1"` // Specifies how often backfill runs. BackfillIntervalMinutes uint32 `json:"backfillIntervalMinutes,omitempty" validate:"min=1"` // Upper limit of current backfill buffer size + backfilling buffer size. BackfillMaxBufferSize int64 `json:"backfillMaxBufferSize,omitempty" validate:"min=1"` // Backfill buffer size in bytes that will trigger a backfill job. BackfillThresholdInBytes int64 `json:"backfillThresholdInBytes,omitempty" validate:"min=1"` // Size of each live batch used by backfill job. BackfillStoreBatchSize int `json:"backfillStoreBatchSize,omitempty" validate:"min=1"` // Records with timestamp older than now - RecordRetentionInDays will be skipped // during ingestion and backfill. 0 means unlimited days. RecordRetentionInDays int `json:"recordRetentionInDays,omitempty" validate:"min=0"` // Dimension table specific configs // Number of mutations to accumulate before creating a new snapshot. SnapshotThreshold int `json:"snapshotThreshold,omitempty" validate:"min=1"` // Specifies how often snapshot runs. SnapshotIntervalMinutes int `json:"snapshotIntervalMinutes,omitempty" validate:"min=1"` AllowMissingEventTime bool `json:"allowMissingEventTime,omitempty"` } // Table defines the schema and configurations of a table from MetaStore. // swagger:model table type Table struct { // Name of the table, immutable. Name string `json:"name"` // Index to Columns also serves as column IDs. Columns []Column `json:"columns"` // IDs of primary key columns. This field is immutable. PrimaryKeyColumns []int `json:"primaryKeyColumns"` // Whether this is a fact table. IsFactTable bool `json:"isFactTable"` // table configurations Config TableConfig `json:"config"` // Fact table only. // IDs of columns to sort based upon. ArchivingSortColumns []int `json:"archivingSortColumns,omitempty"` // Incarnation gets incremented every time an table name is reused // only used for controller managed schema in cluster setting Incarnation int `json:"incarnation"` // Version gets incremented every time when schema is updated // only used for controller managed schema in cluster setting Version int `json:"version"` } // IsEnumColumn checks whether a column is enum column func (c *Column) IsEnumColumn() bool { return c.Type == BigEnum || c.Type == SmallEnum } // IsEnumArrayColumn checks whether a column is of enum array column func (c *Column) IsEnumArrayColumn() bool { return c.Type == ArrayBigEnum || c.Type == ArraySmallEnum } // IsEnumBasedColumn checks whether a column whose value is enum based // including both simple enum columns and arry enum columns func (c *Column) IsEnumBasedColumn() bool { return c.IsEnumArrayColumn() || c.IsEnumColumn() } // IsOverwriteOnlyDataType checks whether a column is overwrite only func (c *Column) IsOverwriteOnlyDataType() bool { switch c.Type { case Uint8, Int8, Uint16, Int16, Uint32, Int32, Float32, Int64: return false default: return true } } // EnumCardinality returns cardinality for enum type func EnumCardinality(columnType string) int { switch columnType { case SmallEnum, ArraySmallEnum: return 1 << 8 case BigEnum, ArrayBigEnum: return 1 << 16 default: return 0 } } // ShardOwnership defines an instruction on whether the receiving instance // should start to own or disown the specified table shard. type ShardOwnership struct { TableName string Shard int ShouldOwn bool }