be/src/olap/tablet_schema.cpp (1,393 lines of code) (raw):

// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #include "olap/tablet_schema.h" #include <gen_cpp/Descriptors_types.h> #include <gen_cpp/olap_file.pb.h> #include <glog/logging.h> #include <google/protobuf/io/coded_stream.h> #include <google/protobuf/io/zero_copy_stream.h> #include <google/protobuf/io/zero_copy_stream_impl_lite.h> #include <algorithm> #include <cctype> // IWYU pragma: no_include <bits/std_abs.h> #include <cmath> // IWYU pragma: keep #include <memory> #include <ostream> #include <vector> #include "common/compiler_util.h" // IWYU pragma: keep #include "common/consts.h" #include "common/status.h" #include "exec/tablet_info.h" #include "olap/inverted_index_parser.h" #include "olap/olap_define.h" #include "olap/tablet_column_object_pool.h" #include "olap/types.h" #include "olap/utils.h" #include "tablet_meta.h" #include "vec/aggregate_functions/aggregate_function_simple_factory.h" #include "vec/aggregate_functions/aggregate_function_state_union.h" #include "vec/common/hex.h" #include "vec/common/string_ref.h" #include "vec/core/block.h" #include "vec/data_types/data_type.h" #include "vec/data_types/data_type_factory.hpp" #include "vec/json/path_in_data.h" namespace doris { FieldType TabletColumn::get_field_type_by_type(PrimitiveType primitiveType) { switch (primitiveType) { case PrimitiveType::INVALID_TYPE: return FieldType::OLAP_FIELD_TYPE_UNKNOWN; case PrimitiveType::TYPE_NULL: return FieldType::OLAP_FIELD_TYPE_NONE; case PrimitiveType::TYPE_BOOLEAN: return FieldType::OLAP_FIELD_TYPE_BOOL; case PrimitiveType::TYPE_TINYINT: return FieldType::OLAP_FIELD_TYPE_TINYINT; case PrimitiveType::TYPE_SMALLINT: return FieldType::OLAP_FIELD_TYPE_SMALLINT; case PrimitiveType::TYPE_INT: return FieldType::OLAP_FIELD_TYPE_INT; case PrimitiveType::TYPE_BIGINT: return FieldType::OLAP_FIELD_TYPE_BIGINT; case PrimitiveType::TYPE_LARGEINT: return FieldType::OLAP_FIELD_TYPE_LARGEINT; case PrimitiveType::TYPE_FLOAT: return FieldType::OLAP_FIELD_TYPE_FLOAT; case PrimitiveType::TYPE_DOUBLE: return FieldType::OLAP_FIELD_TYPE_DOUBLE; case PrimitiveType::TYPE_VARCHAR: return FieldType::OLAP_FIELD_TYPE_VARCHAR; case PrimitiveType::TYPE_DATE: return FieldType::OLAP_FIELD_TYPE_DATE; case PrimitiveType::TYPE_DATETIME: return FieldType::OLAP_FIELD_TYPE_DATETIME; case PrimitiveType::TYPE_BINARY: return FieldType::OLAP_FIELD_TYPE_UNKNOWN; // Not implemented case PrimitiveType::TYPE_CHAR: return FieldType::OLAP_FIELD_TYPE_CHAR; case PrimitiveType::TYPE_STRUCT: return FieldType::OLAP_FIELD_TYPE_STRUCT; case PrimitiveType::TYPE_ARRAY: return FieldType::OLAP_FIELD_TYPE_ARRAY; case PrimitiveType::TYPE_MAP: return FieldType::OLAP_FIELD_TYPE_MAP; case PrimitiveType::TYPE_HLL: return FieldType::OLAP_FIELD_TYPE_HLL; case PrimitiveType::TYPE_DECIMALV2: return FieldType::OLAP_FIELD_TYPE_UNKNOWN; // Not implemented case PrimitiveType::TYPE_OBJECT: return FieldType::OLAP_FIELD_TYPE_OBJECT; case PrimitiveType::TYPE_STRING: return FieldType::OLAP_FIELD_TYPE_STRING; case PrimitiveType::TYPE_QUANTILE_STATE: return FieldType::OLAP_FIELD_TYPE_QUANTILE_STATE; case PrimitiveType::TYPE_DATEV2: return FieldType::OLAP_FIELD_TYPE_DATEV2; case PrimitiveType::TYPE_DATETIMEV2: return FieldType::OLAP_FIELD_TYPE_DATETIMEV2; case PrimitiveType::TYPE_TIMEV2: return FieldType::OLAP_FIELD_TYPE_TIMEV2; case PrimitiveType::TYPE_DECIMAL32: return FieldType::OLAP_FIELD_TYPE_DECIMAL32; case PrimitiveType::TYPE_DECIMAL64: return FieldType::OLAP_FIELD_TYPE_DECIMAL64; case PrimitiveType::TYPE_DECIMAL128I: return FieldType::OLAP_FIELD_TYPE_DECIMAL128I; case PrimitiveType::TYPE_JSONB: return FieldType::OLAP_FIELD_TYPE_JSONB; case PrimitiveType::TYPE_VARIANT: return FieldType::OLAP_FIELD_TYPE_VARIANT; case PrimitiveType::TYPE_LAMBDA_FUNCTION: return FieldType::OLAP_FIELD_TYPE_UNKNOWN; // Not implemented case PrimitiveType::TYPE_AGG_STATE: return FieldType::OLAP_FIELD_TYPE_AGG_STATE; default: return FieldType::OLAP_FIELD_TYPE_UNKNOWN; } } FieldType TabletColumn::get_field_type_by_string(const std::string& type_str) { std::string upper_type_str = type_str; std::transform(type_str.begin(), type_str.end(), upper_type_str.begin(), [](auto c) { return std::toupper(c); }); FieldType type; if (0 == upper_type_str.compare("TINYINT")) { type = FieldType::OLAP_FIELD_TYPE_TINYINT; } else if (0 == upper_type_str.compare("SMALLINT")) { type = FieldType::OLAP_FIELD_TYPE_SMALLINT; } else if (0 == upper_type_str.compare("INT")) { type = FieldType::OLAP_FIELD_TYPE_INT; } else if (0 == upper_type_str.compare("BIGINT")) { type = FieldType::OLAP_FIELD_TYPE_BIGINT; } else if (0 == upper_type_str.compare("LARGEINT")) { type = FieldType::OLAP_FIELD_TYPE_LARGEINT; } else if (0 == upper_type_str.compare("UNSIGNED_TINYINT")) { type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_TINYINT; } else if (0 == upper_type_str.compare("UNSIGNED_SMALLINT")) { type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_SMALLINT; } else if (0 == upper_type_str.compare("UNSIGNED_INT")) { type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT; } else if (0 == upper_type_str.compare("UNSIGNED_BIGINT")) { type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_BIGINT; } else if (0 == upper_type_str.compare("IPV4")) { type = FieldType::OLAP_FIELD_TYPE_IPV4; } else if (0 == upper_type_str.compare("IPV6")) { type = FieldType::OLAP_FIELD_TYPE_IPV6; } else if (0 == upper_type_str.compare("FLOAT")) { type = FieldType::OLAP_FIELD_TYPE_FLOAT; } else if (0 == upper_type_str.compare("DISCRETE_DOUBLE")) { type = FieldType::OLAP_FIELD_TYPE_DISCRETE_DOUBLE; } else if (0 == upper_type_str.compare("DOUBLE")) { type = FieldType::OLAP_FIELD_TYPE_DOUBLE; } else if (0 == upper_type_str.compare("CHAR")) { type = FieldType::OLAP_FIELD_TYPE_CHAR; } else if (0 == upper_type_str.compare("DATE")) { type = FieldType::OLAP_FIELD_TYPE_DATE; } else if (0 == upper_type_str.compare("DATEV2")) { type = FieldType::OLAP_FIELD_TYPE_DATEV2; } else if (0 == upper_type_str.compare("DATETIMEV2")) { type = FieldType::OLAP_FIELD_TYPE_DATETIMEV2; } else if (0 == upper_type_str.compare("DATETIME")) { type = FieldType::OLAP_FIELD_TYPE_DATETIME; } else if (0 == upper_type_str.compare("DECIMAL32")) { type = FieldType::OLAP_FIELD_TYPE_DECIMAL32; } else if (0 == upper_type_str.compare("DECIMAL64")) { type = FieldType::OLAP_FIELD_TYPE_DECIMAL64; } else if (0 == upper_type_str.compare("DECIMAL128I")) { type = FieldType::OLAP_FIELD_TYPE_DECIMAL128I; } else if (0 == upper_type_str.compare("DECIMAL256")) { type = FieldType::OLAP_FIELD_TYPE_DECIMAL256; } else if (0 == upper_type_str.compare(0, 7, "DECIMAL")) { type = FieldType::OLAP_FIELD_TYPE_DECIMAL; } else if (0 == upper_type_str.compare(0, 7, "VARCHAR")) { type = FieldType::OLAP_FIELD_TYPE_VARCHAR; } else if (0 == upper_type_str.compare("STRING")) { type = FieldType::OLAP_FIELD_TYPE_STRING; } else if (0 == upper_type_str.compare("JSONB")) { type = FieldType::OLAP_FIELD_TYPE_JSONB; } else if (0 == upper_type_str.compare("VARIANT")) { type = FieldType::OLAP_FIELD_TYPE_VARIANT; } else if (0 == upper_type_str.compare("BOOLEAN")) { type = FieldType::OLAP_FIELD_TYPE_BOOL; } else if (0 == upper_type_str.compare(0, 3, "HLL")) { type = FieldType::OLAP_FIELD_TYPE_HLL; } else if (0 == upper_type_str.compare("STRUCT")) { type = FieldType::OLAP_FIELD_TYPE_STRUCT; } else if (0 == upper_type_str.compare("LIST")) { type = FieldType::OLAP_FIELD_TYPE_ARRAY; } else if (0 == upper_type_str.compare("MAP")) { type = FieldType::OLAP_FIELD_TYPE_MAP; } else if (0 == upper_type_str.compare("OBJECT")) { type = FieldType::OLAP_FIELD_TYPE_OBJECT; } else if (0 == upper_type_str.compare("ARRAY")) { type = FieldType::OLAP_FIELD_TYPE_ARRAY; } else if (0 == upper_type_str.compare("QUANTILE_STATE")) { type = FieldType::OLAP_FIELD_TYPE_QUANTILE_STATE; } else if (0 == upper_type_str.compare("AGG_STATE")) { type = FieldType::OLAP_FIELD_TYPE_AGG_STATE; } else { LOG(WARNING) << "invalid type string. [type='" << type_str << "']"; type = FieldType::OLAP_FIELD_TYPE_UNKNOWN; } return type; } FieldAggregationMethod TabletColumn::get_aggregation_type_by_string(const std::string& str) { std::string upper_str = str; std::transform(str.begin(), str.end(), upper_str.begin(), [](auto c) { return std::toupper(c); }); FieldAggregationMethod aggregation_type; if (0 == upper_str.compare("NONE")) { aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE; } else if (0 == upper_str.compare("SUM")) { aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_SUM; } else if (0 == upper_str.compare("MIN")) { aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MIN; } else if (0 == upper_str.compare("MAX")) { aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MAX; } else if (0 == upper_str.compare("REPLACE")) { aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE; } else if (0 == upper_str.compare("REPLACE_IF_NOT_NULL")) { aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE_IF_NOT_NULL; } else if (0 == upper_str.compare("HLL_UNION")) { aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_HLL_UNION; } else if (0 == upper_str.compare("BITMAP_UNION")) { aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_BITMAP_UNION; } else if (0 == upper_str.compare("QUANTILE_UNION")) { aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_QUANTILE_UNION; } else if (!upper_str.empty()) { aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_GENERIC; } else { aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_UNKNOWN; } return aggregation_type; } std::string TabletColumn::get_string_by_field_type(FieldType type) { switch (type) { case FieldType::OLAP_FIELD_TYPE_TINYINT: return "TINYINT"; case FieldType::OLAP_FIELD_TYPE_UNSIGNED_TINYINT: return "UNSIGNED_TINYINT"; case FieldType::OLAP_FIELD_TYPE_SMALLINT: return "SMALLINT"; case FieldType::OLAP_FIELD_TYPE_UNSIGNED_SMALLINT: return "UNSIGNED_SMALLINT"; case FieldType::OLAP_FIELD_TYPE_INT: return "INT"; case FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT: return "UNSIGNED_INT"; case FieldType::OLAP_FIELD_TYPE_BIGINT: return "BIGINT"; case FieldType::OLAP_FIELD_TYPE_LARGEINT: return "LARGEINT"; case FieldType::OLAP_FIELD_TYPE_UNSIGNED_BIGINT: return "UNSIGNED_BIGINT"; case FieldType::OLAP_FIELD_TYPE_IPV4: return "IPV4"; case FieldType::OLAP_FIELD_TYPE_IPV6: return "IPV6"; case FieldType::OLAP_FIELD_TYPE_FLOAT: return "FLOAT"; case FieldType::OLAP_FIELD_TYPE_DOUBLE: return "DOUBLE"; case FieldType::OLAP_FIELD_TYPE_DISCRETE_DOUBLE: return "DISCRETE_DOUBLE"; case FieldType::OLAP_FIELD_TYPE_CHAR: return "CHAR"; case FieldType::OLAP_FIELD_TYPE_DATE: return "DATE"; case FieldType::OLAP_FIELD_TYPE_DATEV2: return "DATEV2"; case FieldType::OLAP_FIELD_TYPE_DATETIME: return "DATETIME"; case FieldType::OLAP_FIELD_TYPE_DATETIMEV2: return "DATETIMEV2"; case FieldType::OLAP_FIELD_TYPE_DECIMAL: return "DECIMAL"; case FieldType::OLAP_FIELD_TYPE_DECIMAL32: return "DECIMAL32"; case FieldType::OLAP_FIELD_TYPE_DECIMAL64: return "DECIMAL64"; case FieldType::OLAP_FIELD_TYPE_DECIMAL128I: return "DECIMAL128I"; case FieldType::OLAP_FIELD_TYPE_DECIMAL256: return "DECIMAL256"; case FieldType::OLAP_FIELD_TYPE_VARCHAR: return "VARCHAR"; case FieldType::OLAP_FIELD_TYPE_JSONB: return "JSONB"; case FieldType::OLAP_FIELD_TYPE_VARIANT: return "VARIANT"; case FieldType::OLAP_FIELD_TYPE_STRING: return "STRING"; case FieldType::OLAP_FIELD_TYPE_BOOL: return "BOOLEAN"; case FieldType::OLAP_FIELD_TYPE_HLL: return "HLL"; case FieldType::OLAP_FIELD_TYPE_STRUCT: return "STRUCT"; case FieldType::OLAP_FIELD_TYPE_ARRAY: return "ARRAY"; case FieldType::OLAP_FIELD_TYPE_MAP: return "MAP"; case FieldType::OLAP_FIELD_TYPE_OBJECT: return "OBJECT"; case FieldType::OLAP_FIELD_TYPE_QUANTILE_STATE: return "QUANTILE_STATE"; case FieldType::OLAP_FIELD_TYPE_AGG_STATE: return "AGG_STATE"; default: return "UNKNOWN"; } } std::string TabletColumn::get_string_by_aggregation_type(FieldAggregationMethod type) { switch (type) { case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE: return "NONE"; case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_SUM: return "SUM"; case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MIN: return "MIN"; case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MAX: return "MAX"; case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE: return "REPLACE"; case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE_IF_NOT_NULL: return "REPLACE_IF_NOT_NULL"; case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_HLL_UNION: return "HLL_UNION"; case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_BITMAP_UNION: return "BITMAP_UNION"; case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_QUANTILE_UNION: return "QUANTILE_UNION"; default: return "UNKNOWN"; } } uint32_t TabletColumn::get_field_length_by_type(TPrimitiveType::type type, uint32_t string_length) { switch (type) { case TPrimitiveType::TINYINT: case TPrimitiveType::BOOLEAN: return 1; case TPrimitiveType::SMALLINT: return 2; case TPrimitiveType::INT: return 4; case TPrimitiveType::BIGINT: return 8; case TPrimitiveType::LARGEINT: return 16; case TPrimitiveType::IPV4: return 4; case TPrimitiveType::IPV6: return 16; case TPrimitiveType::DATE: return 3; case TPrimitiveType::DATEV2: return 4; case TPrimitiveType::DATETIME: return 8; case TPrimitiveType::DATETIMEV2: return 8; case TPrimitiveType::FLOAT: return 4; case TPrimitiveType::DOUBLE: return 8; case TPrimitiveType::QUANTILE_STATE: case TPrimitiveType::OBJECT: return 16; case TPrimitiveType::CHAR: return string_length; case TPrimitiveType::VARCHAR: case TPrimitiveType::HLL: case TPrimitiveType::AGG_STATE: return string_length + sizeof(OLAP_VARCHAR_MAX_LENGTH); case TPrimitiveType::STRING: case TPrimitiveType::VARIANT: return string_length + sizeof(OLAP_STRING_MAX_LENGTH); case TPrimitiveType::JSONB: return string_length + sizeof(OLAP_JSONB_MAX_LENGTH); case TPrimitiveType::STRUCT: // Note that(xy): this is the length of struct type itself, // the length of its subtypes are not included. return OLAP_STRUCT_MAX_LENGTH; case TPrimitiveType::ARRAY: return OLAP_ARRAY_MAX_LENGTH; case TPrimitiveType::MAP: return OLAP_MAP_MAX_LENGTH; case TPrimitiveType::DECIMAL32: return 4; case TPrimitiveType::DECIMAL64: return 8; case TPrimitiveType::DECIMAL128I: return 16; case TPrimitiveType::DECIMAL256: return 32; case TPrimitiveType::DECIMALV2: return 12; // use 12 bytes in olap engine. default: LOG(WARNING) << "unknown field type. [type=" << type << "]"; return 0; } } TabletColumn::TabletColumn() : _aggregation(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE) {} TabletColumn::TabletColumn(FieldAggregationMethod agg, FieldType type) { _aggregation = agg; _type = type; } TabletColumn::TabletColumn(FieldAggregationMethod agg, FieldType filed_type, bool is_nullable) { _aggregation = agg; _type = filed_type; _length = get_scalar_type_info(filed_type)->size(); _is_nullable = is_nullable; } TabletColumn::TabletColumn(FieldAggregationMethod agg, FieldType filed_type, bool is_nullable, int32_t unique_id, size_t length) { _aggregation = agg; _type = filed_type; _is_nullable = is_nullable; _unique_id = unique_id; _length = length; } TabletColumn::TabletColumn(const ColumnPB& column) { init_from_pb(column); } TabletColumn::TabletColumn(const TColumn& column) { init_from_thrift(column); } void TabletColumn::init_from_thrift(const TColumn& tcolumn) { ColumnPB column_pb; TabletMeta::init_column_from_tcolumn(tcolumn.col_unique_id, tcolumn, &column_pb); init_from_pb(column_pb); } void TabletColumn::init_from_pb(const ColumnPB& column) { _unique_id = column.unique_id(); _col_name = column.name(); _col_name_lower_case = to_lower(_col_name); _type = TabletColumn::get_field_type_by_string(column.type()); _is_key = column.is_key(); _is_nullable = column.is_nullable(); _is_auto_increment = column.is_auto_increment(); _has_default_value = column.has_default_value(); if (_has_default_value) { _default_value = column.default_value(); } if (column.has_precision()) { _is_decimal = true; _precision = column.precision(); } else { _is_decimal = false; } if (column.has_frac()) { _frac = column.frac(); } _length = column.length(); _index_length = column.index_length(); if (column.has_is_bf_column()) { _is_bf_column = column.is_bf_column(); } else { _is_bf_column = false; } if (column.has_has_bitmap_index()) { _has_bitmap_index = column.has_bitmap_index(); } else { _has_bitmap_index = false; } if (column.has_aggregation()) { _aggregation = get_aggregation_type_by_string(column.aggregation()); _aggregation_name = column.aggregation(); } if (_type == FieldType::OLAP_FIELD_TYPE_AGG_STATE) { _result_is_nullable = column.result_is_nullable(); _be_exec_version = column.be_exec_version(); } if (column.has_visible()) { _visible = column.visible(); } if (_type == FieldType::OLAP_FIELD_TYPE_ARRAY) { CHECK(column.children_columns_size() == 1) << "ARRAY type should has 1 children types, but got " << column.children_columns_size(); } if (_type == FieldType::OLAP_FIELD_TYPE_MAP) { DCHECK(column.children_columns_size() == 2) << "MAP type should has 2 children types, but got " << column.children_columns_size(); if (UNLIKELY(column.children_columns_size() != 2)) { LOG(WARNING) << "MAP type should has 2 children types, but got " << column.children_columns_size(); } } for (size_t i = 0; i < column.children_columns_size(); i++) { TabletColumn child_column; child_column.init_from_pb(column.children_columns(i)); add_sub_column(child_column); } if (column.has_column_path_info()) { _column_path = std::make_shared<vectorized::PathInData>(); _column_path->from_protobuf(column.column_path_info()); _parent_col_unique_id = column.column_path_info().parrent_column_unique_id(); } if (is_variant_type() && !column.has_column_path_info()) { // set path info for variant root column, to prevent from missing _column_path = std::make_shared<vectorized::PathInData>(_col_name_lower_case); } for (const auto& column_pb : column.sparse_columns()) { TabletColumn column; column.init_from_pb(column_pb); _sparse_cols.emplace_back(std::make_shared<TabletColumn>(std::move(column))); _num_sparse_columns++; } } TabletColumn TabletColumn::create_materialized_variant_column(const std::string& root, const std::vector<std::string>& paths, int32_t parent_unique_id) { TabletColumn subcol; subcol.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT); subcol.set_is_nullable(true); subcol.set_unique_id(-1); subcol.set_parent_unique_id(parent_unique_id); vectorized::PathInData path(root, paths); subcol.set_path_info(path); subcol.set_name(path.get_path()); return subcol; } void TabletColumn::to_schema_pb(ColumnPB* column) const { column->set_unique_id(_unique_id); column->set_name(_col_name); column->set_type(get_string_by_field_type(_type)); column->set_is_key(_is_key); column->set_is_nullable(_is_nullable); if (_has_default_value) { column->set_default_value(_default_value); } if (_is_decimal) { column->set_precision(_precision); column->set_frac(_frac); } column->set_length(_length); column->set_index_length(_index_length); if (_is_bf_column) { column->set_is_bf_column(_is_bf_column); } if (!_aggregation_name.empty()) { column->set_aggregation(_aggregation_name); } column->set_result_is_nullable(_result_is_nullable); column->set_be_exec_version(_be_exec_version); if (_has_bitmap_index) { column->set_has_bitmap_index(_has_bitmap_index); } column->set_visible(_visible); if (_type == FieldType::OLAP_FIELD_TYPE_ARRAY) { CHECK(_sub_columns.size() == 1) << "ARRAY type should has 1 children types, but got " << _sub_columns.size(); } if (_type == FieldType::OLAP_FIELD_TYPE_MAP) { DCHECK(_sub_columns.size() == 2) << "MAP type should has 2 children types, but got " << _sub_columns.size(); if (UNLIKELY(_sub_columns.size() != 2)) { LOG(WARNING) << "MAP type should has 2 children types, but got " << _sub_columns.size(); } } for (size_t i = 0; i < _sub_columns.size(); i++) { ColumnPB* child = column->add_children_columns(); _sub_columns[i]->to_schema_pb(child); } // set parts info if (has_path_info()) { // CHECK_GT(_parent_col_unique_id, 0); _column_path->to_protobuf(column->mutable_column_path_info(), _parent_col_unique_id); // Update unstable information for variant columns. Some of the fields in the tablet schema // are irrelevant for variant sub-columns, but retaining them may lead to an excessive growth // in the number of tablet schema cache entries. if (_type == FieldType::OLAP_FIELD_TYPE_STRING) { column->set_length(INT_MAX); } column->set_index_length(0); } for (auto& col : _sparse_cols) { ColumnPB* sparse_column = column->add_sparse_columns(); col->to_schema_pb(sparse_column); } } void TabletColumn::add_sub_column(TabletColumn& sub_column) { _sub_columns.push_back(std::make_shared<TabletColumn>(sub_column)); sub_column._parent_col_unique_id = this->_unique_id; _sub_column_count += 1; } bool TabletColumn::is_row_store_column() const { return _col_name == BeConsts::ROW_STORE_COL; } vectorized::AggregateFunctionPtr TabletColumn::get_aggregate_function_union( vectorized::DataTypePtr type, int current_be_exec_version) const { const auto* state_type = assert_cast<const vectorized::DataTypeAggState*>(type.get()); BeExecVersionManager::check_function_compatibility( current_be_exec_version, _be_exec_version, state_type->get_nested_function()->get_name()); return vectorized::AggregateStateUnion::create(state_type->get_nested_function(), {type}, type); } vectorized::AggregateFunctionPtr TabletColumn::get_aggregate_function( std::string suffix, int current_be_exec_version) const { vectorized::AggregateFunctionPtr function = nullptr; auto type = vectorized::DataTypeFactory::instance().create_data_type(*this); if (type && type->get_primitive_type() == PrimitiveType::TYPE_AGG_STATE) { function = get_aggregate_function_union(type, current_be_exec_version); } else { std::string origin_name = TabletColumn::get_string_by_aggregation_type(_aggregation); std::string agg_name = origin_name + suffix; std::transform(agg_name.begin(), agg_name.end(), agg_name.begin(), [](unsigned char c) { return std::tolower(c); }); function = vectorized::AggregateFunctionSimpleFactory::instance().get( agg_name, {type}, type->is_nullable(), BeExecVersionManager::get_newest_version()); if (!function) { LOG(WARNING) << "get column aggregate function failed, aggregation_name=" << origin_name << ", column_type=" << type->get_name(); } } if (function) { function->set_version(_be_exec_version); return function; } return nullptr; } void TabletColumn::set_path_info(const vectorized::PathInData& path) { _column_path = std::make_shared<vectorized::PathInData>(path); } vectorized::DataTypePtr TabletColumn::get_vec_type() const { return vectorized::DataTypeFactory::instance().create_data_type(*this); } // escape '.' and '_' std::string escape_for_path_name(const std::string& s) { std::string res; const char* pos = s.data(); const char* end = pos + s.size(); while (pos != end) { unsigned char c = *pos; if (c == '.' || c == '_') { res += '%'; res += vectorized::hex_digit_uppercase(c / 16); res += vectorized::hex_digit_uppercase(c % 16); } else { res += c; } ++pos; } return res; } void TabletIndex::set_escaped_escaped_index_suffix_path(const std::string& path_name) { std::string escaped_path = escape_for_path_name(path_name); _escaped_index_suffix_path = escaped_path; } void TabletIndex::init_from_thrift(const TOlapTableIndex& index, const TabletSchema& tablet_schema) { _index_id = index.index_id; _index_name = index.index_name; // init col_unique_id in index at be side, since col_unique_id may be -1 at fe side // get column unique id by name std::vector<int32_t> col_unique_ids(index.columns.size()); for (size_t i = 0; i < index.columns.size(); i++) { auto column_idx = tablet_schema.field_index(index.columns[i]); if (column_idx >= 0) { col_unique_ids[i] = tablet_schema.column(column_idx).unique_id(); } else { // if column unique id not found by column name, find by column unique id // column unique id can not found means this column is a new column added by light schema change if (index.__isset.column_unique_ids && !index.column_unique_ids.empty() && tablet_schema.has_column_unique_id(index.column_unique_ids[i])) { col_unique_ids[i] = index.column_unique_ids[i]; } else { col_unique_ids[i] = -1; } } } _col_unique_ids = std::move(col_unique_ids); switch (index.index_type) { case TIndexType::BITMAP: _index_type = IndexType::BITMAP; break; case TIndexType::INVERTED: _index_type = IndexType::INVERTED; break; case TIndexType::BLOOMFILTER: _index_type = IndexType::BLOOMFILTER; break; case TIndexType::NGRAM_BF: _index_type = IndexType::NGRAM_BF; break; } if (index.__isset.properties) { for (auto kv : index.properties) { _properties[kv.first] = kv.second; } } } void TabletIndex::init_from_thrift(const TOlapTableIndex& index, const std::vector<int32_t>& column_uids) { _index_id = index.index_id; _index_name = index.index_name; _col_unique_ids = column_uids; switch (index.index_type) { case TIndexType::BITMAP: _index_type = IndexType::BITMAP; break; case TIndexType::INVERTED: _index_type = IndexType::INVERTED; break; case TIndexType::BLOOMFILTER: _index_type = IndexType::BLOOMFILTER; break; case TIndexType::NGRAM_BF: _index_type = IndexType::NGRAM_BF; break; } if (index.__isset.properties) { for (auto kv : index.properties) { _properties[kv.first] = kv.second; } } } void TabletIndex::init_from_pb(const TabletIndexPB& index) { _index_id = index.index_id(); _index_name = index.index_name(); _col_unique_ids.clear(); for (auto col_unique_id : index.col_unique_id()) { _col_unique_ids.push_back(col_unique_id); } _index_type = index.index_type(); for (const auto& kv : index.properties()) { _properties[kv.first] = kv.second; } _escaped_index_suffix_path = index.index_suffix_name(); } void TabletIndex::to_schema_pb(TabletIndexPB* index) const { index->set_index_id(_index_id); index->set_index_name(_index_name); index->clear_col_unique_id(); for (auto col_unique_id : _col_unique_ids) { index->add_col_unique_id(col_unique_id); } index->set_index_type(_index_type); for (const auto& kv : _properties) { DBUG_EXECUTE_IF("tablet_schema.to_schema_pb", { if (kv.first == INVERTED_INDEX_PARSER_LOWERCASE_KEY) { continue; } }) (*index->mutable_properties())[kv.first] = kv.second; } index->set_index_suffix_name(_escaped_index_suffix_path); DBUG_EXECUTE_IF("tablet_schema.to_schema_pb", { return; }) // lowercase by default if (!_properties.empty()) { if (!_properties.contains(INVERTED_INDEX_PARSER_LOWERCASE_KEY)) { (*index->mutable_properties())[INVERTED_INDEX_PARSER_LOWERCASE_KEY] = INVERTED_INDEX_PARSER_TRUE; } } } TabletSchema::TabletSchema() = default; TabletSchema::~TabletSchema() { clear_column_cache_handlers(); } int64_t TabletSchema::get_metadata_size() const { return sizeof(TabletSchema); } void TabletSchema::append_column(TabletColumn column, ColumnType col_type) { if (column.is_key()) { _num_key_columns++; } if (column.is_nullable()) { _num_null_columns++; } if (column.is_variant_type()) { ++_num_variant_columns; if (!column.has_path_info()) { const std::string& col_name = column.name_lower_case(); vectorized::PathInData path(col_name); column.set_path_info(path); } } if (UNLIKELY(column.name() == DELETE_SIGN)) { _delete_sign_idx = _num_columns; } else if (UNLIKELY(column.name() == SEQUENCE_COL)) { _sequence_col_idx = _num_columns; } else if (UNLIKELY(column.name() == VERSION_COL)) { _version_col_idx = _num_columns; } else if (UNLIKELY(column.name() == SKIP_BITMAP_COL)) { _skip_bitmap_col_idx = _num_columns; } _field_id_to_index[column.unique_id()] = _num_columns; _cols.push_back(std::make_shared<TabletColumn>(std::move(column))); // The dropped column may have same name with exsiting column, so that // not add to name to index map, only for uid to index map if (col_type == ColumnType::VARIANT || _cols.back()->is_variant_type() || _cols.back()->is_extracted_column()) { _field_name_to_index.emplace(StringRef(_cols.back()->name()), _num_columns); _field_path_to_index[_cols.back()->path_info_ptr().get()] = _num_columns; } else if (col_type == ColumnType::NORMAL) { _field_name_to_index.emplace(StringRef(_cols.back()->name()), _num_columns); } _num_columns++; } void TabletColumn::append_sparse_column(TabletColumn column) { _sparse_cols.push_back(std::make_shared<TabletColumn>(column)); _num_sparse_columns++; } void TabletSchema::append_index(TabletIndex&& index) { for (int32_t id : index.col_unique_ids()) { _col_id_suffix_to_index.emplace( std::make_tuple(index.index_type(), id, index.get_index_suffix()), _indexes.size()); } _indexes.push_back(std::make_shared<TabletIndex>(index)); } void TabletSchema::update_index(const TabletColumn& col, const IndexType& index_type, TabletIndex&& index) { int32_t col_unique_id = col.is_extracted_column() ? col.parent_unique_id() : col.unique_id(); const std::string& suffix_path = escape_for_path_name(col.suffix_path()); IndexKey key(index_type, col_unique_id, suffix_path); auto iter = _col_id_suffix_to_index.find(key); if (iter != _col_id_suffix_to_index.end()) { _indexes[iter->second] = std::make_shared<TabletIndex>(std::move(index)); return; } LOG(WARNING) << " failed to update_index: " << index_type << " " << col_unique_id << " " << suffix_path; } void TabletSchema::replace_column(size_t pos, TabletColumn new_col) { CHECK_LT(pos, num_columns()) << " outof range"; _cols[pos] = std::make_shared<TabletColumn>(std::move(new_col)); } void TabletSchema::clear_index_cache_handlers() { for (auto* handle : _index_cache_handlers) { TabletColumnObjectPool::instance()->release(handle); } _index_cache_handlers.clear(); } void TabletSchema::clear_index() { clear_index_cache_handlers(); _indexes.clear(); _col_id_suffix_to_index.clear(); } void TabletSchema::remove_index(int64_t index_id) { std::vector<TabletIndexPtr> indexes; std::unordered_map<IndexKey, int32_t, IndexKeyHash> col_id_suffix_to_index; for (auto index : _indexes) { if (index->index_id() == index_id) { continue; } for (int32_t col_uid : index->col_unique_ids()) { col_id_suffix_to_index.emplace( std::make_tuple(index->index_type(), col_uid, index->get_index_suffix()), indexes.size()); } indexes.emplace_back(std::move(index)); } _indexes = std::move(indexes); _col_id_suffix_to_index = std::move(col_id_suffix_to_index); } void TabletSchema::clear_columns() { _field_path_to_index.clear(); _field_name_to_index.clear(); _field_id_to_index.clear(); _num_columns = 0; _num_variant_columns = 0; _num_null_columns = 0; _num_key_columns = 0; _cols.clear(); clear_column_cache_handlers(); } void TabletSchema::clear_column_cache_handlers() { for (auto* cache_handle : _column_cache_handlers) { TabletColumnObjectPool::instance()->release(cache_handle); } _column_cache_handlers.clear(); } void TabletSchema::init_from_pb(const TabletSchemaPB& schema, bool ignore_extracted_columns, bool reuse_cache_column) { _keys_type = schema.keys_type(); _num_columns = 0; _num_variant_columns = 0; _num_key_columns = 0; _num_null_columns = 0; _cols.clear(); _indexes.clear(); _col_id_suffix_to_index.clear(); _field_name_to_index.clear(); _field_id_to_index.clear(); _cluster_key_uids.clear(); clear_column_cache_handlers(); clear_index_cache_handlers(); for (const auto& i : schema.cluster_key_uids()) { _cluster_key_uids.push_back(i); } for (auto& column_pb : schema.column()) { TabletColumnPtr column; if (reuse_cache_column) { auto pair = TabletColumnObjectPool::instance()->insert( deterministic_string_serialize(column_pb)); column = pair.second; _column_cache_handlers.push_back(pair.first); } else { column = std::make_shared<TabletColumn>(); column->init_from_pb(column_pb); } if (ignore_extracted_columns && column->is_extracted_column()) { continue; } if (column->is_key()) { _num_key_columns++; } if (column->is_nullable()) { _num_null_columns++; } if (column->is_variant_type()) { ++_num_variant_columns; } _cols.emplace_back(std::move(column)); if (!_cols.back()->is_extracted_column()) { _field_name_to_index.emplace(StringRef(_cols.back()->name()), _num_columns); _field_id_to_index[_cols.back()->unique_id()] = _num_columns; } _num_columns++; } for (const auto& index_pb : schema.index()) { TabletIndexPtr index; if (reuse_cache_column) { auto pair = TabletColumnObjectPool::instance()->insert_index( deterministic_string_serialize(index_pb)); index = pair.second; _index_cache_handlers.push_back(pair.first); } else { index = std::make_shared<TabletIndex>(); index->init_from_pb(index_pb); } for (int32_t col_uid : index->col_unique_ids()) { _col_id_suffix_to_index.emplace( std::make_tuple(index->index_type(), col_uid, index->get_index_suffix()), _indexes.size()); } _indexes.emplace_back(std::move(index)); } _num_short_key_columns = schema.num_short_key_columns(); _num_rows_per_row_block = schema.num_rows_per_row_block(); _compress_kind = schema.compress_kind(); _next_column_unique_id = schema.next_column_unique_id(); if (schema.has_bf_fpp()) { _has_bf_fpp = true; _bf_fpp = schema.bf_fpp(); } else { _has_bf_fpp = false; _bf_fpp = BLOOM_FILTER_DEFAULT_FPP; } _is_in_memory = schema.is_in_memory(); _disable_auto_compaction = schema.disable_auto_compaction(); _enable_single_replica_compaction = schema.enable_single_replica_compaction(); _store_row_column = schema.store_row_column(); _skip_write_index_on_load = schema.skip_write_index_on_load(); _delete_sign_idx = schema.delete_sign_idx(); _sequence_col_idx = schema.sequence_col_idx(); _version_col_idx = schema.version_col_idx(); _skip_bitmap_col_idx = schema.skip_bitmap_col_idx(); _sort_type = schema.sort_type(); _sort_col_num = schema.sort_col_num(); _compression_type = schema.compression_type(); _row_store_page_size = schema.row_store_page_size(); _storage_page_size = schema.storage_page_size(); _schema_version = schema.schema_version(); // Default to V1 inverted index storage format for backward compatibility if not specified in schema. if (!schema.has_inverted_index_storage_format()) { _inverted_index_storage_format = InvertedIndexStorageFormatPB::V1; } else { _inverted_index_storage_format = schema.inverted_index_storage_format(); } _row_store_column_unique_ids.assign(schema.row_store_column_unique_ids().begin(), schema.row_store_column_unique_ids().end()); _enable_variant_flatten_nested = schema.enable_variant_flatten_nested(); update_metadata_size(); } void TabletSchema::copy_from(const TabletSchema& tablet_schema) { TabletSchemaPB tablet_schema_pb; tablet_schema.to_schema_pb(&tablet_schema_pb); init_from_pb(tablet_schema_pb); _table_id = tablet_schema.table_id(); } void TabletSchema::shawdow_copy_without_columns(const TabletSchema& tablet_schema) { *this = tablet_schema; _field_path_to_index.clear(); _field_name_to_index.clear(); _field_id_to_index.clear(); _num_columns = 0; _num_variant_columns = 0; _num_null_columns = 0; _num_key_columns = 0; _cols.clear(); // notice : do not ref columns _column_cache_handlers.clear(); } void TabletSchema::update_index_info_from(const TabletSchema& tablet_schema) { for (auto& col : _cols) { if (col->unique_id() < 0) { continue; } const auto iter = tablet_schema._field_id_to_index.find(col->unique_id()); if (iter == tablet_schema._field_id_to_index.end()) { continue; } auto col_idx = iter->second; if (col_idx < 0 || col_idx >= tablet_schema._cols.size()) { continue; } col->set_is_bf_column(tablet_schema._cols[col_idx]->is_bf_column()); col->set_has_bitmap_index(tablet_schema._cols[col_idx]->has_bitmap_index()); } } std::string TabletSchema::to_key() const { TabletSchemaPB pb; to_schema_pb(&pb); return TabletSchema::deterministic_string_serialize(pb); } void TabletSchema::build_current_tablet_schema(int64_t index_id, int32_t version, const OlapTableIndexSchema* index, const TabletSchema& ori_tablet_schema) { // copy from ori_tablet_schema _keys_type = ori_tablet_schema.keys_type(); _num_short_key_columns = ori_tablet_schema.num_short_key_columns(); _num_rows_per_row_block = ori_tablet_schema.num_rows_per_row_block(); _compress_kind = ori_tablet_schema.compress_kind(); // todo(yixiu): unique_id _next_column_unique_id = ori_tablet_schema.next_column_unique_id(); _is_in_memory = ori_tablet_schema.is_in_memory(); _disable_auto_compaction = ori_tablet_schema.disable_auto_compaction(); _enable_single_replica_compaction = ori_tablet_schema.enable_single_replica_compaction(); _skip_write_index_on_load = ori_tablet_schema.skip_write_index_on_load(); _sort_type = ori_tablet_schema.sort_type(); _sort_col_num = ori_tablet_schema.sort_col_num(); _row_store_page_size = ori_tablet_schema.row_store_page_size(); _storage_page_size = ori_tablet_schema.storage_page_size(); _enable_variant_flatten_nested = ori_tablet_schema.variant_flatten_nested(); // copy from table_schema_param _schema_version = version; _num_columns = 0; _num_variant_columns = 0; _num_key_columns = 0; _num_null_columns = 0; bool has_bf_columns = false; _cols.clear(); _indexes.clear(); _col_id_suffix_to_index.clear(); _field_name_to_index.clear(); _field_id_to_index.clear(); _delete_sign_idx = -1; _sequence_col_idx = -1; _version_col_idx = -1; _skip_bitmap_col_idx = -1; _cluster_key_uids.clear(); clear_column_cache_handlers(); for (const auto& i : ori_tablet_schema._cluster_key_uids) { _cluster_key_uids.push_back(i); } for (auto& column : index->columns) { if (column->is_key()) { _num_key_columns++; } if (column->is_nullable()) { _num_null_columns++; } if (column->is_bf_column()) { has_bf_columns = true; } if (column->is_variant_type()) { ++_num_variant_columns; } if (UNLIKELY(column->name() == DELETE_SIGN)) { _delete_sign_idx = _num_columns; } else if (UNLIKELY(column->name() == SEQUENCE_COL)) { _sequence_col_idx = _num_columns; } else if (UNLIKELY(column->name() == VERSION_COL)) { _version_col_idx = _num_columns; } else if (UNLIKELY(column->name() == SKIP_BITMAP_COL)) { _skip_bitmap_col_idx = _num_columns; } _cols.emplace_back(std::make_shared<TabletColumn>(*column)); _field_name_to_index.emplace(StringRef(_cols.back()->name()), _num_columns); _field_id_to_index[_cols.back()->unique_id()] = _num_columns; _num_columns++; } for (const auto& i : index->indexes) { for (int32_t col_uid : i->col_unique_ids()) { _col_id_suffix_to_index.emplace( std::make_tuple(i->index_type(), col_uid, i->get_index_suffix()), _indexes.size()); } _indexes.emplace_back(std::make_shared<TabletIndex>(*i)); } if (has_bf_columns) { _has_bf_fpp = true; _bf_fpp = ori_tablet_schema.bloom_filter_fpp(); } else { _has_bf_fpp = false; _bf_fpp = BLOOM_FILTER_DEFAULT_FPP; } } void TabletSchema::merge_dropped_columns(const TabletSchema& src_schema) { // If they are the same tablet schema object, then just return if (this == &src_schema) { return; } for (const auto& src_col : src_schema.columns()) { if (_field_id_to_index.find(src_col->unique_id()) == _field_id_to_index.end()) { CHECK(!src_col->is_key()) << src_col->name() << " is key column, should not be dropped."; ColumnPB src_col_pb; // There are some pointer in tablet column, not sure the reference relation, so // that deep copy it. src_col->to_schema_pb(&src_col_pb); TabletColumn new_col(src_col_pb); append_column(new_col, TabletSchema::ColumnType::DROPPED); } } } TabletSchemaSPtr TabletSchema::copy_without_variant_extracted_columns() { TabletSchemaSPtr copy = std::make_shared<TabletSchema>(); TabletSchemaPB tablet_schema_pb; this->to_schema_pb(&tablet_schema_pb); copy->init_from_pb(tablet_schema_pb, true /*ignore extracted_columns*/); return copy; } // Dropped column is in _field_id_to_index but not in _field_name_to_index // Could refer to append_column method bool TabletSchema::is_dropped_column(const TabletColumn& col) const { CHECK(_field_id_to_index.find(col.unique_id()) != _field_id_to_index.end()) << "could not find col with unique id = " << col.unique_id() << " and name = " << col.name() << " table_id=" << _table_id; auto it = _field_name_to_index.find(StringRef {col.name()}); return it == _field_name_to_index.end() || _cols[it->second]->unique_id() != col.unique_id(); } void TabletSchema::copy_extracted_columns(const TabletSchema& src_schema) { std::unordered_set<int32_t> variant_columns; for (const auto& col : columns()) { if (col->is_variant_type()) { variant_columns.insert(col->unique_id()); } } for (const TabletColumnPtr& col : src_schema.columns()) { if (col->is_extracted_column() && variant_columns.contains(col->parent_unique_id())) { ColumnPB col_pb; col->to_schema_pb(&col_pb); TabletColumn new_col(col_pb); append_column(new_col, ColumnType::VARIANT); } } } void TabletSchema::reserve_extracted_columns() { for (auto it = _cols.begin(); it != _cols.end();) { if (!(*it)->is_extracted_column()) { it = _cols.erase(it); } else { ++it; } } } void TabletSchema::to_schema_pb(TabletSchemaPB* tablet_schema_pb) const { for (const auto& i : _cluster_key_uids) { tablet_schema_pb->add_cluster_key_uids(i); } tablet_schema_pb->set_keys_type(_keys_type); for (const auto& col : _cols) { ColumnPB* column = tablet_schema_pb->add_column(); col->to_schema_pb(column); } for (const auto& index : _indexes) { auto* index_pb = tablet_schema_pb->add_index(); index->to_schema_pb(index_pb); } tablet_schema_pb->set_num_short_key_columns(_num_short_key_columns); tablet_schema_pb->set_num_rows_per_row_block(_num_rows_per_row_block); tablet_schema_pb->set_compress_kind(_compress_kind); if (_has_bf_fpp) { tablet_schema_pb->set_bf_fpp(_bf_fpp); } tablet_schema_pb->set_next_column_unique_id(_next_column_unique_id); tablet_schema_pb->set_is_in_memory(_is_in_memory); tablet_schema_pb->set_disable_auto_compaction(_disable_auto_compaction); tablet_schema_pb->set_enable_single_replica_compaction(_enable_single_replica_compaction); tablet_schema_pb->set_store_row_column(_store_row_column); tablet_schema_pb->set_skip_write_index_on_load(_skip_write_index_on_load); tablet_schema_pb->set_delete_sign_idx(_delete_sign_idx); tablet_schema_pb->set_sequence_col_idx(_sequence_col_idx); tablet_schema_pb->set_sort_type(_sort_type); tablet_schema_pb->set_sort_col_num(_sort_col_num); tablet_schema_pb->set_schema_version(_schema_version); tablet_schema_pb->set_compression_type(_compression_type); tablet_schema_pb->set_row_store_page_size(_row_store_page_size); tablet_schema_pb->set_storage_page_size(_storage_page_size); tablet_schema_pb->set_version_col_idx(_version_col_idx); tablet_schema_pb->set_skip_bitmap_col_idx(_skip_bitmap_col_idx); tablet_schema_pb->set_inverted_index_storage_format(_inverted_index_storage_format); tablet_schema_pb->mutable_row_store_column_unique_ids()->Assign( _row_store_column_unique_ids.begin(), _row_store_column_unique_ids.end()); tablet_schema_pb->set_enable_variant_flatten_nested(_enable_variant_flatten_nested); } size_t TabletSchema::row_size() const { size_t size = 0; for (const auto& column : _cols) { size += column->length(); } size += (_num_columns + 7) / 8; return size; } int32_t TabletSchema::field_index(const std::string& field_name) const { const auto& found = _field_name_to_index.find(StringRef(field_name)); return (found == _field_name_to_index.end()) ? -1 : found->second; } int32_t TabletSchema::field_index(const vectorized::PathInData& path) const { const auto& found = _field_path_to_index.find(vectorized::PathInDataRef(&path)); return (found == _field_path_to_index.end()) ? -1 : found->second; } int32_t TabletSchema::field_index(int32_t col_unique_id) const { const auto& found = _field_id_to_index.find(col_unique_id); return (found == _field_id_to_index.end()) ? -1 : found->second; } const std::vector<TabletColumnPtr>& TabletSchema::columns() const { return _cols; } const std::vector<TabletColumnPtr>& TabletColumn::sparse_columns() const { return _sparse_cols; } const TabletColumn& TabletSchema::column(size_t ordinal) const { DCHECK(ordinal < _num_columns) << "ordinal:" << ordinal << ", _num_columns:" << _num_columns; return *_cols[ordinal]; } const TabletColumn& TabletColumn::sparse_column_at(size_t ordinal) const { DCHECK(ordinal < _sparse_cols.size()) << "ordinal:" << ordinal << ", _num_columns:" << _sparse_cols.size(); return *_sparse_cols[ordinal]; } const TabletColumn& TabletSchema::column_by_uid(int32_t col_unique_id) const { return *_cols.at(_field_id_to_index.at(col_unique_id)); } TabletColumn& TabletSchema::mutable_column_by_uid(int32_t col_unique_id) { return *_cols.at(_field_id_to_index.at(col_unique_id)); } TabletColumn& TabletSchema::mutable_column(size_t ordinal) { return *_cols.at(ordinal); } void TabletSchema::update_indexes_from_thrift(const std::vector<doris::TOlapTableIndex>& tindexes) { std::vector<TabletIndexPtr> indexes; for (const auto& tindex : tindexes) { TabletIndex index; index.init_from_thrift(tindex, *this); indexes.emplace_back(std::make_shared<TabletIndex>(std::move(index))); } _indexes = std::move(indexes); std::unordered_map<IndexKey, int32_t, IndexKeyHash> col_id_suffix_to_index; for (size_t i = 0; i < _indexes.size(); i++) { for (int32_t col_uid : _indexes[i]->col_unique_ids()) { col_id_suffix_to_index.emplace(std::make_tuple(_indexes[i]->index_type(), col_uid, _indexes[i]->get_index_suffix()), i); } } _col_id_suffix_to_index = std::move(col_id_suffix_to_index); } bool TabletSchema::exist_column(const std::string& field_name) const { return _field_name_to_index.contains(StringRef {field_name}); } bool TabletSchema::has_column_unique_id(int32_t col_unique_id) const { return _field_id_to_index.contains(col_unique_id); } Status TabletSchema::have_column(const std::string& field_name) const { if (!_field_name_to_index.contains(StringRef(field_name))) { return Status::Error<ErrorCode::INTERNAL_ERROR>( "Not found field_name, field_name:{}, schema:{}", field_name, get_all_field_names()); } return Status::OK(); } Result<const TabletColumn*> TabletSchema::column(const std::string& field_name) const { auto it = _field_name_to_index.find(StringRef {field_name}); if (it == _field_name_to_index.end()) { DCHECK(false) << "field_name=" << field_name << ", table_id=" << _table_id << ", field_name_to_index=" << get_all_field_names(); return ResultError( Status::InternalError("column not found, name={}, table_id={}, schema_version={}", field_name, _table_id, _schema_version)); } return _cols[it->second].get(); } void TabletSchema::update_tablet_columns(const TabletSchema& tablet_schema, const std::vector<TColumn>& t_columns) { copy_from(tablet_schema); if (!t_columns.empty() && t_columns[0].col_unique_id >= 0) { clear_columns(); for (const auto& column : t_columns) { append_column(TabletColumn(column)); } } } bool TabletSchema::has_inverted_index_with_index_id(int64_t index_id) const { for (size_t i = 0; i < _indexes.size(); i++) { if (_indexes[i]->index_type() == IndexType::INVERTED && _indexes[i]->index_id() == index_id) { return true; } } return false; } const TabletIndex* TabletSchema::inverted_index(int32_t col_unique_id, const std::string& suffix_path) const { const std::string escaped_suffix = escape_for_path_name(suffix_path); auto it = _col_id_suffix_to_index.find( std::make_tuple(IndexType::INVERTED, col_unique_id, escaped_suffix)); if (it != _col_id_suffix_to_index.end()) { return _indexes[it->second].get(); } return nullptr; } const TabletIndex* TabletSchema::inverted_index(const TabletColumn& col) const { // Some columns(Float, Double, JSONB ...) from the variant do not support inverted index if (!segment_v2::InvertedIndexColumnWriter::check_support_inverted_index(col)) { return nullptr; } // TODO use more efficient impl // Use parent id if unique not assigned, this could happend when accessing subcolumns of variants int32_t col_unique_id = col.is_extracted_column() ? col.parent_unique_id() : col.unique_id(); return inverted_index(col_unique_id, escape_for_path_name(col.suffix_path())); } bool TabletSchema::has_ngram_bf_index(int32_t col_unique_id) const { IndexKey index_key(IndexType::NGRAM_BF, col_unique_id, ""); auto it = _col_id_suffix_to_index.find(index_key); return it != _col_id_suffix_to_index.end(); } const TabletIndex* TabletSchema::get_ngram_bf_index(int32_t col_unique_id) const { // Get the ngram bf index for the given column unique id IndexKey index_key(IndexType::NGRAM_BF, col_unique_id, ""); auto it = _col_id_suffix_to_index.find(index_key); if (it != _col_id_suffix_to_index.end()) { return _indexes[it->second].get(); } return nullptr; } vectorized::Block TabletSchema::create_block( const std::vector<uint32_t>& return_columns, const std::unordered_set<uint32_t>* tablet_columns_need_convert_null) const { vectorized::Block block; for (int i = 0; i < return_columns.size(); ++i) { const auto& col = *_cols[return_columns[i]]; bool is_nullable = (tablet_columns_need_convert_null != nullptr && tablet_columns_need_convert_null->find(return_columns[i]) != tablet_columns_need_convert_null->end()); auto data_type = vectorized::DataTypeFactory::instance().create_data_type(col, is_nullable); auto column = data_type->create_column(); block.insert({std::move(column), data_type, col.name()}); } return block; } vectorized::Block TabletSchema::create_block(bool ignore_dropped_col) const { vectorized::Block block; for (const auto& col : _cols) { if (ignore_dropped_col && is_dropped_column(*col)) { continue; } auto data_type = vectorized::DataTypeFactory::instance().create_data_type(*col); block.insert({data_type->create_column(), data_type, col->name()}); } return block; } vectorized::Block TabletSchema::create_block_by_cids(const std::vector<uint32_t>& cids) const { vectorized::Block block; for (const auto& cid : cids) { const auto& col = *_cols[cid]; auto data_type = vectorized::DataTypeFactory::instance().create_data_type(col); block.insert({data_type->create_column(), data_type, col.name()}); } return block; } bool operator==(const TabletColumn& a, const TabletColumn& b) { if (a._unique_id != b._unique_id) return false; if (a._col_name != b._col_name) return false; if (a._type != b._type) return false; if (a._is_key != b._is_key) return false; if (a._aggregation != b._aggregation) return false; if (a._is_nullable != b._is_nullable) return false; if (a._has_default_value != b._has_default_value) return false; if (a._has_default_value) { if (a._default_value != b._default_value) return false; } if (a._is_decimal != b._is_decimal) return false; if (a._is_decimal) { if (a._precision != b._precision) return false; if (a._frac != b._frac) return false; } if (a._length != b._length) return false; if (a._index_length != b._index_length) return false; if (a._is_bf_column != b._is_bf_column) return false; if (a._has_bitmap_index != b._has_bitmap_index) return false; if (a._column_path == nullptr && a._column_path != nullptr) return false; if (b._column_path == nullptr && a._column_path != nullptr) return false; if (b._column_path != nullptr && a._column_path != nullptr && *a._column_path != *b._column_path) return false; return true; } bool operator!=(const TabletColumn& a, const TabletColumn& b) { return !(a == b); } bool operator==(const TabletSchema& a, const TabletSchema& b) { if (a._keys_type != b._keys_type) return false; if (a._cols.size() != b._cols.size()) return false; for (int i = 0; i < a._cols.size(); ++i) { if (*a._cols[i] != *b._cols[i]) return false; } if (a._num_columns != b._num_columns) return false; if (a._num_key_columns != b._num_key_columns) return false; if (a._num_null_columns != b._num_null_columns) return false; if (a._num_short_key_columns != b._num_short_key_columns) return false; if (a._num_rows_per_row_block != b._num_rows_per_row_block) return false; if (a._compress_kind != b._compress_kind) return false; if (a._next_column_unique_id != b._next_column_unique_id) return false; if (a._has_bf_fpp != b._has_bf_fpp) return false; if (a._has_bf_fpp) { if (std::abs(a._bf_fpp - b._bf_fpp) > 1e-6) return false; } if (a._is_in_memory != b._is_in_memory) return false; if (a._delete_sign_idx != b._delete_sign_idx) return false; if (a._disable_auto_compaction != b._disable_auto_compaction) return false; if (a._enable_single_replica_compaction != b._enable_single_replica_compaction) return false; if (a._store_row_column != b._store_row_column) return false; if (a._row_store_page_size != b._row_store_page_size) return false; if (a._storage_page_size != b._storage_page_size) return false; if (a._skip_write_index_on_load != b._skip_write_index_on_load) return false; if (a._enable_variant_flatten_nested != b._enable_variant_flatten_nested) return false; return true; } bool operator!=(const TabletSchema& a, const TabletSchema& b) { return !(a == b); } } // namespace doris