backend/schema/validators/column_validator.cc (464 lines of code) (raw):

// // Copyright 2020 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // #include "backend/schema/validators/column_validator.h" #include <string> #include <vector> #include "zetasql/public/options.pb.h" #include "zetasql/public/simple_catalog.h" #include "zetasql/public/type.pb.h" #include "zetasql/public/types/type.h" #include "zetasql/public/types/type_factory.h" #include "absl/container/flat_hash_set.h" #include "absl/status/status.h" #include "absl/status/statusor.h" #include "absl/strings/ascii.h" #include "absl/strings/str_cat.h" #include "absl/strings/string_view.h" #include "backend/datamodel/types.h" #include "backend/schema/backfills/column_value_backfill.h" #include "backend/schema/catalog/change_stream.h" #include "backend/schema/catalog/column.h" #include "backend/schema/catalog/proto_bundle.h" #include "backend/schema/catalog/table.h" #include "backend/schema/catalog/udf.h" #include "backend/schema/graph/schema_node.h" #include "backend/schema/updater/schema_validation_context.h" #include "backend/schema/updater/sql_expression_validators.h" #include "backend/schema/verifiers/column_value_verifiers.h" #include "common/errors.h" #include "common/feature_flags.h" #include "common/limits.h" #include "google/protobuf/descriptor.h" #include "zetasql/base/ret_check.h" #include "zetasql/base/status_macros.h" namespace google { namespace spanner { namespace emulator { namespace backend { namespace { bool IsResizeable(const zetasql::Type* type) { if (type->IsString() || type->IsBytes()) { return true; } return false; } bool IsAllowedTypeChange(const zetasql::Type* old_column_type, const zetasql::Type* new_column_type) { if (old_column_type->Equals(new_column_type)) { return true; } if (old_column_type->IsArray() != new_column_type->IsArray()) { return false; } if (old_column_type->IsArray()) { return IsAllowedTypeChange(BaseType(old_column_type), BaseType(new_column_type)); } // Allow conversions from BYTES to STRING and STRING to BYTES. if ((new_column_type->IsString() && old_column_type->IsBytes()) || (new_column_type->IsBytes() && old_column_type->IsString())) { return true; } // Allow conversions from PROTO to BYTES and BYTES to PROTO if ((new_column_type->IsProto() && old_column_type->IsBytes()) || (new_column_type->IsBytes() && old_column_type->IsProto())) { return true; } // Allow conversion from PROTO to PROTO or enum to enum if ((new_column_type->IsProto() && old_column_type->IsProto()) || (new_column_type->IsEnum() && old_column_type->IsEnum())) { return true; } // Allow conversion from enum to INT64 and INT64 to enum (this should ideally // be INT32 but since cloud spanner doesn't support INT32 columns we support // INT64 here) if ((new_column_type->IsInt64() && old_column_type->IsEnum()) || (new_column_type->IsEnum() && old_column_type->IsInt64())) { return true; } return false; } // Validates size reductions and column type changes. absl::Status CheckAllowedColumnTypeChange( const Column* old_column, const Column* new_column, const zetasql::Type* old_column_type, const zetasql::Type* new_column_type, SchemaValidationContext* context) { if (!IsAllowedTypeChange(old_column_type, new_column_type)) { return error::CannotChangeColumnType(new_column->Name(), ToString(old_column_type), ToString(new_column_type)); } const auto* old_base_type = BaseType(old_column_type); const auto* new_base_type = BaseType(new_column_type); if (new_base_type->Equals(old_base_type)) { if (IsResizeable(old_base_type) && new_column->effective_max_length() < old_column->effective_max_length()) { context->AddAction([old_column, new_column](const SchemaValidationContext* context) { return VerifyColumnLength(old_column->table(), old_column, new_column->effective_max_length(), context); }); } } else { context->AddAction( [old_column, new_column](const SchemaValidationContext* context) { return VerifyColumnTypeChange(old_column->table(), old_column, new_column, context); }); // After verifying that the type change is acceptable, run a backfill // to apply the type change to the column values in storage. context->AddAction( [old_column, new_column](const SchemaValidationContext* context) { return BackfillColumnValue(old_column, new_column, context); }); } return absl::OkStatus(); } absl::Status ValidateColumnSignatureChange( absl::string_view modify_action, absl::string_view dependency_name, const Column* dependent_column, const Table* dependent_table, const Schema* temp_new_schema, zetasql::TypeFactory* type_factory) { // Re-analyze the dependent view based on the new definition of the // dependency in the temporary new schema. if (!dependent_column->expression().has_value()) { return absl::OkStatus(); } absl::flat_hash_set<const SchemaNode*> unused_new_deps; absl::flat_hash_set<const SchemaNode*> unused_udf_dependencies; absl::flat_hash_set<std::string> dependent_column_names; std::vector<zetasql::SimpleTable::NameAndType> name_and_types; for (const Column* column : dependent_table->columns()) { name_and_types.emplace_back(column->Name(), column->GetType()); } auto status = AnalyzeColumnExpression( dependent_column->expression().value(), dependent_column->GetType(), dependent_table, temp_new_schema, type_factory, name_and_types, "check constraints", &dependent_column_names, /*dependent_sequences=*/nullptr, /*allow_volatile_expression=*/false, &unused_udf_dependencies); if (!status.ok()) { return error::DependentColumnBecomesInvalid(modify_action, dependency_name, dependent_column->Name(), status.message()); } return absl::OkStatus(); } } // namespace bool ColumnValidator::TypeExistsInProtoBundle(const zetasql::Type* type, const ProtoBundle* proto_bundle) { if (type->IsProto()) { const google::protobuf::Descriptor* type_descriptor = type->AsProto()->descriptor(); absl::StatusOr<const google::protobuf::Descriptor*> descriptor = proto_bundle->GetTypeDescriptor(type_descriptor->full_name()); return descriptor.ok(); } if (type->IsEnum()) { const google::protobuf::EnumDescriptor* enum_descriptor = type->AsEnum()->enum_descriptor(); absl::StatusOr<const google::protobuf::EnumDescriptor*> descriptor = proto_bundle->GetEnumTypeDescriptor(enum_descriptor->full_name()); return descriptor.ok(); } return false; } absl::Status ColumnValidator::ValidateTypeExistsInProtoBundle( const zetasql::Type* type, const ProtoBundle* proto_bundle, const std::string& column_name) { ZETASQL_RET_CHECK(proto_bundle != nullptr && (type->IsProto() || type->IsEnum())); return TypeExistsInProtoBundle(type, proto_bundle) ? absl::OkStatus() : error::DeletedTypeStillInUse( type->TypeName(zetasql::PRODUCT_EXTERNAL, /*use_external_float32=*/true), column_name); } absl::Status ColumnValidator::Validate(const Column* column, SchemaValidationContext* context) { ZETASQL_RET_CHECK_NE(column->table_, nullptr); ZETASQL_RET_CHECK(!column->name_.empty()); ZETASQL_RET_CHECK(!column->id_.empty()); ZETASQL_RET_CHECK(column->type_ != nullptr && IsSupportedColumnType(column->type_)); const zetasql::Type* base_type = BaseType(column->type_); ZETASQL_RET_CHECK(!column->declared_max_length_.has_value() || base_type->IsString() || base_type->IsBytes()); if (column->name_.length() > limits::kMaxSchemaIdentifierLength) { return error::InvalidSchemaName("Column", column->Name()); } if (column->source_column_) { ZETASQL_RET_CHECK(column->type_->Equals(column->source_column_->type_)); ZETASQL_RET_CHECK(column->declared_max_length_ == column->source_column_->declared_max_length_); } if (column->declared_max_length_.has_value()) { if (base_type->IsString() && (column->declared_max_length_.value() == 0 || column->declared_max_length_.value() > limits::kMaxStringColumnLength)) { return error::InvalidColumnLength(column->FullName(), column->declared_max_length_.value(), 1, limits::kMaxStringColumnLength); } if (base_type->IsBytes() && (column->declared_max_length_.value() == 0 || column->declared_max_length_.value() > limits::kMaxBytesColumnLength)) { return error::InvalidColumnLength(column->FullName(), column->declared_max_length_.value(), 1, limits::kMaxBytesColumnLength); } } if (column->has_vector_length()) { if (!column->type_->IsArray() || (column->type_->IsArray() && !base_type->IsFloat() && !base_type->IsDouble())) { return error::InvalidTypeForVectorLength(column->FullName()); } else if (column->is_generated() || column->has_default_value()) { return error::VectorLengthOnGeneratedOrDefaultColumn(column->FullName()); } } if (base_type->IsProto() || base_type->IsEnum()) { ZETASQL_RETURN_IF_ERROR(ValidateTypeExistsInProtoBundle( base_type, context->proto_bundle(), column->FullName())); } if (column->has_allows_commit_timestamp() && !column->type_->IsTimestamp()) { return error::UnallowedCommitTimestampOption(column->FullName()); } if (column->has_default_value()) { if (column->allows_commit_timestamp()) { return error::CannotUseCommitTimestampWithColumnDefaultValue( column->Name()); } if (context->is_postgresql_dialect()) { ZETASQL_RET_CHECK(column->postgresql_oid().has_value()); } else { ZETASQL_RET_CHECK(!column->postgresql_oid().has_value()); } } if (column->is_generated()) { if (context->is_postgresql_dialect()) { ZETASQL_RET_CHECK(column->postgresql_oid().has_value()); } else { ZETASQL_RET_CHECK(!column->postgresql_oid().has_value()); } if (!EmulatorFeatureFlags::instance().flags().enable_generated_pk && column->table()->FindKeyColumn(column->Name())) { return error::CannotUseGeneratedColumnInPrimaryKey( column->table()->Name(), column->Name()); } for (const Column* dep : column->dependent_columns()) { if (dep->allows_commit_timestamp()) { return error::CannotUseCommitTimestampOnGeneratedColumnDependency( dep->Name()); } } } return absl::OkStatus(); } absl::Status ColumnValidator::ValidateUpdate(const Column* column, const Column* old_column, SchemaValidationContext* context) { // if column has row deletion policy, then can't delete the column or change // type. bool has_row_deletion_policy = column->table()->row_deletion_policy().has_value() && column->table()->row_deletion_policy()->column_name() == column->Name(); if (has_row_deletion_policy && !column->table_->is_deleted() && (column->is_deleted() || !column->GetType()->IsTimestamp())) { return error::RowDeletionPolicyWillBreak(column->Name(), column->table()->Name()); } if (!column->change_streams_explicitly_tracking_column().empty() && column->is_deleted()) { std::string change_stream_names; for (int i = 0; i < column->change_streams_explicitly_tracking_column().size(); ++i) { change_stream_names.append( column->change_streams_explicitly_tracking_column()[i]->Name()); } return error::DropColumnWithChangeStream( column->table()->Name(), column->Name(), column->change_streams_explicitly_tracking_column().size(), change_stream_names); } if (column->is_deleted()) { return absl::OkStatus(); } // Once set, column ID should never change. ZETASQL_RET_CHECK_EQ(column->id(), old_column->id()); // For a non-deleted column, the objects it depends on should // also be alive. ZETASQL_RET_CHECK(!column->table_->is_deleted()); // It is invalid to drop a column which is referenced by a generated column. for (const Column* dep : column->dependent_columns()) { if (dep->is_deleted()) { return error::InvalidDropColumnReferencedByGeneratedColumn( dep->Name(), column->table()->Name(), column->Name()); } } if (column->is_generated() && !old_column->is_generated()) { return error::CannotConvertRegularColumnToGeneratedColumn( column->table()->Name(), column->Name()); } if (!column->is_generated() && old_column->is_generated()) { return error::CannotConvertGeneratedColumnToRegularColumn( column->table()->Name(), column->Name()); } if (column->is_generated() && old_column->is_generated()) { if (!column->GetType()->Equals(old_column->GetType())) { return error::CannotAlterStoredGeneratedColumnDataType( column->table()->Name(), column->Name()); } if (column->expression().value() != old_column->expression().value()) { return error::CannotAlterGeneratedColumnExpression( column->table()->Name(), column->Name()); } if (column->is_stored() != old_column->is_stored()) { return error::CannotAlterGeneratedColumnStoredAttribute( column->table()->Name(), column->Name()); } } if (!column->GetType()->Equals(old_column->GetType())) { for (const Column* generated_column : column->table()->columns()) { if (generated_column->is_generated()) { for (const Column* dep : generated_column->dependent_columns()) { if (column == dep) { return error:: CannotAlterColumnDataTypeWithDependentStoredGeneratedColumn( column->Name()); } } } } } if (column->source_column_) { // There is no valid scenario under which a source column drop should // trigger a cascading drop on referencing column. if (column->source_column_->is_deleted()) { ZETASQL_RET_CHECK_NE(column->table_->owner_index(), nullptr); return error::InvalidDropColumnWithDependency( column->name_, column->table_->owner_index()->indexed_table()->Name(), column->table_->owner_index()->Name()); } } if (old_column->is_nullable_ && !column->is_nullable_) { context->AddAction([old_column](const SchemaValidationContext* context) { return VerifyColumnNotNull(old_column->table(), old_column, context); }); } // Check for size reduction and type change. ZETASQL_RETURN_IF_ERROR(CheckAllowedColumnTypeChange( old_column, column, old_column->GetType(), column->type_, context)); if (column->type_->IsTimestamp()) { if (column->allows_commit_timestamp() && !old_column->allows_commit_timestamp()) { context->AddAction([column](const SchemaValidationContext* context) { return VerifyColumnCommitTimestamp(column->table_, column, context); }); } } for (const SchemaNode* dependency : column->sequences_used()) { // Cannot drop a sequence if a column depends on it. if (dependency->is_deleted()) { const auto& dep_info = dependency->GetSchemaNameInfo(); std::string dependency_type = (dep_info->global ? absl::AsciiStrToUpper(dep_info->kind) : absl::AsciiStrToLower(dep_info->kind)); return error::InvalidDropDependentColumn(dependency_type, dep_info->name, column->FullName()); } } for (const SchemaNode* dependency : column->udf_dependencies()) { if (dependency->is_deleted()) { const auto& dep_info = dependency->GetSchemaNameInfo(); std::string dependency_type = (dep_info->global ? absl::AsciiStrToUpper(dep_info->kind) : absl::AsciiStrToLower(dep_info->kind)); return error::InvalidDropDependentColumn(dependency_type, dep_info->name, column->FullName()); } } if (context->is_postgresql_dialect()) { // Default and generated columns must have OIDs. if (old_column->is_generated() || old_column->has_default_value()) { ZETASQL_RET_CHECK(old_column->postgresql_oid().has_value()); } if (column->is_generated() || column->has_default_value()) { ZETASQL_RET_CHECK(column->postgresql_oid().has_value()); } // Alter statement may change the default value which would be assigned a // new OID so don't assert that the OIDs are the same. } else { ZETASQL_RET_CHECK(!old_column->postgresql_oid().has_value()); ZETASQL_RET_CHECK(!column->postgresql_oid().has_value()); } for (const SchemaNode* dep : column->udf_dependencies()) { // TODO When dropping support is added, a check should be added // to ensure that this column references a UDF that is also being dropped. if (context->IsModifiedNode(dep)) { const auto& dep_info = dep->GetSchemaNameInfo(); std::string dependency_type = (dep_info->global ? absl::AsciiStrToUpper(dep_info->kind) : absl::AsciiStrToLower(dep_info->kind)); std::string modify_action = absl::StrCat("alter ", dependency_type); std::string dependency_name; if (auto dep_udf = dep->As<const Udf>(); dep_udf != nullptr) { dependency_name = dep_udf->Name(); } // No need to check modifications on index dependencies as indexes // cannot currently be altered. ZETASQL_RETURN_IF_ERROR(ValidateColumnSignatureChange( modify_action, dependency_name, column, column->table(), context->tmp_new_schema(), context->type_factory())); } } return absl::OkStatus(); } absl::Status KeyColumnValidator::Validate(const KeyColumn* key_column, SchemaValidationContext* context) { ZETASQL_RET_CHECK_NE(key_column->column_, nullptr); std::string type_name = key_column->column_->GetType()->IsArray() ? "ARRAY" : key_column->column_->GetType()->ShortTypeName( zetasql::PRODUCT_EXTERNAL, /*use_external_float32=*/true); if (!IsSupportedKeyColumnType(key_column->column_->GetType())) { auto owner_index = key_column->column()->table()->owner_index(); if (owner_index != nullptr && owner_index->is_search_index() && key_column->column_->GetType()->IsTokenListType()) { return absl::OkStatus(); } if (owner_index != nullptr && owner_index->is_vector_index() && key_column->column_->GetType()->IsArray()) { const zetasql::Type* element_type = key_column->column_->GetType()->AsArray()->element_type(); if (element_type->IsFloat() || element_type->IsDouble()) { return absl::OkStatus(); } type_name = key_column->column_->GetType()->ShortTypeName( zetasql::PRODUCT_EXTERNAL, true); } if (owner_index != nullptr) { return error::CannotCreateIndexOnColumn( key_column->column()->table()->owner_index()->Name(), key_column->column()->Name(), type_name); } return error::InvalidPrimaryKeyColumnType(key_column->column_->FullName(), type_name); } return absl::OkStatus(); } absl::Status KeyColumnValidator::ValidateUpdate( const KeyColumn* key_column, const KeyColumn* old_key_column, SchemaValidationContext* context) { if (key_column->is_deleted()) { return absl::OkStatus(); } const auto* column = key_column->column_; // If the underlying column of the key column has been altered, // reject the update if the column is also a parent key column, // unless it is a timestamp typed column and the update involves // chaging the allow timestamp option. if (context->IsModifiedNode(column)) { bool is_commit_timestamp_option_change = column->allows_commit_timestamp() != old_key_column->column_->allows_commit_timestamp(); if (!is_commit_timestamp_option_change) { // If the key column is a child table column. const auto* table_parent = column->table()->parent(); if (table_parent != nullptr) { const auto* parent_column = table_parent->FindColumn(column->Name()); if (parent_column != nullptr) { return error::AlteringParentColumn(column->FullName()); } } // If the key column is a parent table column. for (const auto* child_table : column->table()->children()) { if (child_table->FindKeyColumn(column->Name())) { return error::CannotChangeKeyColumnWithChildTables( column->FullName()); } } } } ZETASQL_RET_CHECK(!key_column->column_->is_deleted()); ZETASQL_RET_CHECK_EQ(key_column->is_descending_, old_key_column->is_descending_); if (context->is_postgresql_dialect()) { ZETASQL_RET_CHECK_EQ(old_key_column->postgresql_oid().has_value(), key_column->postgresql_oid().has_value()); if (old_key_column->postgresql_oid().has_value() && key_column->postgresql_oid().has_value()) { ZETASQL_RET_CHECK_EQ(old_key_column->postgresql_oid().value(), key_column->postgresql_oid().value()); } } else { ZETASQL_RET_CHECK(!old_key_column->postgresql_oid().has_value()); ZETASQL_RET_CHECK(!key_column->postgresql_oid().has_value()); } return absl::OkStatus(); } } // namespace backend } // namespace emulator } // namespace spanner } // namespace google