backend/actions/generated_column.cc (272 lines of code) (raw):

// // Copyright 2020 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // #include "backend/actions/generated_column.h" #include <algorithm> #include <cstddef> #include <map> #include <memory> #include <string> #include <utility> #include <vector> #include "zetasql/public/analyzer_options.h" #include "zetasql/public/catalog.h" #include "zetasql/public/evaluator.h" #include "zetasql/public/value.h" #include "absl/container/flat_hash_set.h" #include "absl/log/check.h" #include "absl/status/status.h" #include "absl/status/statusor.h" #include "absl/strings/string_view.h" #include "absl/strings/substitute.h" #include "backend/access/write.h" #include "backend/actions/context.h" #include "backend/actions/ops.h" #include "backend/common/graph_dependency_helper.h" #include "backend/common/ids.h" #include "backend/datamodel/key.h" #include "backend/datamodel/key_range.h" #include "backend/query/analyzer_options.h" #include "backend/schema/catalog/column.h" #include "backend/schema/catalog/table.h" #include "backend/storage/iterator.h" #include "common/errors.h" #include "zetasql/base/ret_check.h" #include "zetasql/base/status_macros.h" namespace google { namespace spanner { namespace emulator { namespace backend { namespace { absl::string_view GetColumnName(const Column* const& column) { return column->Name(); } bool IsKeyColumn(const Column* column) { return column->table()->FindKeyColumn(column->Name()) != nullptr; } absl::Status GetGeneratedColumnsInTopologicalOrder( const Table* table, std::vector<const Column*>* generated_columns) { GraphDependencyHelper<const Column*, GetColumnName> sorter( /*object_type=*/"generated column"); for (const Column* column : table->columns()) { if (column->is_generated() || column->has_default_value()) { ZETASQL_RETURN_IF_ERROR(sorter.AddNodeIfNotExists(column)); } } for (const Column* column : table->columns()) { if (column->is_generated()) { for (const Column* dep : column->dependent_columns()) { if (dep->is_generated() || dep->has_default_value()) { ZETASQL_RETURN_IF_ERROR( sorter.AddEdgeIfNotExists(column->Name(), dep->Name())); } } } } return sorter.TopologicalOrder(generated_columns); } // Check if any dependent column is present in the user supplied columns for the // generated key column. bool IsAnyDependentColumnPresent( const Column* generated_column, std::vector<std::string> user_supplied_columns) { ABSL_DCHECK(generated_column->is_generated()); for (const auto& dep_col : generated_column->dependent_columns()) { if (dep_col->is_generated() && IsKeyColumn(dep_col)) { return IsAnyDependentColumnPresent(dep_col, user_supplied_columns); } if (std::find(user_supplied_columns.begin(), user_supplied_columns.end(), dep_col->Name()) != user_supplied_columns.end()) { return true; } } return false; } absl::StatusOr<std::unique_ptr<zetasql::PreparedExpression>> PrepareExpression(const Column* generated_column, const zetasql::AnalyzerOptions& analyzer_options, zetasql::Catalog* function_catalog) { constexpr char kExpression[] = "CAST (($0) AS $1)"; std::string sql = absl::Substitute( kExpression, generated_column->expression().value(), generated_column->GetType()->TypeName(zetasql::PRODUCT_EXTERNAL, /*use_external_float32=*/true)); auto expr = std::make_unique<zetasql::PreparedExpression>(sql); zetasql::AnalyzerOptions options = analyzer_options; for (const Column* dep : generated_column->dependent_columns()) { ZETASQL_RETURN_IF_ERROR(options.AddExpressionColumn(dep->Name(), dep->GetType())); } ZETASQL_RETURN_IF_ERROR(expr->Prepare(options, function_catalog)); ZETASQL_RET_CHECK(generated_column->GetType()->Equals(expr->output_type())); return std::move(expr); } } // namespace GeneratedColumnEffector::GeneratedColumnEffector( const Table* table, const zetasql::AnalyzerOptions& analyzer_options, zetasql::Catalog* function_catalog, bool for_keys) : table_(table), for_keys_(for_keys) { absl::Status s = Initialize(analyzer_options, function_catalog); ABSL_DCHECK(s.ok()) << "Failed to initialize GeneratedColumnEffector: " << s; } absl::Status GeneratedColumnEffector::Initialize( const zetasql::AnalyzerOptions& analyzer_options, zetasql::Catalog* function_catalog) { ZETASQL_RETURN_IF_ERROR( GetGeneratedColumnsInTopologicalOrder(table_, &generated_columns_)); absl::flat_hash_set<ColumnID> unique_dependent_column; expressions_.reserve(generated_columns_.size()); for (const Column* generated_column : generated_columns_) { ZETASQL_ASSIGN_OR_RETURN(auto expr, PrepareExpression(generated_column, analyzer_options, function_catalog)); expressions_[generated_column] = std::move(expr); for (const Column* dep : generated_column->dependent_columns()) { if (unique_dependent_column.insert(dep->id()).second) { dependent_columns_.push_back(dep); } } } return absl::OkStatus(); } absl::StatusOr<zetasql::Value> GeneratedColumnEffector::ComputeGeneratedColumnValue( const Column* generated_column, const zetasql::ParameterValueMap& row_column_values) const { ZETASQL_RET_CHECK(generated_column != nullptr && (generated_column->is_generated() || generated_column->has_default_value())); ZETASQL_ASSIGN_OR_RETURN( zetasql::Value value, expressions_.at(generated_column)->Execute(row_column_values)); if (value.is_null() && !generated_column->is_nullable()) { return error::NullValueForNotNullColumn(table_->Name(), generated_column->FullName()); } return value; } absl::Status GeneratedColumnEffector::Effect( const MutationOp& op, std::vector<std::vector<zetasql::Value>>* generated_values, std::vector<const Column*>* columns_with_generated_values) const { ZETASQL_RET_CHECK(for_keys_ == true); columns_with_generated_values->reserve(generated_columns_.size()); // This vector stores column values for each row that can be used to evaluate // generated columns. std::vector<zetasql::ParameterValueMap> row_column_values( op.rows.size(), zetasql::ParameterValueMap()); // Evaluate generated columns in topological order. for (int i = 0; i < generated_columns_.size(); ++i) { const Column* generated_column = generated_columns_[i]; if (!generated_column->has_default_value() && !IsKeyColumn(generated_column)) { // skip non-key columns except default columns since generated key columns // may be depended by default columns values of which would need to be // evaluated. continue; } auto column_supplied_itr = std::find(op.columns.begin(), op.columns.end(), generated_column->Name()); bool is_user_supplied_value = column_supplied_itr != op.columns.end(); if (generated_column->has_default_value()) { // If this column has a default value and the user is supplying a value // for it, then we don't need to compute its default value. if (is_user_supplied_value) { continue; } if (IsKeyColumn(generated_column) && (op.type == MutationOpType::kUpdate || op.type == MutationOpType::kDelete)) { return error::DefaultPKNeedsExplicitValue(generated_column->FullName(), "Update/Delete"); } } else if (generated_column->is_generated() && is_user_supplied_value) { // If this column is generated column and user is supplying a value for it // and the user is not supplying values for dependent column values to // evaluate generated column value, we don't need to compute its generated // value. if (!IsAnyDependentColumnPresent(generated_column, op.columns)) { continue; } // Users should supply values for generated columns only in update // operations. if (op.type != MutationOpType::kUpdate) { return error::UserSuppliedValueInNonUpdateGpk( generated_column->FullName()); } } for (int i = 0; i < op.rows.size(); ++i) { // Calculate values of generated columns for each row. for (int j = 0; j < op.columns.size(); ++j) { row_column_values[i][op.columns[j]] = op.rows[i][j]; } ZETASQL_ASSIGN_OR_RETURN( zetasql::Value value, ComputeGeneratedColumnValue(generated_column, row_column_values[i])); if (generated_column->is_generated() && is_user_supplied_value) { size_t index = column_supplied_itr - op.columns.begin(); zetasql::Value provided_value = op.rows[i][index]; if (provided_value != value) { return error::GeneratedPkModified(generated_column->FullName()); } } // Update row_column_values so that other dependent columns on this // generated column can use the value. row_column_values[i][generated_column->Name()] = value; generated_values->at(i).push_back(value); } columns_with_generated_values->push_back(generated_column); } return absl::OkStatus(); } absl::Status GeneratedColumnEffector::Effect(const ActionContext* ctx, const InsertOp& op) const { zetasql::ParameterValueMap column_values; ZETASQL_RET_CHECK_EQ(op.columns.size(), op.values.size()); for (int i = 0; i < op.columns.size(); ++i) { column_values[op.columns[i]->Name()] = op.values[i]; } for (const Column* column : table_->columns()) { // If the column doesn't appear in the list and doesn't have a default // value, we fill in Null value for it, so it can be used to compute // generated column values. // Columns with default values will be computed the same way as generated // columns. if (column_values.find(column->Name()) == column_values.end() && !column->has_default_value()) { column_values[column->Name()] = zetasql::Value::Null(column->GetType()); } } return Effect(ctx, op.key, &column_values, /*skip_default_values=*/false); } absl::Status GeneratedColumnEffector::Effect(const ActionContext* ctx, const UpdateOp& op) const { for (const Column* column : op.columns) { // If any non-key generated columns appear then this is a generated column // effect and we do not need to process it again. The non-key requirement is // needed because user-generated updates are expected to include key values, // including generated ones. if (column->is_generated() && !IsKeyColumn(column)) { return absl::OkStatus(); } } zetasql::ParameterValueMap column_values; ZETASQL_ASSIGN_OR_RETURN( std::unique_ptr<StorageIterator> itr, ctx->store()->Read(table_, KeyRange::Point(op.key), dependent_columns_)); ZETASQL_RET_CHECK(itr->Next()); ZETASQL_RETURN_IF_ERROR(itr->Status()); ZETASQL_RET_CHECK_EQ(op.columns.size(), op.values.size()); ZETASQL_RET_CHECK_EQ(itr->NumColumns(), dependent_columns_.size()); for (int i = 0; i < dependent_columns_.size(); ++i) { column_values[dependent_columns_[i]->Name()] = itr->ColumnValue(i); } for (int i = 0; i < op.columns.size(); ++i) { column_values[op.columns[i]->Name()] = op.values[i]; } return Effect(ctx, op.key, &column_values, /*skip_default_values=*/true); } absl::Status GeneratedColumnEffector::Effect( const ActionContext* ctx, const Key& key, zetasql::ParameterValueMap* column_values, bool skip_default_values) const { ZETASQL_RET_CHECK(for_keys_ == false); std::vector<zetasql::Value> generated_values; generated_values.reserve(generated_columns_.size()); std::vector<const Column*> columns_with_generated_values; columns_with_generated_values.reserve(generated_columns_.size()); // Evaluate generated columns in topological order. for (int i = 0; i < generated_columns_.size(); ++i) { const Column* generated_column = generated_columns_[i]; // Default values should only be populated for inserts where no value is // supplied by the user. For updates, skip_default_values is set to true // and all default value computations are skipped (the column will thus // either be set to a new user-provided value or left at its current value). if (generated_column->has_default_value() && (skip_default_values || column_values->find(generated_column->Name()) != column_values->end())) { continue; } // Keys are handled by a separate effector initialized with for_keys_=true. // Skipping these columns here is not simply an optimization; if we include // them then we may find ourselves in an infinite loop repeatedly generating // effects for generated PKs. if (IsKeyColumn(generated_column)) { continue; } ZETASQL_ASSIGN_OR_RETURN( zetasql::Value value, ComputeGeneratedColumnValue(generated_column, *column_values)); // Update the column value so that it can be used to evaluate other // generated columns that depend on it. (*column_values)[generated_column->Name()] = value; generated_values.push_back(value); columns_with_generated_values.push_back(generated_column); } if (!columns_with_generated_values.empty()) { ctx->effects()->Update(table_, key, columns_with_generated_values, generated_values); } return absl::OkStatus(); } } // namespace backend } // namespace emulator } // namespace spanner } // namespace google