backend/schema/graph/schema_graph_editor.cc (206 lines of code) (raw):

// // Copyright 2020 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // #include "backend/schema/graph/schema_graph_editor.h" #include <algorithm> #include <memory> #include <utility> #include "zetasql/base/logging.h" #include "absl/container/flat_hash_map.h" #include "absl/memory/memory.h" #include "absl/status/status.h" #include "absl/status/statusor.h" #include "absl/strings/str_join.h" #include "backend/schema/graph/schema_node.h" #include "zetasql/base/ret_check.h" #include "absl/status/status.h" #include "zetasql/base/status_macros.h" namespace google { namespace spanner { namespace emulator { namespace backend { SchemaNode* SchemaGraphEditor::MakeNewClone(const SchemaNode* node) { std::unique_ptr<SchemaNode> clone = node->ShallowClone(); SchemaNode* mutable_clone = clone.get(); cloned_pool_->Add(std::move(clone)); clone_map_[node] = mutable_clone; return mutable_clone; } absl::Status SchemaGraphEditor::InitCloneMap() { // First, make a clone of the graph. ZETASQL_VLOG(2) << "First cloning pass"; for (const auto* schema_node : original_graph_->GetSchemaNodes()) { ZETASQL_ASSIGN_OR_RETURN(const auto* cloned_node, Clone(schema_node)); new_nodes_.push_back(cloned_node); } ZETASQL_RET_CHECK_EQ(clone_map_.size(), num_original_nodes()); return absl::OkStatus(); } absl::Status SchemaGraphEditor::FixupInternal(const SchemaNode* original, SchemaNode* mutable_clone) { ZETASQL_VLOG(4) << std::string(depth_, ' ') << "Fixing " << NodeKindString(mutable_clone) << " node :" << mutable_clone; ++depth_; ZETASQL_RETURN_IF_ERROR(mutable_clone->DeepClone(this, original)); --depth_; ZETASQL_VLOG(4) << std::string(depth_, ' ') << "Finished fixing node: " << mutable_clone->DebugString(); return absl::OkStatus(); } // The cloning process is guaranteed to terminate even in case of cycles in the // graph because: // - Besides the 'kOriginal' nodes, Clone() is called on only 3 other kinds of // nodes : kEdited, kAdded and kCloned. // - Each of these adds itself to the clone map before calling FixupInternal // which in turn calls DeepClone which may call Clone() recursively on other // nodes. // - At some point every node has had Clone() called on itself and therefore // added itself to the clone map. After that point, no recursive calls to // Clone() are made, guaranteeing termination of the cloning procedure. absl::StatusOr<const SchemaNode*> SchemaGraphEditor::Clone( const SchemaNode* node) { ZETASQL_RET_CHECK_NE(node, nullptr); // During the delete_fixup_ phase, we don't do any recursive calls. if (delete_fixup_) { return node; } const SchemaNode* ret = nullptr; NodeKind kind = GetNodeKind(node); const SchemaNode* clone = FindClone(node); if (clone != nullptr) { ZETASQL_VLOG(5) << std::string(depth_, ' ') << "Found already visited " << NodeKindString(clone) << " node :" << clone->DebugString(); ret = clone; } else if (kind == kAdded || kind == kEdited || kind == kCloned) { SchemaNode* mutable_node = const_cast<SchemaNode*>(node); // When called with non-original nodes, clone_map_ acts as a 'visited' set. clone_map_[node] = node; ZETASQL_RETURN_IF_ERROR(FixupInternal(node, mutable_node)); ret = node; } else { ZETASQL_RET_CHECK_EQ(kind, kOriginal); ZETASQL_RET_CHECK(!node->is_deleted()); ZETASQL_VLOG(3) << std::string(depth_, ' ') << "Cloning " << NodeKindString(node) << " node: " << node->DebugString(); SchemaNode* mutable_clone = MakeNewClone(node); ZETASQL_RETURN_IF_ERROR(FixupInternal(node, mutable_clone)); ZETASQL_VLOG(3) << std::string(depth_, ' ') << "Finished cloning node: " << node->DebugString(); ret = mutable_clone; } return ret; } absl::Status SchemaGraphEditor::DeleteNode(const SchemaNode* node) { ZETASQL_RET_CHECK(edited_clones_.empty() && added_nodes_.empty()) << "Graph already contains modifications. It must be canonicalized " << "before making further changes."; ZETASQL_RET_CHECK(IsOriginalNode(node)); deleted_nodes_.emplace_back(node); return absl::OkStatus(); } absl::Status SchemaGraphEditor::AddNode( std::unique_ptr<const SchemaNode> node) { ZETASQL_RET_CHECK(deleted_nodes_.empty()) << "Graph already has deleted nodes. It must be canonicalized before " << "making further changes."; added_nodes_.emplace_back(std::move(node)); return absl::OkStatus(); } bool SchemaGraphEditor::IsOriginalNode(const SchemaNode* node) const { for (const auto* schema_node : original_graph_->GetSchemaNodes()) { if (schema_node == node) { return true; } } return false; } absl::StatusOr<std::unique_ptr<SchemaGraph>> SchemaGraphEditor::CanonicalizeGraph() { std::unique_ptr<SchemaGraph> cloned_graph = nullptr; if (!deleted_nodes_.empty()) { ZETASQL_VLOG(2) << "Canonicalizing deletes"; ZETASQL_RETURN_IF_ERROR(CanonicalizeDeletion()); } else { ZETASQL_VLOG(2) << "Canonicalizing edits and additions"; ZETASQL_RETURN_IF_ERROR(CanonicalizeEdits()); } // Set the edited nodes in the context so that nodes can check // if they were edited/cloned inside ValidateUpdate()/Validate(). context_->set_edited_nodes(&edited_clones_); // Create a temporary graph so that we can temporarily set a 'new schema' // on the validation context. At this point, deleted nodes in the old schema // have been marked for deletion (i.e. is_deleted() will return true). They // will be excluded from new_nodes_, but their memory will not yet be freed // so that ValidateUpdate() may be called on them. auto cloned_pool_ptr = cloned_pool_.get(); new_nodes_.erase( std::remove_if(new_nodes_.begin(), new_nodes_.end(), [](const SchemaNode* node) { return node->is_deleted(); }), new_nodes_.end()); cloned_graph = std::make_unique<SchemaGraph>(std::move(new_nodes_), std::move(cloned_pool_)); context_->MakeNewTempSchemaSnapshot(cloned_graph.get()); // Validate the update on cloned nodes which still includes edited and // deleted nodes. for (const auto* orig_node : original_graph_->GetSchemaNodes()) { auto clone = FindClone(orig_node); ZETASQL_RET_CHECK_NE(clone, nullptr); ZETASQL_RETURN_IF_ERROR(clone->ValidateUpdate(orig_node, context_)); } // Finally, erase deleted nodes from the new graph. if (!deleted_nodes_.empty()) { trimmed_ = cloned_pool_ptr->Trim(); } // Do a final pass on the canonicalized set of nodes to perform per-node // validation. for (const auto* node : cloned_graph->GetSchemaNodes()) { ZETASQL_RETURN_IF_ERROR(node->Validate(context_)); } context_->ClearNewTempSchemaSnapshot(); // Check the invariants around the nodes. ZETASQL_RET_CHECK_EQ(cloned_pool_ptr->size(), cloned_graph->GetSchemaNodes().size()) << "\nNodes:\n" << cloned_pool_ptr->DebugString(); ZETASQL_RET_CHECK_EQ(cloned_pool_ptr->size(), num_original_nodes() - trimmed_ + added_nodes_.size()) << "Internal error while cloning schema graph " << "Original: " << num_original_nodes() << "\n" << "Cloned: " << cloned_pool_ptr->size() << "\n" << "Added: " << added_nodes_.size() << "\n" << "Trimmed: " << trimmed_ << "\n" << "Nodes:\n" << cloned_pool_ptr->DebugString(); return cloned_graph; } absl::Status SchemaGraphEditor::CanonicalizeEdits() { if (clone_map_.empty()) { ZETASQL_RETURN_IF_ERROR(InitCloneMap()); } // Run a fixup/cloning pass so that changes from edit nodes in the cloned // graph are propagated to their neighbors. ZETASQL_VLOG(2) << "Fixing clones"; for (const auto* clone : new_nodes_) { ZETASQL_RETURN_IF_ERROR(Fixup(clone)); } // No new clones were added. ZETASQL_RET_CHECK_EQ(new_nodes_.size(), num_original_nodes()); ZETASQL_RET_CHECK_EQ(cloned_pool_->size(), num_original_nodes()); ZETASQL_VLOG(2) << "Fixing added nodes"; for (auto& added_node : added_nodes_) { ZETASQL_RETURN_IF_ERROR(Fixup(added_node.get())); new_nodes_.push_back(added_node.get()); cloned_pool_->Add(std::move(added_node)); } ZETASQL_RET_CHECK_EQ(cloned_pool_->size(), num_original_nodes() + added_nodes_.size()); return absl::OkStatus(); } absl::Status SchemaGraphEditor::Fixup(const SchemaNode* node) { ZETASQL_ASSIGN_OR_RETURN(const auto* cloned_node, Clone(node)); ZETASQL_RET_CHECK_NE(cloned_node, nullptr); return absl::OkStatus(); } absl::Status SchemaGraphEditor::CanonicalizeDeletion() { if (clone_map_.empty()) { ZETASQL_RETURN_IF_ERROR(InitCloneMap()); } // Mark the clone of the node as deleted. for (const auto* node : deleted_nodes_) { const SchemaNode* deleted_clone = FindClone(node); ZETASQL_RET_CHECK_NE(deleted_clone, nullptr); const_cast<SchemaNode*>(deleted_clone)->MarkDeleted(); } // To propagate the deletion information across the graph so that every node // can take action on deletion of its neighbor, we run Fixup as many times as // the number of nodes (the length of the longest non-cyclical path in the // graph or until the number of deletions has converged). delete_fixup_ = true; int deletions = 1; for (int i = 0; i < num_original_nodes(); ++i) { int new_deletions = 0; for (const auto* node : original_graph_->GetSchemaNodes()) { const SchemaNode* clone = FindClone(node); ZETASQL_RET_CHECK_NE(clone, nullptr); ZETASQL_RETURN_IF_ERROR(FixupInternal(clone, const_cast<SchemaNode*>(clone))); if (clone->is_deleted()) { ++new_deletions; } } ZETASQL_RET_CHECK_LE(deletions, new_deletions); // No need to run more fixup passes if the number of updates have converged. if (new_deletions == deletions) { break; } else { deletions = new_deletions; } ZETASQL_VLOG(5) << "Fixup pass " << i + 1 << " deletions: " << new_deletions; } delete_fixup_ = false; return absl::OkStatus(); } } // namespace backend } // namespace emulator } // namespace spanner } // namespace google