backend/schema/catalog/column.h (193 lines of code) (raw):
//
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef THIRD_PARTY_CLOUD_SPANNER_EMULATOR_BACKEND_SCHEMA_CATALOG_COLUMN_H_
#define THIRD_PARTY_CLOUD_SPANNER_EMULATOR_BACKEND_SCHEMA_CATALOG_COLUMN_H_
#include <cstdint>
#include <memory>
#include <optional>
#include <string>
#include <vector>
#include "zetasql/public/type.h"
#include "absl/log/check.h"
#include "absl/memory/memory.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/substitute.h"
#include "absl/types/span.h"
#include "backend/common/case.h"
#include "backend/common/ids.h"
#include "backend/schema/catalog/change_stream.h"
#include "backend/schema/catalog/locality_group.h"
#include "backend/schema/catalog/sequence.h"
#include "backend/schema/catalog/udf.h"
#include "backend/schema/graph/schema_node.h"
#include "common/limits.h"
#include "absl/status/status.h"
namespace google {
namespace spanner {
namespace emulator {
namespace backend {
class Table;
class ChangeStream;
// Column represents a column in a Table.
class Column : public SchemaNode {
public:
// Returns the name of the column.
const std::string& Name() const { return name_; }
// Qualified name of the column.
std::string FullName() const;
// Returns the type of the column.
const zetasql::Type* GetType() const { return type_; }
// Returns a unique id of this column.
const ColumnID id() const { return id_; }
// Returns true if this column allows commit timestamp to be atomically stored
// on Commit.
bool allows_commit_timestamp() const {
return allows_commit_timestamp_.has_value() &&
allows_commit_timestamp_.value();
}
// Returns true if the allow_commit_timestamp option was set explicitly on
// this column.
bool has_allows_commit_timestamp() const {
return allows_commit_timestamp_.has_value();
}
// Returns true if this column allows null values.
bool is_nullable() const { return is_nullable_; }
// The length of a STRING or BYTES column, as declared in the schema. A
// nullopt value represents the max allowed length for the column according to
// https://cloud.google.com/spanner/docs/data-definition-language#scalars/
std::optional<int64_t> declared_max_length() const {
return declared_max_length_;
}
// Return true if vector length was set explicitly on the column.
bool has_vector_length() const { return vector_length_.has_value(); }
// Return the vector length of the array column.
// A nullopt value means the vector length is not explicitly set.
std::optional<uint32_t> vector_length() const { return vector_length_; }
// Returns the effective maximum length of values allowed in this
// column, based on the type. Applicable only to STRING and BYTES types.
int64_t effective_max_length() const {
if (type_->IsString() ||
(type_->IsArray() && type_->AsArray()->element_type()->IsString())) {
return declared_max_length_.value_or(limits::kMaxStringColumnLength);
}
if (type_->IsBytes() ||
(type_->IsArray() && type_->AsArray()->element_type()->IsBytes())) {
return declared_max_length_.value_or(limits::kMaxBytesColumnLength);
}
if (type_->IsProto() ||
(type_->IsArray() && type_->AsArray()->element_type()->IsProto())) {
return declared_max_length_.value_or(limits::kMaxBytesColumnLength);
}
return 0;
}
// Returns whether the column is a generated column.
bool is_generated() const {
return expression_.has_value() && !has_default_value_;
}
// Returns whether the column is an identity column.
bool is_identity_column() const { return is_identity_column_; }
// Returns whether the column is a placement key column.
bool is_placement_key() const { return is_placement_key_; }
// Returns if a generated column is stored.
// Valid only if is_generated() is true.
bool is_stored() const { return is_stored_; }
// Returns whether the column has a default value.
bool has_default_value() const {
return expression_.has_value() && has_default_value_;
}
// Returns the expression if the column is a generated column or if it has
// a default value.
const std::optional<std::string>& expression() const { return expression_; }
// Returns the original dialect expression if the column is a generated column
// or if it has a default value.
const std::optional<std::string>& original_expression() const {
return original_expression_;
}
absl::Span<const Column* const> dependent_columns() const {
return dependent_columns_;
}
const std::vector<const SchemaNode*>& sequences_used() const {
return sequences_used_;
}
absl::Span<const SchemaNode* const> udf_dependencies() const {
return udf_dependencies_;
}
// The locality group this column belongs to.
const LocalityGroup* locality_group() const { return locality_group_; }
// Returns the source column.
const Column* source_column() const { return source_column_; }
// Returns the table containing the column.
const Table* table() const { return table_; }
// Returns the list of all change streams on this column.
absl::Span<const ChangeStream* const> change_streams() const {
return change_streams_;
}
// Returns the list of all change streams explicitly tracking this column by
// the column name.
absl::Span<const ChangeStream* const>
change_streams_explicitly_tracking_column() const {
return change_streams_explicitly_tracking_column_;
}
bool is_trackable_by_change_stream() const { return !is_generated(); }
// Finds a change stream on the column by its name. Name comparison is
// case-insensitive.
const ChangeStream* FindChangeStream(
const std::string& change_stream_name) const;
bool hidden() const { return hidden_; }
// SchemaNode interface implementation.
// ------------------------------------
std::optional<SchemaNameInfo> GetSchemaNameInfo() const override {
return SchemaNameInfo{.name = name_, .kind = "Column"};
}
absl::Status Validate(SchemaValidationContext* context) const override;
absl::Status ValidateUpdate(const SchemaNode* orig,
SchemaValidationContext* context) const override;
std::string DebugString() const override {
return absl::Substitute("C:$0[$1]($2)$3", Name(), id_, type_->DebugString(),
is_deleted() ? "[DELETED]" : "");
}
// Populates dependent_columns_.
void PopulateDependentColumns();
// TODO : Make external friend classes instead of nested classes.
class Builder;
class Editor;
private:
friend class ColumnValidator;
using ValidationFn =
std::function<absl::Status(const Column*, SchemaValidationContext*)>;
using UpdateValidationFn = std::function<absl::Status(
const Column*, const Column*, SchemaValidationContext*)>;
// Constructors are private and only friend classes are able to build /
// modify.
Column(const ValidationFn& validate,
const UpdateValidationFn& validate_update)
: validate_(validate), validate_update_(validate_update) {}
Column(const Column&) = default;
std::unique_ptr<SchemaNode> ShallowClone() const override {
return absl::WrapUnique(new Column(*this));
}
absl::Status DeepClone(SchemaGraphEditor* editor,
const SchemaNode* orig) override;
// Validation delegates.
const ValidationFn validate_;
const UpdateValidationFn validate_update_;
// Name of this column.
std::string name_;
// Unique ID of this column.
ColumnID id_;
// Type of this column.
const zetasql::Type* type_;
// The source column from the indexed table that this column is derived from.
// Only used by index columns.
const Column* source_column_ = nullptr;
// Whether null values are allowed.
bool is_nullable_ = true;
// Whether the column is a placement key column.
bool is_placement_key_ = false;
// A tri state boolean indicating whether commit timestamp can be stored.
// If allows_commit_timestamp is not set, it represents that the option isn't
// set for the columns in the schema serialized back to the user.
std::optional<bool> allows_commit_timestamp_ = std::nullopt;
// Length for STRING and BYTES. If unset, indicates the max allowed length.
std::optional<int64_t> declared_max_length_ = std::nullopt;
// Expression for generated column or default value.
std::optional<std::string> expression_ = std::nullopt;
// Enforce the size of a search vector. Currently it can only apply on ARRAY
// column "Embeddings ARRAY<FLOAT64>(vector_length=>128)".
std::optional<uint32_t> vector_length_ = std::nullopt;
// Original dialect expression for generated column or default value.
std::optional<std::string> original_expression_ = std::nullopt;
// Whether the column has a default value.
bool has_default_value_ = false;
// Whether the column is an identity column.
bool is_identity_column_ = false;
// For a generated column, this is the list of columns that this column
// references in its expression.
std::vector<std::string> dependent_column_names_;
std::vector<const Column*> dependent_columns_;
// List of sequences used by this column in its expression.
std::vector<const SchemaNode*> sequences_used_;
// List of UDFs used by this column in its expression.
std::vector<const SchemaNode*> udf_dependencies_;
// If a generated column is stored. Valid only if is_generated() is true.
bool is_stored_ = false;
// The table containing the column.
const Table* table_ = nullptr;
// List of change streams referring to this column. These are owned by the
// Schema, not by the Column.
std::vector<const ChangeStream*> change_streams_;
// List of change streams explicitly tracking this column by the column name.
// These are owned by the Schema, not by the column.
std::vector<const ChangeStream*> change_streams_explicitly_tracking_column_;
// Indicate if the column is hidden. If true, the column will be excluded from
// star expansion (SELECT *).
bool hidden_ = false;
// The locality group this column belongs to.
const LocalityGroup* locality_group_ = nullptr;
};
// KeyColumn is a single column that is part of the key of a table or an index.
class KeyColumn : public SchemaNode {
public:
// Returns the column that this key column is based on.
const Column* column() const { return column_; }
// Returns true if this key column is sorted in descending order.
bool is_descending() const { return is_descending_; }
// Returns true if NULLs are sorted last in this key column.
bool is_nulls_last() const { return is_nulls_last_; }
// SchemaNode interface implementation.
// ------------------------------------
absl::Status Validate(SchemaValidationContext* context) const override;
absl::Status ValidateUpdate(const SchemaNode* orig,
SchemaValidationContext* context) const override;
std::string DebugString() const override {
return absl::Substitute("PK:$0(desc:$1, nulls_last:$2)",
column_->DebugString(), is_descending_,
is_nulls_last_);
}
class Builder;
private:
friend class KeyColumnValidator;
using ValidationFn =
std::function<absl::Status(const KeyColumn*, SchemaValidationContext*)>;
using UpdateValidationFn = std::function<absl::Status(
const KeyColumn*, const KeyColumn*, SchemaValidationContext*)>;
KeyColumn(const ValidationFn& validate,
const UpdateValidationFn& validate_update)
: validate_(validate), validate_update_(validate_update) {}
KeyColumn(const KeyColumn&) = default;
std::unique_ptr<SchemaNode> ShallowClone() const override {
return absl::WrapUnique(new KeyColumn(*this));
}
absl::Status DeepClone(SchemaGraphEditor* editor,
const SchemaNode* orig) override;
// Validation delegates.
const ValidationFn validate_;
const UpdateValidationFn validate_update_;
// The column that this KeyColumn is based on.
const Column* column_;
// Whether this key column is sorted in descending order.
bool is_descending_ = false;
// Whether NULLs are sorted last in this key column.
bool is_nulls_last_ = false;
};
} // namespace backend
} // namespace emulator
} // namespace spanner
} // namespace google
#endif // THIRD_PARTY_CLOUD_SPANNER_EMULATOR_BACKEND_SCHEMA_CATALOG_COLUMN_H_