frontend/converters/values.cc (528 lines of code) (raw):

// // Copyright 2020 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // #include "frontend/converters/values.h" #include <cmath> #include <limits> #include <memory> #include <string> #include <utility> #include <vector> #include "google/protobuf/struct.pb.h" #include "zetasql/public/functions/date_time_util.h" #include "zetasql/public/interval_value.h" #include "zetasql/public/options.pb.h" #include "zetasql/public/type.pb.h" #include "zetasql/public/uuid_value.h" #include "zetasql/public/value.h" #include "absl/base/optimization.h" #include "absl/status/status.h" #include "absl/status/statusor.h" #include "absl/strings/cord.h" #include "absl/strings/escaping.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" #include "absl/strings/string_view.h" #include "absl/strings/strip.h" #include "absl/time/time.h" #include "backend/query/search/tokenizer.h" #include "common/constants.h" #include "common/errors.h" #include "third_party/spanner_pg/datatypes/extended/pg_jsonb_type.h" #include "third_party/spanner_pg/datatypes/extended/pg_numeric_type.h" #include "third_party/spanner_pg/datatypes/extended/pg_oid_type.h" #include "third_party/spanner_pg/datatypes/extended/spanner_extended_type.h" #include "third_party/spanner_pg/interface/pg_arena.h" #include "third_party/spanner_pg/interface/pg_arena_factory.h" #include "zetasql/base/status_macros.h" namespace google { namespace spanner { namespace emulator { namespace frontend { namespace { using backend::query::search::TokenListFromBytes; using google::spanner::v1::TypeAnnotationCode; using postgres_translator::spangres::datatypes::CreatePgJsonbValue; using postgres_translator::spangres::datatypes::CreatePgNumericValue; using postgres_translator::spangres::datatypes::CreatePgOidValue; using postgres_translator::spangres::datatypes::GetPgJsonbNormalizedValue; using postgres_translator::spangres::datatypes::GetPgNumericNormalizedValue; using postgres_translator::spangres::datatypes::GetPgOidValue; using postgres_translator::spangres::datatypes::SpannerExtendedType; // Time format used by Cloud Spanner to encode timestamps. constexpr char kRFC3339TimeFormatNoOffset[] = "%E4Y-%m-%dT%H:%M:%E*S"; // Create PG.JSONB value in a valid memory context which is required for calling // PG code. static absl::StatusOr<zetasql::Value> CreatePgJsonbValueWithMemoryContext( absl::string_view jsonb_string) { ZETASQL_ASSIGN_OR_RETURN( std::unique_ptr<postgres_translator::interfaces::PGArena> pg_arena, postgres_translator::interfaces::CreatePGArena(nullptr)); return postgres_translator::spangres::datatypes::CreatePgJsonbValue( jsonb_string); } // Create PG.NUMERIC value in a valid memory context which is required for // calling PG code. static absl::StatusOr<zetasql::Value> CreatePgNumericValueWithMemoryContext( absl::string_view numeric_string) { ZETASQL_ASSIGN_OR_RETURN( std::unique_ptr<postgres_translator::interfaces::PGArena> pg_arena, postgres_translator::interfaces::CreatePGArena(nullptr)); return postgres_translator::spangres::datatypes::CreatePgNumericValue( numeric_string); } static bool IsValidFloat(double value) { double float_lower_limit = static_cast<double>(std::numeric_limits<float>::lowest()); double float_upper_limit = static_cast<double>(std::numeric_limits<float>::max()); bool is_valid_finite_float = std::isfinite(value) && float_lower_limit <= value && value <= float_upper_limit; return is_valid_finite_float || std::isinf(value) || std::isnan(value); } } // namespace absl::StatusOr<zetasql::Value> ValueFromProto( const google::protobuf::Value& value_pb, const zetasql::Type* type) { if (value_pb.kind_case() == google::protobuf::Value::kNullValue) { return zetasql::values::Null(type); } switch (type->kind()) { case zetasql::TypeKind::TYPE_BOOL: { if (value_pb.kind_case() != google::protobuf::Value::kBoolValue) { return error::ValueProtoTypeMismatch(value_pb.DebugString(), type->DebugString()); } return zetasql::values::Bool(value_pb.bool_value()); } case zetasql::TypeKind::TYPE_INT64: { int64_t num = 0; if (value_pb.kind_case() != google::protobuf::Value::kStringValue) { return error::ValueProtoTypeMismatch(value_pb.DebugString(), type->DebugString()); } if (!absl::SimpleAtoi(value_pb.string_value(), &num)) { return error::CouldNotParseStringAsInteger(value_pb.string_value()); } return zetasql::values::Int64(num); } case zetasql::TypeKind::TYPE_FLOAT: { double val = 0; if (value_pb.kind_case() == google::protobuf::Value::kStringValue) { if (value_pb.string_value() == "Infinity") { val = std::numeric_limits<float>::infinity(); } else if (value_pb.string_value() == "-Infinity") { val = -std::numeric_limits<float>::infinity(); } else if (value_pb.string_value() == "NaN") { val = std::numeric_limits<float>::quiet_NaN(); } else { return error::CouldNotParseStringAsFloat(value_pb.string_value()); } } else if (ABSL_PREDICT_TRUE(IsValidFloat(value_pb.number_value()))) { val = value_pb.number_value(); } else { return error::ValueProtoTypeMismatch(value_pb.DebugString(), type->DebugString()); } return zetasql::values::Float(val); } case zetasql::TypeKind::TYPE_DOUBLE: { double val = 0; if (value_pb.kind_case() == google::protobuf::Value::kStringValue) { if (value_pb.string_value() == "Infinity") { val = std::numeric_limits<double>::infinity(); } else if (value_pb.string_value() == "-Infinity") { val = -std::numeric_limits<double>::infinity(); } else if (value_pb.string_value() == "NaN") { val = std::numeric_limits<double>::quiet_NaN(); } else { return error::CouldNotParseStringAsDouble(value_pb.string_value()); } } else if (value_pb.kind_case() == google::protobuf::Value::kNumberValue) { val = value_pb.number_value(); } else { return error::ValueProtoTypeMismatch(value_pb.DebugString(), type->DebugString()); } return zetasql::values::Double(val); } case zetasql::TypeKind::TYPE_EXTENDED: { auto type_code = static_cast<const SpannerExtendedType*>(type)->code(); switch (type_code) { case TypeAnnotationCode::PG_JSONB: { if (value_pb.kind_case() != google::protobuf::Value::kStringValue) { return error::ValueProtoTypeMismatch(value_pb.DebugString(), type->DebugString()); } auto pg_jsonb = CreatePgJsonbValueWithMemoryContext(value_pb.string_value()); if (!pg_jsonb.ok()) { return error::CouldNotParseStringAsPgJsonb(value_pb.string_value()); } return *pg_jsonb; } case TypeAnnotationCode::PG_NUMERIC: { if (value_pb.kind_case() != google::protobuf::Value::kStringValue) { return error::ValueProtoTypeMismatch(value_pb.DebugString(), type->DebugString()); } auto pg_numeric = CreatePgNumericValueWithMemoryContext(value_pb.string_value()); if (!pg_numeric.ok()) { return error::CouldNotParseStringAsPgNumeric( value_pb.string_value()); } return *pg_numeric; } case TypeAnnotationCode::PG_OID: { int64_t oid = 0; if (value_pb.kind_case() != google::protobuf::Value::kStringValue) { return error::ValueProtoTypeMismatch(value_pb.DebugString(), type->DebugString()); } if (!absl::SimpleAtoi(value_pb.string_value(), &oid)) { return error::CouldNotParseStringAsPgOid(value_pb.string_value()); } return CreatePgOidValue(oid); } default: return error::Internal(absl::StrCat( "Cloud Spanner unsupported type ", type->DebugString(), " passed to ValueFromProto when parsing ", value_pb.DebugString())); } break; } case zetasql::TypeKind::TYPE_TIMESTAMP: { if (value_pb.kind_case() != google::protobuf::Value::kStringValue) { return error::ValueProtoTypeMismatch(value_pb.DebugString(), type->DebugString()); } if (value_pb.string_value() == kCommitTimestampIdentifier) { return zetasql::values::String(value_pb.string_value()); } absl::string_view time_str(value_pb.string_value()); if (!absl::ConsumeSuffix(&time_str, "Z")) { return error::TimestampMustBeInUTCTimeZone(value_pb.string_value()); } absl::Time time; std::string error; if (!absl::ParseTime(kRFC3339TimeFormatNoOffset, time_str, &time, &error)) { return error::CouldNotParseStringAsTimestamp(value_pb.string_value(), error); } if (!zetasql::functions::IsValidTime(time)) { return error::TimestampOutOfRange( absl::FormatTime(time, absl::UTCTimeZone())); } return zetasql::values::Timestamp(time); } case zetasql::TypeKind::TYPE_DATE: { if (value_pb.kind_case() != google::protobuf::Value::kStringValue) { return error::ValueProtoTypeMismatch(value_pb.DebugString(), type->DebugString()); } absl::CivilDay date; if (!absl::ParseCivilTime(value_pb.string_value(), &date)) { return error::CouldNotParseStringAsDate(value_pb.string_value()); } if (date.year() < 1 || date.year() > 9999) { return error::InvalidDate(value_pb.string_value()); } absl::CivilDay epoch_date(1970, 1, 1); return zetasql::values::Date(static_cast<int32_t>(date - epoch_date)); } case zetasql::TypeKind::TYPE_STRING: { if (value_pb.kind_case() != google::protobuf::Value::kStringValue) { return error::ValueProtoTypeMismatch(value_pb.DebugString(), type->DebugString()); } return zetasql::values::String(value_pb.string_value()); } case zetasql::TypeKind::TYPE_BYTES: { if (value_pb.kind_case() != google::protobuf::Value::kStringValue) { return error::ValueProtoTypeMismatch(value_pb.DebugString(), type->DebugString()); } std::string bytes; if (!absl::Base64Unescape(value_pb.string_value(), &bytes)) { return error::CouldNotParseStringAsBytes(value_pb.string_value()); } return zetasql::values::Bytes(bytes); } case zetasql::TypeKind::TYPE_NUMERIC: { if (value_pb.kind_case() != google::protobuf::Value::kStringValue) { return error::ValueProtoTypeMismatch(value_pb.DebugString(), type->DebugString()); } auto status_or_numeric = zetasql::NumericValue::FromStringStrict(value_pb.string_value()); if (!status_or_numeric.ok()) { return error::CouldNotParseStringAsNumeric(value_pb.string_value()); } return zetasql::values::Numeric(status_or_numeric.value()); } case zetasql::TypeKind::TYPE_JSON: { if (value_pb.kind_case() != google::protobuf::Value::kStringValue) { return error::ValueProtoTypeMismatch(value_pb.DebugString(), type->DebugString()); } auto status_or_json = zetasql::JSONValue::ParseJSONString(value_pb.string_value()); if (!status_or_json.ok()) { return error::CouldNotParseStringAsJson(value_pb.string_value()); } return zetasql::values::Json(std::move(status_or_json.value())); } case zetasql::TypeKind::TYPE_TOKENLIST: { if (value_pb.kind_case() != google::protobuf::Value::kStringValue) { return error::ValueProtoTypeMismatch(value_pb.DebugString(), type->DebugString()); } std::string bytes; if (!absl::Base64Unescape(value_pb.string_value(), &bytes)) { return error::CouldNotParseStringAsBytes(value_pb.string_value()); } return TokenListFromBytes(bytes); } case zetasql::TypeKind::TYPE_INTERVAL: { if (value_pb.kind_case() != google::protobuf::Value::kStringValue) { return error::ValueProtoTypeMismatch(value_pb.DebugString(), type->DebugString()); } absl::StatusOr<zetasql::IntervalValue> interval_value = zetasql::IntervalValue::Parse(value_pb.string_value(), /*allow_nanos=*/true); if (!interval_value.ok()) { return error::CouldNotParseStringAsInterval( value_pb.string_value(), interval_value.status().message()); } return zetasql::values::Interval(interval_value.value()); } case zetasql::TypeKind::TYPE_ARRAY: { if (value_pb.kind_case() != google::protobuf::Value::kListValue) { return error::ValueProtoTypeMismatch(value_pb.DebugString(), type->DebugString()); } std::vector<zetasql::Value> values(value_pb.list_value().values_size()); for (int i = 0; i < value_pb.list_value().values_size(); ++i) { const google::protobuf::Value& element_pb = value_pb.list_value().values(i); ZETASQL_ASSIGN_OR_RETURN( values[i], ValueFromProto(element_pb, type->AsArray()->element_type()), _ << "\nWhen parsing array element #" << i << ": {" << element_pb.DebugString() << "} in " << value_pb.DebugString()); } return zetasql::values::Array(type->AsArray(), values); } case zetasql::TypeKind::TYPE_STRUCT: { if (value_pb.kind_case() != google::protobuf::Value::kListValue) { return error::ValueProtoTypeMismatch(value_pb.DebugString(), type->DebugString()); } std::vector<zetasql::Value> values(value_pb.list_value().values_size()); for (int i = 0; i < value_pb.list_value().values_size(); ++i) { const google::protobuf::Value& field_pb = value_pb.list_value().values(i); ZETASQL_ASSIGN_OR_RETURN( values[i], ValueFromProto(field_pb, type->AsStruct()->field(i).type), _ << "\nWhen parsing struct element #" << i << ": {" << field_pb.DebugString() << "} in " << value_pb.DebugString()); } return zetasql::values::Struct(type->AsStruct(), values); } case zetasql::TypeKind::TYPE_PROTO: { if (value_pb.kind_case() != google::protobuf::Value::kStringValue) { return error::ValueProtoTypeMismatch(value_pb.DebugString(), type->DebugString()); } std::string bytes; if (!absl::Base64Unescape(value_pb.string_value(), &bytes)) { return error::CouldNotParseStringAsBytes(value_pb.string_value()); } return zetasql::values::Proto(type->AsProto(), absl::Cord(bytes)); } case zetasql::TypeKind::TYPE_ENUM: { if (value_pb.kind_case() != google::protobuf::Value::kStringValue) { return error::ValueProtoTypeMismatch(value_pb.DebugString(), type->DebugString()); } int num = 0; if (!absl::SimpleAtoi(value_pb.string_value(), &num)) { return error::CouldNotParseStringAsInteger(value_pb.string_value()); } return zetasql::values::Enum(type->AsEnum(), num); } default: { return error::Internal(absl::StrCat( "Cloud Spanner unsupported type ", type->DebugString(), " passed to ValueFromProto when parsing ", value_pb.DebugString())); } } } absl::StatusOr<google::protobuf::Value> ValueToProto( const zetasql::Value& value) { if (!value.is_valid()) { return error::Internal( "Uninitialized ZetaSQL value passed to ValueToProto"); } google::protobuf::Value value_pb; if (value.is_null()) { value_pb.set_null_value(google::protobuf::NullValue()); return value_pb; } switch (value.type_kind()) { case zetasql::TypeKind::TYPE_BOOL: { value_pb.set_bool_value(value.bool_value()); break; } case zetasql::TypeKind::TYPE_INT64: { value_pb.set_string_value(absl::StrCat(value.int64_value())); break; } case zetasql::TypeKind::TYPE_FLOAT: { float val = value.float_value(); if (std::isfinite(val)) { value_pb.set_number_value(static_cast<double>(val)); } else if (val == std::numeric_limits<float>::infinity()) { value_pb.set_string_value("Infinity"); } else if (val == -std::numeric_limits<float>::infinity()) { value_pb.set_string_value("-Infinity"); } else if (std::isnan(val)) { value_pb.set_string_value("NaN"); } else { return error::Internal(absl::StrCat("Unsupported float value ", value.float_value(), " passed to ValueToProto")); } break; } case zetasql::TypeKind::TYPE_DOUBLE: { double val = value.double_value(); if (std::isfinite(val)) { value_pb.set_number_value(val); } else if (val == std::numeric_limits<double>::infinity()) { value_pb.set_string_value("Infinity"); } else if (val == -std::numeric_limits<double>::infinity()) { value_pb.set_string_value("-Infinity"); } else if (std::isnan(val)) { value_pb.set_string_value("NaN"); } else { return error::Internal(absl::StrCat("Unsupported double value ", value.double_value(), " passed to ValueToProto")); } break; } case zetasql::TypeKind::TYPE_EXTENDED: { auto type_code = static_cast<const SpannerExtendedType*>(value.type())->code(); switch (type_code) { case TypeAnnotationCode::PG_JSONB: { value_pb.set_string_value( std::string(*GetPgJsonbNormalizedValue(value))); break; } case TypeAnnotationCode::PG_NUMERIC: { value_pb.set_string_value( std::string(*GetPgNumericNormalizedValue(value))); break; } case TypeAnnotationCode::PG_OID: { value_pb.set_string_value(absl::StrCat(*GetPgOidValue(value))); break; } default: return error::Internal( absl::StrCat("Cloud Spanner unsupported ZetaSQL value ", value.DebugString(), " passed to ValueToProto")); } break; } case zetasql::TypeKind::TYPE_TIMESTAMP: { value_pb.set_string_value( absl::StrCat(absl::FormatTime(kRFC3339TimeFormatNoOffset, value.ToTime(), absl::UTCTimeZone()), "Z")); break; } case zetasql::TypeKind::TYPE_DATE: { int32_t days_since_epoch = value.date_value(); absl::CivilDay epoch_date(1970, 1, 1); absl::CivilDay date = epoch_date + days_since_epoch; if (date.year() > 9999 || date.year() < 1) { return error::Internal(absl::StrCat( "Unsupported date value ", value.DebugString(), " passed to ValueToProto. Year must be between 1 and 9999.")); } absl::StrAppendFormat(value_pb.mutable_string_value(), "%04d-%02d-%02d", date.year(), date.month(), date.day()); break; } case zetasql::TypeKind::TYPE_STRING: { value_pb.set_string_value(value.string_value()); break; } case zetasql::TypeKind::TYPE_NUMERIC: { value_pb.set_string_value(value.numeric_value().ToString()); break; } case zetasql::TypeKind::TYPE_JSON: { value_pb.set_string_value(value.json_string()); break; } case zetasql::TypeKind::TYPE_BYTES: { absl::Base64Escape(value.bytes_value(), value_pb.mutable_string_value()); break; } case zetasql::TypeKind::TYPE_ENUM: { value_pb.set_string_value(std::to_string(value.enum_value())); break; } case zetasql::TypeKind::TYPE_PROTO: { std::string strvalue; absl::CopyCordToString(value.ToCord(), &strvalue); absl::Base64Escape(strvalue, value_pb.mutable_string_value()); break; } case zetasql::TYPE_TOKENLIST: { absl::Base64Escape(value.tokenlist_value().GetBytes(), value_pb.mutable_string_value()); break; } case zetasql::TypeKind::TYPE_INTERVAL: { zetasql::IntervalValue interval_value = value.interval_value(); value_pb.set_string_value(interval_value.ToISO8601()); break; } case zetasql::TypeKind::TYPE_ARRAY: { google::protobuf::ListValue* list_value_pb = value_pb.mutable_list_value(); for (int i = 0; i < value.num_elements(); ++i) { ZETASQL_ASSIGN_OR_RETURN(*list_value_pb->add_values(), ValueToProto(value.element(i)), _ << "\nWhen encoding array element #" << i << ": " << value.element(i).DebugString() << " in " << value.DebugString()); } break; } case zetasql::TypeKind::TYPE_STRUCT: { google::protobuf::ListValue* list_value_pb = value_pb.mutable_list_value(); for (int i = 0; i < value.num_fields(); ++i) { ZETASQL_ASSIGN_OR_RETURN( *list_value_pb->add_values(), ValueToProto(value.field(i)), _ << "\nWhen encoding struct element #" << i << ": " << value.field(i).DebugString() << " in " << value.DebugString()); } break; } default: { return error::Internal( absl::StrCat("Cloud Spanner unsupported ZetaSQL value ", value.DebugString(), " passed to ValueToProto")); } } return value_pb; } } // namespace frontend } // namespace emulator } // namespace spanner } // namespace google