sql_utils/public/error_helpers.cc (243 lines of code) (raw):

/* * Copyright 2023 Google LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "sql_utils/public/error_helpers.h" #include <ctype.h> #include <algorithm> #include <string> #include "absl/status/statusor.h" #include "absl/strings/cord.h" #include "absl/strings/match.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" #include "absl/strings/substitute.h" #include "sql_utils/base/logging.h" #include "sql_utils/base/ret_check.h" #include "sql_utils/base/status.h" #include "sql_utils/base/status_builder.h" #include "sql_utils/common/status_payload_utils.h" #include "sql_utils/common/utf_util.h" #include "sql_utils/proto/internal_error_location.pb.h" #include "sql_utils/public/error_location.pb.h" #include "sql_utils/public/parse_location.h" namespace bigquery_ml_utils { // Format an ErrorLocation using <format>, which is a string as in // strings::Substitute, with $0 being file, $1 being line, and $2 being column. // ErrorSource information is ignored (if present). static std::string FormatErrorLocation(const ErrorLocation& location, const absl::string_view format) { return absl::Substitute(format, location.filename(), location.line(), location.column()); } std::string FormatErrorLocation(const ErrorLocation& location) { return (location.has_filename() ? FormatErrorLocation(location, "$0:$1:$2") : FormatErrorLocation(location, "$1:$2")); } // Internal helper function to format the ErrorLocation string with format // [at file:line:column] static std::string FormatErrorLocationAtFileLineColumn( const ErrorLocation& location) { return absl::StrCat("[at ", FormatErrorLocation(location), "]"); } std::string FormatErrorLocation(const ErrorLocation& location, absl::string_view input_text, ErrorMessageMode mode) { std::string error_location_string = FormatErrorLocationAtFileLineColumn(location); if (mode == ErrorMessageMode::ERROR_MESSAGE_MULTI_LINE_WITH_CARET) { absl::StrAppend(&error_location_string, "\n", GetErrorStringWithCaret(input_text, location)); } if (!location.error_source().empty()) { const std::string error_source_separator = (mode == ErrorMessageMode::ERROR_MESSAGE_MULTI_LINE_WITH_CARET ? "\n" : "; "); std::string error_source_string; for (const ErrorSource& error_source : location.error_source()) { const std::string source_message = FormatErrorSource(error_source, mode); error_source_string = absl::StrCat( source_message, (!error_source_string.empty() ? error_source_separator : ""), error_source_string); } absl::StrAppend( &error_location_string, (!error_source_string.empty() ? error_source_separator : ""), error_source_string); } return error_location_string; } std::string FormatErrorSource(const ErrorSource& error_source, ErrorMessageMode mode) { if (mode == ErrorMessageMode::ERROR_MESSAGE_WITH_PAYLOAD) { return ""; } std::string message = error_source.error_message(); if (!message.empty() && error_source.has_error_location()) { // Note - if error_source.error_location has an ErrorSource, it is ignored. absl::StrAppend( &message, " ", FormatErrorLocationAtFileLineColumn(error_source.error_location())); } if (mode == ErrorMessageMode::ERROR_MESSAGE_MULTI_LINE_WITH_CARET && error_source.has_error_message_caret_string()) { absl::StrAppend(&message, (!message.empty() ? "\n" : ""), error_source.error_message_caret_string()); } return message; } std::string FormatError(const absl::Status& status) { if (status.code() != absl::StatusCode::kInvalidArgument) { return internal::StatusToString(status); } std::string message = std::string(status.message()); if (internal::HasPayload(status)) { std::string payload_string; std::string location_string; if (internal::HasPayloadWithType<ErrorLocation>(status)) { // Perform special formatting for location data. ErrorLocation location = internal::GetPayload<ErrorLocation>(status); location_string = absl::StrCat( " ", FormatErrorLocation(location, /*input_text=*/"", ErrorMessageMode::ERROR_MESSAGE_ONE_LINE)); absl::Status stripped_status = status; internal::ErasePayloadTyped<ErrorLocation>(&stripped_status); payload_string = internal::PayloadToString(stripped_status); } else { payload_string = internal::PayloadToString(status); } // Error messages with a caret look strange if the payload immediately // follows the caret, so put it on separate line in that case, being careful // to avoid turning a single-line error message into a multi-line one. absl::string_view payload_separator; if (!payload_string.empty()) { const bool multiline = absl::StrContains(message, '\n'); payload_separator = multiline ? "\n" : " "; } absl::StrAppend(&message, location_string, payload_separator, payload_string); } return message; } bool HasErrorLocation(const absl::Status& status) { return internal::HasPayloadWithType<ErrorLocation>(status); } bool GetErrorLocation(const absl::Status& status, ErrorLocation* location) { if (HasErrorLocation(status)) { *location = internal::GetPayload<ErrorLocation>(status); return true; } return false; } void ClearErrorLocation(absl::Status* status) { return internal::ErasePayloadTyped<ErrorLocation>(status); } static bool IsWordChar(char c) { return isalnum(c) || c == '_'; } // Return true if <column> (0-based) in <str> starts a word. static bool IsWordStart(const std::string& str, int column) { SQL_DCHECK_LT(column, str.size()); if (column == 0 || column >= str.size()) return true; return !IsWordChar(str[column - 1]) && IsWordChar(str[column]); } // Constructs and returns a truncated input string based on <input>, <location>, // and <max_width_in>. Also returns the error column. static void GetTruncatedInputStringInfo(absl::string_view input, const ErrorLocation& location, int max_width_in, std::string* truncated_input, int* error_column) { // We don't allow a max_width below a certain size. constexpr int kMinimumMaxWidth = 30; // If the error line is longer than max_width, give a substring of up // to max_width characters, with the caret near the middle of it. // We need some minimum width. const int max_width = std::max(max_width_in, kMinimumMaxWidth); SQL_DCHECK_GT(location.line(), 0); SQL_DCHECK_GT(location.column(), 0); ParseLocationTranslator translator(input); absl::StatusOr<absl::string_view> line_text = translator.GetLineText(location.line()); SQL_DCHECK_OK(line_text.status()); *truncated_input = translator.ExpandTabs(line_text.value_or("")); // location.column() may be one off the end of the line for EOF errors. SQL_DCHECK_LE(location.column(), truncated_input->size() + 1); // error_column is 0-based. *error_column = std::max(1, std::min(static_cast<int>(truncated_input->size() + 1), location.column())) - 1; if (truncated_input->size() > max_width) { const int one_half = max_width / 2; const int one_third = max_width / 3; // If the error is near the start, just use a prefix of the string. if (*error_column > max_width - one_third) { // Otherwise, try to find a word boundary to start the string on // that puts the caret in the middle third of the output line. int found_start = -1; for (int start_column = std::max(0, *error_column - 2 * one_third); start_column < std::max(0, *error_column - one_third); ++start_column) { if (IsWordStart(*truncated_input, start_column)) { found_start = start_column; break; } } if (found_start == -1) { // Didn't find a good separator. Just split in the middle. found_start = std::max(*error_column - one_half, 0); } // Add ... prefix if necessary. if (found_start < 3) { found_start = 0; } else { *truncated_input = absl::StrCat("...", truncated_input->substr(found_start)); *error_column -= found_start - 3; } } *truncated_input = PrettyTruncateUTF8(*truncated_input, max_width); SQL_DCHECK_LE(*error_column, truncated_input->size()); } } // Helper function to return an error string from an error line and column. static std::string GetErrorStringFromErrorLineAndColumn( const std::string& error_line, const int error_column) { return absl::StrFormat("%s\n%*s^", error_line, error_column, ""); } std::string GetErrorStringWithCaret(absl::string_view input, const ErrorLocation& location, int max_width_in) { std::string error_line; int error_column; GetTruncatedInputStringInfo(input, location, max_width_in, &error_line, &error_column); return GetErrorStringFromErrorLineAndColumn(error_line, error_column); } // Updates the <status> error string based on <input_text> and <mode>. // See header comment for MaybeUpdateErrorFromPayload for details. static absl::Status UpdateErrorFromPayload(const absl::Status& status, absl::string_view input_text, ErrorMessageMode mode) { if (mode == ErrorMessageMode::ERROR_MESSAGE_WITH_PAYLOAD) { // In this case, we do not update the error message and the payload // remains on the Status. return status; } SQL_RET_CHECK(!internal::HasPayloadWithType<InternalErrorLocation>(status)) << "Status must not have InternalErrorLocation: " << internal::StatusToString(status); if (!status.ok()) { ErrorLocation location; if (GetErrorLocation(status, &location)) { std::string new_message = absl::StrCat(status.message(), " ", FormatErrorLocation(location, input_text, mode)); // Update the message. Leave everything else as is. absl::Status new_status = absl::Status(status.code(), new_message); // Copy payloads status.ForEachPayload([&new_status]( absl::string_view type_url, const absl::Cord& payload) { new_status.SetPayload(type_url, payload);}); ClearErrorLocation(&new_status); return new_status; } } return status; } absl::Status MaybeUpdateErrorFromPayload(ErrorMessageMode mode, absl::string_view input_text, const absl::Status& status) { SQL_RET_CHECK(!internal::HasPayloadWithType<InternalErrorLocation>(status)) << "Status must not have InternalErrorLocation: " << internal::StatusToString(status); if (status.ok() || mode == ErrorMessageMode::ERROR_MESSAGE_WITH_PAYLOAD) { // We do not update the error string with error payload, which // could include location and/or nested errors. We leave any payload // attached to the Status. return status; } return UpdateErrorFromPayload(status, input_text, mode); } absl::Status UpdateErrorLocationPayloadWithFilenameIfNotPresent( const absl::Status& status, const std::string& filename) { ErrorLocation error_location; if (filename.empty() || !GetErrorLocation(status, &error_location)) { return status; } if (error_location.has_filename()) { return status; } // The error location does not have a filename, so use the specified // 'module_filename'. error_location.set_filename(filename); absl::Status copy = status; ClearErrorLocation(&copy); internal::AttachPayload(&copy, error_location); return copy; } } // namespace bigquery_ml_utils