mysqlshdk/scripting/polyglot/utils/polyglot_error.cc (325 lines of code) (raw):
/*
* Copyright (c) 2024, 2025, Oracle and/or its affiliates.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License, version 2.0,
* as published by the Free Software Foundation.
*
* This program is designed to work with certain software (including
* but not limited to OpenSSL) that is licensed under separate terms,
* as designated in a particular file or component or in included license
* documentation. The authors of MySQL hereby grant you an additional
* permission to link the program and your derivative works with the
* separately licensed software that they have either included with
* the program or referenced in the documentation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
* the GNU General Public License, version 2.0, for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "mysqlshdk/scripting/polyglot/utils/polyglot_error.h"
#include <limits>
#include <numeric>
#include <optional>
#include <regex>
#include <sstream>
#include <vector>
#include "mysqlshdk/scripting/polyglot/utils/polyglot_api_clean.h"
#include "mysqlshdk/libs/utils/utils_string.h"
#include "mysqlshdk/scripting/polyglot/native_wrappers/polyglot_collectable.h"
#include "mysqlshdk/scripting/polyglot/native_wrappers/polyglot_map_wrapper.h"
namespace shcore {
namespace polyglot {
namespace {
inline const char *k_key_type{"type"};
inline const char *k_key_backtrace{"backtrace"};
inline const char *k_key_source{"source"};
inline const char *k_key_line{"line"};
inline const char *k_key_column{"column"};
inline const char *k_key_source_line{"source_line"};
inline const char *k_key_code{"code"};
/**
* The backtrace may come from different sources:
* - The shell itself (internal code, user code, processed scripts)
* - graalvm - non JavaScript specific
* - graalvm - JavaScript specific
*
* This function will filter out the backtrace to only display the relevant
* elements to the user:
*
* - Excludes graalvm frames that are not JavaScript specific
* - Excludes shell internal frames
*/
std::vector<std::string> filter_backtrace(const std::string &backtrace,
std::string *header = nullptr,
int *column = nullptr,
std::string *src_line = nullptr) {
std::vector<std::string> filtered;
assert(!column || src_line);
// A backtrace frame comes in the following format:
// <call>(<source>:<line>:<start-end>)\n
// Where:
// - <call> is the function call at the given frame, examples:
// - module.function
// - function
// - :program (for an instruction given by the user in the shell prompt)
// - <source> is the name of the file containing the <call> or any of the
// shell sources:
// - (internal): for internal code
// - (shell): for code provided by the user in the shell prompt.
// <line>: the 1 based line number containing the code in the given <source>
// <start>: first byte of the sentence that generated the error (using
// <source> as reference) <end>: last byte of the sentence that generated the
// error
//
// This expression matches the sequence capturing the content between
// parenthesis:
// <source>:<line>:<start>
// Which is the only information included in the resulting backtrace
static const std::regex bt_source_location_message(
R"*(<[a-z]+> (.*?)\((.+:\d+):.*\).*)*",
std::regex::icase | std::regex::optimize);
std::vector<std::string> lines;
std::istringstream stream(backtrace);
for (std::string line; std::getline(stream, line);) {
#ifdef _WIN32
if (!line.empty() && '\r' == line.back()) {
line.pop_back();
}
#endif // _WIN32
lines.push_back(std::move(line));
}
auto it = lines.begin();
if (header) {
(*header) = std::move(*it);
++it;
}
std::vector<std::string>::iterator last_filtered_out;
for (; it != lines.end(); ++it) {
std::cmatch match;
if (std::regex_match((*it).c_str(), (*it).c_str() + (*it).size(), match,
bt_source_location_message)) {
// Excludes (internal) frames
auto source = match[2].str();
if (!shcore::str_beginswith(source, "(internal)")) {
filtered.push_back(match[1].str() + "(" + source + ")");
}
} else if (column && !(*it).empty() && (*it).back() == '^') {
// The column information is not available on the backtrace frames,
// however, in some cases, a last line including the ^ symbol is included
// and prepended with the number of spaces that would represent the column
*src_line = *last_filtered_out;
*column = (*it).size() - 1;
} else {
last_filtered_out = it;
}
}
return filtered;
}
std::string get_poly_exception_message(
poly_thread thread, poly_exception exc, int *column = nullptr,
std::string *src_line = nullptr,
std::vector<std::string> *backtrace = nullptr) {
size_t length{0};
std::string message;
if (poly_ok == poly_exception_get_message(thread, exc, nullptr, 0, &length)) {
message.resize(length++);
throw_if_error(poly_exception_get_message, thread, exc, &message[0], length,
&length);
if (message.find('\n') != std::string::npos) {
auto local_backtrace =
filter_backtrace(message, &message, column, src_line);
if (backtrace && !local_backtrace.empty()) {
*backtrace = std::move(local_backtrace);
}
}
}
return message;
}
std::vector<std::string> get_exception_stack_trace(poly_thread thread,
poly_exception exc) {
size_t length{0};
if (poly_ok ==
poly_exception_get_guest_stack_trace(thread, exc, nullptr, 0, &length)) {
std::string message;
message.resize(length++);
if (poly_ok == poly_exception_get_guest_stack_trace(
thread, exc, &message[0], length, &length)) {
return filter_backtrace(message);
}
}
return {};
}
} // namespace
void Polyglot_error::initialize(poly_thread thread) {
const poly_extended_error_info *err_info = nullptr;
poly_get_last_error_info(thread, &err_info);
// This is not expected to happen, but it is always a possibility so we handle
// the case
if (!err_info) {
throw Polyglot_generic_error(
"generic error occurred in the polyglot library");
}
set_message(err_info->error_message);
}
Polyglot_error::Polyglot_error(poly_thread thread, int64_t rc) {
if (rc == poly_pending_exception) {
poly_exception exc;
if (poly_ok == poly_get_last_exception(thread, &exc)) {
initialize(thread, exc);
} else {
throw Polyglot_generic_error(
"Error retrieving last exception in the polyglot library.");
}
} else {
initialize(thread);
}
}
Polyglot_error::Polyglot_error(poly_thread thread, poly_exception exc) {
initialize(thread, exc);
}
void Polyglot_error::initialize(poly_thread thread, poly_exception exc) {
if (bool result{false};
poly_ok == poly_exception_is_resource_exhausted(thread, exc, &result) &&
result) {
m_resource_exhausted = true;
return;
}
if (bool result{false};
poly_ok == poly_exception_is_interrupted(thread, exc, &result) &&
result) {
m_interrupted = true;
return;
}
int column = -1;
std::string src_line;
auto msg =
get_poly_exception_message(thread, exc, &column, &src_line, &m_backtrace);
if (column != -1) {
m_column = column;
m_source_line = src_line;
}
// Retrieve the exception data coming in the error message
parse_and_translate(msg);
// Sometimes, backtrace comes with the error message, in other cases it
// comes apart, we need to handle both cases
if (m_backtrace.empty()) {
m_backtrace = get_exception_stack_trace(thread, exc);
}
// If the initial message did not come with line (and source??) we locate
// them on the first backtrace line
if (!m_line.has_value() && !m_backtrace.empty()) {
parse_and_translate(m_backtrace[0]);
// If the first line provided the location, it gets deleted
if (m_line.has_value()) {
m_backtrace.erase(m_backtrace.begin());
}
}
if (message().empty()) {
set_message(msg);
}
// If an exception object is found, this would be the real initial
// exception, so we override whatever data contained in it
if (bool has_object = false;
poly_ok == poly_exception_has_object(thread, exc, &has_object) &&
has_object) {
poly_value error_obj = nullptr;
if (poly_ok != poly_exception_get_object(thread, exc, &error_obj)) {
throw Polyglot_generic_error(
"Error trying to retrieve an exception object");
}
poly_value error_cause = nullptr;
if (std::string class_name;
is_object(thread, error_obj, &class_name) && class_name == "Error") {
get_member(thread, error_obj, "cause", &error_cause);
}
auto data = make_dict();
if (error_cause &&
Polyglot_map_wrapper::unwrap(thread, error_cause, &data)) {
if (data->has_key(k_key_message)) {
set_message(data->get_string(k_key_message));
}
if (data->has_key(k_key_type)) {
m_type = data->get_string(k_key_type);
}
if (data->has_key(k_key_source)) {
m_source = data->get_string(k_key_source);
}
if (data->has_key(k_key_code)) {
auto code = data->get_int(k_key_code);
if (code > 0) {
m_code = code;
}
}
if (data->has_key(k_key_line)) {
m_line = data->get_uint(k_key_line);
}
if (data->has_key(k_key_column)) {
m_column = data->get_uint(k_key_column);
}
if (data->has_key(k_key_source_line)) {
m_source_line = data->get_string(k_key_source_line);
}
if (data->has_key(k_key_backtrace)) {
m_backtrace.clear();
for (const auto &frame : *data->at(k_key_backtrace).as_array()) {
m_backtrace.push_back(frame.as_string());
}
}
}
}
}
void Polyglot_error::set_message(const std::string &msg) {
Polyglot_generic_error::set_message(msg);
if ("Garbage-collected heap size exceeded." == msg) {
m_resource_exhausted = true;
}
}
/**
* This function is used to normalize as much as possible the exceptions, i.e.
* to try getting as much as possible the separate components for future unified
* formatting
*/
void Polyglot_error::parse_and_translate(const std::string &source) {
// This pattern identifies exceptions in the format of:
// <type>: <details> such as all the exceptions produced internally, i.e.
// - SyntaxError: (shell):2:20 Missing close quote
static const std::regex type_message(
R"*((\w*Error):\s(.*))*", std::regex::icase | std::regex::optimize);
// This pattern is used to parse the details found with the previous pattern
// which come in the following format:
// <source>:<line>:<start>-<end> <message>
// NOTE: The -<end> portion may come or not
static const std::regex source_location_message(
R"*((.*):(\d+):(\d+)(-\d+)?\s(.*))*",
std::regex::icase | std::regex::optimize);
// This pattern is used to parse the details on a backtrace entry which comes
// in the format of <source>:<line>:<offset> (as formatted in
// filter_backtrace)
static const std::regex bt_source_location_message(
R"*(^.*?\((.+):(\d+)\)$)*", std::regex::icase | std::regex::optimize);
std::string msg = {source};
std::cmatch m;
if (std::regex_match(msg.c_str(), msg.c_str() + msg.size(), m,
type_message)) {
std::string type = m[1];
msg = m[2];
if (type == k_syntax_error) {
if (std::regex_match(msg.c_str(), msg.c_str() + msg.size(), m,
source_location_message)) {
m_source = m[1];
m_line = static_cast<size_t>(std::stoi(m[2]));
msg = m[5];
}
}
// Set the values if not already defined, if already defined they are indeed
// more accurate
if (!m_type.has_value()) {
m_type = std::move(type);
}
if (message().empty()) {
set_message(msg);
}
} else if (std::regex_match(msg.c_str(), msg.c_str() + msg.size(), m,
bt_source_location_message)) {
m_source = m[1];
m_line = static_cast<size_t>(std::stoi(m[2]));
}
}
bool Polyglot_error::is_syntax_error() const {
return m_type.has_value() && (*m_type) == k_syntax_error;
}
std::string Polyglot_error::format(bool include_location) const {
std::string error_message;
if (!message().empty()) {
error_message += message();
if (m_type.has_value() && m_code.has_value()) {
error_message += " (" + *m_type + " " + std::to_string(*m_code) + ")\n";
} else if (m_type.has_value()) {
error_message += " (" + *m_type + ")\n";
} else if (m_code.has_value()) {
error_message += " (" + std::to_string(*m_code) + ") ";
}
if (!include_location || (m_backtrace.size() > 1)) {
std::string location;
if (m_source.has_value()) {
location.append(" at ").append(*m_source);
}
if (m_line.has_value()) {
location += ':';
location.append(std::to_string(*m_line));
std::string line_location;
if (m_column.has_value()) {
location += ':';
location.append(std::to_string(*m_column));
if (m_source_line.has_value()) {
line_location = "\n in " + *m_source_line + '\n' +
std::string(*m_column + 4, ' ') + '^';
}
}
if (!m_backtrace.empty()) {
location.append(std::accumulate(
m_backtrace.begin(), m_backtrace.end(), std::string(""),
[](const std::string &a, const std::string &b) {
return a + "\n at " + b;
}));
}
if (!line_location.empty()) {
location.append(line_location);
}
}
if (!location.empty()) error_message += location;
}
if (error_message.back() != '\n') {
error_message += '\n';
}
}
return error_message;
}
shcore::Dictionary_t Polyglot_error::data() const {
auto ret_val = shcore::make_dict();
ret_val->set(k_key_message, shcore::Value(message()));
if (!m_backtrace.empty()) {
auto bt = shcore::make_array(m_backtrace);
ret_val->set(k_key_backtrace, shcore::Value(std::move(bt)));
}
if (m_code.has_value()) {
ret_val->set(k_key_code, shcore::Value(*m_code));
}
if (m_column.has_value()) {
ret_val->set(k_key_column, shcore::Value(static_cast<uint64_t>(*m_column)));
}
if (m_source_line.has_value()) {
ret_val->set(k_key_source_line, shcore::Value(*m_source_line));
}
if (m_line.has_value()) {
ret_val->set(k_key_line, shcore::Value(static_cast<uint64_t>(*m_line)));
}
if (m_source.has_value()) {
ret_val->set(k_key_source, shcore::Value(*m_source));
}
if (m_type.has_value()) {
ret_val->set(k_key_type, shcore::Value(*m_type));
}
return ret_val;
}
} // namespace polyglot
} // namespace shcore