in mysqlshdk/libs/utils/utils_mysql_parsing.cc [223:598]
bool Sql_splitter::next_range(Sql_splitter::Range *out_range,
std::string *out_delim) {
auto unfinished_stmt = [this](char *bos, Range *result_range) {
Range range{static_cast<size_t>(bos - m_begin),
static_cast<size_t>(m_end - bos), m_current_line};
m_ptr = bos;
*result_range = range;
return false;
};
char *bos; // beginning of statement
bos = m_ptr;
for (;;) {
bos = skip_blanks(bos, m_end);
if (*bos == '\n') {
m_current_line++;
++bos;
} else {
break;
}
}
size_t line_count = 0;
char *next_bol = bos; // beginning of next line
while (next_bol < m_end) {
char *bol = next_bol;
bool has_complete_line;
bool command = false;
bool last_line_was_delimiter = false;
// process input per line so we can count line numbers
char *eol = static_cast<char *>(memchr(bol, '\n', m_end - bol));
if (!eol) {
eol = m_end;
has_complete_line = m_last_chunk;
} else {
++eol; // skip the newline
has_complete_line = true;
}
next_bol = eol;
// find the end of the statement
char *p = bol;
const auto run_command = [&](bool delimited) {
command = false;
auto [skip, d] =
m_cmd_callback(std::string_view{bos, static_cast<size_t>(p - bos)},
true, m_current_line);
if (skip == 0) return false;
if (delimited) skip += m_delimiter.size();
p = bos + skip;
memmove(bos, p, m_end - p + 1);
m_shrinked_bytes += skip;
p = bos;
eol -= skip;
next_bol -= skip;
m_end -= skip;
pop();
return true;
};
while (p && p < eol) {
auto ctx = context();
if (ctx == Context::kNone || ctx == Context::kStatement ||
ctx == Context::kCommentConditional) {
if (ctx == Context::kCommentConditional) {
if ((eol - p) > 2 && *p == '*' && *(p + 1) == '/') {
pop();
p += 2;
continue;
}
} else {
// check for the current delimiter
if (*p == m_delimiter[0] &&
(p + m_delimiter.size() <= eol &&
m_delimiter.compare(0, m_delimiter.size(), p, 0,
m_delimiter.size()) == 0)) {
if (command && run_command(true)) continue;
pop();
Range range{static_cast<size_t>(bos - m_begin),
static_cast<size_t>(p - bos), m_current_line};
m_ptr = p + m_delimiter.size();
*out_range = range;
*out_delim = m_delimiter;
m_current_line += line_count;
return true;
}
}
switch (*p) {
case '\\': // \x
{
if (m_no_backslash_escapes) goto other;
// commands within commands are not supported
if (command) goto other;
if (eol - p == 1) return unfinished_stmt(bos, out_range);
// Callback returns number of chars to skip and whether the
// statement should be terminated.
auto [skip, delim] = m_cmd_callback(
std::string_view{p, static_cast<size_t>(eol - p)}, p == bos,
m_current_line);
if (skip == 0 && (p == bos && !has_complete_line)) {
// if there's a \command at the beginning of the line and
// skip is 0, then it means we need the full line because it's a
// standalone shell \command
return unfinished_stmt(bos, out_range);
}
if (delim) {
// if this cmd acts as a delimiter (like \g or \G, return the
// statement)
*out_delim = std::string(p, skip);
m_ptr = p + skip;
*out_range = Range{static_cast<size_t>(bos - m_begin),
static_cast<size_t>(p - bos), m_current_line};
pop();
m_current_line += line_count;
return true;
} else {
// pack together the rest of the buffer removing the command
memmove(p, p + skip, (m_end - p) - skip);
m_shrinked_bytes += skip;
eol -= skip;
m_end -= skip;
}
break;
}
case '\'': // 'str'
++p; // skip the opening '
push(Context::kSQuoteString);
break;
case '"': // "str"
++p; // skip the opening "
if (m_ansi_quotes) {
push(Context::kDQuoteIdentifier);
} else {
push(Context::kDQuoteString);
}
break;
case '`': // `ident`
++p;
push(Context::kBQuoteIdentifier);
break;
case '/': // /* ... */
if ((m_end - p) >= 3) {
if (*(p + 1) == '*') {
if (*(p + 2) == '+') {
push(Context::kCommentHint);
p += 3;
} else if (*(p + 2) == '!') {
push(Context::kCommentConditional);
p += 3;
} else {
push(Context::kComment);
p += 2;
}
break;
}
} else {
if (!has_complete_line) return unfinished_stmt(bos, out_range);
}
goto other;
case 'd': // delimiter
case 'D':
// Possible start of the keyword DELIMITER. Must be the 1st keyword
// of a statement.
if (context() == Context::kNone &&
shcore::str_ibeginswith(
{p, static_cast<std::size_t>(m_end - p)}, k_delimiter)) {
if (has_complete_line) {
// handle delimiter change directly here
auto np = skip_blanks(p + k_delimiter.size(), eol);
if (np == eol || np == p + k_delimiter.size()) {
set_delimiter("");
} else {
p = np;
char *end = skip_not_blanks(p, eol);
set_delimiter(std::string(p, end - p));
}
bos = p = next_bol;
pop();
last_line_was_delimiter = true;
// delimiter is like a full statement, so increment line
// and start next stmt from scratch
m_current_line++;
break;
} else {
// newline is missing
return unfinished_stmt(bos, out_range);
}
}
goto other;
case '#': // # ...
if (has_complete_line) {
// if the whole line is a comment return it
if (context() == Context::kNone) {
int nl = 1;
if (*(eol - 1) == '\r') nl++;
Range range{static_cast<size_t>(bos - m_begin),
static_cast<size_t>(eol - bos - nl),
m_current_line};
m_ptr = next_bol;
*out_range = range;
*out_delim = "";
line_count++;
m_current_line += line_count;
return true;
}
p = next_bol;
} else {
return unfinished_stmt(bos, out_range);
}
break;
case '-': // -- ...
if ((m_end - p) > 2 && *(p + 1) == '-' && is_any_blank(*(p + 2))) {
if (has_complete_line) {
// if the whole line is a comment return it
if (context() == Context::kNone) {
int nl = 1;
if (*(eol - 1) == '\r') nl++;
Range range{static_cast<size_t>(bos - m_begin),
static_cast<size_t>(eol - bos - nl),
m_current_line};
m_ptr = next_bol;
*out_range = range;
*out_delim = "";
line_count++;
m_current_line += line_count;
return true;
}
p = next_bol;
} else {
return unfinished_stmt(bos, out_range);
}
} else {
goto other;
}
break;
default:
other:
if (context() == Context::kNone) {
size_t off = tolower(*p) - 'a';
if (off < m_commands_table.size() &&
!m_commands_table[off].empty()) {
for (const auto &kwd : m_commands_table[off])
if (shcore::str_ibeginswith(
{p, static_cast<std::size_t>(m_end - p)}, kwd) &&
is_any_blank(*(p + kwd.length()))) {
command = true;
p += kwd.length() + 1;
push(Context::kStatement);
break;
}
if (command) continue;
}
}
if (!is_any_blank(*p)) {
if (context() == Context::kNone) push(Context::kStatement);
} else if (p == bos) {
bos++;
}
++p;
break;
}
}
switch (context()) {
case Context::kNone:
case Context::kStatement:
case Context::kCommentConditional:
break;
case Context::kSQuoteString:
p = span_string<internal::k_quoted_string_span_skips_sq, '\''>(
p, eol, m_no_backslash_escapes);
if (!p) { // closing quote missing
if (has_complete_line) {
p = eol;
} else {
return unfinished_stmt(bos, out_range);
}
} else {
pop();
}
break;
case Context::kDQuoteString:
p = span_string<internal::k_quoted_string_span_skips_dq, '"'>(
p, eol, m_no_backslash_escapes);
if (!p) { // closing quote missing
if (has_complete_line) {
p = eol;
} else {
return unfinished_stmt(bos, out_range);
}
} else {
pop();
}
break;
case Context::kComment:
case Context::kCommentHint:
p = span_comment(p, eol);
if (!p) { // comment end missing
if (has_complete_line) {
p = eol;
} else {
return unfinished_stmt(bos, out_range);
}
} else {
p += 2;
pop();
}
break;
case Context::kBQuoteIdentifier:
p = span_quoted_identifier<'`'>(p, eol);
if (!p) { // closing quote missing
if (has_complete_line) {
p = eol;
} else {
return unfinished_stmt(bos, out_range);
}
} else {
pop();
}
break;
case Context::kDQuoteIdentifier:
p = span_quoted_identifier<'"'>(p, eol);
if (!p) { // closing quote missing
if (has_complete_line) {
p = eol;
} else {
return unfinished_stmt(bos, out_range);
}
} else {
pop();
}
break;
}
}
if (p == eol) {
if (command)
run_command(false);
else if (!last_line_was_delimiter)
line_count++;
}
}
if (m_last_chunk && bos < m_end) {
Range range{static_cast<size_t>(bos - m_begin),
static_cast<size_t>(m_end - bos), m_current_line};
m_ptr = m_end;
*out_range = range;
*out_delim = "";
m_current_line += line_count;
return true;
}
if (m_last_chunk) m_eof = true;
*out_range = Range{static_cast<size_t>(bos - m_begin),
static_cast<size_t>(m_end - bos), m_current_line};
return false;
}