bool Sql_splitter::next_range()

in mysqlshdk/libs/utils/utils_mysql_parsing.cc [223:598]


bool Sql_splitter::next_range(Sql_splitter::Range *out_range,
                              std::string *out_delim) {
  auto unfinished_stmt = [this](char *bos, Range *result_range) {
    Range range{static_cast<size_t>(bos - m_begin),
                static_cast<size_t>(m_end - bos), m_current_line};
    m_ptr = bos;
    *result_range = range;
    return false;
  };

  char *bos;  // beginning of statement

  bos = m_ptr;
  for (;;) {
    bos = skip_blanks(bos, m_end);
    if (*bos == '\n') {
      m_current_line++;
      ++bos;
    } else {
      break;
    }
  }

  size_t line_count = 0;
  char *next_bol = bos;  // beginning of next line
  while (next_bol < m_end) {
    char *bol = next_bol;
    bool has_complete_line;
    bool command = false;
    bool last_line_was_delimiter = false;
    // process input per line so we can count line numbers
    char *eol = static_cast<char *>(memchr(bol, '\n', m_end - bol));
    if (!eol) {
      eol = m_end;
      has_complete_line = m_last_chunk;
    } else {
      ++eol;  // skip the newline
      has_complete_line = true;
    }
    next_bol = eol;

    // find the end of the statement
    char *p = bol;

    const auto run_command = [&](bool delimited) {
      command = false;
      auto [skip, d] =
          m_cmd_callback(std::string_view{bos, static_cast<size_t>(p - bos)},
                         true, m_current_line);
      if (skip == 0) return false;
      if (delimited) skip += m_delimiter.size();
      p = bos + skip;
      memmove(bos, p, m_end - p + 1);
      m_shrinked_bytes += skip;
      p = bos;
      eol -= skip;
      next_bol -= skip;
      m_end -= skip;
      pop();
      return true;
    };

    while (p && p < eol) {
      auto ctx = context();
      if (ctx == Context::kNone || ctx == Context::kStatement ||
          ctx == Context::kCommentConditional) {
        if (ctx == Context::kCommentConditional) {
          if ((eol - p) > 2 && *p == '*' && *(p + 1) == '/') {
            pop();
            p += 2;
            continue;
          }
        } else {
          // check for the current delimiter
          if (*p == m_delimiter[0] &&
              (p + m_delimiter.size() <= eol &&
               m_delimiter.compare(0, m_delimiter.size(), p, 0,
                                   m_delimiter.size()) == 0)) {
            if (command && run_command(true)) continue;
            pop();
            Range range{static_cast<size_t>(bos - m_begin),
                        static_cast<size_t>(p - bos), m_current_line};
            m_ptr = p + m_delimiter.size();
            *out_range = range;
            *out_delim = m_delimiter;
            m_current_line += line_count;
            return true;
          }
        }

        switch (*p) {
          case '\\':  // \x
          {
            if (m_no_backslash_escapes) goto other;

            // commands within commands are not supported
            if (command) goto other;

            if (eol - p == 1) return unfinished_stmt(bos, out_range);

            // Callback returns number of chars to skip and whether the
            // statement should be terminated.
            auto [skip, delim] = m_cmd_callback(
                std::string_view{p, static_cast<size_t>(eol - p)}, p == bos,
                m_current_line);
            if (skip == 0 && (p == bos && !has_complete_line)) {
              // if there's a \command at the beginning of the line and
              // skip is 0, then it means we need the full line because it's a
              // standalone shell \command
              return unfinished_stmt(bos, out_range);
            }
            if (delim) {
              // if this cmd acts as a delimiter (like \g or \G, return the
              // statement)
              *out_delim = std::string(p, skip);
              m_ptr = p + skip;
              *out_range = Range{static_cast<size_t>(bos - m_begin),
                                 static_cast<size_t>(p - bos), m_current_line};
              pop();
              m_current_line += line_count;
              return true;
            } else {
              // pack together the rest of the buffer removing the command
              memmove(p, p + skip, (m_end - p) - skip);
              m_shrinked_bytes += skip;
              eol -= skip;
              m_end -= skip;
            }
            break;
          }

          case '\'':  // 'str'
            ++p;      // skip the opening '
            push(Context::kSQuoteString);
            break;

          case '"':  // "str"
            ++p;     // skip the opening "
            if (m_ansi_quotes) {
              push(Context::kDQuoteIdentifier);
            } else {
              push(Context::kDQuoteString);
            }
            break;

          case '`':  // `ident`
            ++p;
            push(Context::kBQuoteIdentifier);
            break;

          case '/':  // /* ... */
            if ((m_end - p) >= 3) {
              if (*(p + 1) == '*') {
                if (*(p + 2) == '+') {
                  push(Context::kCommentHint);
                  p += 3;
                } else if (*(p + 2) == '!') {
                  push(Context::kCommentConditional);
                  p += 3;
                } else {
                  push(Context::kComment);
                  p += 2;
                }
                break;
              }
            } else {
              if (!has_complete_line) return unfinished_stmt(bos, out_range);
            }
            goto other;

          case 'd':  // delimiter
          case 'D':
            // Possible start of the keyword DELIMITER. Must be the 1st keyword
            // of a statement.
            if (context() == Context::kNone &&
                shcore::str_ibeginswith(
                    {p, static_cast<std::size_t>(m_end - p)}, k_delimiter)) {
              if (has_complete_line) {
                // handle delimiter change directly here
                auto np = skip_blanks(p + k_delimiter.size(), eol);
                if (np == eol || np == p + k_delimiter.size()) {
                  set_delimiter("");
                } else {
                  p = np;
                  char *end = skip_not_blanks(p, eol);
                  set_delimiter(std::string(p, end - p));
                }
                bos = p = next_bol;
                pop();
                last_line_was_delimiter = true;
                // delimiter is like a full statement, so increment line
                // and start next stmt from scratch
                m_current_line++;
                break;
              } else {
                // newline is missing
                return unfinished_stmt(bos, out_range);
              }
            }
            goto other;

          case '#':  // # ...
            if (has_complete_line) {
              // if the whole line is a comment return it
              if (context() == Context::kNone) {
                int nl = 1;
                if (*(eol - 1) == '\r') nl++;
                Range range{static_cast<size_t>(bos - m_begin),
                            static_cast<size_t>(eol - bos - nl),
                            m_current_line};
                m_ptr = next_bol;
                *out_range = range;
                *out_delim = "";
                line_count++;
                m_current_line += line_count;
                return true;
              }
              p = next_bol;
            } else {
              return unfinished_stmt(bos, out_range);
            }
            break;

          case '-':  // -- ...
            if ((m_end - p) > 2 && *(p + 1) == '-' && is_any_blank(*(p + 2))) {
              if (has_complete_line) {
                // if the whole line is a comment return it
                if (context() == Context::kNone) {
                  int nl = 1;
                  if (*(eol - 1) == '\r') nl++;
                  Range range{static_cast<size_t>(bos - m_begin),
                              static_cast<size_t>(eol - bos - nl),
                              m_current_line};
                  m_ptr = next_bol;
                  *out_range = range;
                  *out_delim = "";
                  line_count++;
                  m_current_line += line_count;
                  return true;
                }
                p = next_bol;
              } else {
                return unfinished_stmt(bos, out_range);
              }
            } else {
              goto other;
            }
            break;

          default:
          other:
            if (context() == Context::kNone) {
              size_t off = tolower(*p) - 'a';
              if (off < m_commands_table.size() &&
                  !m_commands_table[off].empty()) {
                for (const auto &kwd : m_commands_table[off])
                  if (shcore::str_ibeginswith(
                          {p, static_cast<std::size_t>(m_end - p)}, kwd) &&
                      is_any_blank(*(p + kwd.length()))) {
                    command = true;
                    p += kwd.length() + 1;
                    push(Context::kStatement);
                    break;
                  }
                if (command) continue;
              }
            }

            if (!is_any_blank(*p)) {
              if (context() == Context::kNone) push(Context::kStatement);
            } else if (p == bos) {
              bos++;
            }
            ++p;
            break;
        }
      }

      switch (context()) {
        case Context::kNone:
        case Context::kStatement:
        case Context::kCommentConditional:
          break;

        case Context::kSQuoteString:
          p = span_string<internal::k_quoted_string_span_skips_sq, '\''>(
              p, eol, m_no_backslash_escapes);
          if (!p) {  // closing quote missing
            if (has_complete_line) {
              p = eol;
            } else {
              return unfinished_stmt(bos, out_range);
            }
          } else {
            pop();
          }
          break;

        case Context::kDQuoteString:
          p = span_string<internal::k_quoted_string_span_skips_dq, '"'>(
              p, eol, m_no_backslash_escapes);
          if (!p) {  // closing quote missing
            if (has_complete_line) {
              p = eol;
            } else {
              return unfinished_stmt(bos, out_range);
            }
          } else {
            pop();
          }
          break;

        case Context::kComment:
        case Context::kCommentHint:
          p = span_comment(p, eol);
          if (!p) {  // comment end missing
            if (has_complete_line) {
              p = eol;
            } else {
              return unfinished_stmt(bos, out_range);
            }
          } else {
            p += 2;
            pop();
          }
          break;

        case Context::kBQuoteIdentifier:
          p = span_quoted_identifier<'`'>(p, eol);
          if (!p) {  // closing quote missing
            if (has_complete_line) {
              p = eol;
            } else {
              return unfinished_stmt(bos, out_range);
            }
          } else {
            pop();
          }
          break;

        case Context::kDQuoteIdentifier:
          p = span_quoted_identifier<'"'>(p, eol);
          if (!p) {  // closing quote missing
            if (has_complete_line) {
              p = eol;
            } else {
              return unfinished_stmt(bos, out_range);
            }
          } else {
            pop();
          }
          break;
      }
    }

    if (p == eol) {
      if (command)
        run_command(false);
      else if (!last_line_was_delimiter)
        line_count++;
    }
  }
  if (m_last_chunk && bos < m_end) {
    Range range{static_cast<size_t>(bos - m_begin),
                static_cast<size_t>(m_end - bos), m_current_line};
    m_ptr = m_end;
    *out_range = range;
    *out_delim = "";
    m_current_line += line_count;
    return true;
  }
  if (m_last_chunk) m_eof = true;
  *out_range = Range{static_cast<size_t>(bos - m_begin),
                     static_cast<size_t>(m_end - bos), m_current_line};
  return false;
}