void MyxStatementParser::process()

in library/sql.parser/source/myx_statement_parser.cpp [140:451]


void MyxStatementParser::process(std::istream& is, process_sql_statement_callback cb, void *arg, int mode)
{
  static const char *kwd= "DELIMITER";
  
  int c;
  ParserState state = start, prevState = start;
  std::string stmt_buffer;
  std::string delim_buffer;
  char strchar = 0;
  _stmt_boffset= 0;
  _stmt_first_line_first_symbol_pos= 0;
  _symbols_since_newline= 0;
  _total_lc= 0;

  int len;
  bool m, can_be_kwd;
  int p;

  while(!buffer_eof(is) && !parser_is_stopped) {
    switch(state) {
    case eos:
      break;
    
    case start:
      stmt_buffer.clear();
      c= get_next_char(is, &len);
      while(my_isspace(cs, c) || (c == '\n') || (c == '\r')) {
        add_char_to_buffer(stmt_buffer, c, len);
        c= get_next_char(is, &len);
      }
      add_char_to_buffer(stmt_buffer, c, len);
      if(kwd[0] == my_toupper(cs, c)) {
        state= delimkwd;
      } else if(c == '`') {
        strchar= '`';
        state= str;
      } else if(c == '\'') {
        strchar= '\'';
        state= str;
      } else if(c == '"') {
        strchar= '"';
        state= str;
      } else if((c == '/') && (peek_next_char(is, &len) == '*')) {
        prevState= start;
        state= mlcomment;
      } else if((c == '-') && (peek_next_char(is, &len) == '-')) {
        prevState= start;
        state= comment1;
      } else if(c == '#') {
        prevState= start;
        state= comment2;
      } else if(c == delim[0]) {
        state= delimtok;
      } else {
        state= stmt;
      }
      continue;
    
    case delimkwd:
      m= true;
      for (int i= 1; kwd[i] != '\0'; i++) {
        c= peek_next_char(is, &len);
        if(my_toupper(cs, c) != kwd[i]) {
          m= false;
          break;
        }
        else
        {
          c= get_next_char(is, &len);
          add_char_to_buffer(stmt_buffer, c, len);        
        }
      }
      if(!m) {
        //state= stmt;
        //continue;
        goto stmtlabel;
      }
      c= get_next_char(is, &len);
      add_char_to_buffer(stmt_buffer, c, len);
      if(!my_isspace(cs, c)) {
        state= stmt;
        continue;
      }
      c= peek_next_char(is, &len);
      while(my_isspace(cs, c)) {
        c= get_next_char(is, &len);
        add_char_to_buffer(stmt_buffer, c, len);
        c= peek_next_char(is, &len);
      }
      if((c == '\r') || (c == '\n')) {
        add_char_to_buffer(stmt_buffer, c, len);
        state= stmt;
        continue;
      }
      delim_buffer.clear();
      _stmt_boffset+= (int)stmt_buffer.size();
      while((c != '\r') && (c != '\n') /*&& !my_isspace(cs, c)*/ && !buffer_eof(is)) 
      {
        c= get_next_char(is, &len);
        _stmt_boffset+= len;
        delim_buffer+= (char)c;
        c= peek_next_char(is, &len);
      }
      //if(delim_buffer.length() > delim.length()) 
      //{
      //  if(delim_buffer.compare(delim_buffer.length() - delim.length(), delim.length(), delim) == 0)
      //  {
      //    delim_buffer.erase(delim_buffer.length() - delim.length());
      //  }
      //}

      // new delimiter
      if(!delim_buffer.empty())
      {
        stmt_buffer.clear();
        for (size_t n= 0, count= delim_buffer.size(); n < count; ++n)
        {
          if (my_isspace(cs, delim_buffer[n]))
          {
            delim_buffer.resize(n);
            break;
          }
        }
        delim= delim_buffer;
        for(;;)
        {
          c= peek_next_char(is, &len);
          if((c != '\r') && (c != '\n'))
            break;
          c= get_next_char(is, &len);
          _stmt_boffset+= len;
        }
        _stmt_first_line_first_symbol_pos= _symbols_since_newline;
      }

      state= start;
      continue;

    case str:
      c= get_next_char(is, &len);
      while((c != strchar) && !buffer_eof(is)) 
      {
        add_char_to_buffer(stmt_buffer, c, len);
        if(c == '\\') 
        {
          c= get_next_char(is, &len);
          add_char_to_buffer(stmt_buffer, c, len);
        }
        c= get_next_char(is, &len);
      }
      add_char_to_buffer(stmt_buffer, c, len);
      if(!buffer_eof(is)) 
      {
        state= stmt;
      }
      continue;

    case mlcomment:
      c= get_next_char(is, &len);
      add_char_to_buffer(stmt_buffer, c, len);
      if(c != '*') {
        state= stmt;
        continue;
      }

      p= ' ';
      while(!buffer_eof(is)) {
        c= get_next_char(is, &len);
        add_char_to_buffer(stmt_buffer, c, len);
        if((c == '/') && (p == '*')) {
          state= stmt;
          break;
        }
        if(buffer_eof(is))
        {
          break;
        }
        p= c;
      }
      continue;

    case comment2:
      c= get_next_char(is, &len);
      while((c != '\r') && (c != '\n') && !buffer_eof(is)) {
        add_char_to_buffer(stmt_buffer, c, len);
        c= get_next_char(is, &len);
      }
      add_char_to_buffer(stmt_buffer, c, len);
      state= stmt;
      
      c= peek_next_char(is, &len);
      while((c == '\r') || (c == '\n'))
      {
        c= get_next_char(is, &len);
        add_char_to_buffer(stmt_buffer, c, len);
        c= peek_next_char(is, &len);
      }

      continue;

    case comment1:
      c= get_next_char(is, &len);
      if(c != '-') {
        add_char_to_buffer(stmt_buffer, c, len);
        state= prevState;
        continue;
      }
      while((c != '\r') && (c != '\n') && !buffer_eof(is)) {
        add_char_to_buffer(stmt_buffer, c, len);
        c= get_next_char(is, &len);
      }
      add_char_to_buffer(stmt_buffer, c, len);
      state= stmt;

      c= peek_next_char(is, &len);
      while((c == '\r') || (c == '\n'))
      {
        c= get_next_char(is, &len);
        add_char_to_buffer(stmt_buffer, c, len);
        c= peek_next_char(is, &len);
      }
      continue;

    case delimtok:
      m= true;
      for(size_t i= 1; i < delim.size(); i++) {
        c= get_next_char(is, &len);
        add_char_to_buffer(stmt_buffer, c, len);
        if(my_toupper(cs, c) != delim[i]) {
          m= false;
          break;
        }
      }
      if(!m) {
        state= stmt;
        continue;
      }
      // new statement is read
      stmt_buffer.erase(stmt_buffer.length() - delim.length());
      {
        std::string::size_type stmt_boffset_inc = stmt_buffer.size() + delim.size();
        if(!is_empty_statement(stmt_buffer))
          cb(this, stmt_buffer.c_str(), arg);
        _stmt_boffset+= (int)stmt_boffset_inc;
      }
      stmt_buffer.clear();
      _stmt_first_line_first_symbol_pos= _symbols_since_newline;

      state= start;
      continue;

    case stmt:
stmtlabel:
      can_be_kwd= true;

      while(!buffer_eof(is)) {
        c= get_next_char(is, &len);
        add_char_to_buffer(stmt_buffer, c, len);
        if(can_be_kwd && (kwd[0] == my_toupper(cs, c))) {
          prevState= stmt;
          state= delimkwd;
          break;
        } else if(c == '`') {
          prevState= stmt;
          strchar= '`';
          state= str;
          break;
        } else if(c == '\'') {
          prevState= stmt;
          strchar= '\'';
          state= str;
          break;
        } else if(c == '"') {
          prevState= stmt;
          strchar= '"';
          state= str;
          break;
        } else if((c == '/') && (peek_next_char(is, &len) == '*')) {
          prevState= stmt;
          state= mlcomment;
          break;
        } else if((c == '-') && (peek_next_char(is, &len) == '-')) {
          prevState= stmt;
          state= comment1;
          break;
        } else if(c == '#') {
          prevState= stmt;
          state= comment2;
          break;
        } else if(c == delim[0]) {
          prevState= stmt;
          state= delimtok;
          break;
        }
        if(c > ' ')
          can_be_kwd= false;
      }
      continue;
    } // switch
  }

  if (parser_is_stopped)
    return;
  else if(!(mode & MYX_SPM_DELIMS_REQUIRED)
    && (stmt_buffer.length() > 0)
    && !is_empty_statement(stmt_buffer))
  {
    int stmt_boffset_inc= (int)stmt_buffer.size();
    cb(this, stmt_buffer.c_str(), arg);
    _stmt_boffset+= stmt_boffset_inc;
  }
}