cql_noexport uint32_t cg_statement_pieces()

in sources/cg_c.c [4228:4302]


cql_noexport uint32_t cg_statement_pieces(CSTR in, charbuf *output) {
  Contract(in);
  int32_t len = (int32_t)strlen(in);
  Contract(len);
  uint32_t count = 0;

  CSTR start = in;
  CSTR cur = in;

  int32_t prev_state = 0;
  int32_t cur_state = 0;

  bputc(output, '"');
  cg_varinteger(len + 1, output);

  for (; *cur ; cur++, prev_state = cur_state) {
    char ch = *cur;
    if (ch == ' ' || ch == '\n') {
      cur_state = 0;  // state 0 is a run of whitespace
    }
    else if ((ch >= 'a' && ch <= 'z') || (ch >= '@' && ch <= 'Z') || (ch >= '0' && ch <= '9')) {
      cur_state = 1; // state 1 is a run of alpha-ish charcters
    }
    else {
      cur_state = 2; // state 2 is a run of misc characters like operators or whatever
    }

    if (prev_state == cur_state) {
      continue;  // keep going as long as we're on the same kind of run
    }

    if (cur - start <= 4 && cur_state == 0) {
      continue;  // if we found whitespace keep going if we haven't seen at least 4 characters
    }

    // Ok we have something worthy of flushing:
    // one last chance to grow it some. We dont want single spaces to go into the output
    // by themselves because it's costly.  Include this space in the token.  Note that
    // this is already normalized output so multiple spaces are not a possibility.
    // Space and then newline is also shunned (it'll work but it doesn't happen because
    // gen_sql never creates that stuff).

    if (cur_state == 0) {
      cur++;  // use the space/newline
      ch = *cur; // put ourselves into the correct state, here we let _ start an alpha-ish sttate after a break
      if ((ch >= 'a' && ch <= 'z') || (ch >= '@' && ch <= 'Z') || (ch >= '0' && ch <= '9') || ch == '_') {
        cur_state = 1;  // back to run of alpha
      }
      // note cur has been advanced now and it might be null (!)
    }

    // if we've anything to flush at this point the run is over, flush it.
    if (start < cur) {
      cg_flush_piece(start, cur, output);
      start = cur;
      count++;

      // if we advanced off the end above when we skipped over the space, we can exit now
      // we don't want to advance again off the end of the string.
      if (!*cur) {
        break;
      }
    }
  }

  // if there's anything left pending when we hit the end, flush it.
  if (start < cur) {
    cg_flush_piece(start, cur, output);
    count++;
  }

  bputc(output, '"');

  return count;
}