in sources/cg_c.c [4228:4302]
cql_noexport uint32_t cg_statement_pieces(CSTR in, charbuf *output) {
Contract(in);
int32_t len = (int32_t)strlen(in);
Contract(len);
uint32_t count = 0;
CSTR start = in;
CSTR cur = in;
int32_t prev_state = 0;
int32_t cur_state = 0;
bputc(output, '"');
cg_varinteger(len + 1, output);
for (; *cur ; cur++, prev_state = cur_state) {
char ch = *cur;
if (ch == ' ' || ch == '\n') {
cur_state = 0; // state 0 is a run of whitespace
}
else if ((ch >= 'a' && ch <= 'z') || (ch >= '@' && ch <= 'Z') || (ch >= '0' && ch <= '9')) {
cur_state = 1; // state 1 is a run of alpha-ish charcters
}
else {
cur_state = 2; // state 2 is a run of misc characters like operators or whatever
}
if (prev_state == cur_state) {
continue; // keep going as long as we're on the same kind of run
}
if (cur - start <= 4 && cur_state == 0) {
continue; // if we found whitespace keep going if we haven't seen at least 4 characters
}
// Ok we have something worthy of flushing:
// one last chance to grow it some. We dont want single spaces to go into the output
// by themselves because it's costly. Include this space in the token. Note that
// this is already normalized output so multiple spaces are not a possibility.
// Space and then newline is also shunned (it'll work but it doesn't happen because
// gen_sql never creates that stuff).
if (cur_state == 0) {
cur++; // use the space/newline
ch = *cur; // put ourselves into the correct state, here we let _ start an alpha-ish sttate after a break
if ((ch >= 'a' && ch <= 'z') || (ch >= '@' && ch <= 'Z') || (ch >= '0' && ch <= '9') || ch == '_') {
cur_state = 1; // back to run of alpha
}
// note cur has been advanced now and it might be null (!)
}
// if we've anything to flush at this point the run is over, flush it.
if (start < cur) {
cg_flush_piece(start, cur, output);
start = cur;
count++;
// if we advanced off the end above when we skipped over the space, we can exit now
// we don't want to advance again off the end of the string.
if (!*cur) {
break;
}
}
}
// if there's anything left pending when we hit the end, flush it.
if (start < cur) {
cg_flush_piece(start, cur, output);
count++;
}
bputc(output, '"');
return count;
}