sources/printf.c (335 lines of code) (raw):
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#if defined(CQL_AMALGAM_LEAN) && !defined(CQL_AMALGAM_SEM)
// stubs to avoid link errors (none needed)
#else
#include "printf.h"
// Declares the single-character C string `name` given a character `c`.
#define CSTR_OF_CHAR(name, c) \
char name[2] = {0}; \
name[0] = c;
// There are seven options flags that can be associated with a substitution in a
// SQLite format string.
typedef enum {
PRINTF_FLAGS_NONE = 0,
PRINTF_FLAGS_MINUS = 1 << 0,
PRINTF_FLAGS_PLUS = 1 << 1,
PRINTF_FLAGS_SPACE = 1 << 2,
PRINTF_FLAGS_ZERO = 1 << 3,
PRINTF_FLAGS_HASH = 1 << 4,
PRINTF_FLAGS_COMMA = 1 << 5,
PRINTF_FLAGS_BANG = 1 << 6,
} printf_flags;
// Parsing a format string involves the use of a state machine with the
// following states. The name of each state indicates what we're looking for
// next, e.g., the start of a substitution, a percent sign following the start
// ('%%'), a flag, a width specification, et cetera.
typedef enum {
PRINTF_STATE_START,
PRINTF_STATE_PERCENT,
PRINTF_STATE_FLAG,
PRINTF_STATE_WIDTH,
PRINTF_STATE_WIDTH_NUMERIC,
PRINTF_STATE_DOT,
PRINTF_STATE_PRECISION,
PRINTF_STATE_LENGTH_LONG,
PRINTF_STATE_LENGTH_LONG_LONG,
PRINTF_STATE_TYPE,
} printf_state;
// A width in SQLite can be either numeric or '*'. In the latter case, two
// arguments are required for the substition, the first of which will be the
// width.
typedef enum {
PRINTF_WIDTH_NONE,
PRINTF_WIDTH_NUMERIC,
PRINTF_WIDTH_STAR,
} printf_width;
// A length specifier can be absent (indicating the default, 32-bit length), 'l'
// (which does nothing in SQLite and therefore also indicates 32 bits), or 'll'
// (indicating 64 bits).
typedef enum {
PRINTF_LENGTH_DEFAULT,
PRINTF_LENGTH_LONG,
PRINTF_LENGTH_LONG_LONG,
} printf_length;
struct printf_iterator {
// The AST to use for error reporting. This should be the string literal
// `ast_node` itself so that problems with the format string are reported with
// the correct location.
ast_node *ast;
// The flags associated with the current substitution.
printf_flags flags;
// A pointer into the null-terminated format string that is being parsed. This
// string must NOT simply be the string extracted from the `ast_node`, but
// rather a decoded version without the leading and trailing quotes.
CSTR format_string;
// The length associated with the current substitution.
printf_length length;
// The core type associated with the type specifier of the current
// substitution (e.g., `SEM_TYPE_INTEGER`), `SEM_TYPE_PENDING` if we're
// currently in the middle of parsing a substitution, `SEM_TYPE_OK` if we've
// finished parsing the format string successfully, or `SEM_TYPE_ERROR` if we
// encountered an error. In the lattermost two cases, the iterator is finished
// and `printf_iterator_next` must not be called again.
sem_t sem_type;
// The current state of parsing for the current substitution.
printf_state state;
// The width associated with the current substitution.
printf_width width;
};
// We need to provide `sizeof_printf_iterator` because `printf_iterator` is
// abstract in the header.
size_t sizeof_printf_iterator = sizeof(printf_iterator);
// Initializes a `printf_iterator`.
cql_noexport void printf_iterator_init(printf_iterator *iterator, ast_node *format_strlit, CSTR format_string) {
Contract(!format_strlit || is_strlit(format_strlit));
iterator->ast = format_strlit;
iterator->flags = PRINTF_FLAGS_NONE;
iterator->format_string = format_string;
iterator->length = PRINTF_LENGTH_DEFAULT;
iterator->sem_type = SEM_TYPE_PENDING;
iterator->state = PRINTF_STATE_START;
iterator->width = PRINTF_WIDTH_NONE;
}
// Given a character, returns the associated flag (or `PRINTF_FLAGS_NONE` if the
// character does not correspond to a flag).
static printf_flags printf_flag_of_char(char c) {
switch (c) {
case '-':
return PRINTF_FLAGS_MINUS;
case '+':
return PRINTF_FLAGS_PLUS;
case ' ':
return PRINTF_FLAGS_SPACE;
case '0':
return PRINTF_FLAGS_ZERO;
case '#':
return PRINTF_FLAGS_HASH;
case ',':
return PRINTF_FLAGS_COMMA;
case '!':
return PRINTF_FLAGS_BANG;
default:
return PRINTF_FLAGS_NONE;
}
}
// Indicates an error in the format string and sets `SEM_TYPE_ERROR`.
static void printf_iterator_error(printf_iterator *iterator, CSTR msg, CSTR subject) {
if (iterator->ast) {
report_error(iterator->ast, msg, subject);
record_error(iterator->ast);
}
iterator->sem_type = SEM_TYPE_ERROR;
}
// Returns `true` if the character corresponds to one of the seven possible
// flags, else `false`.
static bool_t printf_is_flag_char(char c) {
return printf_flag_of_char(c) != PRINTF_FLAGS_NONE;
}
// Records that a character corresponding to one of the seven possible flags is
// associated with the current substituion. This must not be called with a
// character that does not correspond to a flag. Sets `SEM_TYPE_ERROR` if the
// flag character is a duplicate or if there is an invalid combination of flags.
static void printf_iterator_add_flag_char(printf_iterator *iterator, char c) {
Contract(iterator);
Contract(iterator->sem_type == SEM_TYPE_PENDING);
Contract(iterator->state == PRINTF_STATE_FLAG);
printf_flags flag = printf_flag_of_char(c);
Invariant(flag != PRINTF_FLAGS_NONE);
if (iterator->flags & flag) {
CSTR_OF_CHAR(flag_string, c);
printf_iterator_error(iterator, "CQL0411: duplicate flag in substitution", flag_string);
return;
}
printf_flags plus_or_space = PRINTF_FLAGS_PLUS | PRINTF_FLAGS_SPACE;
if ((iterator->flags & plus_or_space) && (flag & plus_or_space)) {
// We already had a plus or space, and we just got a plus or space, and we
// know the one we just got is not a duplicate of what we already had
// because we just checked, so now we have both.
printf_iterator_error(iterator, "CQL0412: cannot combine '+' flag with space flag", NULL);
return;
}
iterator->flags |= flag;
}
// Records the width specifier for the current substition. Sets `SEM_TYPE_ERROR`
// if the substitution has no width but one is required for a previously
// recorded flag to make sense.
static void printf_set_width(printf_iterator *iterator, printf_width width) {
Contract(iterator);
Contract(iterator->sem_type == SEM_TYPE_PENDING);
Contract(iterator->state == PRINTF_STATE_WIDTH);
Contract(iterator->width == PRINTF_WIDTH_NONE);
switch (width) {
case PRINTF_WIDTH_NONE:
if ((iterator->flags & (PRINTF_FLAGS_MINUS | PRINTF_FLAGS_ZERO))) {
CSTR flag_string = (iterator->flags & PRINTF_FLAGS_MINUS) ? "-" : "0";
printf_iterator_error(iterator, "CQL0413: width required when using flag in substitution", flag_string);
return;
}
break;
case PRINTF_WIDTH_NUMERIC:
break;
case PRINTF_WIDTH_STAR:
break;
}
iterator->width = width;
}
// Sets the length specifier for the current substitution. Sets `SEM_TYPE_ERROR`
// if the specifier is `PRINTF_STATE_LENGTH_LONG` (as 'l' serves no purpose in
// SQLite) or if a length specifier has been combined with a flag that doesn't
// make sense with a length specifier.
static void printf_set_length(printf_iterator *iterator, printf_length length) {
Contract(iterator);
Contract(iterator->sem_type == SEM_TYPE_PENDING);
Contract(iterator->state == PRINTF_STATE_LENGTH_LONG || iterator->state == PRINTF_STATE_LENGTH_LONG_LONG);
switch (length) {
case PRINTF_LENGTH_DEFAULT:
break;
case PRINTF_LENGTH_LONG:
printf_iterator_error(iterator, "CQL0414: 'l' length specifier has no effect; consider 'll' instead", NULL);
return;
case PRINTF_LENGTH_LONG_LONG:
if ((iterator->flags & PRINTF_FLAGS_BANG)) {
printf_iterator_error(iterator, "CQL0415: length specifier cannot be combined with '!' flag", NULL);
return;
}
break;
}
iterator->length = length;
}
// Sets the type specifier associated with the character provided for the
// current substitution. Sets `SEM_TYPE_ERROR` if the type specifier is not
// compatible with the previously recorded flags or length specifier, or if the
// type specifier is not allowed in CQL, or if the character provided does not
// correspond to any type specifier.
static void printf_iterator_set_type_char(printf_iterator *iterator, char c) {
Contract(iterator);
Contract(iterator->sem_type == SEM_TYPE_PENDING);
Contract(iterator->state == PRINTF_STATE_TYPE);
CSTR_OF_CHAR(type_string, c);
// '-' works with all possible type specifications.
printf_flags valid_flags = PRINTF_FLAGS_MINUS;
bool_t allows_length_specifier;
switch (c) {
case 'd': case 'i': {
allows_length_specifier = true;
valid_flags |= PRINTF_FLAGS_PLUS;
valid_flags |= PRINTF_FLAGS_SPACE;
valid_flags |= PRINTF_FLAGS_ZERO;
valid_flags |= PRINTF_FLAGS_COMMA;
if (iterator->length == PRINTF_LENGTH_LONG_LONG) {
iterator->sem_type = SEM_TYPE_LONG_INTEGER;
} else {
iterator->sem_type = SEM_TYPE_INTEGER;
}
break;
}
case 'u':
allows_length_specifier = true;
valid_flags |= PRINTF_FLAGS_ZERO;
if (iterator->length == PRINTF_LENGTH_LONG_LONG) {
iterator->sem_type = SEM_TYPE_LONG_INTEGER;
} else {
iterator->sem_type = SEM_TYPE_INTEGER;
}
break;
case 'f': case 'e': case 'E': case 'g': case 'G':
allows_length_specifier = false;
valid_flags |= PRINTF_FLAGS_ZERO;
valid_flags |= PRINTF_FLAGS_BANG;
valid_flags |= PRINTF_FLAGS_HASH;
iterator->sem_type = SEM_TYPE_REAL;
break;
case 'x': case 'X': case 'o':
allows_length_specifier = true;
valid_flags |= PRINTF_FLAGS_ZERO;
valid_flags |= PRINTF_FLAGS_HASH;
if (iterator->length == PRINTF_LENGTH_LONG_LONG) {
iterator->sem_type = SEM_TYPE_LONG_INTEGER;
} else {
iterator->sem_type = SEM_TYPE_INTEGER;
}
break;
case 's':
allows_length_specifier = false;
valid_flags |= PRINTF_FLAGS_BANG;
iterator->sem_type = SEM_TYPE_TEXT;
break;
case 'c': case 'z': case 'p': case 'n': case 'q': case 'Q': case 'w': {
// NOTE: 'c' could be supported with codegen changes. It is presently
// disallowed because it requires a TEXT argument when used in an SQL
// context, yet it requires an integer argument when used via
// `sqlite3_mprintf`. The code generator currently cannot handle the
// latter case correctly.
printf_iterator_error(iterator, "CQL0416: type specifier not allowed in CQL", type_string);
return;
}
default:
printf_iterator_error(iterator, "CQL0417: unrecognized type specifier", type_string);
return;
}
if ((iterator->flags | valid_flags) != valid_flags) {
printf_iterator_error(iterator, "CQL0418: type specifier combined with inappropriate flags", type_string);
return;
}
if (iterator->length != PRINTF_LENGTH_DEFAULT && !allows_length_specifier) {
printf_iterator_error(iterator, "CQL0419: type specifier cannot be combined with length specifier", type_string);
return;
}
}
// Resets the iterator after successfully parsing one substitution to prepare
// for the next call to `printf_iterator_next`. This must not be called if in
// the middle of a substitution with a '*' width specifier;
// `printf_iterator_suspend_for_star` should be used instead.
static void printf_iterator_reset(printf_iterator *iterator) {
Contract(iterator);
Contract(iterator->state == PRINTF_STATE_TYPE);
Contract(iterator->sem_type != SEM_TYPE_ERROR);
Contract(iterator->sem_type != SEM_TYPE_OK);
Contract(iterator->sem_type != SEM_TYPE_PENDING);
iterator->flags = PRINTF_FLAGS_NONE;
iterator->format_string++;
iterator->length = PRINTF_LENGTH_DEFAULT;
iterator->sem_type = SEM_TYPE_PENDING;
iterator->state = PRINTF_STATE_START;
iterator->width = PRINTF_WIDTH_NONE;
}
// Prepares the iterator for the next call to `printf_iterator_next` after
// encountering a '*' width specifier.
static void printf_iterator_suspend_for_star(printf_iterator *iterator) {
Contract(iterator);
Contract(iterator->state == PRINTF_STATE_WIDTH);
Contract(iterator->width == PRINTF_WIDTH_NONE);
Contract(*iterator->format_string == '*');
// The '*' width requires two arguments for the substitution instead of one,
// the first of which will be the width. For example, the following two uses
// of printf are equivalent:
//
// printf("%10d\n", 42);
// printf("%*d\n, 10, 42);
//
// It therefore follows that we need to return the fact that we need an
// integer, and then be ready to resume parsing the rest of the current
// substitution later.
printf_set_width(iterator, PRINTF_WIDTH_STAR);
// Setting the width to `PRINTF_WIDTH_STAR` cannot fail.
Invariant(iterator->sem_type != SEM_TYPE_ERROR);
// We'll resume looking for a dot after the star.
iterator->state = PRINTF_STATE_DOT;
// Consume the '*' character.
iterator->format_string++;
}
// Returns the type of the next substitution, else `SEM_TYPE_OK` if no
// substitutions remain or `SEM_TYPE_ERROR` in the case of an error.
cql_noexport sem_t printf_iterator_next(printf_iterator *iterator) {
Contract(iterator);
Contract(iterator->sem_type == SEM_TYPE_PENDING);
// We should either be at the start of a substituion or resuming a
// substitution with a '*' width specifier.
Contract(iterator->state == PRINTF_STATE_START || iterator->width == PRINTF_WIDTH_STAR);
for (;;) {
// If we encountered an error or hit the end of the string, stop.
if (iterator->sem_type != SEM_TYPE_PENDING) {
return iterator->sem_type;
}
// Read the current character in the format string.
char c = *iterator->format_string;
// Check if we're at the end of the string. If so, stop.
if (c == '\0') {
if (iterator->state == PRINTF_STATE_START) {
// We hit the end while not within a substitution, so we're simply done.
iterator->sem_type = SEM_TYPE_OK;
} else {
// We hit the end in the middle of a substitution, so the substitution
// is incomplete and the format string is invalid.
printf_iterator_error(iterator, "CQL0420: incomplete substitution in format string", NULL);
}
return iterator->sem_type;
}
// Here, we dispatch appropriately based on the current state. If the
// current character should be consumed, we `break` to jump out of the
// switch and advance the string to the next character at the end of the for
// loop. If we want to go onto another step without consuming the current
// character, we `continue` instead to jump back to the top of the for loop
// without advancing the string.
switch (iterator->state) {
case PRINTF_STATE_START:
if (c == '%') {
iterator->state = PRINTF_STATE_PERCENT;
}
break;
case PRINTF_STATE_PERCENT:
if (c == '%') {
iterator->state = PRINTF_STATE_START;
break;
}
iterator->state = PRINTF_STATE_FLAG;
continue;
case PRINTF_STATE_FLAG:
if (printf_is_flag_char(c)) {
printf_iterator_add_flag_char(iterator, c);
break;
}
iterator->state = PRINTF_STATE_WIDTH;
continue;
case PRINTF_STATE_WIDTH:
if (c >= '0' && c <= '9') {
printf_set_width(iterator, PRINTF_WIDTH_NUMERIC);
iterator->state = PRINTF_STATE_WIDTH_NUMERIC;
break;
}
if (c == '*') {
// Return the fact that we need an integer and prepare to resume
// parsing the rest of the substitution later.
printf_iterator_suspend_for_star(iterator);
return SEM_TYPE_INTEGER;
}
printf_set_width(iterator, PRINTF_WIDTH_NONE);
iterator->state = PRINTF_STATE_DOT;
continue;
case PRINTF_STATE_WIDTH_NUMERIC:
if (c >= '0' && c <= '9') {
break;
}
iterator->state = PRINTF_STATE_DOT;
continue;
case PRINTF_STATE_DOT:
if (c == '.') {
iterator->state = PRINTF_STATE_PRECISION;
break;
}
iterator->state = PRINTF_STATE_LENGTH_LONG;
continue;
case PRINTF_STATE_PRECISION:
if (c >= '0' && c <= '9') {
break;
}
iterator->state = PRINTF_STATE_LENGTH_LONG;
continue;
case PRINTF_STATE_LENGTH_LONG:
if (c == 'l') {
iterator->state = PRINTF_STATE_LENGTH_LONG_LONG;
break;
}
printf_set_length(iterator, PRINTF_LENGTH_DEFAULT);
iterator->state = PRINTF_STATE_TYPE;
continue;
case PRINTF_STATE_LENGTH_LONG_LONG:
if (c == 'l') {
printf_set_length(iterator, PRINTF_LENGTH_LONG_LONG);
iterator->state = PRINTF_STATE_TYPE;
break;
}
printf_set_length(iterator, PRINTF_LENGTH_LONG);
iterator->state = PRINTF_STATE_TYPE;
continue;
case PRINTF_STATE_TYPE:
printf_iterator_set_type_char(iterator, c);
sem_t sem_type = iterator->sem_type;
if (sem_type != SEM_TYPE_ERROR) {
printf_iterator_reset(iterator);
}
return sem_type;
}
// Consume the current character and continue.
iterator->format_string++;
}
}
#endif