sources/rewrite.c (1,196 lines of code) (raw):
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#if defined(CQL_AMALGAM_LEAN) && !defined(CQL_AMALGAM_SEM)
// stubs to avoid link errors (none needed)
#else
// Most of the functions that rewrite the AST have been hoisted out of sem.c and are here
// Rewrites always happen during semantic analysis so this is really part of that phase.
#include <stdint.h>
#include <stdio.h>
#include <limits.h>
#include "cg_common.h"
#include "compat.h"
#include "cql.h"
#include "ast.h"
#include "cql.y.h"
#include "sem.h"
#include "charbuf.h"
#include "bytebuf.h"
#include "list.h"
#include "gen_sql.h"
#include "symtab.h"
#include "eval.h"
#include "rewrite.h"
#include "printf.h"
static ast_node* rewrite_gen_arg_list(charbuf* format_buf, CSTR cusor_name, CSTR col_name, sem_t type);
static ast_node* rewrite_gen_printf_call(CSTR format, ast_node *arg_list);
static ast_node *rewrite_gen_cursor_printf(ast_node *variable);
static ast_node *rewrite_gen_iif_case_expr(ast_node *expr, ast_node *val1, ast_node *val2);
static ast_node *rewrite_gen_case_expr(ast_node *var1, ast_node *var2, bool_t report_column_name);
static bool_t rewrite_one_def(ast_node *head);
static void rewrite_one_typed_name(ast_node *typed_name, symtab *used_names);
static void rewrite_from_shape_args(ast_node *head);
// @PROC can be used in place of an ID in various places
// replace that name if appropriate
cql_noexport void rewrite_proclit(ast_node *ast) {
Contract(is_ast_str(ast));
EXTRACT_STRING(name, ast);
CSTR newname = process_proclit(ast, name);
if (newname) {
((str_ast_node*)ast)->value = newname;
}
}
// To do this rewrite we only need to check a few things:
// * is the given name really a shape
// * does the shape have storage (i.e. SEM_TYPE_HAS_SHAPE_STORAGE is set)
// * were enough fields specified?
// * were any fields requested? [e.g. FETCH C() FROM CURSOR is meaningless]
//
// If the above conditions are met then we're basically good to go. For each column specified
// e.g. FETCH C(a,b) has two; we will take the next shape columns and add it an automatically
// created values list. At the end the AST will be transformed into
// FETCH C(a,b, etc.) FROM VALUES(D.col1, D.col2, etc.)
// and it can then be type checked as usual.
//
cql_noexport void rewrite_insert_list_from_shape(ast_node *ast, ast_node *from_shape, uint32_t count) {
Contract(is_ast_columns_values(ast));
Contract(is_ast_from_shape(from_shape));
Contract(count > 0);
EXTRACT_ANY_NOTNULL(shape, from_shape->right);
// from_shape must have the columns
if (!(shape->sem->sem_type & SEM_TYPE_HAS_SHAPE_STORAGE)) {
report_error(shape, "CQL0298: cannot read from a cursor without fields", shape->sem->name);
record_error(shape);
record_error(ast);
return;
}
EXTRACT_ANY_NOTNULL(column_spec, from_shape->left);
EXTRACT_ANY(name_list, column_spec->left);
uint32_t provided_count = 0;
for (ast_node *item = name_list; item; item = item->right) {
provided_count++;
}
if (provided_count < count) {
report_error(ast, "CQL0299: [shape] has too few fields", shape->sem->name);
record_error(ast);
return;
}
AST_REWRITE_INFO_SET(shape->lineno, shape->filename);
ast_node *insert_list = NULL;
ast_node *insert_list_tail = NULL;
ast_node *item = name_list;
for (int32_t i = 0; i < count; i++, item = item->right) {
EXTRACT_STRING(item_name, item->left);
ast_node *cname = new_ast_str(shape->sem->name);
ast_node *col = new_ast_str(item_name);
ast_node *dot = new_ast_dot(cname, col);
// add name to the name list
ast_node *new_tail = new_ast_insert_list(dot, NULL);
if (insert_list) {
ast_set_right(insert_list_tail, new_tail);
}
else {
insert_list = new_tail;
}
insert_list_tail = new_tail;
}
AST_REWRITE_INFO_RESET();
// the tree is rewritten, semantic analysis can proceed
ast_set_right(ast, insert_list);
// temporarily mark the ast ok, there is more checking to do
record_ok(ast);
}
// The form "LIKE x" can appear in most name lists instead of a list of names
// the idea here is that if you want to use the columns of a shape
// for the data you don't want to specify the columns manually, you'd like
// to get them from the type information. So for instance:
// INSERT INTO T(like C) values(C.x, C.y) is better than
// INSERT INTO T(x,y) values(C.x, C.y), but better still
// INSERT INTO T(like C) from C;
//
// This is sugar, so the code gen system never sees the like form.
// The rewrite is semantically checked as usual so you get normal errors
// if the column types are not compatible.
//
// There are good helpers for creating the name list and for finding
// the likeable object. So we just use those for all the heavy lifting.
cql_noexport void rewrite_like_column_spec_if_needed(ast_node *columns_values) {
Contract(is_ast_columns_values(columns_values) || is_ast_from_shape(columns_values));
EXTRACT_NOTNULL(column_spec, columns_values->left);
EXTRACT_ANY(like, column_spec->left);
if (is_ast_like(like)) {
ast_node *found_shape = sem_find_likeable_ast(like, LIKEABLE_FOR_VALUES);
if (!found_shape) {
record_error(columns_values);
return;
}
AST_REWRITE_INFO_SET(like->lineno, like->filename);
sem_struct *sptr = found_shape->sem->sptr;
ast_node *name_list = rewrite_gen_full_column_list(sptr);
ast_set_left(column_spec, name_list);
AST_REWRITE_INFO_RESET();
}
record_ok(columns_values);
}
// FROM [shape] is a sugar feature, this is the place where we trigger rewriting of the AST
// to replace FROM [shape] with normal values from the shape
// * Note: By this point column_spec has already been rewritten so that it is for sure not
// null if it was absent. It will be an empty name list.
// All we're doing here is setting up the call to the worker using the appropriate AST args
cql_noexport void rewrite_from_shape_if_needed(ast_node *ast_stmt, ast_node *columns_values)
{
Contract(ast_stmt); // we can record the error on any statement
Contract(is_ast_columns_values(columns_values));
EXTRACT_NOTNULL(column_spec, columns_values->left);
if (!is_ast_from_shape(columns_values->right)) {
record_ok(ast_stmt);
return;
}
uint32_t count = 0;
for (ast_node *item = column_spec->left; item; item = item->right) {
count++;
}
if (count == 0) {
report_error(columns_values->right, "CQL0297: FROM [shape] is redundant if column list is empty", NULL);
record_error(ast_stmt);
return;
}
EXTRACT_NOTNULL(from_shape, columns_values->right);
EXTRACT_ANY_NOTNULL(shape, from_shape->right);
sem_any_shape(shape);
if (is_error(shape)) {
record_error(ast_stmt);
return;
}
// Now we're going to go a bit meta, the from [shape] clause itself has a column
// list we might need to rewrite THAT column list before we can proceed.
// The from [shape] column list could be empty
sem_struct *sptr = shape->sem->sptr;
rewrite_empty_column_list(from_shape, sptr);
rewrite_like_column_spec_if_needed(from_shape);
if (is_error(from_shape)) {
record_error(ast_stmt);
return;
}
rewrite_insert_list_from_shape(columns_values, from_shape, count);
if (is_error(columns_values)) {
record_error(ast_stmt);
return;
}
// temporarily mark the ast ok, there is more checking to do
// record_ok(ast_stmt);
record_ok(ast_stmt);
}
// Here we will rewrite the arguments in a call statement expanding any
// FROM [shape] [LIKE type ] entries we encounter. We don't validate
// the types here. That happens after expansion. It's possible that the
// types don't match at all, but we don't care yet.
static void rewrite_from_shape_args(ast_node *head) {
Contract(is_ast_expr_list(head) || is_ast_arg_list(head) || is_ast_insert_list(head));
// We might need to make arg_list nodes, insert_list nodes, or expr_list nodes, they are the
// same really so we'll change the node type to what we need. We just stash what
// the first item was and make any that we create the same as this one.
CSTR node_type = head->type;
for (ast_node *item = head ; item ; item = item->right) {
EXTRACT_ANY_NOTNULL(arg, item->left);
if (is_ast_from_shape(arg)) {
EXTRACT_ANY_NOTNULL(shape, arg->left);
// Note if this shape has no storage (e.g. non automatic cursor) then we will fail later
// when we try to resolve the '.' expression. That error message tells the story well enough
// so we don't need an extra check here.
sem_any_shape(shape);
if (is_error(shape)) {
record_error(head);
return;
}
ast_node *like_ast = arg->right;
ast_node *likeable_shape = NULL;
if (like_ast) {
likeable_shape = sem_find_likeable_ast(like_ast, LIKEABLE_FOR_VALUES);
if (!likeable_shape) {
record_error(head);
return;
}
}
AST_REWRITE_INFO_SET(shape->lineno, shape->filename);
// use the names from the LIKE clause if there is one, otherwise use
// all the names in the shape.
sem_struct *sptr = likeable_shape ? likeable_shape->sem->sptr : shape->sem->sptr;
uint32_t count = sptr->count;
for (uint32_t i = 0; i < count; i++) {
ast_node *cname = new_ast_str(shape->sem->name);
ast_node *col = new_ast_str(sptr->names[i]);
ast_node *dot = new_ast_dot(cname, col);
if (i == 0) {
// the first item just replaces the FROM cursor node
ast_set_left(item, dot);
}
else {
// subsequent items are threaded after our current position
// we leave arg_list pointed to the end of what we inserted
ast_node *right = item->right;
ast_node *new_item = new_ast_expr_list(dot, right);
new_item->type = node_type;
ast_set_right(item, new_item);
item = new_item;
}
}
AST_REWRITE_INFO_RESET();
}
}
// at least provisionally ok
record_ok(head);
}
// Walk the list of column definitions looking for any of the
// "LIKE table/proc/view". If any are found, replace that parameter with
// the table/prov/view columns
cql_noexport bool_t rewrite_col_key_list(ast_node *head) {
for (ast_node *ast = head; ast; ast = ast->right) {
Contract(is_ast_col_key_list(ast));
if (is_ast_like(ast->left)) {
bool_t success = rewrite_one_def(ast);
if (!success) {
return false;
}
}
}
return true;
}
// There is a LIKE [table/view/proc] used to create a table so we
// - Look up the parameters to the table/view/proc
// - Create a col_def node for each field of the table/view/proc
// - Reconstruct the ast
cql_noexport bool_t rewrite_one_def(ast_node *head) {
Contract(is_ast_col_key_list(head));
Contract(is_ast_like(head->left));
EXTRACT_NOTNULL(like, head->left);
EXTRACT_STRING(like_name, like->left);
// it's ok to use the LIKE construct on old tables
ast_node *likeable_shape = sem_find_likeable_ast(like, LIKEABLE_FOR_VALUES);
if (!likeable_shape) {
record_error(head);
return false;
}
AST_REWRITE_INFO_SET(like->lineno, like->filename);
// Store the remaining nodes while we reconstruct the AST
EXTRACT_ANY(right_ast, head->right);
sem_struct *sptr = likeable_shape->sem->sptr;
uint32_t count = sptr->count;
for (int32_t i = 0; i < count; i++) {
sem_t sem_type = sptr->semtypes[i];
CSTR col_name = sptr->names[i];
// Construct a col_def using name and core semantic type
ast_node *data_type = rewrite_gen_data_type(core_type_of(sem_type), NULL);
ast_node *name_ast = new_ast_str(col_name);
ast_node *name_type = new_ast_col_def_name_type(name_ast, data_type);
// In the case of columns the ast has col attributes to represent
// not null and sensitive so we add those after we've already
// added the basic data type above
ast_node *attrs = NULL;
if (is_not_nullable(sem_type)) {
attrs = new_ast_col_attrs_not_null(NULL, NULL);
}
if (sensitive_flag(sem_type)) {
// link it in, in case not null was also in play
attrs = new_ast_sensitive_attr(NULL, attrs);
}
ast_node *col_def_type_attrs = new_ast_col_def_type_attrs(name_type, attrs);
ast_node *col_def = new_ast_col_def(col_def_type_attrs, NULL);
if (i) {
ast_node *new_head = new_ast_col_key_list(col_def, NULL);
ast_set_right(head, new_head);
head = new_head;
} else {
Invariant(is_ast_col_key_list(head));
Invariant(is_ast_like(head->left));
// replace the like entry with a col_def
// on the next iteration, we will insert to the right of ast
ast_set_right(head, NULL);
ast_set_left(head, col_def);
}
}
AST_REWRITE_INFO_RESET();
// Put the stored columns at the 'tail' of the linked list
ast_set_right(head, right_ast);
return true;
}
// Give the best name for the shape type given then AST
// there are many casese, the best data is on the struct type unless
// it's anonymous, in which case the item name is the best choice.
CSTR static best_shape_type_name(ast_node *shape) {
Contract(shape->sem);
Contract(shape->sem->sptr);
CSTR struct_name = shape->sem->sptr->struct_name;
CSTR obj_name = shape->sem->name;
// "select" is the generic name used for structs that are otherwise unnamed.
// e.g. "declare C cursor like select 1 x, 2 y"
if (struct_name && strcmp("select", struct_name)) {
return struct_name;
}
else {
// use "select" only as a last recourse, it means some anonymous shape
return obj_name ? obj_name : "select";
}
}
// Here we have found a "like T" name that needs to be rewritten with
// the various columns of T. We do this by:
// * looking up "T" (this is the only thing that can go wrong)
// * replace the "like T" slug with a param node for the first column of T
// * for each additional column create a param node and link it in.
// * emit any given name only once, (so you can do like T1, like T1 even if both have the same pk)
// * arg names get a _ suffix so they don't conflict with column names
static ast_node *rewrite_one_param(ast_node *param, symtab *param_names, bytebuf *args_info) {
Contract(is_ast_param(param));
EXTRACT_NOTNULL(param_detail, param->right);
EXTRACT_ANY(shape_name_ast, param_detail->left);
EXTRACT_NOTNULL(like, param_detail->right);
EXTRACT_STRING(like_name, like->left);
ast_node *likeable_shape = sem_find_likeable_ast(like, LIKEABLE_FOR_ARGS);
if (!likeable_shape) {
record_error(param);
return param;
}
AST_REWRITE_INFO_SET(like->lineno, like->filename);
// Nothing can go wrong from here on
record_ok(param);
sem_struct *sptr = likeable_shape->sem->sptr;
uint32_t count = sptr->count;
bool_t first_rewrite = true;
CSTR shape_name = "";
CSTR shape_type = best_shape_type_name(likeable_shape);
if (shape_name_ast) {
EXTRACT_STRING(sname, shape_name_ast);
shape_name = sname;
ast_node *shape_ast = new_ast_str(shape_name);
shape_ast->sem = likeable_shape->sem;
sem_add_flags(shape_ast, SEM_TYPE_HAS_SHAPE_STORAGE); // the arg bundle has storage!
shape_ast->sem->name = shape_name;
add_arg_bundle(shape_ast, shape_name);
}
for (int32_t i = 0; i < count; i++) {
sem_t sem_type = sptr->semtypes[i];
CSTR param_name = sptr->names[i];
CSTR param_kind = sptr->kinds[i];
CSTR original_name = param_name;
if (shape_name[0]) {
// the orignal name in this form has to be compound to disambiguate
param_name = dup_printf("%s_%s", shape_name, param_name);
// note we skip none of these, if the names conflict that is an error:
// e.g. if you make an arg like x_y and you then have a shape named x
// with a field y you'll get an error
symtab_add(param_names, param_name, NULL);
}
else {
// If the shape came from a procedure we keep the args unchanged
// If the shape came from a data type or cursor then we add _
// The idea here is that if it came from a procedure we want to keep the same signature
// exactly and if any _ needed to be added to avoid conflict with a column name then it already was.
if (!(sem_type & (SEM_TYPE_IN_PARAMETER | SEM_TYPE_OUT_PARAMETER))) {
param_name = dup_printf("%s_", param_name);
}
// skip any that we have already added or that are manually present
if (!symtab_add(param_names, param_name, NULL)) {
continue;
}
}
if (args_info) {
// args info uses the cleanest version of the name, no trailing _
bytebuf_append_var(args_info, original_name);
bytebuf_append_var(args_info, shape_name);
bytebuf_append_var(args_info, shape_type);
}
ast_node *type = rewrite_gen_data_type(sem_type, param_kind);
ast_node *name_ast = new_ast_str(param_name);
ast_node *param_detail_new = new_ast_param_detail(name_ast, type);
ast_node *inout = NULL; // IN by default
if (sem_type & SEM_TYPE_OUT_PARAMETER) {
if (sem_type & SEM_TYPE_IN_PARAMETER) {
inout = new_ast_inout();
}
else {
inout = new_ast_out();
}
}
if (!first_rewrite) {
// for the 2nd and subsequent args make a new node
ast_node *params = param->parent;
ast_node *new_param = new_ast_param(inout, param_detail_new);
ast_set_right(params, new_ast_params(new_param, params->right));
param = new_param;
}
else {
// for the first arg, just replace the param details
// recall that we are on a param node and it is the like entry
Invariant(is_ast_param(param));
// replace the like entry with a real param detail
// on the next iteration, we will insert to the right of ast
ast_set_right(param, param_detail_new);
ast_set_left(param, inout);
first_rewrite = false;
}
record_ok(param);
}
// There's a chance we did nothing. If that happens we still have to remove the like node.
// If we did anything the like node is already gone.
if (first_rewrite) {
// since this can only happen if there is 100% duplication, that means there is always a previous parameter
// if this were the first node we would have expanded ... something
EXTRACT_NOTNULL(params, param->parent);
EXTRACT_NAMED_NOTNULL(tail, params, params->parent);
ast_set_right(tail, params->right);
}
AST_REWRITE_INFO_RESET();
// this is the last param that we modified
return param;
}
// The name @proc refers to the current procedure name, this can appear in various
// contexts either as a literal string or a valid id. If it matches replace it here
cql_noexport CSTR process_proclit(ast_node *ast, CSTR name) {
if (!Strcasecmp(name, "@proc")) {
if (!current_proc) {
report_error(ast, "CQL0252: @PROC literal can only appear inside of procedures", NULL);
record_error(ast);
return NULL;
}
ast_node *name_ast = get_proc_name(current_proc);
EXTRACT_STRING(proc_name, name_ast);
name = proc_name;
}
record_ok(ast);
return name;
}
cql_noexport ast_node *rewrite_gen_data_type(sem_t sem_type, CSTR kind) {
ast_node *ast = NULL;
ast_node *kind_ast = kind ? new_ast_str(kind) : NULL;
switch (core_type_of(sem_type)) {
case SEM_TYPE_INTEGER: ast = new_ast_type_int(kind_ast); break;
case SEM_TYPE_TEXT: ast = new_ast_type_text(kind_ast); break;
case SEM_TYPE_LONG_INTEGER: ast = new_ast_type_long(kind_ast); break;
case SEM_TYPE_REAL: ast = new_ast_type_real(kind_ast); break;
case SEM_TYPE_BOOL: ast = new_ast_type_bool(kind_ast); break;
case SEM_TYPE_BLOB: ast = new_ast_type_blob(kind_ast); break;
case SEM_TYPE_OBJECT: ast = new_ast_type_object(kind_ast); break;
}
Invariant(ast);
if (is_not_nullable(sem_type)) {
ast = new_ast_notnull(ast);
}
if (sensitive_flag(sem_type)) {
ast = new_ast_sensitive_attr(ast, NULL);
}
return ast;
}
// If no name list then fake a name list so that both paths are the same
// no name list is the same as all the names
cql_noexport ast_node *rewrite_gen_full_column_list(sem_struct *sptr) {
Contract(sptr);
ast_node *name_list = NULL;
ast_node *name_list_tail = NULL;
for (int32_t i = 0; i < sptr->count; i++) {
if (sptr->semtypes[i] & SEM_TYPE_HIDDEN_COL) {
continue;
}
ast_node *ast_col = new_ast_str(sptr->names[i]);
// add name to the name list
ast_node *new_tail = new_ast_name_list(ast_col, NULL);
if (name_list) {
ast_set_right(name_list_tail, new_tail);
}
else {
name_list = new_tail;
}
name_list_tail = new_tail;
}
return name_list;
}
// This helper function rewrites the expr_names ast to the columns_values ast.
// e.g: fetch C using 1 a, 2 b, 3 c; ==> fetch C (a,b,c) values (1, 2, 3);
cql_noexport void rewrite_expr_names_to_columns_values(ast_node* columns_values) {
Contract(is_ast_expr_names(columns_values));
AST_REWRITE_INFO_SET(columns_values->lineno, columns_values->filename);
EXTRACT(expr_names, columns_values);
ast_node *name_list = NULL;
ast_node *insert_list = NULL;
for ( ; expr_names->right ; expr_names = expr_names->right) ;
do {
EXTRACT(expr_name, expr_names->left);
EXTRACT_ANY(expr, expr_name->left);
EXTRACT_ANY(as_alias, expr_name->right);
EXTRACT_ANY_NOTNULL(name, as_alias->left);
name_list = new_ast_name_list(name, name_list);
insert_list = new_ast_insert_list(expr, insert_list);
expr_names = expr_names->parent;
} while (is_ast_expr_names(expr_names));
ast_node *opt_column_spec = new_ast_column_spec(name_list);
ast_node *new_columns_values = new_ast_columns_values(opt_column_spec, insert_list);
columns_values->type = new_columns_values->type;
ast_set_left(columns_values, new_columns_values->left);
ast_set_right(columns_values, new_columns_values->right);
AST_REWRITE_INFO_RESET();
}
// This helper function rewrites the select statement ast to the columns_values ast.
// e.g: insert into X using select 1 a, 2 b, 3 c; ==> insert into X (a,b,c) values (1, 2, 3);
cql_noexport void rewrite_select_stmt_to_columns_values(ast_node* columns_values) {
EXTRACT_ANY_NOTNULL(select_stmt, columns_values);
Contract(is_select_stmt(select_stmt));
AST_REWRITE_INFO_SET(columns_values->lineno, columns_values->filename);
ast_node *name_list = NULL;
Invariant(select_stmt->sem);
Invariant(select_stmt->sem->sptr);
sem_struct *sptr = select_stmt->sem->sptr;
// doing the names in reverse order is easier to build up the list
int32_t i = (int32_t)sptr->count;
while (--i >= 0) {
CSTR name = sptr->names[i];
ast_node *name_ast = new_ast_str(name);
name_list = new_ast_name_list(name_ast, name_list);
}
// we need a new select statement to push down the tree because we're mutating the current one
ast_node *new_select_stmt = new_ast_select_stmt(select_stmt->left, select_stmt->right);
new_select_stmt->type = select_stmt->type;
// now make the columns values we need that holds the names we computed plus the new select node
ast_node *opt_column_spec = new_ast_column_spec(name_list);
ast_node *new_columns_values = new_ast_columns_values(opt_column_spec, new_select_stmt);
// The current columns_values becomes a true columns values node taking over the content
// of the fresh one we just made. This used to be the select node, hence we copied it.
columns_values->type = new_columns_values->type;
ast_set_left(columns_values, new_columns_values->left);
ast_set_right(columns_values, new_columns_values->right);
AST_REWRITE_INFO_RESET();
}
// There are two reasons the columns might be missing. A form like this:
// INSERT C FROM VALUES(...);
// or
// INSERT C() FROM VALUES() @dummy_seed(...)
//
// The first form is shorthand for specifying that all of the columns are present.
// It will be expanded into something like FETCH C(x,y,z) FROM VALUES(....)
//
// The second form indicates that there are NO values specified at all. This might
// be ok if all the columns have some default value. Or if dummy data is used.
// When dummy data is present, any necessary but missing columns are provided
// using the seed variable. The same rules apply to the FETCH statement.
//
// So these kinds of cases:
// FETCH C FROM VALUES(...) // all values are specified
// FETCH C() FROM VALUES() @dummy_seed(...) -- NO values are specified, all dummy
//
// If you add FROM ARGUMENTS to this situation, the arguments take the place of the
// values. Each specified column will cause an argument to be used as a value, in
// the declared order. The usual type checking will be done.
//
// So we have these kinds of cases:
// FETCH C FROM ARGUMENTS -- args are covering everything (dummy data not applicable as usual)
// FETCH C() FROM ARGUMENTS @dummy_seed(...) -- error, args can't possibly be used, no columns specified
// FETCH C() FROM VALUES() @dummy_seed(...) -- all values are dummy
// FETCH C(x,y) FROM VALUES(1,2) @dummy_seed(...) -- x, y from values, the rest are dummy
// FETCH C(x,y) FROM ARGUMENTS @dummy_seed(...) -- x,y from args, the rest are dummy
//
// This is harder to explain than it is to code.
cql_noexport void rewrite_empty_column_list(ast_node *columns_values, sem_struct *sptr)
{
Invariant(is_ast_columns_values(columns_values) || is_ast_from_shape(columns_values));
EXTRACT(column_spec, columns_values->left);
AST_REWRITE_INFO_SET(columns_values->lineno, columns_values->filename);
if (!column_spec) {
// no list was specified, always make the full list
ast_node *name_list = rewrite_gen_full_column_list(sptr);
column_spec = new_ast_column_spec(name_list);
ast_set_left(columns_values, column_spec);
}
AST_REWRITE_INFO_RESET();
}
// We can't just return the error in the tree like we usually do because
// arg_list might be null and we're trying to do all the helper logic here.
cql_noexport bool_t rewrite_shape_forms_in_list_if_needed(ast_node *arg_list) {
if (arg_list) {
// if there are any cursor forms in the arg list that need to be expanded, do that here.
rewrite_from_shape_args(arg_list);
if (is_error(arg_list)) {
return false;
}
}
return true;
}
// rewrite call node with cql_cursor_format(X) to a printf(format, arg ...)
// statement. e.g: C cursor has column text x, y
// cql_cursor_format(C); ===> printf("x:%s|y:%s", C.x, C.y);
cql_noexport void rewrite_cql_cursor_format(ast_node *ast) {
Contract(is_ast_call(ast));
EXTRACT_ANY_NOTNULL(name_ast, ast->left);
EXTRACT_STRING(name, name_ast);
EXTRACT_NOTNULL(call_arg_list, ast->right);
EXTRACT(arg_list, call_arg_list->right);
Contract(!Strcasecmp("cql_cursor_format", name));
ast_node *arg = first_arg(arg_list);
AST_REWRITE_INFO_SET(name_ast->lineno, name_ast->filename);
ast_node *printf_node = rewrite_gen_cursor_printf(arg);
AST_REWRITE_INFO_RESET();
// Reset the cql_cursor_format function call node to a case_expr
// node.
ast->type = printf_node->type;
ast_set_left(ast, printf_node->left);
ast_set_right(ast, printf_node->right);
// do semantic analysis of the rewritten AST to validate it
sem_expr(ast);
// the rewrite is not expected to have any semantic error
Invariant(!is_error(ast));
}
// rewrite call node with cql_cursor_diff_xxx(X,Y) to a case_expr statement
// e.g: C1 and C2 are two cursor variable with the same shape
// cql_cursor_diff_xxx(C1, C2); ===> CASE WHEN C1.x IS NOT C2.x THEN 'x' WHEN C1.y IS NOT C2.y THEN 'y'
cql_noexport void rewrite_cql_cursor_diff(ast_node *ast, bool_t report_column_name) {
Contract(is_ast_call(ast));
EXTRACT_ANY_NOTNULL(name_ast, ast->left);
EXTRACT_STRING(name, name_ast);
EXTRACT_NOTNULL(call_arg_list, ast->right);
EXTRACT(arg_list, call_arg_list->right);
Contract(
!Strcasecmp("cql_cursor_diff_col", name) ||
!Strcasecmp("cql_cursor_diff_val", name));
ast_node *arg1 = first_arg(arg_list);
ast_node *arg2 = second_arg(arg_list);
AST_REWRITE_INFO_SET(name_ast->lineno, name_ast->filename);
ast_node *case_expr = rewrite_gen_case_expr(arg1, arg2, report_column_name);
AST_REWRITE_INFO_RESET();
// Reset the cql_cursor_diff_col function call node to a case_expr
// node.
ast->type = case_expr->type;
ast_set_left(ast, case_expr->left);
ast_set_right(ast, case_expr->right);
// do semantic analysis of the rewrite AST to validate the rewrite
sem_expr(ast);
// the rewrite is not expected to have any semantic error
Invariant(!is_error(ast));
}
// This helper function rewrites an iif ast to a case_expr ast, e.g.:
//
// iif(X, Y, Z) => CASE WHEN X THEN Y ELSE Z END;
//
// The caller is responsible for validating that we have the three arguments
// required. In fact, we don't do any form of semantic analysis here at all:
// Unlike in other rewrite functions that call `sem_expr` to validate the
// rewrite, it's very much the case that the rewritten expression may not be
// semantically valid due to an error in the input program, so we simply let the
// caller deal with it.
cql_noexport void rewrite_iif(ast_node *ast) {
Contract(is_ast_call(ast));
EXTRACT_ANY_NOTNULL(name_ast, ast->left);
EXTRACT_STRING(name, name_ast);
EXTRACT_NOTNULL(call_arg_list, ast->right);
EXTRACT(arg_list, call_arg_list->right);
ast_node *arg1 = first_arg(arg_list);
ast_node *arg2 = second_arg(arg_list);
ast_node *arg3 = third_arg(arg_list);
AST_REWRITE_INFO_SET(name_ast->lineno, name_ast->filename);
ast_node *case_expr = rewrite_gen_iif_case_expr(arg1, arg2, arg3);
AST_REWRITE_INFO_RESET();
// Reset the call node to a case_expr node.
ast->type = case_expr->type;
ast_set_left(ast, case_expr->left);
ast_set_right(ast, case_expr->right);
}
// The form we're trying to rewrite here is
// with cte(*) as (select 1 a, 2 b) select * from cte;
// The idea is that if you named all the columns in the projection of the select
// in this case "a, b" you don't want to rename all again in the cte definiton.
// That is with cte(a,b) as (select 1 a, 2 b) is redundant.
// There are many cases with dozens of names and it becomes a real problem to make sure
// the names all match and are in the right order. This avoids all that. Even if you
// select the columns you need in the wrong order it won't matter because you get them
// by name from the CTE anyway. If you're using a union, the additional enforcement
// that the names match on each branch locks you in to correct columns.
// All we have to do is:
// * make sure all the columns have a name and a reasonable type
// * make a name list for the column names
// * swap it in
cql_noexport void rewrite_cte_name_list_from_columns(ast_node *ast, ast_node *select_core) {
Contract(is_ast_cte_decl(ast));
EXTRACT_NOTNULL(star, ast->right)
sem_verify_no_anon_no_null_columns(select_core);
if (is_error(select_core)) {
record_error(ast);
return;
}
AST_REWRITE_INFO_SET(star->lineno, star->filename);
sem_struct *sptr = select_core->sem->sptr;
ast_node *name_list = rewrite_gen_full_column_list(sptr);
ast_set_right(ast, name_list);
AST_REWRITE_INFO_RESET();
record_ok(ast);
}
// Here we have found a "like T" name that needs to be rewritten with
// the various columns of T. We do this by:
// * looking up "T" (this is the only thing that can go wrong)
// * replace the "like T" slug with the first column of T
// * for each additional column create a typed name node and link it in.
// * emit any given name only once, (so you can do like T1, like T1 even if both have the same pk)
static void rewrite_one_typed_name(ast_node *typed_name, symtab *used_names) {
Contract(is_ast_typed_name(typed_name));
EXTRACT_ANY(shape_name_ast, typed_name->left);
EXTRACT_NOTNULL(like, typed_name->right);
EXTRACT_STRING(like_name, like->left);
ast_node *found_shape = sem_find_likeable_ast(like, LIKEABLE_FOR_VALUES);
if (!found_shape) {
record_error(typed_name);
return;
}
AST_REWRITE_INFO_SET(like->lineno, like->filename);
// Nothing can go wrong from here on
record_ok(typed_name);
sem_struct *sptr = found_shape->sem->sptr;
uint32_t count = sptr->count;
bool_t first_rewrite = true;
CSTR shape_name = "";
ast_node *insertion = typed_name;
if (shape_name_ast) {
EXTRACT_STRING(sname, shape_name_ast);
shape_name = sname;
// note that typed names are part of a procedure return type in a declaration
// they don't create a proc or a proc body and so we don't add to arg_bundles,
// indeed arg_bundles is null at this point
}
for (int32_t i = 0; i < count; i++) {
sem_t sem_type = sptr->semtypes[i];
CSTR name = sptr->names[i];
CSTR kind = sptr->kinds[i];
CSTR combined_name = name;
if (shape_name[0]) {
combined_name = dup_printf("%s_%s", shape_name, name);
}
// skip any that we have already added or that are manually present
if (!symtab_add(used_names, combined_name, NULL)) {
continue;
}
ast_node *name_ast = new_ast_str(combined_name);
ast_node *type = rewrite_gen_data_type(sem_type, kind);
ast_node *new_typed_name = new_ast_typed_name(name_ast, type);
ast_node *typed_names = insertion->parent;
if (!first_rewrite) {
ast_set_right(typed_names, new_ast_typed_names(new_typed_name, typed_names->right));
}
else {
ast_set_left(typed_names, new_typed_name);
first_rewrite = false;
}
insertion = new_typed_name;
}
// There's a chance we did nothing. If that happens we still have to remove the like node.
// If we did anything the like node is already gone.
if (first_rewrite) {
// since this can only happen if there is 100% duplication, that means there is always a previous typed name
// if this were the first node we would have expanded ... something
EXTRACT_NOTNULL(typed_names, typed_name->parent);
EXTRACT_NAMED_NOTNULL(tail, typed_names, typed_names->parent);
ast_set_right(tail, typed_names->right);
}
AST_REWRITE_INFO_RESET();
}
// Walk the typed name list looking for any of the "like T" forms
// if any is found, replace that entry with the table/shape columns
cql_noexport void rewrite_typed_names(ast_node *head) {
symtab *used_names = symtab_new();
for (ast_node *ast = head; ast; ast = ast->right) {
Contract(is_ast_typed_names(ast));
EXTRACT_NOTNULL(typed_name, ast->left);
if (is_ast_like(typed_name->right)) {
rewrite_one_typed_name(typed_name, used_names);
if (is_error(typed_name)) {
record_error(head);
goto cleanup;
}
}
else {
// Just extract the name and record that we used it -- no rewrite needed.
EXTRACT_STRING(name, typed_name->left);
symtab_add(used_names, name, NULL);
}
}
record_ok(head);
cleanup:
symtab_delete(used_names);
}
// Walk the param list looking for any of the "like T" forms
// if any is found, replace that parameter with the table/shape columns
cql_noexport void rewrite_params(ast_node *head, bytebuf *args_info) {
symtab *param_names = symtab_new();
for (ast_node *ast = head; ast; ast = ast->right) {
Contract(is_ast_params(ast));
EXTRACT_NOTNULL(param, ast->left)
EXTRACT_NOTNULL(param_detail, param->right)
if (is_ast_like(param_detail->right)) {
param = rewrite_one_param(param, param_names, args_info);
if (is_error(param)) {
record_error(head);
goto cleanup;
}
ast = param->parent;
Invariant(is_ast_params(ast));
}
else {
// Just extract the name and record that we used it -- no rewrite needed.
EXTRACT_STRING(param_name, param_detail->left);
CSTR shape_type = "";
CSTR shape_name = "";
if (args_info) {
bytebuf_append_var(args_info, param_name);
bytebuf_append_var(args_info, shape_name);
bytebuf_append_var(args_info, shape_type);
}
symtab_add(param_names, param_name, NULL);
}
}
record_ok(head);
cleanup:
symtab_delete(param_names);
}
static CSTR coretype_format(sem_t sem_type) {
CSTR result = NULL;
switch (core_type_of(sem_type)) {
case SEM_TYPE_INTEGER:
case SEM_TYPE_BOOL: result = "%d"; break;
case SEM_TYPE_BLOB:
case SEM_TYPE_LONG_INTEGER: result = "%lld"; break;
case SEM_TYPE_REAL: result = "%f"; break;
case SEM_TYPE_TEXT: result = "%s"; break;
}
Invariant(result);
return result;
}
// Generate arg_list nodes and formatting values for a printf(...) ast
static ast_node* rewrite_gen_arg_list(charbuf* format_buf, CSTR cusor_name, CSTR col_name, sem_t type) {
// left to arg_list node
ast_node* dot = new_ast_dot(new_ast_str(cusor_name), new_ast_str(col_name));
// If the argument is blob type we need to print just its size therefore we rewrite
// ast to call cql_get_blob_size(<blob>) which return the size of the argument
if (is_blob(type)) {
// right to call_arg_list node
ast_node* arg_list = new_ast_arg_list(dot, NULL);
ast_node* call_arg_list =
new_ast_call_arg_list(new_ast_call_filter_clause(NULL, NULL), arg_list);
dot = new_ast_call(new_ast_str("cql_get_blob_size"), call_arg_list);
}
bprintf(format_buf, is_blob(type) ? "length %s blob" : "%s", coretype_format(type));
return new_ast_arg_list(dot, NULL);
}
// Generate printf(...) function node. This is used by
// rewrite_gen_cursor_printf() to generate the rewrite for cql_cursor_format
// function.
// e.g: cusor_name = C, dot_name = x, type = text PRINTF("%s", C.x);
// e.g: cusor_name = C, dot_name = x, type = blob PRINTF("length %d blob", cql_get_blob_size(C.x));
static ast_node* rewrite_gen_printf_call(CSTR format, ast_node *arg_list) {
CSTR copy_format = dup_printf("'%s'", format);
// right to call_arg_list node
ast_node* first_arg_list = new_ast_arg_list(new_ast_str(copy_format), arg_list);
// right to call node
ast_node* call_arg_list = new_ast_call_arg_list(
new_ast_call_filter_clause(NULL, NULL), first_arg_list);
ast_node* call = new_ast_call(new_ast_str("printf"), call_arg_list);
return call;
}
// Generates a call to nullable with `ast` as the argument.
static ast_node *rewrite_gen_nullable(ast_node *ast) {
Contract(ast);
return new_ast_call(
new_ast_str("nullable"),
new_ast_call_arg_list(
new_ast_call_filter_clause(NULL, NULL),
new_ast_arg_list(ast, NULL)));
}
// Generate a 'call' node for printf function from a cursor variable.
// This is used to rewrite cql_cursor_format(X) when called from a
// sql context.
// e.g:
// select cql_cursor_format(C) as p; ===> select printf("x:%d|y:%s", C.x, C.y) as p;
static ast_node *rewrite_gen_cursor_printf(ast_node *variable) {
Contract(is_variable(variable->sem->sem_type));
CHARBUF_OPEN(format);
sem_struct *sptr = variable->sem->sptr;
int32_t count = (int32_t) sptr->count;
Invariant(count > 0);
ast_node *arg_list = NULL;
for (int32_t i = count - 1; i >= 0; i--) {
Invariant(sptr->names[i]);
// left side of IS
ast_node* dot = new_ast_dot(new_ast_str(variable->sem->name), new_ast_str(sptr->names[i]));
// We wrap the dot in a call to nullable if it is of a nonnull type so that
// the IS NULL check will not result in a type error. Eliding the check is
// not possible in the nonnull case because even a dot of a nonnull type
// could, unfortunately, be null if a row was not fetched.
if (is_not_nullable(sptr->semtypes[i])) {
dot = rewrite_gen_nullable(dot);
}
// right side of IS
ast_node* null_node = new_ast_null();
// left side of WHEN
ast_node* is_node = new_ast_is(dot, null_node);
// the THEN part of WHEN THEN
ast_node* val = new_ast_str("'null'");
// left case_list node
ast_node* when = new_ast_when(is_node, val);
// left connector node
ast_node* case_list = new_ast_case_list(when, NULL);
// right connector node: printf(...)
CHARBUF_OPEN(format_output);
// arg_list node for the printf call
ast_node* printf_arg_list = rewrite_gen_arg_list(
&format_output, variable->sem->name, sptr->names[i], sptr->semtypes[i]);
ast_node* call_printf = rewrite_gen_printf_call(format_output.ptr, printf_arg_list);
CHARBUF_CLOSE(format_output);
// case list with no ELSE (we get ELSE NULL by default)
ast_node* connector = new_ast_connector(case_list, call_printf);
// CASE WHEN expr THEN result form; not CASE expr WHEN val THEN result
ast_node* case_expr = new_ast_case_expr(NULL, connector);
// new arg_list node
ast_node* new_arg_list = new_ast_arg_list(case_expr, arg_list);
arg_list = new_arg_list;
}
for (int32_t i = 0; i < count; i++) {
if (i > 0) {
bprintf(&format, "|");
}
bprintf(&format, "%s:%s", sptr->names[i], "%s");
}
CSTR format_lit = dup_printf("'%s'", format.ptr); // this turns into literal name
ast_node *first_arg_list = new_ast_arg_list(new_ast_str(format_lit), arg_list);
// call_arg_list node
ast_node *call_arg_list = new_ast_call_arg_list(new_ast_call_filter_clause(NULL, NULL), first_arg_list);
ast_node *call = new_ast_call(new_ast_str("printf"), call_arg_list);
CHARBUF_CLOSE(format);
return call;
}
// This helper generates a case_expr node that check if an expression to return value or
// otherwise another value
// e.g: (expr, val1, val2) => CASE WHEN expr THEN val2 ELSE val1;
static ast_node *rewrite_gen_iif_case_expr(ast_node *expr, ast_node *val1, ast_node *val2) {
// left case_list node
ast_node* when = new_ast_when(expr, val1);
// left connector node
ast_node* case_list = new_ast_case_list(when, NULL);
// case list with no ELSE (we get ELSE NULL by default)
ast_node* connector = new_ast_connector(case_list, val2);
// CASE WHEN expr THEN result form; not CASE expr WHEN val THEN result
ast_node* case_expr = new_ast_case_expr(NULL, connector);
return case_expr;
}
// This helper generates a 'case_expr' node from two cursors variables. This is used to rewrite
// cql_cursor_diff_col(X,Y) and cql_cursor_diff_val(X,Y) function to a case expr.
// e.g:
// cql_cursor_diff_col(C1, C2); ===> CASE WHEN C1.x IS NOT C2.x THEN 'x' WHEN C1.y IS NOT C2.y THEN 'y'
// cql_cursor_diff_val(C1, C2); ===> CASE WHEN C1.x IS NOT C2.x THEN printf('column:%s left:%s right:%s', 'y', printf('%s', C1.x), printf('%s', C2.x))
// WHEN C1.y IS NOT C2.y THEN printf('column:%s left:%s right:%s', 'y', printf('%s', C1.y), printf('%s', C2.y))
static ast_node *rewrite_gen_case_expr(ast_node *var1, ast_node *var2, bool_t report_column_name) {
Contract(is_variable(var1->sem->sem_type));
Contract(is_variable(var2->sem->sem_type));
CSTR c1_name = var1->sem->name;
CSTR c2_name = var2->sem->name;
sem_struct *sptr1 = var1->sem->sptr;
sem_struct *sptr2 = var2->sem->sptr;
Invariant(sptr1->count == sptr2->count);
// We don't need to make sure both cursors have the same shape because it's has been done
// already. Therefore we just assume both cursors have identical shape
int32_t count = (int32_t) sptr1->count;
ast_node *case_list = NULL;
for (int32_t i = count - 1; i >= 0; i--) {
Invariant(sptr1->names[i]);
// left side of IS NOT
ast_node *dot1 = new_ast_dot(new_ast_str(c1_name), new_ast_str(sptr1->names[i]));
// right side of IS NOT
ast_node *dot2 = new_ast_dot(new_ast_str(c2_name), new_ast_str(sptr2->names[i]));
ast_node *is_not = new_ast_is_not(dot1, dot2);
// the THEN part of WHEN THEN
ast_node *val = NULL;
if (report_column_name) {
CSTR name_lit = dup_printf("'%s'", sptr1->names[i]); // this turns into literal name
val = new_ast_str(name_lit);
} else {
ast_node *arg_list = NULL;
CHARBUF_OPEN(format_output);
// fourth argument to call printf node: call printf(...) node
ast_node* printf_arg_list3 = rewrite_gen_arg_list(
&format_output, c2_name, sptr2->names[i], sptr2->semtypes[i]);
// CALL PRINTF ast on fourth argument
ast_node *call_printf3 = rewrite_gen_printf_call(format_output.ptr, printf_arg_list3);
// left of is node
ast_node *dot = new_ast_dot(new_ast_str(c2_name), new_ast_str(sptr2->names[i]));
// We wrap the dot in a call to nullable if it is of a nonnull type so
// that the IS NULL check will not result in a type error. Eliding the
// check is not possible in the nonnull case because even a dot of a
// nonnull type could, unfortunately, be null if a row was not fetched.
if (is_not_nullable(sptr2->semtypes[i])) {
dot = rewrite_gen_nullable(dot);
}
// left of WHEN expr
ast_node* is_node = new_ast_is(dot, new_ast_null());
// case_expr node: CASE WHEN C.x IS NULL THEN 'null' ELSE printf("%s", C.x)
ast_node *check_call_printf3 = rewrite_gen_iif_case_expr(
is_node,
new_ast_str("'null'"),
call_printf3);
arg_list = new_ast_arg_list(check_call_printf3, NULL);
bclear(&format_output);
// third argument to call printf node: call print(...) node
ast_node* printf_arg_list2 = rewrite_gen_arg_list(
&format_output, c1_name, sptr1->names[i], sptr1->semtypes[i]);
// CALL PRINTF ast on third argument
ast_node *call_printf2 = rewrite_gen_printf_call(format_output.ptr, printf_arg_list2);
// left of IS node
dot = new_ast_dot(new_ast_str(c1_name), new_ast_str(sptr1->names[i]));
if (is_not_nullable(sptr1->semtypes[i])) {
dot = rewrite_gen_nullable(dot);
}
// left of WHEN expr
is_node = new_ast_is(dot, new_ast_null());
// case_expr node: CASE WHEN C.x IS NULL THEN 'null' ELSE printf("%s", C.x)
ast_node *check_call_printf2 = rewrite_gen_iif_case_expr(
is_node,
new_ast_str("'null'"),
call_printf2);
arg_list = new_ast_arg_list(check_call_printf2, arg_list);
bclear(&format_output);
// second argument too call printf node: name node
ast_node * printf_arg_list1 = new_ast_str(dup_printf("'%s'", sptr1->names[i]));
arg_list = new_ast_arg_list(printf_arg_list1, arg_list);
// printf call node
CHARBUF_OPEN(tmp);
bprintf(&tmp, "column:%%s %s:%%s %s:%%s", c1_name, c2_name);
val = rewrite_gen_printf_call(tmp.ptr, arg_list);
CHARBUF_CLOSE(tmp);
CHARBUF_CLOSE(format_output);
}
// The WHEN node and the CASE LIST that holds it
ast_node *when = new_ast_when(is_not, val);
ast_node *new_case_list = new_ast_case_list(when, case_list);
case_list = new_case_list;
}
// case list with no ELSE (we get ELSE NULL by default)
ast_node *connector = new_ast_connector(case_list, NULL);
// CASE WHEN expr THEN result form; not CASE expr WHEN val THEN result
ast_node *case_expr = new_ast_case_expr(NULL, connector);
return case_expr;
}
// This helper rewrites col_def_type_attrs->right nodes to include notnull and sensitive
// flag from the data type of a column in create table statement. This is only applicable
// if column data type of the column is the name of an emum type or a declared named type.
cql_noexport void rewrite_right_col_def_type_attrs_if_needed(ast_node *ast) {
Contract(is_ast_col_def_type_attrs(ast));
EXTRACT_NOTNULL(col_def_name_type, ast->left);
EXTRACT_ANY_NOTNULL(data_type, col_def_name_type->right);
EXTRACT_ANY(col_attrs, ast->right);
if (is_ast_str(data_type)) {
EXTRACT_STRING(name, data_type);
ast_node *named_type = find_named_type(name);
if (!named_type) {
report_error(ast, "CQL0360: unknown type", name);
record_error(ast);
return;
}
AST_REWRITE_INFO_SET(ast->lineno, ast->filename);
sem_t found_sem_type = named_type->sem->sem_type;
if (!is_nullable(found_sem_type)) {
col_attrs = new_ast_col_attrs_not_null(NULL, col_attrs);
}
if (sensitive_flag(found_sem_type)) {
col_attrs = new_ast_sensitive_attr(NULL, col_attrs);
}
ast_set_right(ast, col_attrs);
AST_REWRITE_INFO_RESET();
}
record_ok(ast);
}
// Rewrite a data type represented as a string node to the
// actual type if the string name is a declared type.
cql_noexport void rewrite_data_type_if_needed(ast_node *ast) {
ast_node *data_type = NULL;
if (is_ast_create_data_type(ast)) {
data_type = ast->left;
} else {
data_type = ast;
}
if (is_ast_str(data_type)) {
EXTRACT_STRING(name, data_type);
ast_node *named_type = find_named_type(name);
if (!named_type) {
report_error(ast, "CQL0360: unknown type", name);
record_error(ast);
return;
}
sem_t sem_type = named_type->sem->sem_type;
// * The cast_expr node doesn't need attributes, it only casts to the
// target type. When casting, both nullability and sensitivity are
// perserved. So in that case we remove the extra attributes. They
// are not expected/required in the rewrite.
//
// * Columns are a little different; nullability and sensitivity are
// encoded differently in columns than in variables.
// So in that case we again only produce the base type here.
// The caller will do the rest. This work is done in
// rewrite_right_col_def_type_attrs_if_needed(ast_node
bool_t only_core_type = ast->parent &&
(is_ast_col_def_name_type(ast->parent) || is_ast_cast_expr(ast->parent));
if (only_core_type) {
sem_type = core_type_of(sem_type);
}
AST_REWRITE_INFO_SET(data_type->lineno, data_type->filename);
ast_node *node = rewrite_gen_data_type(sem_type, named_type->sem->kind);
AST_REWRITE_INFO_RESET();
ast_set_left(data_type, node->left);
ast_set_right(data_type, node->right);
data_type->sem = node->sem;
data_type->type = node->type; // note this is ast type, not semantic type
}
record_ok(ast);
}
// Wraps an id or dot in a call to cql_inferred_notnull.
cql_noexport void rewrite_nullable_to_notnull(ast_node *_Nonnull ast) {
Contract(is_id_or_dot(ast));
AST_REWRITE_INFO_SET(ast->lineno, ast->filename);
ast_node *id_or_dot;
if (is_id(ast)) {
EXTRACT_STRING(name, ast);
id_or_dot = new_ast_str(name);
} else {
Invariant(is_ast_dot(ast));
EXTRACT_NAME_AND_SCOPE(ast);
id_or_dot = new_ast_dot(new_ast_str(scope), new_ast_str(name));
}
ast_node *cql_inferred_notnull = new_ast_str("cql_inferred_notnull");
ast_node* call_arg_list = new_ast_call_arg_list(
new_ast_call_filter_clause(NULL, NULL),
new_ast_arg_list(id_or_dot, NULL));
ast->type = k_ast_call;
ast_set_left(ast, cql_inferred_notnull);
ast_set_right(ast, call_arg_list);
AST_REWRITE_INFO_RESET();
// Analyze the AST to validate the rewrite.
sem_expr(ast);
// The rewrite is not expected to have any semantic error.
Invariant(!is_error(ast));
}
// Rewrites a guard statement of the form `IF expr stmt` to a regular if
// statement of the form `IF expr THEN stmt END IF`.
cql_noexport void rewrite_guard_stmt_to_if_stmt(ast_node *_Nonnull ast) {
Contract(is_ast_guard_stmt(ast));
AST_REWRITE_INFO_SET(ast->lineno, ast->filename);
EXTRACT_ANY_NOTNULL(expr, ast->left);
EXTRACT_ANY_NOTNULL(stmt, ast->right);
ast->type = k_ast_if_stmt;
ast_set_left(ast, new_ast_cond_action(expr, new_ast_stmt_list(stmt, NULL)));
ast_set_right(ast, new_ast_if_alt(NULL, NULL));
AST_REWRITE_INFO_RESET();
sem_one_stmt(ast);
}
// Rewrites an already analyzed printf call such that all arguments whose core
// types do not match the format string exactly have casts inserted to make them
// do so. This allows programmers to enjoy the usual subtyping semantics of
// `sem_verify_assignment` while making sure that all types match up exactly for
// calls to `sqlite3_mprintf` in the C output.
cql_noexport void rewrite_printf_inserting_casts_as_needed(ast_node *ast, CSTR format_string) {
Contract(is_ast_call(ast));
Contract(!is_error(ast));
EXTRACT_NOTNULL(call_arg_list, ast->right);
EXTRACT_NOTNULL(arg_list, call_arg_list->right);
printf_iterator *iterator = minipool_alloc(ast_pool, (uint32_t)sizeof_printf_iterator);
printf_iterator_init(iterator, NULL, format_string);
ast_node *args_for_format = arg_list->right;
for (ast_node *arg_item = args_for_format; arg_item; arg_item = arg_item->right) {
sem_t sem_type = printf_iterator_next(iterator);
// We know the format string cannot have an error.
Contract(sem_type != SEM_TYPE_ERROR);
// We know that we do not have too many arguments.
Contract(sem_type != SEM_TYPE_OK);
ast_node *arg = arg_item->left;
AST_REWRITE_INFO_SET(arg->lineno, arg->filename);
if (core_type_of(arg->sem->sem_type) == SEM_TYPE_NULL) {
// We cannot cast NULL outside of an SQL context, so we just insert the
// correct zero-valued literal instead, if needed.
switch (sem_type) {
case SEM_TYPE_INTEGER:
ast_set_left(arg_item, new_ast_num(NUM_INT, "0"));
break;
case SEM_TYPE_LONG_INTEGER:
ast_set_left(arg_item, new_ast_num(NUM_LONG, "0"));
break;
case SEM_TYPE_REAL:
ast_set_left(arg_item, new_ast_num(NUM_REAL, "0.0"));
break;
default:
// Reference types do not need to be casted.
break;
}
}
else if (core_type_of(arg->sem->sem_type) != sem_type) {
Invariant(is_numeric(sem_type));
// The format string specifies a larger type than what was provided, so
// we must insert a cast to make the types match exactly.
ast_node *type_ast;
switch (sem_type) {
case SEM_TYPE_INTEGER:
type_ast = new_ast_type_int(NULL);
break;
case SEM_TYPE_LONG_INTEGER:
type_ast = new_ast_type_long(NULL);
break;
default:
Invariant(sem_type == SEM_TYPE_REAL);
type_ast = new_ast_type_real(NULL);
break;
}
ast_set_left(arg_item, new_ast_cast_expr(arg, type_ast));
}
AST_REWRITE_INFO_RESET();
}
// We know that we do not have too few arguments.
Contract(printf_iterator_next(iterator) == SEM_TYPE_OK);
// Validate the rewrite.
sem_expr(ast);
}
// Just maintain head and tail whilst adding a node at the tail.
// This uses the usual convention that ->right is the "next" pointer.
static void add_tail(ast_node **head, ast_node **tail, ast_node *node) {
if (*head) {
(*tail)->right = node;
}
else {
*head = node;
}
*tail = node;
}
static void append_scoped_name(ast_node **head, ast_node **tail, CSTR scope, CSTR name) {
ast_node *expr = NULL;
if (scope) {
expr = new_ast_dot(new_ast_str(scope), new_ast_str(name));
}
else {
expr = new_ast_str(name);
}
ast_node *select_expr = new_ast_select_expr(expr, NULL);
ast_node *select_expr_list = new_ast_select_expr_list(select_expr, NULL);
add_tail(head, tail, select_expr_list);
}
// This is our helper struct with the computed symbol tables for disambiguation
// we flow this around when we need to do the searches.
typedef struct jfind_t {
sem_join *jptr;
symtab *location;
symtab *dups;
symtab *tables;
} jfind_t;
// This just gives us easy access to the sem_struct or NULL
static sem_struct *jfind_table(jfind_t *jfind, CSTR name) {
symtab_entry *entry = symtab_find(jfind->tables, name);
return entry ? (sem_struct *)(entry->val) : NULL;
}
// We often need to find the index of a particular column
// because in X like Y the column order of X might be different
// than Y and probably is.
static int32_t find_col_in_sptr(sem_struct *sptr, CSTR name) {
for (int32_t i = 0; i < sptr->count; i++) {
if (!Strcasecmp(sptr->names[i], name)) {
return i;
}
}
return -1;
}
// If we need them we make these fast disambiguation tables so that
// we don't have to do a cubic algorithm re-searching every column we need
// These will tell us the disambiguated location of any given column name
// and its duplicate status as well fast access to the sem_struct for
// any scope within the jptr -- this will be the jptr of a FROM clause.
static void jfind_init(jfind_t *jfind, sem_join *jptr) {
jfind->jptr = jptr;
// this will map from column name to the first table that has that column
jfind->location = symtab_new();
// this will tell us if any given column requires disambiguation
jfind->dups = symtab_new();
// this will tell us the sptr index for a particular table name
jfind->tables = symtab_new();
// here we make the lookup maps by walking the jptr for the from clause
// this will save us a lot of searching later...
for (int32_t i = 0; i < jptr->count; i++) {
CSTR name = jptr->names[i];
sem_struct *sptr = jptr->tables[i];
symtab_add(jfind->tables, name, (void *)sptr);
for (int32_t j = 0; j < sptr->count; j++) {
CSTR col = sptr->names[j];
if (!symtab_add(jfind->location, col, (void*)name)) {
symtab_add(jfind->dups, col, NULL);
}
}
}
}
// cleanup the helper tables so we don't leak in the amalgam
static void jfind_cleanup(jfind_t *jfind) {
if (jfind->location) {
symtab_delete(jfind->location);
}
if (jfind->dups) {
symtab_delete(jfind->dups);
}
if (jfind->tables) {
symtab_delete(jfind->tables);
}
}
// This will check if the indicated column of the required sptr is a type match
// for the same column name (maybe different index) in the actual column. We
// have to do this because we want to make sure that when you say COLUMNS(X like foo)
// that the foo columns of X are the same type as those in foo.
static bool_t verify_matched_column(
ast_node *ast,
sem_struct *sptr_reqd,
int32_t i_reqd,
sem_struct *sptr_actual,
CSTR scope)
{
CHARBUF_OPEN(err);
bool_t ok = false;
CSTR col = sptr_reqd->names[i_reqd];
// if we're emitting from the same structure there's nothing to check
// this is not the LIKE case
if (sptr_reqd == sptr_actual) {
ok = true;
goto cleanup;
}
// for better diagnostics, we can give the scoped name
bprintf(&err, "%s.%s", scope, col);
int32_t i_actual = find_col_in_sptr(sptr_actual, col);
if (i_actual < 0) {
report_error(ast, "CQL0069: name not found", err.ptr);
goto cleanup;
}
// here the ast is only where we charge the error, but as it happens that will be the node we just added
// which by an amazing coincidence has exactly the right file/line number for the columns node
if (!sem_verify_assignment(ast, sptr_reqd->semtypes[i_reqd], sptr_actual->semtypes[i_actual], err.ptr)) {
goto cleanup;
}
ok = true;
cleanup:
CHARBUF_CLOSE(err);
return ok;
}
// Here we've found one column_calculation node, this corresponds to a single
// instance of COLUMNS(...) in the select list. When we process this, we
// will replace it with its expansion. Note that each one is independent
// so often you really only need one (distinct is less powerful if you have two or more).
static void rewrite_column_calculation(ast_node *column_calculation, jfind_t *jfind) {
Contract(is_ast_column_calculation(column_calculation));
bool_t distinct = !!column_calculation->right;
symtab *used_names = distinct ? symtab_new() : NULL;
ast_node *tail = NULL;
ast_node *head = NULL;
for (ast_node *item = column_calculation->left; item; item = item->right) {
Contract(is_ast_col_calcs(item));
EXTRACT(col_calc, item->left);
if (is_ast_dot(col_calc->left)) {
// If a column is explicitly mentioned, we simply emit it
// we won't duplicate the column later but neither will we
// filter it out if distinct is mentioned, this is to prevent
// bogus manual columns from staying in select lists. If it's
// not distinct, either hoist it to the front or else remove it.
EXTRACT_NOTNULL(dot, col_calc->left);
EXTRACT_STRING(left, dot->left);
EXTRACT_STRING(right, dot->right);
// no type check is needed here, we just emit the name whatever it is
append_scoped_name(&head, &tail, left, right);
if (used_names) {
symtab_add(used_names, right, NULL);
}
}
else if (col_calc->left) {
EXTRACT_STRING(scope, col_calc->left);
sem_struct *sptr_table = jfind_table(jfind, scope);
if (!sptr_table) {
report_error(col_calc->left, "CQL0069: name not found", scope);
record_error(column_calculation);
goto cleanup;
}
EXTRACT(like, col_calc->right);
sem_struct *sptr;
if (like) {
ast_node *found_shape = sem_find_likeable_ast(like, LIKEABLE_FOR_VALUES);
if (!found_shape) {
record_error(column_calculation);
goto cleanup;
}
// get just the shape columns (or try anyway)
sptr = found_shape->sem->sptr;
}
else {
// get all the columns from this table
sptr = sptr_table;
}
for (int32_t j = 0; j < sptr->count; j++) {
CSTR col = sptr->names[j];
if (used_names && !symtab_add(used_names, col, NULL)) {
continue;
}
append_scoped_name(&head, &tail, scope, col);
if (!verify_matched_column(tail, sptr, j, sptr_table, scope)) {
record_error(column_calculation);
goto cleanup;
}
}
}
else {
// the other case has just a like expression
EXTRACT_NOTNULL(like, col_calc->right);
ast_node *found_shape = sem_find_likeable_ast(like, LIKEABLE_FOR_VALUES);
if (!found_shape) {
record_error(column_calculation);
goto cleanup;
}
// get just the shape columns (or try anyway)
sem_struct *sptr = found_shape->sem->sptr;
// now we can use our found structure from the like
// we will find the table that has the given column
// we generate a disambiguation scope if it is needed
for (int32_t i = 0; i < sptr->count; i++) {
CSTR col = sptr->names[i];
if (!used_names || symtab_add(used_names, col, NULL)) {
// if the name has duplicates then qualify it
symtab_entry *entry = symtab_find(jfind->location, col);
if (!entry) {
report_error(like, "CQL0069: name not found", col);
record_error(column_calculation);
goto cleanup;
}
CSTR scope = (CSTR)entry->val;
sem_struct *sptr_table = jfind_table(jfind, scope);
Invariant(sptr_table); // this is our lookup of a scope that is known, it cant fail
// We only use the scope in the output if it's needed and if distinct was specified
// if distinct wasn't specified then ambiguity is an error and it will be. The later
// stages will check for an unambiguous name.
CSTR used_scope = (used_names && symtab_find(jfind->dups, col)) ? scope : NULL;
append_scoped_name(&head, &tail, used_scope, col);
// We check the type of the first match of the name, this is the only column that
// can match legally. If there are other columns ambiguity errors will be emitted.
if (!verify_matched_column(tail, sptr, i, sptr_table, scope)) {
record_error(column_calculation);
goto cleanup;
}
}
}
}
}
// replace the calc node with the head payload
ast_node *splice = column_calculation->parent;
ast_set_left(splice, head->left);
ast_set_right(tail, splice->right); // this could be mutating the head
ast_set_right(splice, head->right); // works even if head is an alias for tail
record_ok(column_calculation);
cleanup:
if (used_names) {
symtab_delete(used_names);
}
}
// At this point we're going to walk the select expression list looking for
// the construct COLUMNS(...) with its various forms. This is a generalization
// of the T.* syntax that allows you to pull slices of the tables and to
// get distinct columns where there are duplicates due to joins. Ultimately
// this is just sugar but the point is that there could be dozens of such columns
// and if you have to type it all yourself it is very easy to get it wrong. So
// here we're going to expand out the COLUMNS(...) operator into the actual
// tables/columns you requested. SQLite, has no support for this sort of thing
// so it, and indeed the rest of the compilation chain, will just see the result
// of the expansion.
cql_noexport void rewrite_select_expr_list(ast_node *ast, sem_join *jptr_from) {
Contract(is_ast_select_expr_list_con(ast));
EXTRACT_NOTNULL(select_expr_list, ast->left);
jfind_t jfind = {0};
for (ast_node *item = select_expr_list; item; item = item->right) {
Contract(is_ast_select_expr_list(item));
if (is_ast_column_calculation(item->left)) {
EXTRACT_NOTNULL(column_calculation, item->left);
if (!jptr_from) {
report_error(ast, "CQL0053: select columns(...) cannot be used with no FROM clause", NULL);
record_error(ast);
return;
}
if (!jfind.jptr) {
jfind_init(&jfind, jptr_from);
}
AST_REWRITE_INFO_SET(column_calculation->lineno, column_calculation->filename);
rewrite_column_calculation(column_calculation, &jfind);
AST_REWRITE_INFO_RESET();
if (is_error(column_calculation)) {
record_error(ast);
goto cleanup;
}
}
}
record_ok(ast);
cleanup:
jfind_cleanup(&jfind);
}
// Here we convert one of the normal fetch_values forms
//
// FETCH C from B -- C is a cursor B is a blob
// SET B from CURSOR C -- load the blob from the cursor
//
// into the blob deserializing or deserializing forms.
// We rewrite the AST here to the blob form so that it's easier
// for the later passes to see the difference. The parser
// can't do this because you have to know the type of the arguments
// to know that this is the blob case.
//
// The relevant statments are fetch_cursor_from_blob_stmt and
// set_blob_from_cursor_stmt. These only have very simple forms,
// the idea is that if you need any slicing, extraction,
// or whatever, you do it with the cursors not blobs.
cql_noexport bool_t try_rewrite_blob_fetch_forms(ast_node *ast) {
Contract(is_ast_fetch_values_stmt(ast) || is_ast_set_from_cursor(ast));
ast_node *cursor = NULL;
ast_node *blob = NULL;
ast_node *dest = NULL;
ast_node *src = NULL;
if (is_ast_fetch_values_stmt(ast)) {
EXTRACT(insert_dummy_spec, ast->left);
EXTRACT_NOTNULL(name_columns_values, ast->right);
EXTRACT_ANY_NOTNULL(target, name_columns_values->left)
EXTRACT_ANY_NOTNULL(columns_values, name_columns_values->right);
// check for the simple case of fetching to or from a blob
if (insert_dummy_spec) {
return false;
}
if (!is_ast_from_shape(columns_values->right)) {
return false;
}
EXTRACT_NOTNULL(from_shape, columns_values->right);
// any shape with a like clause is not the blob case
if (from_shape->left) {
return false;
}
EXTRACT_ANY_NOTNULL(source, from_shape->right);
EXTRACT_STRING(source_name, source);
EXTRACT_STRING(target_name, target);
ast_node *variable = find_local_or_global_variable(source_name);
if (variable && is_blob(variable->sem->sem_type)) {
// we're committed now, there's a blob on the right
// one way or another we are resolving the type
dest = cursor = target;
src = blob = source;
goto rewrite_or_fail;
}
}
else {
EXTRACT_ANY_NOTNULL(source, ast->right);
EXTRACT_ANY_NOTNULL(target, ast->left);
EXTRACT_STRING(target_name, target);
ast_node *variable = find_local_or_global_variable(target_name);
if (variable && is_blob(variable->sem->sem_type)) {
// we're committed now, there's a blob on the right
// one way or another we are resolving the type
src = cursor = source;
dest = blob = target;
goto rewrite_or_fail;
}
}
// the blob position is not a blob, proceed as usual
return false;
rewrite_or_fail:
Invariant(cursor);
sem_validate_cursor_blob_compat(ast, cursor, blob, dest, src);
if (is_error(ast)) {
return true;
}
if (dest == blob) {
ast->type = k_ast_set_blob_from_cursor_stmt;
}
else {
ast->type = k_ast_fetch_cursor_from_blob_stmt;
}
ast_set_left(ast, dest);
ast_set_right(ast, src);
return true;
}
#endif