sources/sem.h (262 lines of code) (raw):
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
typedef uint64_t sem_t;
#if defined(CQL_AMALGAM_LEAN) && !defined(CQL_AMALGAM_SEM)
// minimal stuff goes here
cql_noexport void sem_main(ast_node *node);
cql_noexport void sem_cleanup(void);
cql_noexport void print_sem_type(struct sem_node *sem);
#else
#include "cql.h"
#include "ast.h"
#include "bytebuf.h"
#include "symtab.h"
#include "charbuf.h"
#include "list.h"
//
// The key semantic type information
//
// The rules:
// * if sem_type is STRUCT then sptr is not null
// * if sem_type is JOIN then jptr is not null
// * if node is on a table or a view element, which is a STRUCT
// then jptr is also not null as an optimization
// * in all other cases neither is populated
//
// Tables and Views have both their sptr and jptr filled out
// because the first thing you're going to do with a table/view is
// join it to something and so the base case of a 1 table join happens
// all the time. To make this easier that jptr is pre-populated as
// an optimization.
//
// @lint-ignore-every LINEWRAP
#define sem_not(x) u64_not(x)
typedef struct sem_node {
sem_t sem_type; // core type plus flags
CSTR name; // for named expressions in select columns etc.
CSTR kind; // the Foo in object<Foo>, not a variable or column name
CSTR error; // error text for test output, not used otherwise
struct sem_struct *sptr; // encoded struct if any
struct sem_join *jptr; // encoded join if any
int32_t create_version; // create version if any (really only for tables and columns)
int32_t delete_version; // delete version if any (really only for tables and columns)
int32_t unsub_version; // unsub version if any (for tables only)
int32_t resub_version; // resub version if any (for tables only)
bool_t recreate; // for tables only, true if marked @recreate
CSTR recreate_group_name; // for tables only, the name of the recreate group if they are in one
CSTR region; // the schema region, if applicable; null means unscoped (default)
symtab *used_symbols; // for select statements, we need to know which of the ids in the select list was used, if any
list_item *index_list; // for tables we need the list of indices that use this table (so we can recreate them together if needed)
struct eval_node *value; // for enum values we have to store the evaluated constant value of each member of the enum
} sem_node;
// for tables and views and the result of a select
typedef struct sem_struct {
CSTR struct_name; // struct name
uint32_t count; // count of fields
CSTR *names; // field names
CSTR *kinds; // the "kind" text of each column, if any, e.g. integer<foo> foo is the kind
sem_t *semtypes; // typecode for each field
} sem_struct;
// for the data type of (parts of) the FROM clause
// sometimes I refer to as a "joinscope"
typedef struct sem_join {
uint32_t count; // count of table/views in the join
CSTR *names; // names of the table/view
struct sem_struct **tables; // struct type of each table/view
} sem_join;
typedef struct recreate_annotation {
CSTR target_name; // the name of the target
CSTR group_name; // group name or "" if no group (not null, safe to sort)
ast_node *target_ast; // top level target (table, view, or index)
ast_node *annotation_ast; // the actual annotation
int32_t ordinal; // when sorting we want to use the original order (reversed actually) within a group
} recreate_annotation;
typedef struct schema_annotation {
int32_t ordinal; // this will be the original annotation order
int32_t version; // the version number (always > 0)
ast_node *target_ast; // top level target (table, view, or index)
CSTR target_name; // the name of the target
uint32_t annotation_type; // one of the codes below for the type of annotation
ast_node *annotation_ast; // the actual annotation
int32_t column_ordinal; // -1 if not a column
ast_node *column_ast; // a particular column if column annotation
} schema_annotation;
// Note: schema annotations are processed in the indicated order: the numbers matter!
#define SCHEMA_ANNOTATION_INVALID 0
#define SCHEMA_ANNOTATION_FIRST 1
#define SCHEMA_ANNOTATION_UNSUB 1
#define SCHEMA_ANNOTATION_CREATE_TABLE 2
#define SCHEMA_ANNOTATION_CREATE_COLUMN 3
#define SCHEMA_ANNOTATION_DELETE_TRIGGER 4
#define SCHEMA_ANNOTATION_DELETE_VIEW 5
#define SCHEMA_ANNOTATION_DELETE_INDEX 6
#define SCHEMA_ANNOTATION_DELETE_COLUMN 7
#define SCHEMA_ANNOTATION_DELETE_TABLE 8
#define SCHEMA_ANNOTATION_AD_HOC 9
#define SCHEMA_ANNOTATION_RESUB 10
#define SCHEMA_ANNOTATION_LAST 10
#define SEM_TYPE_NULL 0 // the subtree is a null literal (not just nullable)
#define SEM_TYPE_BOOL 1 // the subtree is a bool
#define SEM_TYPE_INTEGER 2 // the subtree is an integer
#define SEM_TYPE_LONG_INTEGER 3 // the subtree is a long_integer
#define SEM_TYPE_REAL 4 // the subtree is a real
#define SEM_TYPE_TEXT 5 // the subtree is a text type
#define SEM_TYPE_BLOB 6 // the subtree is a blob type
#define SEM_TYPE_OBJECT 7 // the subtree is any object type
#define SEM_TYPE_STRUCT 8 // the subtree is a table/view
#define SEM_TYPE_JOIN 9 // the subtree is a join
#define SEM_TYPE_ERROR 10 // marks the subtree as having a problem
#define SEM_TYPE_OK 11 // sentinel for ok but no type info
#define SEM_TYPE_PENDING 12 // sentinel for type calculation in flight
#define SEM_TYPE_REGION 13 // the ast is a schema region
#define SEM_TYPE_CORE 0xff // bit mask for the core types
#define SEM_TYPE_MAX_UNITARY (SEM_TYPE_OBJECT+1) // the last unitary type
#define SEM_TYPE_NOTNULL _64(0x0100) // set if and only if null is not possible
#define SEM_TYPE_HAS_DEFAULT _64(0x0200) // set for table columns with a default
#define SEM_TYPE_AUTOINCREMENT _64(0x0400) // set for table columns with autoinc
#define SEM_TYPE_VARIABLE _64(0x0800) // set for variables and parameters
#define SEM_TYPE_IN_PARAMETER _64(0x1000) // set for in parameters (can mix with below)
#define SEM_TYPE_OUT_PARAMETER _64(0x2000) // set for out parameters (can mix with above)
#define SEM_TYPE_DML_PROC _64(0x4000) // set for stored procs that have DML/DDL
#define SEM_TYPE_HAS_SHAPE_STORAGE _64(0x8000) // set for a cursor with simplified fetch syntax
#define SEM_TYPE_CREATE_FUNC _64(0x10000) // set for a function that returns a created object +1 ref
#define SEM_TYPE_SELECT_FUNC _64(0x20000) // set for a sqlite UDF function declaration
#define SEM_TYPE_DELETED _64(0x40000) // set for columns that are not visible in the current schema version
#define SEM_TYPE_VALIDATED _64(0x80000) // set if item has already been validated against previous schema
#define SEM_TYPE_USES_OUT _64(0x100000) // set if proc has a one rowresult using the OUT statement
#define SEM_TYPE_USES_OUT_UNION _64(0x200000) // set if proc uses the OUT UNION form for multi row result
#define SEM_TYPE_PK _64(0x400000) // set if column is a primary key
#define SEM_TYPE_FK _64(0x800000) // set if column is a foreign key
#define SEM_TYPE_UK _64(0x1000000) // set if column is a unique key
#define SEM_TYPE_VALUE_CURSOR _64(0x2000000) // set only if SEM_TYPE_HAS_SHAPE_STORAGE is set and the cursor has no statement
#define SEM_TYPE_SENSITIVE _64(0x4000000) // set if the object is privacy sensitive
#define SEM_TYPE_DEPLOYABLE _64(0x8000000) // set if the object is a deployable region
#define SEM_TYPE_BOXED _64(0x10000000) // set if a cursor's lifetime is managed by a box object
#define SEM_TYPE_HAS_CHECK _64(0x20000000) // set for table column with a "check" clause
#define SEM_TYPE_HAS_COLLATE _64(0x40000000) // set for table column with a "collate" clause
#define SEM_TYPE_INFERRED_NOTNULL _64(0x80000000) // set if inferred to not be nonnull (but was originally nullable)
#define SEM_TYPE_VIRTUAL _64(0x100000000) // set if and only if this is a virtual table
#define SEM_TYPE_HIDDEN_COL _64(0x200000000) // set if and only if hidden column on a virtual table
#define SEM_TYPE_TVF _64(0x400000000) // set if and only table node is a table valued function
#define SEM_TYPE_IMPLICIT _64(0x800000000) // set if and only the variable was declare implicitly (via declare out)
#define SEM_TYPE_CALLS_OUT_UNION _64(0x1000000000) // set if proc calls an out union proc for a result
#define SEM_TYPE_ALIAS _64(0x2000000000) // set only for aliases of a select when analyzing its where clause
#define SEM_TYPE_INIT_REQUIRED _64(0x4000000000) // set for variables that require initialization before use
#define SEM_TYPE_INIT_COMPLETE _64(0x8000000000) // set when SEM_TYPE_INIT_REQUIRED is present to indicate initialization
#define SEM_TYPE_INLINE_CALL _64(0x10000000000) // set when a proc_as_func call in SQL can be executed safely by inlining the SQL
#define SEM_TYPE_SERIALIZE _64(0x20000000000) // set when a cursor will need serialization features
#define SEM_TYPE_HAS_ROW _64(0x40000000000) // set on auto cursors to indicate that they are known to have a row
#define SEM_TYPE_FETCH_INTO _64(0x80000000000) // set if the cursor is used with fetch into
#define SEM_TYPE_FLAGS _64(0xFFFFFFFFF00) // all the flag bits we have so far
#define SEM_EXPR_CONTEXT_NONE 0x0001
#define SEM_EXPR_CONTEXT_SELECT_LIST 0x0002
#define SEM_EXPR_CONTEXT_WHERE 0x0004
#define SEM_EXPR_CONTEXT_ON 0x0008
#define SEM_EXPR_CONTEXT_HAVING 0x0010
#define SEM_EXPR_CONTEXT_ORDER_BY 0x0020
#define SEM_EXPR_CONTEXT_GROUP_BY 0x0040
#define SEM_EXPR_CONTEXT_LIMIT 0x0080
#define SEM_EXPR_CONTEXT_OFFSET 0x0100
#define SEM_EXPR_CONTEXT_TABLE_FUNC 0x0200
#define SEM_EXPR_CONTEXT_WINDOW 0x0400
#define SEM_EXPR_CONTEXT_WINDOW_FILTER 0x0800
#define SEM_EXPR_CONTEXT_CONSTRAINT 0x1000
#define SEM_EXPR_CONTEXT_FLAGS 0x1FFF // all the flag bits
#define CURRENT_EXPR_CONTEXT_IS(x) (!!(current_expr_context & (x)))
#define CURRENT_EXPR_CONTEXT_IS_NOT(x) (!(current_expr_context & (x)))
cql_noexport sem_t core_type_of(sem_t sem_type);
cql_noexport sem_t sensitive_flag(sem_t sem_type);
cql_noexport CSTR coretype_string(sem_t sem_type);
cql_noexport bool_t is_virtual_ast(ast_node *ast);
cql_noexport bool_t is_deleted(ast_node *ast);
cql_noexport bool_t is_single_flag(sem_t sem_type);
cql_noexport bool_t is_bool(sem_t sem_type);
cql_noexport bool_t is_string_compat(sem_t sem_type);
cql_noexport bool_t is_blob_compat(sem_t sem_type);
cql_noexport bool_t is_object_compat(sem_t sem_type);
cql_noexport bool_t is_create_func(sem_t sem_type);
cql_noexport bool_t is_integer(sem_t sem_type);
cql_noexport bool_t is_numeric(sem_t sem_type);
cql_noexport bool_t is_numeric_compat(sem_t sem_type);
cql_noexport bool_t is_numeric_expr(ast_node *expr);
cql_noexport bool_t is_unitary(sem_t sem_type);
cql_noexport bool_t is_struct(sem_t sem_type);
cql_noexport bool_t is_cursor(sem_t sem_type);
cql_noexport bool_t is_auto_cursor(sem_t sem_type);
cql_noexport bool_t is_primary_key(sem_t sem_type);
cql_noexport bool_t is_foreign_key(sem_t sem_type);
cql_noexport bool_t is_sem_error(sem_node *sem);
cql_noexport bool_t is_error(ast_node *ast);
cql_noexport bool_t is_not_nullable(sem_t sem_type);
cql_noexport bool_t is_variable(sem_t sem_type);
cql_noexport bool_t is_in_parameter(sem_t sem_type);
cql_noexport bool_t is_out_parameter(sem_t sem_type);
cql_noexport bool_t is_inout_parameter(sem_t sem_type);
cql_noexport bool_t is_dml_proc(sem_t sem_type);
cql_noexport bool_t is_text(sem_t sem_type);
cql_noexport bool_t sem_is_str_name(ast_node *ast);
cql_noexport bool_t is_blob(sem_t sem_type);
cql_noexport bool_t is_object(sem_t sem_type);
cql_noexport bool_t is_ref_type(sem_t sem_type);
cql_noexport bool_t is_nullable(sem_t sem_type);
cql_noexport bool_t is_null_type(sem_t sem_type);
cql_noexport bool_t has_result_set(ast_node *ast);
cql_noexport bool_t has_out_stmt_result(ast_node *ast);
cql_noexport bool_t has_out_union_call(ast_node *ast);
cql_noexport bool_t has_out_union_stmt_result(ast_node *ast);
cql_noexport bool_t is_autotest_dummy_table(CSTR name);
cql_noexport bool_t is_autotest_dummy_insert(CSTR name);
cql_noexport bool_t is_autotest_dummy_select(CSTR name);
cql_noexport bool_t is_autotest_dummy_result_set(CSTR name);
cql_noexport bool_t is_autotest_dummy_test(CSTR name);
cql_noexport bool_t is_referenceable_by_foreign_key(ast_node *ref_table, CSTR column_name);
// Exit if schema validation directive was seen
cql_noexport void exit_on_validating_schema(void);
cql_noexport void sem_main(ast_node *node);
cql_noexport void sem_cleanup(void);
cql_noexport void print_sem_type(struct sem_node *sem);
cql_noexport int32_t sem_column_index(sem_struct *sptr, CSTR name);
cql_noexport ast_node *find_proc(CSTR name);
cql_noexport bytebuf *find_proc_arg_info(CSTR name);
cql_noexport ast_node *find_local_or_global_variable(CSTR name);
cql_noexport ast_node *find_region(CSTR name);
cql_noexport ast_node *find_func(CSTR name);
cql_noexport ast_node *find_table_or_view_even_deleted(CSTR name);
cql_noexport ast_node *find_usable_and_not_deleted_table_or_view(CSTR name, ast_node *err_target, CSTR msg);
cql_noexport void sem_resolve_id(ast_node *ast, CSTR name, CSTR scope);
cql_noexport ast_node *find_enum(CSTR name);
cql_noexport ast_node *find_base_fragment(CSTR name);
cql_noexport ast_node *find_recreate_migrator(CSTR name);
cql_noexport ast_node *find_constant_group(CSTR name);
cql_noexport ast_node *find_variable_group(CSTR name);
cql_noexport ast_node *find_constant(CSTR name);
cql_noexport ast_node *sem_get_col_default_value(ast_node *attrs);
cql_noexport void sem_accumulate_full_region_image(symtab *regions, CSTR name);
cql_noexport void sem_accumulate_public_region_image(symtab *regions, CSTR name);
cql_noexport sem_t find_column_type(CSTR table_name, CSTR column_name);
cql_noexport void init_encode_info(ast_node *misc_attrs, bool_t *use_encode_arg, CSTR *encode_context_column_arg, symtab *encode_columns_arg);
cql_noexport bool_t should_encode_col(CSTR col, sem_t sem_type, bool_t use_encode_arg, symtab *encode_columns_arg);
#define LIKEABLE_FOR_ARGS 1
#define LIKEABLE_FOR_VALUES 2
cql_noexport ast_node *sem_find_likeable_ast(ast_node *like_ast, int32_t likeable_for);
cql_noexport ast_node *sem_find_likeable_from_var_type(ast_node *var);
cql_noexport ast_node *find_named_type(CSTR name);
cql_noexport void record_error(ast_node *ast);
cql_noexport void record_ok(ast_node *ast);
cql_noexport void report_error(ast_node *ast, CSTR msg, CSTR subject);
cql_noexport void sem_one_stmt(ast_node *ast);
cql_noexport void sem_root_expr(ast_node *node, uint32_t expr_context);
cql_noexport void sem_expr(ast_node *node);
cql_noexport void sem_cursor(ast_node *ast);
cql_noexport ast_node *find_arg_bundle(CSTR name);
cql_noexport bool_t add_arg_bundle(ast_node *ast, CSTR name);
cql_noexport void sem_add_flags(ast_node *ast, sem_t flags);
cql_noexport ast_node *first_arg(ast_node *arg_list);
cql_noexport ast_node *second_arg(ast_node *arg_list);
cql_noexport ast_node *third_arg(ast_node *arg_list);
cql_noexport void sem_verify_no_anon_no_null_columns(ast_node *ast);
cql_noexport void sem_verify_identical_columns(ast_node *expected, ast_node *actual, CSTR target);
cql_noexport void sem_validate_cursor_blob_compat(ast_node *ast_error, ast_node *cursor, ast_node *blob, ast_node *dest, ast_node *src);
cql_noexport void sem_any_shape(ast_node *ast);
cql_noexport sem_node *new_sem(sem_t sem_type);
cql_noexport bool_t sem_verify_assignment(ast_node *ast, sem_t sem_type_needed, sem_t sem_type_found, CSTR var_name);
#endif
cql_data_decl( bytebuf *schema_annotations );
cql_data_decl( bytebuf *recreate_annotations );
cql_data_decl( struct list_item *all_tables_list );
cql_data_decl( struct list_item *all_subscriptions_list );
cql_data_decl( struct list_item *all_functions_list );
cql_data_decl( struct list_item *all_views_list );
cql_data_decl( struct list_item *all_indices_list );
cql_data_decl( struct list_item *all_triggers_list );
cql_data_decl( struct list_item *all_regions_list );
cql_data_decl( struct list_item *all_ad_hoc_list );
cql_data_decl( struct list_item *all_select_functions_list );
cql_data_decl( struct list_item *all_enums_list );
cql_data_decl( struct list_item *all_constant_groups_list );
cql_data_decl( symtab *schema_regions );
cql_data_decl( ast_node *current_proc );
cql_data_decl( charbuf *error_capture );
// True if we are presently emitting a vault stored proc.
// A stored proc with attribution vault_sensitive is a vault stored proc
cql_data_decl( bool_t use_encode );
cql_data_decl( CSTR encode_context_column );
// List of column names reference in a stored proc that we should vault
cql_data_decl( symtab *encode_columns );
// These are the symbol tables with the accumulated included/excluded regions
cql_data_decl( symtab *included_regions );
cql_data_decl( symtab *excluded_regions );
cql_data_decl( sem_t global_proc_flags );
// This is the table for all the migration procs for any recreate procs or groups
// that might need them, these are the second form of ad hoc schema migration
cql_data_decl( symtab *ad_hoc_recreate_actions );