sources/flow.c (308 lines of code) (raw):
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include "ast.h"
#include "flow.h"
// Indicates whether a history item resulted from setting a flag or unsetting a
// flag. The values associated with each enum case allow a series of deltas to
// be totalled up to determine an overall effect when popping a branch group.
typedef enum {
FLOW_HISTORY_DELTA_UNSET = -1,
FLOW_HISTORY_DELTA_SET = 1
} flow_history_delta;
// Records a call to `flow_set_flag_for_type` or `flow_unset_flag_for_type`.
// These form a linked list referred to as a "history".
typedef struct history_item {
struct history_item *next;
sem_t *type;
sem_t flag;
flow_history_delta delta;
} flow_history_item;
// A list of `history_item` values.
typedef flow_history_item *flow_history;
// There are different types of flow contexts, each of which is indicated by its
// `flow_context_kind`.
typedef enum {
// Normal flow contexts simply revert all improvements set within them when
// they are popped.
FLOW_CONTEXT_KIND_NORMAL,
// Branch group contexts contain zero or more branch contexts. Branch group
// contexts perform extra steps to merge the effects of their branches.
FLOW_CONTEXT_KIND_BRANCH_GROUP,
// Branch contexts are contexts that are only entered into if some condition
// is true. They must only be created within a branch group context. They are
// special in that they completely reverse all effects within them when they
// are popped so that subsequent branches are unaffected.
FLOW_CONTEXT_KIND_BRANCH,
// Jump contexts are pessimistic contexts that assume, after the context has
// ended, that all possible un-sets within it happened and all sets within it
// did not happen. They can be used to ensure safety for TRY blocks, loops,
// and other blocks containing statement lists that may not be executed in
// full due to one statement within them jumping to the end of the context (or
// to the end of an enclosing jump context).
//
// NOTE: Jump contexts do not take the locations of control statements within
// them, if any, into account, and thus are presently more conservative than
// is necessary. Experience with previous versions of CQL that treated *all*
// contexts this pessimistically strongly suggests that this is not a problem
// in practice.
FLOW_CONTEXT_KIND_JUMP
} flow_context_kind;
// A flow context is used to encapsulate a region of a program so that effects
// within it (e.g., nullability improvements, variable initialization
// improvements, row check improvements, et cetera) can be managed
// appropriately.
typedef struct flow_context {
// The parent of the context, if any.
struct flow_context *parent;
// The kind of the context. The value of `kind` indicates which of the
// anonymous structs within the union below may be accessed, if any.
flow_context_kind kind;
// The history of sets and un-sets made within the context. This does not
// necessarily include *all* sets and un-sets as branch group contexts merge
// the effects of their branches.
flow_history history;
union {
// Only used when `kind` is `FLOW_CONTEXT_KIND_BRANCH`.
struct {
// `true` when the branch always jumps to the end of an enclosing jump
// context or some statement beyond it.
bool_t always_jumps;
} branch;
// Only used when `kind` is `FLOW_CONTEXT_KIND_BRANCH_GROUP`.
struct {
// The concatenated histories of all of the branches created directly
// within the branch group.
flow_history branch_histories;
// The number of branches directly within the branch group.
uint32_t branch_count;
// `true` if the branch group includes (or will include) a catch-all
// branch (e.g., ELSE) or otherwise covers all possible cases.
bool_t covers_all_cases;
} branch_group;
// Only used when `kind` is `FLOW_CONTEXT_KIND_JUMP`.
struct {
// The nearest enclosing context with kind `FLOW_CONTEXT_KIND_JUMP`, if
// any. This is used to update `top_jump_context` when a jump context is
// popped.
struct flow_context *nearest_jump_context;
// The histories of *all* un-sets made within the jump context's
// subcontexts.
flow_history unset_histories;
} jump;
};
} flow_context;
// The global that holds all control flow information managed within this file.
static flow_context *current_context;
// The topmost jump context, if any. This exists merely to avoid the need to
// search upwards from the current context for this every time an improvement is
// unset: It provides no additional information beyond what is already available
// in `current_context`.
static flow_context *top_jump_context;
// Given a pointer to `history`, sets its tail to `history_to_append`.
static void append_history(flow_history *history, flow_history history_to_append) {
flow_history *tail = history;
while (*tail) {
tail = &(*tail)->next;
}
*tail = history_to_append;
}
// Given `history`, returns an array that points to each item within it and sets
// `*count` to the total number of items.
static flow_history_item **array_from_history(flow_history history, uint32_t *count) {
Contract(count);
uint32_t length = 0;
for (flow_history_item *item = history; item; item = item->next) {
length++;
}
flow_history_item **array = _ast_pool_new_array(flow_history_item *, length);
uint32_t i = 0;
for (flow_history_item *item = history; item; item = item->next, i++) {
array[i] = item;
}
*count = length;
return array;
}
// Compares two history items by their type addresses (which, in this instance,
// constitute a unique identifier), followed by their flags. This is used to
// allow `qsort` to group together effects across a group of branches in order
// to be able to produce the total set of effects for the enclosing branch
// group.
static int history_item_comparator(const void *a, const void *b) {
flow_history_item *item_a = *(flow_history_item **)a;
flow_history_item *item_b = *(flow_history_item **)b;
if (item_a->type < item_b->type) {
return -1;
}
if (item_a->type > item_b->type) {
return 1;
}
#if 0
// We'll never enter either of these branches because, at the moment, there
// are only two types of improvements, and no type can have both its
// nullability improved and its initialization improved: Only nonnull types
// may require initialization, and only nullable types may have improved
// nullability. This is, therefore, disabled for the sake of code coverage.
if (item_a->flag < item_b->flag) {
return -1;
}
if (item_a->flag > item_b->flag) {
return 1;
}
#else
// Until additional improvement types are added, this will always be true.
Invariant(item_a->flag == item_b->flag);
#endif
return 0;
}
// Returns a new history item given initial values.
static flow_history_item *history_item_new(sem_t *type, sem_t flag, flow_history_delta delta) {
Contract(type);
Contract(is_single_flag(flag));
flow_history_item *item = _ast_pool_new(flow_history_item);
item->next = NULL;
item->type = type;
item->flag = flag;
item->delta = delta;
return item;
}
// Reverses a history in place.
static void reverse_history(flow_history *history) {
flow_history_item *current = *history;
flow_history_item *previous = NULL;
flow_history_item *next = NULL;
while (current) {
next = current->next;
current->next = previous;
previous = current;
current = next;
}
*history = previous;
}
// Clones a history and returns the new copy.
static flow_history clone_history(flow_history history) {
flow_history new_history = NULL;
for (flow_history_item *item = history; item; item = item->next) {
flow_history_item *new_item = history_item_new(item->type, item->flag, item->delta);
new_item->next = new_history;
new_history = new_item;
}
reverse_history(&new_history);
return new_history;
}
// Given a pointer to a history, adds a new history item to it with the initial
// values provided.
static void record_in_history(flow_history *history, sem_t *type, sem_t flag, flow_history_delta delta) {
Contract(history);
Contract(type);
Contract(is_single_flag(flag));
flow_history_item *item = history_item_new(type, flag, delta);
item->next = *history;
*history = item;
}
// Sets `flag` on `*type` and records it in the history of the the current
// context. This must never be called if the flag is already set.
cql_noexport void flow_set_flag_for_type(sem_t flag, sem_t *type) {
Contract(current_context);
Contract(is_single_flag(flag));
Contract(type);
Contract(!(*type & flag));
*type |= flag;
record_in_history(¤t_context->history, type, flag, FLOW_HISTORY_DELTA_SET);
}
// Un-sets `flag` on `*type` and records it in the history of the the current
// context. This must never be called unless the flag is already set.
cql_noexport void flow_unset_flag_for_type(sem_t flag, sem_t *type) {
Contract(current_context);
Contract(is_single_flag(flag));
Contract(type);
Contract(*type & flag);
*type &= sem_not(flag);
record_in_history(¤t_context->history, type, flag, FLOW_HISTORY_DELTA_UNSET);
// If we're within a jump context, record the unset there too so we can
// re-unset it later. We can skip this if the current context is a jump
// context as the unset was just recorded in its history above directly.
if (top_jump_context && current_context != top_jump_context) {
record_in_history(&top_jump_context->jump.unset_histories, type, flag, FLOW_HISTORY_DELTA_UNSET);
}
}
// Creates a new context with the kind provided and adds it to the current
// context. This does NOT initialize any kind-specific members.
static void push_context_with_kind(flow_context_kind kind) {
flow_context *context = _ast_pool_new(flow_context);
context->parent = current_context;
context->kind = kind;
context->history = NULL;
current_context = context;
}
// Given a history, iterates over it and calls `func` with the delta sum of each
// type/flag combination.
static void with_delta_sums_of_history(flow_history history, void func(sem_t *type, sem_t flag, int32_t delta_sum)) {
Contract(func);
uint32_t item_count;
flow_history_item **item_array = array_from_history(history, &item_count);
if (item_count == 0) {
return;
}
// Sort the total history by type, then by flag.
qsort(item_array, item_count, sizeof(flow_history_item *), history_item_comparator);
// Iterate over the branch histories, calling `func` the `delta_sum` of each
// type/flag combination.
int32_t delta_sum = item_array[0]->delta;
for (uint32_t i = 1; i < item_count; i++) {
flow_history_item *previous_item = item_array[i - 1];
flow_history_item *current_item = item_array[i];
if (previous_item->type != current_item->type || previous_item->flag != current_item->flag) {
func(previous_item->type, previous_item->flag, delta_sum);
delta_sum = 0;
}
delta_sum += current_item->delta;
}
func(item_array[item_count - 1]->type, item_array[item_count - 1]->flag, delta_sum);
}
// Asserts that the `delta_sum` calculated for a particular type/flag
// combination present in some context's history is within [-1, 1] as is
// required for `merge_effects` to work properly.
static void invariant_delta_sum(sem_t *type, sem_t flag, int32_t delta_sum) {
Invariant(delta_sum >= -1 && delta_sum <= 1);
}
// Moves the history of the current context to the start of the history of the
// parent context (if any) so that it can manage it however it chooses.
static void move_history_to_parent() {
Contract(current_context);
if (current_context->parent) {
append_history(¤t_context->history, current_context->parent->history);
current_context->parent->history = current_context->history;
}
// In order for `merge_effects` to work properly, the history field of every
// context must contain a history that, for every type/flag combination, has a
// delta sum of -1, 0, or 1. We enforce that invariant here.
with_delta_sums_of_history(
// If we have a parent context, we check that (purely for the sake of
// checking more things rather than fewer) as it now also contains the
// history of the current context.
(current_context->parent ? current_context->parent : current_context)->history,
invariant_delta_sum
);
// This is no longer valid.
current_context->history = NULL;
}
// Pushes a normal context. Unless one is pushing a context for a group of
// branches (e.g., within an IF or CASE) or pushing a branch itself, one should
// simply push a normal context.
cql_noexport void _flow_push_context_normal() {
push_context_with_kind(FLOW_CONTEXT_KIND_NORMAL);
}
// Un-sets all of the improvements in the current history while leaving the
// history itself unchanged.
static void unset_all_improvements() {
for (flow_history_item *item = current_context->history; item; item = item->next) {
switch (item->delta) {
case FLOW_HISTORY_DELTA_UNSET:
break;
case FLOW_HISTORY_DELTA_SET:
if (*item->type & item->flag) {
flow_unset_flag_for_type(item->flag, item->type);
}
}
}
}
// Pops a normal context, unsetting all improvements made within it.
cql_noexport void _flow_pop_context_normal() {
Contract(current_context);
Contract(current_context->kind == FLOW_CONTEXT_KIND_NORMAL);
unset_all_improvements();
move_history_to_parent();
current_context = current_context->parent;
}
// Pushes a new branch group context. Only branch contexts may be created within
// it.
cql_noexport void _flow_push_context_branch_group() {
push_context_with_kind(FLOW_CONTEXT_KIND_BRANCH_GROUP);
current_context->branch_group.branch_histories = NULL;
current_context->branch_group.branch_count = 0;
current_context->branch_group.covers_all_cases = false;
}
// Merges the effects of the branches of the current branch group context for a
// particular type and flag to produce the overall effect.
//
// For a given type and flag combination, each branch could have ultimately
// unset a flag that was initially set, set a flag that was initially unset, or
// been neutral with respect to the flag (e.g., by setting it, then unsetting it
// when it was initially unset, or by simply not affecting it at all). Each
// branch, therefore, can be assigned a value of -1, 1, or 0 respectively. These
// are totalled up and represent the `delta_sum`.
static void merge_effects(sem_t *type, sem_t flag, int32_t delta_sum) {
Contract(current_context);
Contract(current_context->kind == FLOW_CONTEXT_KIND_BRANCH_GROUP);
Contract(current_context->branch_group.branch_count > 0);
Contract(type);
Contract(is_single_flag(flag));
// If the delta sum is positive, it must have been the case that the flag
// began unset otherwise no branches could have set it.
Invariant(delta_sum <= 0 || !(*type & flag));
// If the delta sum is negative, it must have been the case that the flag
// began set otherwise no branches could have unset it.
Invariant(delta_sum >= 0 || *type & flag);
uint32_t branch_count = current_context->branch_group.branch_count;
// If all branches set a flag, `delta_sum` will equal `branch_count`.
// Likewise, if all branches unset it, `abs(delta_sum)` will equal
// `branch_count`.
Invariant(abs(delta_sum) <= branch_count);
// Indicates whether or not the branches cover all possible cases.
bool_t covers_all_cases = current_context->branch_group.covers_all_cases;
if (covers_all_cases && delta_sum == branch_count) {
// The branches cover all possible cases and `delta_sum` is equal to the
// number of branches (which means every branch made the same improvement),
// so we can consider the entire branch group to have made the improvement.
flow_set_flag_for_type(flag, type);
} else if (delta_sum < 0) {
// The delta sum is negative, so at least one of the branches unset the
// flag. We must, therefore, consider the entire branch group as having
// unset the flag.
flow_unset_flag_for_type(flag, type);
} else {
// If `delta_sum` is 0, that means all branches were neutral with respect to
// the flag. If `delta_sum` is positive, but less than `branch_count`, that
// means some branches improved it and the rest were netural. Since all
// branches were at least neutral, it's safe to do nothing here and allow
// things to remain as they are.
//
// The reason we know this is that we only set something when it is unset
// and only unset something when it is set, and so all branches must have
// the same overall effect if they are to have any effect at all. For
// example, if a variable is nullable before an IF, branches can either
// leave it as it is or improve it. Likewise, if a variable is already
// inferred to be nonnull before an IF, branches can either leave it as it
// is or un-improve it. Since it is not possible for one branch to improve
// something if another un-improved it and vice versa, we know any
// non-negative delta sum implies all branches were at least neutral, and so
// no unset is required.
}
}
// Records whether or not the current branch group context has a catch-all
// branch or otherwise covers all possible cases.
cql_noexport void flow_set_context_branch_group_covers_all_cases(bool_t covers_all_cases) {
Contract(current_context);
Contract(current_context->kind == FLOW_CONTEXT_KIND_BRANCH_GROUP);
current_context->branch_group.covers_all_cases = covers_all_cases;
}
// Records the presence of an empty branch within the branch group. This is
// equivalent to calling `_flow_push_context_branch` immediately followed by
// `_flow_pop_context_branch` and exists only for the sake of convenience.
cql_noexport void flow_context_branch_group_add_empty_branch() {
Contract(current_context);
Contract(current_context->kind == FLOW_CONTEXT_KIND_BRANCH_GROUP);
current_context->branch_group.branch_count++;
}
// Pops the current branch group, calculating the total effect of all of its
// branches in the process.
cql_noexport void _flow_pop_context_branch_group() {
Contract(current_context);
Contract(current_context->kind == FLOW_CONTEXT_KIND_BRANCH_GROUP);
// Unset all of the negative improvements made within the branch group itself.
// Negative improvements are those made because the condition of a previous
// branch must have been false if a later branch was taken.
unset_all_improvements();
// Zero out the current history. We will rebuild it based on what occurred in
// the branches via `merge_effects`.
current_context->history = NULL;
// Merge the effects of all of the branches.
with_delta_sums_of_history(current_context->branch_group.branch_histories, merge_effects);
// The history we move to the parent contains only the result of the
// `unset_all_improvements` call above and the result of merging the effects:
// The full histories of the branches are completely discarded. This is
// critical for the delta sum approach that `merge_effects` uses: If N
// branches within a branch group all improve the same type/flag combination,
// we need that to count as a single improvement, not N+1 improvements, for
// any further effect merging that might take place in enclosing branch groups
// to work correctly.
move_history_to_parent();
current_context = current_context->parent;
}
// Pushes a new branch context. This must only be done within a branch group
// context. It is critical that this be done for ALL branches of a given
// conditional, including any "else" or catch-all branch, otherwise
// `_flow_pop_context_branch_group` may improve something too optimistically due
// to a lack of sufficient information.
cql_noexport void _flow_push_context_branch() {
Contract(current_context);
Contract(current_context->kind == FLOW_CONTEXT_KIND_BRANCH_GROUP);
push_context_with_kind(FLOW_CONTEXT_KIND_BRANCH);
current_context->branch.always_jumps = false;
// Clone the current history of (typically negative) improvements within the
// branch group itself and add it to the accumulated history of its branches.
// The reason we do this is so that we can calculate the correct delta sum
// later on.
//
// Consider the following code:
//
// IF x IS NULL THEN
// SET x := 42;
// ELSE IF y THEN
// -- do nothing
// ELSE
// -- do nothing
// END IF;
//
// Here, even though the second and third branches have not improved the type
// of `x` to be nonnull themselves, it is still the case that x is nonnull
// at the end of the second and third branches due to the condition of the
// first branch. It is therefore safe and appropriate to treat the branches as
// though they each made the improvement themselves so that x can be treated
// as nonnull after END IF.
//
// To put it more plainly, we want a delta sum of 3 for the improvement to x.
// If we didn't perform this step, it would only be 1.
flow_history branch_group_history = clone_history(current_context->parent->history);
append_history(&branch_group_history, current_context->parent->branch_group.branch_histories);
current_context->parent->branch_group.branch_histories = branch_group_history;
// Increment the branch count of the parent branch group so that it can be
// later compared against delta sums.
current_context->parent->branch_group.branch_count++;
}
// Sets whether or not the current context always jumps to the end of the
// nearest enclosing jump context or some statement beyond it.
cql_noexport void flow_set_context_always_jumps(bool_t always_jumps) {
Contract(current_context);
if (current_context->kind != FLOW_CONTEXT_KIND_BRANCH) {
// Knowledge that the current context always jumps is only useful if the
// current context is a branch context.
return;
}
current_context->branch.always_jumps = always_jumps;
}
// Pop a branch context, reverting the history within it such that it will be as
// though nothing ever happened.
cql_noexport void _flow_pop_context_branch() {
Contract(current_context);
Contract(current_context->kind == FLOW_CONTEXT_KIND_BRANCH);
Invariant(current_context->parent);
Invariant(current_context->parent->kind == FLOW_CONTEXT_KIND_BRANCH_GROUP);
// Revert the history by interating over it, starting with the most recent
// item, and doing the opposite of what was originally done. We only adjust
// the flags and do not record the changes: The parent branch group will
// handle the recording when it merges the effects of all of its branches.
for (flow_history_item *item = current_context->history; item; item = item->next) {
switch (item->delta) {
case FLOW_HISTORY_DELTA_UNSET:
*item->type |= item->flag;
break;
case FLOW_HISTORY_DELTA_SET:
*item->type &= sem_not(item->flag);
break;
}
}
if (current_context->branch.always_jumps) {
// Since the current context always jumps, we actually do not want to count
// it as a branch at all when merging effects. By simply eliding the branch,
// an improvement that is merely set in all branches that do not always jump
// is sufficient for the improvement to persist after the branch group ends
// (assuming the branches cover all possible cases).
//
// By discarding the branch, we technically do a bit worse in one case: If
// *all* branches always jump and the branches cover all possibilities, no
// improvements will persist after the branch group ends because all
// branches will have been elided. This is not a problem in practice,
// however, since if all branches jump and the branches cover all cases, no
// code could execute until after the end of the nearest enclosing jump
// context, and no improvements set within a jump context are allowed to
// persist beyond it anyway.
//
// NOTE: Discarding the branch now is not the same as the branch never
// having existed. The reason for this is that any improvements unset within
// the branch have already been added to the nearest enclosing jump context
// and will remain there to be re-unset later. This must be the case
// because, even though we know this context always jumps, we do not know at
// what point it might jump, and so we must remain sufficiently pessimistic.
//
// NOTE: We could do better by keeping a separate count of the number of
// branches that always jump. If we did that, when popping a branch group,
// if the number of branches that always jumped equaled the total number of
// branches, then the branch group could itself be considered to always jump
// and we could propagate that knowledge upwards. This would allow us to
// handle cases like the following:
//
// DECLARE x INT;
// IF 0 THEN
// IF 0 THEN
// THROW;
// ELSE
// RETURN;
// END IF;
// -- the fact that this always jumps is presently not understood
// ELSE
// SET x := 1;
// END IF;
// -- x could be safely considered nonnull here
current_context->parent->branch_group.branch_count--;
} else {
// Add the history of the branch to the total set of branch histories for
// the current branch group.
append_history(¤t_context->history, current_context->parent->branch_group.branch_histories);
current_context->parent->branch_group.branch_histories = current_context->history;
}
current_context = current_context->parent;
}
// Pushes a jump context such that `current_context` will be identical to
// `top_jump_context`.
cql_noexport void _flow_push_context_jump() {
push_context_with_kind(FLOW_CONTEXT_KIND_JUMP);
current_context->jump.nearest_jump_context = top_jump_context;
current_context->jump.unset_histories = NULL;
top_jump_context = current_context;
}
// Pops the current jump context and updates `top_jump_context` accordingly.
cql_noexport void _flow_pop_context_jump() {
Contract(current_context);
Contract(current_context->kind == FLOW_CONTEXT_KIND_JUMP);
// Re-unset all un-sets made anywhere within the jump context's subcontexts.
// This is done to remain safe in the presence of code like the following:
//
// DECLARE x INT;
// -- improve x
// SET x := 42;
// WHILE some_condition
// BEGIN
// -- use an IF/ELSE to trigger effect merging in the branch group; this
// -- is critical to this example as we'd be safe otherwise!
// IF another_condition THEN
// -- un-improve x
// SET x := NULL;
// IF yet_another_condition THEN
// -- jump before re-improving x
// LEAVE;
// END IF;
// -- re-improve x
// SET x := 100;
// ELSE
// -- neutral; do nothing
// END IF;
// -- the branch group context considers the one un-improvement of 'x' and
// -- the one improvement of 'x' in the first branch to be neutral and
// -- thus cancels out their effects since the ELSE is also netural
// END;
// -- if the body of the WHILE were analyzed within a normal flow context,
// -- x would be unsafely treated as nonnull here due to the cancelling
// -- mentioned above
//
// NOTE: The approach taken here is more conservative than is necessary and
// may be revisited in the future.
for (flow_history_item *item = current_context->jump.unset_histories; item; item = item->next) {
Invariant(item->delta == FLOW_HISTORY_DELTA_UNSET);
if (*item->type & item->flag) {
flow_unset_flag_for_type(item->flag, item->type);
}
}
// This un-sets improvements initially made within the jump context itself, of
// course, but it also propagates any un-sets made in the previous step to the
// nearest enclosing jump context, if any. This is important because all of
// the unsetting we're doing now may be reverted by a branch context in
// between this context and the nearest enclosing jump context.
unset_all_improvements();
move_history_to_parent();
top_jump_context = current_context->jump.nearest_jump_context;
current_context = current_context->parent;
}