in sql/sql_resolver.cc [5500:6341]
bool Query_block::transform_table_subquery_to_join_with_derived(
THD *thd, Item_exists_subselect *subq_pred) {
Query_expression *const inner_qe = subq_pred->query_expr();
Query_block *const inner_qb = inner_qe->first_query_block();
assert(inner_qe->query_term()->term_type() == QT_QUERY_BLOCK);
subq_pred->strategy = Subquery_strategy::DERIVED_TABLE;
const int hidden_fields = CountHiddenFields(inner_qb->fields);
const bool no_aggregates =
!inner_qb->is_grouped() && !inner_qb->with_sum_func &&
inner_qb->having_cond() == nullptr && !inner_qb->has_windows();
const bool decorrelate =
no_aggregates && (inner_qe->uncacheable & UNCACHEABLE_DEPENDENT) &&
inner_qb->where_cond() != nullptr &&
inner_qb->where_cond()->is_outer_reference() &&
// decorrelation adds to the SELECT list, and hidden fields make it
// impossible (search for "hidden" in this function). Hidden fields
// usually come from aggregation, which we disallowed just above, but also
// if a SELECT list element is a subquery which contains an outer
// reference to subs_query_block.
hidden_fields == 0;
// Ensure that all lists are consistent. all_fields should have an optional
// prefix and then be fields_list. If no aggregates, base_ref_items should
// start with fields_list.
assert(hidden_fields >= 0);
// We're going to build the lists of outer and inner semijoin
// expressions:
// - they start empty
// - first (build_sj_exprs()), if this is IN, we add the left and right
// expressions of IN; if this is EXISTS, we do nothing
// - second (decorrelate_condition()), we decorrelate comparison operators
// in the subquery, and add the resulting left and right expressions.
mem_root_deque<Item *> sj_outer_exprs(thd->mem_root);
mem_root_deque<Item *> sj_inner_exprs(thd->mem_root);
Mem_root_array<Item_func::Functype> op_types(thd->mem_root);
// If non-NULL, the subquery predicate is a quantified comparison predicate
Item_allany_subselect *allany_pred = nullptr;
// Whether a quantified comparison predicate is ALL or ANY, and special cases:
bool is_all_pred = false;
bool is_any_pred = false;
bool is_eq_all = false;
bool is_ne_any = false;
/*
When transforming quantified comparison predicates to derived tables, there
are three ways to generate the derived table from the subquery:
deduplicated, aggregated and grouped.
A deduplicated derived table means that a DISTINCT flag is added to the
subquery, thus we simply eliminate duplicates. This kind of derived table is
used to process IN/NOT IN/EXISTS/NOT EXISTS predicates placed in the
WHERE clause and SELECT list, and with conformant truth values.
An aggregated derived table is a derived table aggregated into one row.
Typically, it includes a MAX or MIN value, number of rows in the subquery,
and possibly number of NULL values, if applicable. An aggregated derived
table is used to implement <op>ALL and <op>ANY quantified comparison
predicates where <op> is >, >=, < or <=, as well as =ALL and <>ANY.
A grouped derived table is a derived table that is grouped on the
inner expressions from a correlated WHERE clause with equality predicates
in the subquery. It also contains a count of the number of rows in
the group. The count is required to check for existence of a particular
value. Grouped derived tables are used to handle quantified comparison
predicates with correlation in the WHERE clause that would be handled with
an aggregated derived table without the correlation.
An aggregated derived table always returns one row, however there is no
such guarantee for grouped derived tables. Because of this, in order to
properly evaluate the count of a particular group, a query that would
otherwise be processed as an inner join must be processed as a left outer
join, and a zero count must be checked by looking for a null-extended
row from the subquery.
A derived table is assumed to be deduplicated if it is neither specified
as aggregated nor grouped.
*/
// If true, quantified comparison predicate uses aggregated subquery
bool aggregated_subquery = false;
// If true, quantified comparison predicate uses grouped subquery
bool grouped_subquery = false;
// Track optional aggregate fields for quantified comparison predicates;
int count_field_no = -1;
int nulls_field_no = -1;
int distinct_field_no = -1;
// If true, use anti-join algorithm, otherwise use semi-join algorithm
bool use_anti_join = subq_pred->use_anti_join_transform();
// Shorthand for the left expression:
Item *left_expr = subq_pred->left_expr;
// Locate place of this subquery (SELECT list or WHERE clause):
Item **root = nullptr;
size_t root_field_no = 0;
for (size_t i = 0; i < fields.size(); i++) {
root = &fields[i];
if (!(*root)->hidden && (*root)->has_subquery() &&
(*root)->walk(&Item::contains_item, enum_walk::PREFIX,
pointer_cast<uchar *>(&subq_pred))) {
root_field_no = i;
break;
}
root = nullptr;
}
if (root == nullptr) {
root = &m_where_cond;
if (*root == nullptr || !(*root)->has_subquery() ||
!(*root)->walk(&Item::contains_item, enum_walk::PREFIX,
pointer_cast<uchar *>(&subq_pred))) {
root = nullptr;
}
}
assert(root != nullptr);
if (root != &m_where_cond) {
for (size_t i = 0; i < fields.size(); i++) {
if (i > root_field_no && fields[i]->has_wf() &&
fields[i]->has_subquery()) {
/*
There is an anomaly in fields which make it impossible to transform
quantified comparison subqueries that are used in subsequent
window functions. Skip this marginal corner case.
*/
my_error(ER_SUBQUERY_TRANSFORM_REJECTED, MYF(0));
return true;
}
}
}
if (subq_pred->subquery_type() == Item_subselect::IN_SUBQUERY) {
build_sj_exprs(thd, &sj_outer_exprs, &sj_inner_exprs, subq_pred, inner_qb);
// All these expressions are compared with '=':
if (op_types.resize(sj_outer_exprs.size(), Item_func::EQ_FUNC)) {
return true;
}
} else if (subq_pred->subquery_type() == Item_subselect::EXISTS_SUBQUERY) {
if (inner_qb->is_table_value_constructor) {
if ((inner_qb->select_limit != nullptr &&
!inner_qb->select_limit->const_item()) ||
(inner_qb->offset_limit != nullptr &&
!inner_qb->offset_limit->const_item())) {
subq_pred->strategy = Subquery_strategy::SUBQ_MATERIALIZATION;
// We can't determine until materialization time whether we have
// an empty or non-empty result set, skip transform
return false;
}
}
// We must replace of all EXISTS' initial SELECT list with
// constants, otherwise they will interfere in DISTINCT, indeed if we didn't
// replace,
// SELECT ... FROM ot WHERE EXISTS(SELECT c1 FROM it)
// would become
// SELECT ... FROM ot JOIN (SELECT DISTINCT c1 FROM it) AS dt
// and we may get duplicate copies of a row of 'ot', wrongly.
// Note that in setup_wild() we already do that, but only for "SELECT *",
// not for an explicit list "SELECT expr1, expr2", so we still have to do
// that here.
// We cannot do that if the query is aggregated, consider:
// EXISTS(SELECT SUM(a) AS x, b as y FROM t GROUP BY y HAVING x>2)
// if we replace we get
// EXISTS(SELECT 1, 1 FROM t GROUP BY y HAVING x>2)
// And as 'x' points to 1, HAVING is "always false".
// Resolving ensures that this assertion holds.
assert(no_aggregates);
if (inner_qb->is_table_value_constructor) {
// This transformation effectively converts a table value constructor
// query block to a scalar subquery with zero or one constant rows.
inner_qb->is_table_value_constructor = false;
// We checked above that we can evaluate LIMIT/OFFSET, so use that to
// compute here whether result set is empty or not
const ulonglong limit = (inner_qb->select_limit != nullptr)
? inner_qb->select_limit->val_uint()
: std::numeric_limits<ulonglong>::max();
const ulonglong offset = (inner_qb->offset_limit != nullptr)
? inner_qb->offset_limit->val_uint()
: 0;
const ulonglong actual_rows = inner_qb->row_value_list->size();
const bool empty_result = limit == 0 || offset >= actual_rows;
auto limes = new (thd->mem_root) Item_int(empty_result ? 0 : 1);
if (limes == nullptr) return true;
inner_qb->select_limit = limes;
inner_qb->offset_limit = nullptr;
}
Item::Cleanup_after_removal_context ctx(this);
int i = 0;
for (auto it = inner_qb->visible_fields().begin();
it != inner_qb->visible_fields().end(); ++it, ++i) {
Item *inner = *it;
if (inner->basic_const_item()) continue; // no need to replace it
auto constant = new (thd->mem_root)
Item_int(NAME_STRING("Not_used"), 1LL, MY_INT64_NUM_DECIMAL_DIGITS);
*it = constant;
inner_qb->base_ref_items[i] = constant;
// Expressions from the SELECT list will not be used; unlike in the case
// of IN, they are not part of sj_inner_exprs.
inner->walk(&Item::clean_up_after_removal, walk_options,
pointer_cast<uchar *>(&ctx));
}
inner_qb->select_list_tables = 0;
} else if (subq_pred->subquery_type() == Item_subselect::ALL_SUBQUERY ||
subq_pred->subquery_type() == Item_subselect::ANY_SUBQUERY) {
allany_pred = down_cast<Item_allany_subselect *>(subq_pred);
is_all_pred = subq_pred->subquery_type() == Item_subselect::ALL_SUBQUERY;
is_any_pred = !is_all_pred;
assert(!inner_qb->is_grouped());
// Selected field is first in base_ref_items, ahead of any hidden fields.
size_t field_no = 0;
// ALL and ANY will always work on an aggregated or grouped inner query:
if (!decorrelate) {
aggregated_subquery = true;
} else {
grouped_subquery = true;
}
// An aggregated subquery in WHERE clause can be processed as a regular join
if (root == &m_where_cond && aggregated_subquery) {
use_anti_join = false;
}
Item *expr = inner_qb->base_ref_items[field_no];
if (allany_pred->eqne_op()) { // =ALL or <>ANY
is_eq_all = is_all_pred;
is_ne_any = is_any_pred;
}
// Convert subquery to aggregated query block:
thd->lex->set_current_query_block(inner_qb);
const auto save_allow_sum_func = thd->lex->allow_sum_func;
thd->lex->allow_sum_func |= (nesting_map)1 << inner_qb->nest_level;
// Select MIN/MAX of the selected expression
Item *aggregate =
is_any_pred ^ (allany_pred->compare_func()->l_op())
? implicit_cast<Item *>(new (thd->mem_root) Item_sum_min(expr))
: implicit_cast<Item *>(new (thd->mem_root) Item_sum_max(expr));
if (aggregate == nullptr) return true;
if (aggregate->fix_fields(thd, &aggregate)) return true;
inner_qb->fields[field_no + hidden_fields] = aggregate;
inner_qb->base_ref_items[field_no] = aggregate;
/*
Other generated fields are placed behind existing fields.
"field_no" counts the field number in the generated derived table,
ignoring any hidden fields in the fields list. Thus, "hidden_fields"
must be accounted for when adding to fields and base_ref_items.
*/
field_no++;
// Aggregate COUNT from subquery (actually, we only need empty indicator)
Item_int *number_0 = new (thd->mem_root) Item_int(int32{0}, 1);
if (number_0 == nullptr) return true;
Item *counter = new (thd->mem_root) Item_sum_count(number_0);
if (counter == nullptr) return true;
if (counter->fix_fields(thd, &counter)) return true;
if (inner_qb->fields.push_back(counter)) return true;
count_field_no = field_no++;
inner_qb->base_ref_items[count_field_no + hidden_fields] = counter;
// If selected expression is nullable, return whether there are NULL values
if (allany_pred->process_nulls() && expr->is_nullable()) {
Item *isnull = new (thd->mem_root) Item_func_isnull(expr);
if (isnull == nullptr) return true;
Item *has_nulls = new (thd->mem_root) Item_sum_max(isnull);
if (has_nulls == nullptr) return true;
if (has_nulls->fix_fields(thd, &has_nulls)) return true;
inner_qb->fields.push_back(has_nulls);
nulls_field_no = field_no++;
inner_qb->base_ref_items[nulls_field_no + hidden_fields] = has_nulls;
}
// =ALL and <>ANY also needs number of distinct values
if (is_eq_all || is_ne_any) {
Item *distinct_count = new (thd->mem_root) Item_sum_count(expr, true);
if (distinct_count == nullptr) return true;
if (distinct_count->fix_fields(thd, &distinct_count)) return true;
inner_qb->fields.push_back(distinct_count);
distinct_field_no = field_no++;
inner_qb->base_ref_items[distinct_field_no + hidden_fields] =
distinct_count;
}
thd->lex->allow_sum_func = save_allow_sum_func;
thd->lex->set_current_query_block(this);
} else {
assert(false);
}
Semijoin_decorrelation sj_decor(
&sj_outer_exprs, &sj_inner_exprs,
// If antijoin, we can decorrelate '<>', '>=', etc, too (but not '<=>'):
// multiple inner rows may match '<>', but they will fail the IS NULL
// condition, and if this condition is top-level in WHERE it will
// eliminate the rows.
(use_anti_join && allany_pred == nullptr &&
subq_pred->outer_condition_context == enum_condition_context::ANDS)
? &op_types
: nullptr);
if (decorrelate) {
// We try to decorrelate it, by looking at equalities in its WHERE.
// This helps for this common pattern:
// EXISTS(SELECT FROM it WHERE it.c=ot.c AND <condition on 'it' only>)
const int initial_sj_inner_exprs_count = sj_inner_exprs.size();
if (inner_qb->decorrelate_condition(sj_decor, nullptr)) return true;
// Append inner expressions of decorrelated equalities to the SELECT
// list. Correct context info of outer expressions.
auto it_outer = sj_outer_exprs.begin() + initial_sj_inner_exprs_count;
auto it_inner = sj_inner_exprs.begin() + initial_sj_inner_exprs_count;
for (int i = 0; it_outer != sj_outer_exprs.end();
++it_outer, ++it_inner, ++i) {
Item *inner = *it_inner;
Item *outer = *it_outer;
// In setup_base_ref_items() we allocated space for appending this
// element.
// If there were a hidden element (there is none, see the setting of
// 'decorrelate'), we would be appending a *non*-hidden element
// (participating in DISTINCT) *after* the hidden element, which would
// break the usual layout of base_ref_items which is: "non-hidden then
// hidden" (see Query_block::add_hidden_item()). While this layout is not
// documented (?), it is safer to not break it.
size_t field_no = inner_qb->fields.size();
inner_qb->base_ref_items[field_no] = inner;
inner_qb->fields.push_back(inner);
// Add as grouping expression, if required:
if (grouped_subquery && inner_qb->add_grouping_expr(thd, inner)) {
return true;
}
// Needed for fix_after_pullout:
update_context_to_derived(outer, this);
// Decorrelated outer expression will move to ON, so fix it.
outer->fix_after_pullout(this, inner_qb);
}
// Decorrelation identified new outer/inner expression pairs.
// Recalculate used_tables() after that (the subquery may have become
// uncorrelated). Because there is no aggregation, window functions, ORDER
// BY, we only have to collect used_tables bits from the SELECT list, FROM
// clause (outer-correlated derived tables and join conditions) and WHERE
// clause.
for (Item *inner : inner_qb->visible_fields()) {
inner_qb->select_list_tables |= inner->used_tables();
}
table_map new_used_tables = inner_qb->select_list_tables;
if (inner_qb->where_cond() != nullptr) {
inner_qb->where_cond()->update_used_tables();
new_used_tables |= inner_qb->where_cond()->used_tables();
}
// Walk the FROM clause to gather any outer-correlated derived table or join
// condition.
walk_join_list(inner_qb->m_table_nest, [&](Table_ref *tr) -> bool {
if (tr->join_cond()) new_used_tables |= tr->join_cond()->used_tables();
if (tr->is_derived() && tr->uses_materialization())
new_used_tables |= tr->derived_query_expression()->m_lateral_deps;
return false;
});
if (!(new_used_tables & OUTER_REF_TABLE_BIT)) {
// there is no outer reference anymore
inner_qb->uncacheable &= ~UNCACHEABLE_DEPENDENT;
inner_qe->uncacheable &= ~UNCACHEABLE_DEPENDENT;
// this must be called only after the change to 'uncacheable' above
subq_pred->update_used_tables();
}
}
// If the subquery is (still) correlated, we would need to create a LATERAL
// derived table, but a certain secondary engine doesn't support it. Error:
if ((subq_pred->subquery_used_tables() & ~PSEUDO_TABLE_BITS) != 0) {
my_error(ER_SUBQUERY_TRANSFORM_REJECTED, MYF(0));
return true;
}
if (!aggregated_subquery && !grouped_subquery &&
!inner_qb->can_skip_distinct()) {
inner_qb->add_base_options(SELECT_DISTINCT);
}
// As the synthesised ON and WHERE will reference columns of the derived
// table, we must have unique names.
// A derived table must have unique column names, while a quantified
// subquery needn't; so names may not currently be unique and we have to
// make them so.
{
int i = 1;
for (Item *inner : inner_qb->visible_fields()) {
if (baptize_item(thd, inner, &i)) return true;
}
}
/*
If subquery is top-level in WHERE, and not negated, use INNER JOIN, else
use LEFT JOIN. A correlated subquery (which is transformed to a grouped
subquery) must also be processed as a LEFT JOIN.
We could use LEFT JOIN unconditionally and let simplify_joins() convert it
to INNER JOIN, but the conversion is not perfect, as not all effects of
propagate_nullability() are undone.
*/
const bool use_inner_join =
root == &m_where_cond &&
subq_pred->outer_condition_context == enum_condition_context::ANDS &&
!use_anti_join && !grouped_subquery;
Table_ref *tr;
if (transform_subquery_to_derived(thd, &tr, inner_qe, subq_pred,
use_inner_join, false, nullptr, nullptr))
return true;
assert(CountVisibleFields(sj_inner_exprs) == sj_inner_exprs.size());
const int first_sj_inner_expr_of_subquery =
CountVisibleFields(inner_qb->fields) - sj_inner_exprs.size();
/**
This function will generate two conditions to be attached in the
synthesized query: a JOIN condition that is used in the join between
the outer tables and the generated table, and a general condition that is
applied after the join. The JOIN condition is required especially for
outer join and anti-join operations, but is less important for inner join
(It could be moved to the WHERE clause).
The general condition is applied as a filter condition (WHERE clause) when
the subquery predicate is placed in the WHERE clause, or is used to
substitute the original subquery predicate when placed in the SELECT list.
Note that we sometimes generate a full condition in "condition" but later
transfer it to become the JOIN condition.
*/
Item_bool_func *condition = nullptr;
Item_bool_func *join_cond = nullptr;
// Start at first SJ inner expression in SELECT list:
int i = first_sj_inner_expr_of_subquery;
int j = 0; // counter of processed SJ inner expressions
for (auto it_outer = sj_outer_exprs.begin(); it_outer != sj_outer_exprs.end();
++i, ++j, ++it_outer) {
Item *outer = *it_outer;
assert(i < (int)tr->table->s->fields);
// Using this constructor, instead of the alternative which only takes a
// Field pointer, gives a persistent name to the item (sets orig_table_name
// etc) which is necessary for prepared statements.
Item_field *const derived_field = new (thd->mem_root)
Item_field(thd, &this->context, tr->table->field[i]);
if (derived_field == nullptr) return true;
Item_bool_func *predicate = nullptr;
const Item_func::Functype op_type = sj_decor.op_type_at(j);
switch (op_type) {
case Item_func::EQ_FUNC:
predicate = new (thd->mem_root) Item_func_eq(outer, derived_field);
break;
case Item_func::NE_FUNC:
predicate = new (thd->mem_root) Item_func_ne(outer, derived_field);
break;
case Item_func::LT_FUNC:
predicate = new (thd->mem_root) Item_func_lt(outer, derived_field);
break;
case Item_func::LE_FUNC:
predicate = new (thd->mem_root) Item_func_le(outer, derived_field);
break;
case Item_func::GT_FUNC:
predicate = new (thd->mem_root) Item_func_gt(outer, derived_field);
break;
case Item_func::GE_FUNC:
predicate = new (thd->mem_root) Item_func_ge(outer, derived_field);
break;
default:
assert(false);
}
if (predicate == nullptr) return true;
condition = and_items(condition, predicate);
if (condition == nullptr) return true;
}
/*
If the predicate is transformed using a grouped subquery, use just generated
condition as the join condition for the outer join.
*/
if (grouped_subquery) {
assert(join_cond == nullptr);
join_cond = condition;
condition = nullptr;
}
// If this is a quantified comparison predicate, add the generated comparison
if (is_eq_all || is_ne_any) {
Item_field *const field = new (thd->mem_root)
Item_field(thd, &this->context, tr->table->field[0]);
if (field == nullptr) return true;
Item_bool_func *compare;
if (is_eq_all) {
compare = new (thd->mem_root) Item_func_eq(left_expr, field);
} else {
compare = new (thd->mem_root) Item_func_ne(left_expr, field);
}
if (compare == nullptr) return true;
Item *count_field = new (thd->mem_root)
Item_field(thd, &this->context, tr->table->field[count_field_no]);
if (count_field == nullptr) return true;
Item_int *const number_0 = new (thd->mem_root) Item_int(int32{0}, 1);
if (number_0 == nullptr) return true;
if (grouped_subquery) {
count_field =
new (thd->mem_root) Item_func_coalesce(count_field, number_0);
if (count_field == nullptr) return true;
}
Item *count_check;
if (is_eq_all) {
count_check = new (thd->mem_root) Item_func_eq(count_field, number_0);
} else {
count_check = new (thd->mem_root) Item_func_ne(count_field, number_0);
}
if (count_check == nullptr) return true;
Item_field *const distinct_field = new (thd->mem_root)
Item_field(thd, &this->context, tr->table->field[distinct_field_no]);
if (distinct_field == nullptr) return true;
Item *inner_nulls_check = nullptr;
Item *outer_nulls_check = nullptr;
Item *any_non_nulls_check = nullptr;
if (nulls_field_no >= 0) {
Item *nulls_field = new (thd->mem_root)
Item_field(thd, &this->context, tr->table->field[nulls_field_no]);
if (nulls_field == nullptr) return true;
inner_nulls_check =
new (thd->mem_root) Item_func_eq(nulls_field, number_0);
if (inner_nulls_check == nullptr) return true;
}
if ((is_ne_any || !allany_pred->ignore_unknown()) &&
left_expr->is_nullable()) {
outer_nulls_check = new (thd->mem_root) Item_func_isnotnull(left_expr);
if (outer_nulls_check == nullptr) return true;
}
if (is_eq_all && nulls_field_no >= 0 && !allany_pred->ignore_unknown()) {
any_non_nulls_check =
new (thd->mem_root) Item_func_eq(distinct_field, number_0);
if (any_non_nulls_check == nullptr) return true;
Item *const true_result = new (thd->mem_root) Item_null();
if (true_result == nullptr) return true;
Item *const false_result = new (thd->mem_root) Item_func_false();
if (false_result == nullptr) return true;
any_non_nulls_check = new (thd->mem_root)
Item_bool_if(any_non_nulls_check, true_result, false_result);
if (any_non_nulls_check == nullptr) return true;
}
if (inner_nulls_check != nullptr) {
Item *true_result;
if (is_ne_any) {
true_result = new (thd->mem_root) Item_func_false();
if (true_result == nullptr) return true;
} else {
true_result = new (thd->mem_root) Item_func_true();
if (true_result == nullptr) return true;
}
Item *const false_result = new (thd->mem_root) Item_null();
if (false_result == nullptr) return true;
inner_nulls_check = new (thd->mem_root)
Item_bool_if(inner_nulls_check, true_result, false_result);
if (inner_nulls_check == nullptr) return true;
}
if (outer_nulls_check != nullptr && !allany_pred->ignore_unknown()) {
Item *true_result;
if (is_ne_any) {
true_result = new (thd->mem_root) Item_func_true();
if (true_result == nullptr) return true;
} else {
true_result = new (thd->mem_root) Item_func_false();
if (true_result == nullptr) return true;
}
Item *const false_result = new (thd->mem_root) Item_null();
if (false_result == nullptr) return true;
outer_nulls_check = new (thd->mem_root)
Item_bool_if(outer_nulls_check, true_result, false_result);
if (outer_nulls_check == nullptr) return true;
}
Item_int *const number_1 = new (thd->mem_root) Item_int(int32{1}, 1);
if (number_1 == nullptr) return true;
Item *distinct_check;
if (is_ne_any) {
distinct_check =
new (thd->mem_root) Item_func_gt(distinct_field, number_1);
} else {
distinct_check =
new (thd->mem_root) Item_func_eq(distinct_field, number_1);
}
if (distinct_check == nullptr) return true;
if (is_ne_any) {
// If table is empty, result is FALSE.
// If searched value is NULL, result is UNKNOWN.
// If there is more than one distinct, non-NULL value, result is TRUE.
// If value in subquery is different from searched value, result is TRUE.
// If subquery contains at least one NULL, result is UNKNOWN,
// otherwise FALSE.
condition = new (thd->mem_root) Item_cond_or(compare, distinct_check);
if (condition == nullptr) return true;
if (inner_nulls_check != nullptr) {
condition =
new (thd->mem_root) Item_cond_or(condition, inner_nulls_check);
if (condition == nullptr) return true;
}
condition = new (thd->mem_root) Item_cond_and(condition, count_check);
if (condition == nullptr) return true;
if (outer_nulls_check != nullptr) {
condition =
new (thd->mem_root) Item_cond_and(condition, outer_nulls_check);
if (condition == nullptr) return true;
}
} else if (is_eq_all) {
// If table is empty, result is TRUE.
// If searched value is NULL, result is UNKNOWN.
// If there is more than one distinct, non-NULL value, or the distinct
// value is different from the searched value, result is FALSE.
// If subquery contains at least one NULL, result is UNKNOWN,
// otherwise result is FALSE.
condition = new (thd->mem_root) Item_cond_and(distinct_check, compare);
if (condition == nullptr) return true;
if (inner_nulls_check != nullptr) {
condition =
new (thd->mem_root) Item_cond_and(condition, inner_nulls_check);
if (condition == nullptr) return true;
}
condition = new (thd->mem_root) Item_cond_or(count_check, condition);
if (condition == nullptr) return true;
if (outer_nulls_check != nullptr) {
condition =
new (thd->mem_root) Item_cond_or(condition, outer_nulls_check);
if (condition == nullptr) return true;
}
if (any_non_nulls_check != nullptr) {
condition =
new (thd->mem_root) Item_cond_or(condition, any_non_nulls_check);
if (condition == nullptr) return true;
}
}
} else if (allany_pred != nullptr) {
// Handles quantified comparison predicates not handled above.
// Replace the aggregate with a field from the aggregated table:
Item *field = new (thd->mem_root)
Item_field(thd, &this->context, tr->table->field[0]);
if (field == nullptr) return true;
Item_bool_func *const compare =
allany_pred->compare_func()->create(left_expr, field);
if (compare == nullptr) return true;
condition = compare;
if (nulls_field_no > 0) {
field = new (thd->mem_root)
Item_field(thd, &this->context, tr->table->field[nulls_field_no]);
if (field == nullptr) return true;
Item_int *const number_0 = new (thd->mem_root) Item_int(int32{0}, 1);
if (number_0 == nullptr) return true;
Item *null_check;
if (is_any_pred) {
null_check = new (thd->mem_root) Item_func_ne(field, number_0);
if (null_check == nullptr) return true;
Item *const true_result = new (thd->mem_root) Item_null();
if (true_result == nullptr) return true;
Item *const false_result = new (thd->mem_root) Item_func_false();
if (false_result == nullptr) return true;
null_check = new (thd->mem_root)
Item_bool_if(null_check, true_result, false_result);
if (null_check == nullptr) return true;
condition = new (thd->mem_root) Item_cond_or(condition, null_check);
if (condition == nullptr) return true;
} else {
null_check = new (thd->mem_root) Item_func_eq(field, number_0);
if (null_check == nullptr) return true;
Item *const true_result = new (thd->mem_root) Item_func_true();
if (true_result == nullptr) return true;
Item *false_result;
if (allany_pred->ignore_unknown()) {
false_result = new (thd->mem_root) Item_func_false();
} else {
false_result = new (thd->mem_root) Item_null();
}
if (false_result == nullptr) return true;
null_check = new (thd->mem_root)
Item_bool_if(null_check, true_result, false_result);
if (null_check == nullptr) return true;
condition = new (thd->mem_root) Item_cond_and(condition, null_check);
if (condition == nullptr) return true;
}
}
field = new (thd->mem_root)
Item_field(thd, &this->context, tr->table->field[count_field_no]);
if (field == nullptr) return true;
Item_int *const number_0 = new (thd->mem_root) Item_int(int32{0}, 1);
if (number_0 == nullptr) return true;
if (grouped_subquery) {
field = new (thd->mem_root) Item_func_coalesce(field, number_0);
if (field == nullptr) return true;
}
if (is_any_pred) {
Item *const count_check =
new (thd->mem_root) Item_func_ne(field, number_0);
if (count_check == nullptr) return true;
condition = new (thd->mem_root) Item_cond_and(condition, count_check);
if (condition == nullptr) return true;
} else {
Item *const count_check =
new (thd->mem_root) Item_func_eq(field, number_0);
if (count_check == nullptr) return true;
condition = new (thd->mem_root) Item_cond_or(condition, count_check);
if (condition == nullptr) return true;
}
}
if (!grouped_subquery) {
if (root == &m_where_cond) {
assert(join_cond == nullptr);
join_cond = condition;
condition = nullptr;
// Make the IS [NOT] NULL condition:
Item_field *const derived_field = new (thd->mem_root)
Item_field(thd, &this->context, tr->table->field[0]);
if (derived_field == nullptr) return true;
assert(condition == nullptr);
if (!tr->outer_join) {
condition = new (thd->mem_root) Item_func_true();
} else if (use_anti_join) {
condition = new (thd->mem_root) Item_func_isnull(derived_field);
} else {
condition = new (thd->mem_root) Item_func_isnotnull(derived_field);
}
if (condition == nullptr) return true;
// We only need to test the first column for null-ness:
// if the NOT NULL test eliminates it, i.e. if it's NULL:
// - if it's not NULL-complemented: it's a NULL in the right member of the
// LEFT JOIN, thus in the subquery, thus it wouldn't pass the IN
// condition,
// - if it is NULL-complemented: then one IN sub-equality failed, thus it
// wouldn't pass the IN condition.
// Reciprocically: if the NOT NULL does not eliminate it: it's not
// NULL-complemented, so all IN sub-equalities passed, it would pass
// the IN condition.
// If the subquery was rather with EXISTS, the SELECT list's first
// expression is 1, so if it's NULL it's surely NULL-complemented;
// if there were decorrelated equalities one of them failed, or
// the inner table was empty.
} else if (!aggregated_subquery) {
/*
Handle EXISTS, NOT EXISTS, and IN and NOT IN with non-nullable
expressions where the subquery predicate is placed in the SELECT list.
The derived table is deduplicated. Perform an outer join between the
outer table(s) and the derived table using the join condition.
The selected expression is replaced with the condition:
IN, EXISTS: (it.iv IS NOT NULL)
NOT IN, NOT EXISTS: (it.iv IS NULL)
Because of the outer join, a non-existing value from the subquery is
represented by a NULL value, thus the expression is reduced to a simple
IS [NOT] NULL check.
*/
Item_field *const derived_field = new (thd->mem_root)
Item_field(thd, &this->context, tr->table->field[0]);
if (derived_field == nullptr) return true;
assert(join_cond == nullptr);
join_cond = condition;
condition = nullptr;
switch (subq_pred->value_transform) {
case Item::BOOL_IDENTITY:
case Item::BOOL_IS_TRUE:
condition = new (thd->mem_root) Item_func_isnotnull(derived_field);
if (condition == nullptr) return true;
break;
case Item::BOOL_NEGATED:
case Item::BOOL_IS_FALSE:
condition = new (thd->mem_root) Item_func_isnull(derived_field);
if (condition == nullptr) return true;
break;
default:
assert(false);
}
}
}
if (join_cond == nullptr) {
join_cond = new (thd->mem_root) Item_func_true();
if (join_cond == nullptr) return true;
}
join_cond->apply_is_true();
if (!join_cond->fixed && join_cond->fix_fields(thd, nullptr)) return true;
tr->set_join_cond(join_cond);
if (condition == nullptr) {
condition = new (thd->mem_root) Item_func_true();
if (condition == nullptr) return true;
}
// Synthesized columns need name copied from original expression
if (!condition->item_name.is_set()) {
condition->item_name.set(subq_pred->item_name.ptr());
}
if (root == &m_where_cond) {
condition->apply_is_true();
} else {
condition->increment_ref_count();
}
if (!condition->fixed && condition->fix_fields(thd, nullptr)) return true;
Item *old_expr = unwrap_rollup_group(*root);
if (replace_subcondition(thd, root, subq_pred, condition, false)) return true;
if (root != &m_where_cond) {
if (base_ref_items[root_field_no] != *root) {
base_ref_items[root_field_no] = *root;
}
if (is_grouped()) {
replace_order_item(group_list.first, old_expr,
unwrap_rollup_group(*root));
}
if (is_ordered()) {
replace_order_item(order_list.first, old_expr,
unwrap_rollup_group(*root));
}
}
return false;
}