in sql/sql_select.cc [4454:5041]
bool JOIN::make_tmp_tables_info() {
assert(!join_tab);
mem_root_deque<Item *> *curr_fields = fields;
bool materialize_join = false;
uint curr_tmp_table = const_tables;
TABLE *exec_tmp_table = nullptr;
auto cleanup_tmp_tables_on_error =
create_scope_guard([this, &curr_tmp_table] {
if (qep_tab == nullptr) {
return;
}
for (unsigned table_idx = primary_tables; table_idx <= curr_tmp_table;
++table_idx) {
TABLE *table = qep_tab[table_idx].table();
if (table != nullptr) {
close_tmp_table(table);
free_tmp_table(table);
qep_tab[table_idx].set_table(nullptr);
}
}
});
/*
If the plan is constant, we will not do window tmp table processing
cf. special code path for handling const plans.
*/
m_windowing_steps = m_windows.elements > 0 && !plan_is_const() &&
!implicit_grouping && !group_optimized_away;
const bool may_trace = // just to avoid an empty trace block
need_tmp_before_win || implicit_grouping || m_windowing_steps ||
!group_list.empty() || !order.empty();
Opt_trace_context *const trace = &thd->opt_trace;
const Opt_trace_disable_I_S trace_disabled(trace, !may_trace);
const Opt_trace_object wrapper(trace);
const Opt_trace_array trace_tmp(trace, "considering_tmp_tables");
DBUG_TRACE;
/*
In this function, we may change having_cond into a condition on a
temporary sort/group table, so we have to assign having_for_explain now:
*/
having_for_explain = having_cond;
const bool has_group_by = this->grouped;
/*
The loose index scan access method guarantees that all grouping or
duplicate row elimination (for distinct) is already performed
during data retrieval, and that all MIN/MAX functions are already
computed for each group. Thus all MIN/MAX functions should be
treated as regular functions, and there is no need to perform
grouping in the main execution loop.
Currently loose index scan is only applicable for single table queries. The
only exception is when a single table query becomes a multi-table query
because of a semijoin transformation. We check the first join_tab element
of the plan for its access method here, which holds good even for the
multi-table query, but only when optimizer has picked nested loop joins.
Skip scan is enabled only for the original table in the query which is the
first table in the join order for a nested loop join. However, for hash
joins it does not hold good. So, we see an additional de-duplication step
when hash join is picked as it is not aware that de-duplication is taken
care by the access method picked.
TODO: Make optimize_distinct_group_order() understand that de-duplication
is taken care by the chosen access method, so that we avoid the additional
de-duplication step.
*/
if (qep_tab && qep_tab[0].range_scan() &&
is_loose_index_scan(qep_tab[0].range_scan()))
tmp_table_param.precomputed_group_by =
!is_agg_loose_index_scan(qep_tab[0].range_scan());
/*
Create the first temporary table if distinct elimination is requested or
if the sort is too complicated to be evaluated as a filesort.
*/
if (need_tmp_before_win) {
curr_tmp_table = primary_tables;
Opt_trace_object trace_this_outer(trace);
trace_this_outer.add("adding_tmp_table_in_plan_at_position",
curr_tmp_table);
tmp_tables++;
/*
Make a copy of the base slice in the save slice.
This is needed because later steps will overwrite the base slice with
another slice (1-3).
After this slice has been used, overwrite the base slice again with
the copy in the save slice.
*/
if (alloc_ref_item_slice(thd, REF_SLICE_SAVED_BASE)) return true;
copy_ref_item_slice(REF_SLICE_SAVED_BASE, REF_SLICE_ACTIVE);
current_ref_item_slice = REF_SLICE_SAVED_BASE;
/*
Create temporary table for use in a single execution.
(Will be reused if this is a subquery that is executed several times
for one execution of the statement)
Don't use tmp table grouping for json aggregate funcs as it's
very ineffective.
*/
ORDER_with_src tmp_group;
if (!simple_group && !(test_flags & TEST_NO_KEY_GROUP) && !with_json_agg)
tmp_group = group_list;
tmp_table_param.hidden_field_count = CountHiddenFields(*fields);
if (create_intermediate_table(&qep_tab[curr_tmp_table], *fields, tmp_group,
!group_list.empty() && simple_group))
return true;
exec_tmp_table = qep_tab[curr_tmp_table].table();
if (exec_tmp_table->s->is_distinct) optimize_distinct();
/*
If there is no sorting or grouping, 'use_order'
index result should not have been requested.
Exception: LooseScan strategy for semijoin requires
sorted access even if final result is not to be sorted.
*/
assert(
!(m_ordered_index_usage == ORDERED_INDEX_VOID && !plan_is_const() &&
qep_tab[const_tables].position()->sj_strategy != SJ_OPT_LOOSE_SCAN &&
qep_tab[const_tables].use_order()));
/*
Allocate a slice of ref items that describe the items to be copied
from the first temporary table.
*/
if (alloc_ref_item_slice(thd, REF_SLICE_TMP1)) return true;
// Change sum_fields reference to calculated fields in tmp_table
if (streaming_aggregation || qep_tab[curr_tmp_table].table()->group ||
tmp_table_param.precomputed_group_by) {
if (change_to_use_tmp_fields(fields, thd, ref_items[REF_SLICE_TMP1],
&tmp_fields[REF_SLICE_TMP1],
query_block->m_added_non_hidden_fields))
return true;
} else {
if (change_to_use_tmp_fields_except_sums(
fields, thd, query_block, ref_items[REF_SLICE_TMP1],
&tmp_fields[REF_SLICE_TMP1],
query_block->m_added_non_hidden_fields))
return true;
}
curr_fields = &tmp_fields[REF_SLICE_TMP1];
// Need to set them now for correct group_fields setup, reset at the end.
set_ref_item_slice(REF_SLICE_TMP1);
qep_tab[curr_tmp_table].ref_item_slice = REF_SLICE_TMP1;
setup_tmptable_write_func(&qep_tab[curr_tmp_table], &trace_this_outer);
/*
If having is not handled here, it will be checked before the row is sent
to the client.
*/
if (having_cond &&
(streaming_aggregation ||
(exec_tmp_table->s->is_distinct && group_list.empty()))) {
/*
If there is no select distinct or rollup, then move the having to table
conds of tmp table.
NOTE : We cannot apply having after distinct. If columns of having are
not part of select distinct, then distinct may remove rows
which can satisfy having.
As this condition will read the tmp table, it is appropriate that
REF_SLICE_TMP1 is in effect when we create it below.
*/
if ((!select_distinct && rollup_state == RollupState::NONE) &&
add_having_as_tmp_table_cond(curr_tmp_table))
return true;
/*
Having condition which we are not able to add as tmp table conds are
kept as before. And, this will be applied before storing the rows in
tmp table.
*/
qep_tab[curr_tmp_table].having = having_cond;
having_cond = nullptr; // Already done
}
tmp_table_param.func_count = 0;
if (streaming_aggregation || qep_tab[curr_tmp_table].table()->group) {
tmp_table_param.func_count += tmp_table_param.sum_func_count;
tmp_table_param.sum_func_count = 0;
}
if (exec_tmp_table->group) { // Already grouped
/*
Check if group by has to respect ordering. If true, move group by to
order by.
*/
if (order.empty() && !skip_sort_order) {
for (ORDER *group = group_list.order; group; group = group->next) {
if (group->direction != ORDER_NOT_RELEVANT) {
order = group_list; /* order by group */
break;
}
}
}
group_list.clean();
}
/*
If we have different sort & group then we must sort the data by group
and copy it to a second temporary table.
This code is also used if we are using distinct something
we haven't been able to store in the temporary table yet
like SEC_TO_TIME(SUM(...)) or when distinct is used with rollup.
*/
if ((!group_list.empty() &&
(!test_if_subpart(group_list.order, order.order) || select_distinct ||
m_windowing_steps || rollup_state != RollupState::NONE)) ||
(select_distinct && (tmp_table_param.using_outer_summary_function ||
rollup_state != RollupState::NONE))) {
DBUG_PRINT("info", ("Creating group table"));
calc_group_buffer(this, group_list.order);
count_field_types(query_block, &tmp_table_param,
tmp_fields[REF_SLICE_TMP1],
select_distinct && group_list.empty(), false);
tmp_table_param.hidden_field_count =
CountHiddenFields(tmp_fields[REF_SLICE_TMP1]);
streaming_aggregation = false;
if (!exec_tmp_table->group && !exec_tmp_table->s->is_distinct) {
// 1st tmp table were materializing join result
materialize_join = true;
explain_flags.set(ESC_BUFFER_RESULT, ESP_USING_TMPTABLE);
}
curr_tmp_table++;
tmp_tables++;
Opt_trace_object trace_this_tbl(trace);
trace_this_tbl.add("adding_tmp_table_in_plan_at_position", curr_tmp_table)
.add_alnum("cause", "sorting_to_make_groups");
/* group data to new table */
/*
If the access method is loose index scan then all MIN/MAX
functions are precomputed, and should be treated as regular
functions. See extended comment above.
*/
if (qep_tab[0].range_scan() &&
is_loose_index_scan(qep_tab[0].range_scan()))
tmp_table_param.precomputed_group_by = true;
ORDER_with_src dummy; // TODO can use table->group here also
if (create_intermediate_table(&qep_tab[curr_tmp_table], *curr_fields,
dummy, true))
return true;
if (!group_list.empty()) {
explain_flags.set(group_list.src, ESP_USING_TMPTABLE);
if (!plan_is_const()) // No need to sort a single row
{
if (add_sorting_to_table(curr_tmp_table - 1, &group_list,
/*sort_before_group=*/true))
return true;
}
if (make_group_fields(this, this)) return true;
}
// Setup sum funcs only when necessary, otherwise we might break info
// for the first table
if (!group_list.empty() || tmp_table_param.sum_func_count) {
if (make_sum_func_list(*curr_fields, true, true)) return true;
const bool need_distinct =
!(qep_tab[0].range_scan() &&
is_agg_loose_index_scan(qep_tab[0].range_scan()));
if (prepare_sum_aggregators(sum_funcs, need_distinct)) return true;
group_list.clean();
if (setup_sum_funcs(thd, sum_funcs)) return true;
}
/*
Allocate a slice of ref items that describe the items to be copied
from the second temporary table.
*/
if (alloc_ref_item_slice(thd, REF_SLICE_TMP2)) return true;
// No sum funcs anymore
if (change_to_use_tmp_fields(&tmp_fields[REF_SLICE_TMP1], thd,
ref_items[REF_SLICE_TMP2],
&tmp_fields[REF_SLICE_TMP2],
query_block->m_added_non_hidden_fields))
return true;
curr_fields = &tmp_fields[REF_SLICE_TMP2];
set_ref_item_slice(REF_SLICE_TMP2);
qep_tab[curr_tmp_table].ref_item_slice = REF_SLICE_TMP2;
setup_tmptable_write_func(&qep_tab[curr_tmp_table], &trace_this_tbl);
}
if (qep_tab[curr_tmp_table].table()->s->is_distinct)
select_distinct = false; /* Each row is unique */
if (select_distinct && group_list.empty() && !m_windowing_steps) {
if (having_cond) {
qep_tab[curr_tmp_table].having = having_cond;
having_cond->update_used_tables();
having_cond = nullptr;
}
qep_tab[curr_tmp_table].needs_duplicate_removal = true;
trace_this_outer.add("reading_from_table_eliminates_duplicates", true);
explain_flags.set(ESC_DISTINCT, ESP_DUPS_REMOVAL);
select_distinct = false;
}
/* Clean tmp_table_param for the next tmp table. */
tmp_table_param.sum_func_count = tmp_table_param.func_count = 0;
tmp_table_param.cleanup();
streaming_aggregation = false;
if (!group_optimized_away) {
grouped = false;
} else {
/*
If grouping has been optimized away, a temporary table is
normally not needed unless we're explicitly requested to create
one (e.g. due to a SQL_BUFFER_RESULT hint or INSERT ... SELECT or
there is a windowing function that needs sorting).
In this case (grouping was optimized away), temp_table was
created without a grouping expression and JOIN::exec() will not
perform the necessary grouping (by the use of end_send_group()
or end_write_group()) if JOIN::group is set to false.
*/
/*
The temporary table was explicitly requested or there is a window
function which needs sorting (check need_tmp_before_win in
JOIN::optimize).
*/
assert(query_block->active_options() & OPTION_BUFFER_RESULT ||
m_windowing_steps);
// the temporary table does not have a grouping expression
assert(!qep_tab[curr_tmp_table].table()->group);
}
calc_group_buffer(this, group_list.order);
count_field_types(query_block, &tmp_table_param, *curr_fields, false,
false);
}
/*
Set up structures for a temporary table but do not actually create
the temporary table if one of these conditions are true:
- The query is implicitly grouped.
- The query is explicitly grouped and
+ implemented as a simple grouping, or
+ LIMIT 1 is specified, or
+ ROLLUP is specified, or
+ <some unknown condition>.
*/
if ((grouped || implicit_grouping) && !m_windowing_steps) {
if (make_group_fields(this, this)) return true;
if (make_sum_func_list(*curr_fields, true, true)) return true;
const bool need_distinct =
!(qep_tab && qep_tab[0].range_scan() &&
is_agg_loose_index_scan(qep_tab[0].range_scan()));
if (prepare_sum_aggregators(sum_funcs, need_distinct)) return true;
if (setup_sum_funcs(thd, sum_funcs) || thd->is_fatal_error()) return true;
}
if (qep_tab && (!group_list.empty() ||
(!order.empty() && !m_windowing_steps /* [1] */))) {
/*
[1] above: too early to do query ORDER BY if we have windowing; must
wait till after window processing.
*/
ASSERT_BEST_REF_IN_JOIN_ORDER(this);
DBUG_PRINT("info", ("Sorting for send_result_set_metadata"));
/*
If we have already done the group, add HAVING to sorted table except
when rollup is present
*/
if (having_cond && group_list.empty() && !streaming_aggregation &&
rollup_state == RollupState::NONE) {
if (add_having_as_tmp_table_cond(curr_tmp_table)) return true;
}
if (grouped)
m_select_limit = HA_POS_ERROR;
else if (!need_tmp_before_win) {
/*
We can abort sorting after thd->select_limit rows if there are no
filter conditions for any tables after the sorted one.
Filter conditions come in several forms:
1. as a condition item attached to the join_tab, or
2. as a keyuse attached to the join_tab (ref access).
*/
for (uint i = const_tables + 1; i < primary_tables; i++) {
QEP_TAB *const tab = qep_tab + i;
if (tab->condition() || // 1
(best_ref[tab->idx()]->keyuse() &&
tab->first_inner() == NO_PLAN_IDX)) // 2
{
/* We have to sort all rows */
m_select_limit = HA_POS_ERROR;
break;
}
}
}
/*
Here we add sorting stage for ORDER BY/GROUP BY clause, if the
optimiser chose FILESORT to be faster than INDEX SCAN or there is
no suitable index present.
OPTION_FOUND_ROWS supersedes LIMIT and is taken into account.
*/
DBUG_PRINT("info", ("Sorting for order by/group by"));
ORDER_with_src order_arg = group_list.empty() ? order : group_list;
if (qep_tab &&
m_ordered_index_usage != (group_list.empty()
? ORDERED_INDEX_ORDER_BY
: ORDERED_INDEX_GROUP_BY) &&
// Windowing will change order, so it's too early to sort here
!m_windowing_steps) {
// Sort either first non-const table or the last tmp table
QEP_TAB *const sort_tab = &qep_tab[curr_tmp_table];
if (need_tmp_before_win && !materialize_join && !exec_tmp_table->group)
explain_flags.set(order_arg.src, ESP_USING_TMPTABLE);
if (add_sorting_to_table(curr_tmp_table, &order_arg,
/*sort_before_group=*/false))
return true;
/*
filesort_limit: Return only this many rows from filesort().
We can use select_limit_cnt only if we have no group_by and 1 table.
This allows us to use Bounded_queue for queries like:
"select * from t1 order by b desc limit 1;"
m_select_limit == HA_POS_ERROR (we need a full table scan)
query_expression->select_limit_cnt == 1 (we only need one row in the
result set)
*/
if (sort_tab->filesort)
sort_tab->filesort->limit =
(has_group_by || (primary_tables > curr_tmp_table + 1) ||
calc_found_rows)
? m_select_limit
: query_expression()->select_limit_cnt;
}
}
if (qep_tab && m_windowing_steps) {
for (uint wno = 0; wno < m_windows.elements; wno++) {
tmp_table_param.m_window = m_windows[wno];
if (!tmp_tables) {
curr_tmp_table = primary_tables;
tmp_tables++;
if (ref_items[REF_SLICE_SAVED_BASE].is_null()) {
/*
Make a copy of the base slice in the save slice.
This is needed because later steps will overwrite the base slice with
another slice (1-3 or window slice).
After this slice has been used, overwrite the base slice again with
the copy in the save slice.
*/
if (alloc_ref_item_slice(thd, REF_SLICE_SAVED_BASE)) return true;
copy_ref_item_slice(REF_SLICE_SAVED_BASE, REF_SLICE_ACTIVE);
current_ref_item_slice = REF_SLICE_SAVED_BASE;
}
} else {
curr_tmp_table++;
tmp_tables++;
}
ORDER_with_src dummy;
tmp_table_param.hidden_field_count = CountHiddenFields(*curr_fields);
/*
Allocate a slice of ref items that describe the items to be copied
from the next temporary table.
*/
const uint widx = REF_SLICE_WIN_1 + wno;
QEP_TAB *tab = &qep_tab[curr_tmp_table];
mem_root_deque<Item *> *orig_fields = curr_fields;
{
Opt_trace_object trace_this_tbl(trace);
trace_this_tbl
.add("adding_tmp_table_in_plan_at_position", curr_tmp_table)
.add_alnum("cause", "output_for_window_functions")
.add("with_buffer", m_windows[wno]->needs_buffering());
if (create_intermediate_table(tab, *curr_fields, dummy, false,
"window output buffer"))
return true;
if (alloc_ref_item_slice(thd, widx)) return true;
if (change_to_use_tmp_fields(curr_fields, thd, ref_items[widx],
&tmp_fields[widx],
query_block->m_added_non_hidden_fields,
/*windowing*/ true))
return true;
curr_fields = &tmp_fields[widx];
set_ref_item_slice(widx);
tab->ref_item_slice = widx;
setup_tmptable_write_func(tab, &trace_this_tbl);
}
if (m_windows[wno]->needs_buffering()) {
if (CreateFramebufferTable(
thd, tmp_table_param, *query_block, *orig_fields, *curr_fields,
tab->tmp_table_param->items_to_copy, m_windows[wno])) {
return true;
}
}
if (m_windows[wno]->make_special_rows_cache(thd, tab->table()))
return true;
ORDER_with_src w_partition(m_windows[wno]->sorting_order(),
ESC_WINDOWING);
if (w_partition.order != nullptr) {
Opt_trace_object trace_pre_sort(trace, "adding_sort_to_previous_table");
if (add_sorting_to_table(curr_tmp_table - 1, &w_partition,
/*sort_before_group=*/false))
return true;
}
if (m_windows[wno]->is_last()) {
if (!order.empty() && m_ordered_index_usage != ORDERED_INDEX_ORDER_BY) {
if (add_sorting_to_table(curr_tmp_table, &order,
/*sort_before_group=*/false))
return true;
}
if (!tab->filesort && !tab->table()->s->keys &&
(!(query_block->active_options() & OPTION_BUFFER_RESULT) ||
need_tmp_before_win || wno >= 1)) {
/*
Last tmp table of execution; no sort, no duplicate elimination, no
buffering imposed by user (or it has already been implemented by
a previous tmp table): hence any row needn't be written to
tmp table's storage; send it out to query's result instead:
*/
m_windows[wno]->set_short_circuit(true);
}
}
if (having_cond != nullptr) {
tab->having = having_cond;
having_cond = nullptr;
}
}
}
// In the case of rollup (only): After the base slice list was made, we may
// have modified the field list to add rollup group items and sum switchers.
// Since there may be HAVING filters with refs that refer to the base slice,
// we need to refresh that slice (and its copy, REF_SLICE_SAVED_BASE) so
// that it includes the updated items.
//
// Note that we do this after we've made the TMP1 and TMP2 slices, since
// there's a lot of logic that looks through the GROUP BY list, which refers
// to the base slice and expects _not_ to find rollup items there.
refresh_base_slice();
fields = curr_fields;
// Reset before execution
set_ref_item_slice(REF_SLICE_SAVED_BASE);
if (qep_tab) {
qep_tab[primary_tables + tmp_tables].op_type = get_end_select_func();
}
grouped = has_group_by;
unplug_join_tabs();
/*
Tmp tables are a layer between the nested loop and the derived table's
result, WITH RECURSIVE cannot work with them. This should not happen, as a
recursive query cannot have clauses which use a tmp table (GROUP BY,
etc).
*/
assert(!query_block->is_recursive() || !tmp_tables);
cleanup_tmp_tables_on_error.release();
return false;
}