in sql/sql_select.cc [4177:4899]
bool JOIN::make_tmp_tables_info()
{
DBUG_ASSERT(!join_tab);
List<Item> *curr_all_fields= &all_fields;
List<Item> *curr_fields_list= &fields_list;
bool materialize_join= false;
uint curr_tmp_table= const_tables;
TABLE *exec_tmp_table= NULL;
/*
If the plan is constant, we will not do window tmp table processing
cf. special code path in do_select.
*/
m_windowing_steps= m_windows.elements > 0 && !plan_is_const() &&
!implicit_grouping && !group_optimized_away;
const bool may_trace= // just to avoid an empty trace block
need_tmp_before_win || implicit_grouping || m_windowing_steps ||
group_list || order;
Opt_trace_context * const trace= &thd->opt_trace;
Opt_trace_disable_I_S trace_disabled(trace, !may_trace);
Opt_trace_object wrapper(trace);
Opt_trace_array trace_tmp(trace, "considering_tmp_tables");
DBUG_ENTER("JOIN::make_tmp_tables_info");
/*
In this function, we may change having_cond into a condition on a
temporary sort/group table, so we have to assign having_for_explain now:
*/
having_for_explain= having_cond;
const bool has_group_by= this->grouped;
/*
Setup last table to provide fields and all_fields lists to the next
node in the plan.
*/
if (qep_tab)
{
qep_tab[primary_tables - 1].fields= &fields_list;
qep_tab[primary_tables - 1].all_fields= &all_fields;
}
/*
The loose index scan access method guarantees that all grouping or
duplicate row elimination (for distinct) is already performed
during data retrieval, and that all MIN/MAX functions are already
computed for each group. Thus all MIN/MAX functions should be
treated as regular functions, and there is no need to perform
grouping in the main execution loop.
Notice that currently loose index scan is applicable only for
single table queries, thus it is sufficient to test only the first
join_tab element of the plan for its access method.
*/
if (qep_tab && qep_tab[0].quick() && qep_tab[0].quick()->is_loose_index_scan())
tmp_table_param.precomputed_group_by=
!qep_tab[0].quick()->is_agg_loose_index_scan();
uint last_slice_before_windowing= REF_SLICE_BASE;
reset_wf_result_fields(curr_all_fields);
/*
Create the first temporary table if distinct elimination is requested or
if the sort is too complicated to be evaluated as a filesort.
*/
if (need_tmp_before_win)
{
curr_tmp_table= primary_tables;
Opt_trace_object trace_this_tbl(trace);
trace_this_tbl.
add("adding_tmp_table_in_plan_at_position", curr_tmp_table);
tmp_tables++;
if (plan_is_const())
first_select= sub_select_op;
/*
Make a copy of the base slice in the save slice.
This is needed because later steps will overwrite the base slice with
another slice (1-3).
After this slice has been used, overwrite the base slice again with
the copy in the save slice.
*/
if (alloc_ref_item_slice(thd, REF_SLICE_SAVE))
DBUG_RETURN(true);
copy_ref_item_slice(REF_SLICE_SAVE, REF_SLICE_BASE);
current_ref_item_slice= REF_SLICE_SAVE;
/*
Create temporary table for use in a single execution.
(Will be reused if this is a subquery that is executed several times
for one execution of the statement)
Don't use tmp table grouping for json aggregate funcs as it's
very ineffective.
*/
ORDER_with_src tmp_group;
if (!simple_group && !(test_flags & TEST_NO_KEY_GROUP) && !with_json_agg)
tmp_group= group_list;
tmp_table_param.hidden_field_count=
all_fields.elements - fields_list.elements;
if (create_intermediate_table(&qep_tab[curr_tmp_table],
&all_fields, tmp_group,
group_list && simple_group,
TMP_WIN_NONE, false))
DBUG_RETURN(true);
exec_tmp_table= qep_tab[curr_tmp_table].table();
if (exec_tmp_table->distinct)
optimize_distinct();
/*
If there is no sorting or grouping, 'use_order'
index result should not have been requested.
Exception: LooseScan strategy for semijoin requires
sorted access even if final result is not to be sorted.
*/
DBUG_ASSERT(
!(ordered_index_usage == ordered_index_void &&
!plan_is_const() &&
qep_tab[const_tables].position()->sj_strategy != SJ_OPT_LOOSE_SCAN &&
qep_tab[const_tables].use_order()));
/*
Allocate a slice of ref items that describe the items to be copied
from the first temporary table.
*/
if (alloc_ref_item_slice(thd, REF_SLICE_TMP1))
DBUG_RETURN(true);
// Change sum_fields reference to calculated fields in tmp_table
if (sort_and_group || qep_tab[curr_tmp_table].table()->group ||
tmp_table_param.precomputed_group_by)
{
if (change_to_use_tmp_fields(thd,
ref_items[REF_SLICE_TMP1],
tmp_fields_list[REF_SLICE_TMP1],
tmp_all_fields[REF_SLICE_TMP1],
fields_list.elements,
all_fields))
DBUG_RETURN(true);
}
else
{
if (change_refs_to_tmp_fields(thd,
ref_items[REF_SLICE_TMP1],
tmp_fields_list[REF_SLICE_TMP1],
tmp_all_fields[REF_SLICE_TMP1],
fields_list.elements,
all_fields))
DBUG_RETURN(true);
}
curr_all_fields= &tmp_all_fields[REF_SLICE_TMP1];
curr_fields_list= &tmp_fields_list[REF_SLICE_TMP1];
// Need to set them now for correct group_fields setup, reset at the end.
set_ref_item_slice(REF_SLICE_TMP1);
qep_tab[curr_tmp_table].ref_item_slice= REF_SLICE_TMP1;
qep_tab[curr_tmp_table].all_fields= &tmp_all_fields[REF_SLICE_TMP1];
qep_tab[curr_tmp_table].fields= &tmp_fields_list[REF_SLICE_TMP1];
setup_tmptable_write_func(&qep_tab[curr_tmp_table], REF_SLICE_TMP1,
&trace_this_tbl);
last_slice_before_windowing= REF_SLICE_TMP1;
/*
If having is not handled here, it will be checked before the row is sent
to the client.
*/
if (having_cond &&
(sort_and_group || (exec_tmp_table->distinct && !group_list)))
{
/*
If there is no select distinct then move the having to table conds of
tmp table.
NOTE : We cannot apply having after distinct. If columns of having are
not part of select distinct, then distinct may remove rows
which can satisfy having.
*/
if (!select_distinct && add_having_as_tmp_table_cond(curr_tmp_table))
DBUG_RETURN(true);
/*
Having condition which we are not able to add as tmp table conds are
kept as before. And, this will be applied before storing the rows in
tmp table.
*/
qep_tab[curr_tmp_table].having= having_cond;
having_cond= NULL; // Already done
}
tmp_table_param.func_count= 0;
if (sort_and_group || qep_tab[curr_tmp_table].table()->group)
{
tmp_table_param.field_count+= tmp_table_param.sum_func_count;
tmp_table_param.sum_func_count= 0;
}
if (exec_tmp_table->group)
{ // Already grouped
if (!order && !no_order && !skip_sort_order)
{
if (!group_list.can_ignore_order())
order= group_list; /* order by group */
else
{
/*
Check whether an order was explicitly specified on a GROUP BY
column. If so, we have to use filesort.
*/
for (ORDER *ord= group_list; ord; ord= ord->next)
if (ord->is_explicit)
{
order= group_list; /* order by group */
break;
}
}
}
group_list= NULL;
}
/*
If we have different sort & group then we must sort the data by group
and copy it to a second temporary table.
This code is also used if we are using distinct something
we haven't been able to store in the temporary table yet
like SEC_TO_TIME(SUM(...)).
*/
if ((group_list &&
(!test_if_subpart(group_list, order) ||
select_distinct || m_windowing_steps)) ||
(select_distinct && tmp_table_param.using_outer_summary_function))
{
DBUG_PRINT("info",("Creating group table"));
calc_group_buffer(this, group_list);
count_field_types(select_lex, &tmp_table_param,
tmp_all_fields[REF_SLICE_TMP1],
select_distinct && !group_list, false);
tmp_table_param.hidden_field_count=
tmp_all_fields[REF_SLICE_TMP1].elements -
tmp_fields_list[REF_SLICE_TMP1].elements;
sort_and_group= false;
if (!exec_tmp_table->group && !exec_tmp_table->distinct)
{
// 1st tmp table were materializing join result
materialize_join= true;
explain_flags.set(ESC_BUFFER_RESULT, ESP_USING_TMPTABLE);
}
curr_tmp_table++;
tmp_tables++;
Opt_trace_object trace_this_tbl(trace);
trace_this_tbl.
add("adding_tmp_table_in_plan_at_position", curr_tmp_table).
add_alnum("cause", "sorting_to_make_groups");
/* group data to new table */
/*
If the access method is loose index scan then all MIN/MAX
functions are precomputed, and should be treated as regular
functions. See extended comment above.
*/
if (qep_tab[0].quick() && qep_tab[0].quick()->is_loose_index_scan())
tmp_table_param.precomputed_group_by= TRUE;
ORDER_with_src dummy= NULL; //TODO can use table->group here also
if (create_intermediate_table(&qep_tab[curr_tmp_table],
curr_all_fields, dummy, true,
TMP_WIN_NONE, false))
DBUG_RETURN(true);
if (group_list)
{
explain_flags.set(group_list.src, ESP_USING_TMPTABLE);
if (!plan_is_const()) // No need to sort a single row
{
if (add_sorting_to_table(curr_tmp_table - 1, &group_list))
DBUG_RETURN(true);
}
if (make_group_fields(this, this))
DBUG_RETURN(true);
}
// Setup sum funcs only when necessary, otherwise we might break info
// for the first table
if (group_list || tmp_table_param.sum_func_count)
{
if (make_sum_func_list(*curr_all_fields, *curr_fields_list, true, true))
DBUG_RETURN(true);
const bool need_distinct=
!(qep_tab[0].quick() && qep_tab[0].quick()->is_agg_loose_index_scan());
if (prepare_sum_aggregators(sum_funcs, need_distinct))
DBUG_RETURN(true);
group_list= NULL;
if (setup_sum_funcs(thd, sum_funcs))
DBUG_RETURN(true);
}
/*
Allocate a slice of ref items that describe the items to be copied
from the second temporary table.
*/
if (alloc_ref_item_slice(thd, REF_SLICE_TMP2))
DBUG_RETURN(true);
// No sum funcs anymore
if (change_to_use_tmp_fields(thd,
ref_items[REF_SLICE_TMP2],
tmp_fields_list[REF_SLICE_TMP2],
tmp_all_fields[REF_SLICE_TMP2],
fields_list.elements,
tmp_all_fields[REF_SLICE_TMP1]))
DBUG_RETURN(true);
curr_fields_list= &tmp_fields_list[REF_SLICE_TMP2];
curr_all_fields= &tmp_all_fields[REF_SLICE_TMP2];
set_ref_item_slice(REF_SLICE_TMP2);
qep_tab[curr_tmp_table].ref_item_slice= REF_SLICE_TMP2;
qep_tab[curr_tmp_table].all_fields= &tmp_all_fields[REF_SLICE_TMP2];
qep_tab[curr_tmp_table].fields= &tmp_fields_list[REF_SLICE_TMP2];
setup_tmptable_write_func(&qep_tab[curr_tmp_table], REF_SLICE_TMP2,
&trace_this_tbl);
last_slice_before_windowing= REF_SLICE_TMP2;
}
if (qep_tab[curr_tmp_table].table()->distinct)
select_distinct= false; /* Each row is unique */
if (select_distinct && !group_list && !m_windowing_steps)
{
if (having_cond)
{
qep_tab[curr_tmp_table].having= having_cond;
having_cond->update_used_tables();
having_cond= NULL;
}
qep_tab[curr_tmp_table].distinct= true;
trace_this_tbl.add("reading_from_table_eliminates_duplicates", true);
explain_flags.set(ESC_DISTINCT, ESP_DUPS_REMOVAL);
select_distinct= false;
}
/* Clean tmp_table_param for the next tmp table. */
tmp_table_param.field_count= tmp_table_param.sum_func_count=
tmp_table_param.func_count= 0;
tmp_table_param.copy_field= tmp_table_param.copy_field_end=0;
first_record= sort_and_group=0;
if (!group_optimized_away)
{
grouped= false;
}
else
{
/*
If grouping has been optimized away, a temporary table is
normally not needed unless we're explicitly requested to create
one (e.g. due to a SQL_BUFFER_RESULT hint or INSERT ... SELECT or
there is a windowing function that needs sorting).
In this case (grouping was optimized away), temp_table was
created without a grouping expression and JOIN::exec() will not
perform the necessary grouping (by the use of end_send_group()
or end_write_group()) if JOIN::group is set to false.
*/
/*
The temporary table was explicitly requested or there is a window
function which needs sorting (check need_tmp_before_win in
JOIN::optimize).
*/
DBUG_ASSERT(select_lex->active_options() & OPTION_BUFFER_RESULT ||
m_windowing_steps);
// the temporary table does not have a grouping expression
DBUG_ASSERT(!qep_tab[curr_tmp_table].table()->group);
}
calc_group_buffer(this, group_list);
count_field_types(select_lex, &tmp_table_param, *curr_all_fields, false,
false);
}
/*
Set up structures for a temporary table but do not actually create
the temporary table if one of these conditions are true:
- The query is implicitly grouped.
- The query is explicitly grouped and
+ implemented as a simple grouping, or
+ LIMIT 1 is specified, or
+ ROLLUP is specified, or
+ <some unknown condition>.
*/
if ( (grouped || implicit_grouping) && !m_windowing_steps)
{
if (make_group_fields(this, this))
DBUG_RETURN(true);
// "save" slice of ref_items array is needed due to overwriting strategy.
if (ref_items[REF_SLICE_SAVE].is_null())
{
if (alloc_ref_item_slice(thd, REF_SLICE_SAVE))
DBUG_RETURN(true);
copy_ref_item_slice(REF_SLICE_SAVE, REF_SLICE_BASE);
current_ref_item_slice= REF_SLICE_SAVE;
}
/*
Allocate a slice of ref items that describe the items to be copied
from the record buffer for this temporary table.
*/
if (alloc_ref_item_slice(thd, REF_SLICE_TMP3))
DBUG_RETURN(true);
setup_copy_fields(thd,
&tmp_table_param,
ref_items[REF_SLICE_TMP3],
tmp_fields_list[REF_SLICE_TMP3],
tmp_all_fields[REF_SLICE_TMP3],
curr_fields_list->elements,
*curr_all_fields);
curr_fields_list= &tmp_fields_list[REF_SLICE_TMP3];
curr_all_fields= &tmp_all_fields[REF_SLICE_TMP3];
set_ref_item_slice(REF_SLICE_TMP3);
last_slice_before_windowing= REF_SLICE_TMP3;
if (qep_tab)
{
// Set grouped fields on the last table
qep_tab[primary_tables + tmp_tables - 1].ref_item_slice= REF_SLICE_TMP3;
qep_tab[primary_tables + tmp_tables - 1].all_fields=
&tmp_all_fields[REF_SLICE_TMP3];
qep_tab[primary_tables + tmp_tables - 1].fields=
&tmp_fields_list[REF_SLICE_TMP3];
}
if (make_sum_func_list(*curr_all_fields, *curr_fields_list, true, true))
DBUG_RETURN(true);
const bool need_distinct=
!(qep_tab && qep_tab[0].quick() &&
qep_tab[0].quick()->is_agg_loose_index_scan());
if (prepare_sum_aggregators(sum_funcs, need_distinct))
DBUG_RETURN(true);
if (setup_sum_funcs(thd, sum_funcs) || thd->is_fatal_error)
DBUG_RETURN(true);
}
if (qep_tab && (group_list || (order && !m_windowing_steps /* [1] */)))
{
/*
[1] above: too early to do query ORDER BY if we have windowing; must
wait till after window processing.
Moreover, with window processing we can have several temporary tables;
if, below, we add HAVING as a condition attached to the current table
and evaluated by filesort()/find_all_keys(), such condition will contain
Item_ref whose val_*() looks at result_field, which is a column of the
next table, whereas correct evaluation should look at the value of *ref,
which is a column of the current table. To avoid the problem, we must
not use add_having_as_tmp_table_cond() when there are windows.
*/
ASSERT_BEST_REF_IN_JOIN_ORDER(this);
DBUG_PRINT("info",("Sorting for send_result_set_metadata"));
THD_STAGE_INFO(thd, stage_sorting_result);
/* If we have already done the group, add HAVING to sorted table */
if (having_cond && !group_list && !sort_and_group)
{
if (add_having_as_tmp_table_cond(curr_tmp_table))
DBUG_RETURN(true);
}
if (grouped)
m_select_limit= HA_POS_ERROR;
else if (!need_tmp_before_win)
{
/*
We can abort sorting after thd->select_limit rows if there are no
filter conditions for any tables after the sorted one.
Filter conditions come in several forms:
1. as a condition item attached to the join_tab, or
2. as a keyuse attached to the join_tab (ref access).
*/
for (uint i= const_tables + 1; i < primary_tables; i++)
{
QEP_TAB *const tab= qep_tab + i;
if (tab->condition() || // 1
(best_ref[tab->idx()]->keyuse() &&
tab->first_inner() == NO_PLAN_IDX)) // 2
{
/* We have to sort all rows */
m_select_limit= HA_POS_ERROR;
break;
}
}
}
/*
Here we add sorting stage for ORDER BY/GROUP BY clause, if the
optimiser chose FILESORT to be faster than INDEX SCAN or there is
no suitable index present.
OPTION_FOUND_ROWS supersedes LIMIT and is taken into account.
*/
DBUG_PRINT("info",("Sorting for order by/group by"));
ORDER_with_src order_arg= group_list ? group_list : order;
if (qep_tab &&
ordered_index_usage !=
(group_list ? ordered_index_group_by : ordered_index_order_by) &&
// Windowing will change order, so it's too early to sort here
!m_windowing_steps)
{
// Sort either first non-const table or the last tmp table
QEP_TAB *const sort_tab= &qep_tab[curr_tmp_table];
if (need_tmp_before_win && !materialize_join && !exec_tmp_table->group)
explain_flags.set(order_arg.src, ESP_USING_TMPTABLE);
if (add_sorting_to_table(curr_tmp_table, &order_arg))
DBUG_RETURN(true);
/*
filesort_limit: Return only this many rows from filesort().
We can use select_limit_cnt only if we have no group_by and 1 table.
This allows us to use Bounded_queue for queries like:
"select SQL_CALC_FOUND_ROWS * from t1 order by b desc limit 1;"
m_select_limit == HA_POS_ERROR (we need a full table scan)
unit->select_limit_cnt == 1 (we only need one row in the result set)
*/
if (sort_tab->filesort)
sort_tab->filesort->limit=
(has_group_by || (primary_tables > curr_tmp_table + 1)) ?
m_select_limit : unit->select_limit_cnt;
}
if (!plan_is_const() &&
!qep_tab[const_tables].table()->sort.io_cache)
{
/*
If no IO cache exists for the first table then we are using an
INDEX SCAN and no filesort. Thus we should not remove the sorted
attribute on the INDEX SCAN.
*/
skip_sort_order= true;
}
}
if (qep_tab && m_windowing_steps)
{
for (uint wno=0; wno < m_windows.elements; wno++)
{
tmp_table_param.m_window= m_windows[wno];
if (!tmp_tables)
{
curr_tmp_table= primary_tables;
tmp_tables++;
if (plan_is_const())
first_select= sub_select_op;
if (ref_items[REF_SLICE_SAVE].is_null())
{
/*
Make a copy of the base slice in the save slice.
This is needed because later steps will overwrite the base slice with
another slice (1-3 or window slice).
After this slice has been used, overwrite the base slice again with
the copy in the save slice.
*/
if (alloc_ref_item_slice(thd, REF_SLICE_SAVE))
DBUG_RETURN(true);
copy_ref_item_slice(REF_SLICE_SAVE, REF_SLICE_BASE);
current_ref_item_slice= REF_SLICE_SAVE;
}
}
else
{
curr_tmp_table++;
tmp_tables++;
}
ORDER_with_src dummy= NULL;
if (last_slice_before_windowing == REF_SLICE_BASE)
{
tmp_table_param.hidden_field_count=
all_fields.elements - fields_list.elements;
}
else
{
DBUG_ASSERT(tmp_tables >=1 &&
last_slice_before_windowing > REF_SLICE_BASE);
tmp_table_param.hidden_field_count=
tmp_all_fields[last_slice_before_windowing].elements -
tmp_fields_list[last_slice_before_windowing].elements;
}
/*
Allocate a slice of ref items that describe the items to be copied
from the next temporary table.
*/
const uint widx= REF_SLICE_WIN_1 + wno;
const int fbidx= widx + m_windows.elements; // use far area
if (m_windows[wno]->needs_buffering())
{
/*
Create the window frame buffer tmp table. We create a
temporary table with same contents as the output tmp table
in the windowing pipeline (columns defined by
curr_all_fields), but used for intermediate storage, saving
the window's frame buffer now that we know the window needs
buffering.
*/
Temp_table_param *par=
new (thd->mem_root) Temp_table_param(tmp_table_param);
List<Item> tmplist(*curr_all_fields, thd->mem_root);
TABLE* table= create_tmp_table(thd, par, tmplist,
nullptr, false,
false, select_lex->active_options(),
HA_POS_ERROR, "", TMP_WIN_FRAME_BUFFER);
if (table == nullptr)
DBUG_RETURN(true);
if (alloc_ref_item_slice(thd, fbidx))
DBUG_RETURN(true);
if (change_to_use_tmp_fields(thd,
ref_items[fbidx],
tmp_fields_list[fbidx],
tmp_all_fields[fbidx],
curr_fields_list->elements,
*curr_all_fields))
DBUG_RETURN(true);
m_windows[wno]->set_frame_buffer_param(par);
m_windows[wno]->set_frame_buffer(table);
}
Opt_trace_object trace_this_tbl(trace);
trace_this_tbl.
add("adding_tmp_table_in_plan_at_position", curr_tmp_table).
add_alnum("cause", "output_for_window_functions").
add("with_buffer", m_windows[wno]->needs_buffering());
if (create_intermediate_table(&qep_tab[curr_tmp_table],
curr_all_fields, dummy,
false, TMP_WIN_CONDITIONAL,
m_windows[wno]->is_last()))
DBUG_RETURN(true);
m_windows[wno]->set_outtable_param(qep_tab[curr_tmp_table].tmp_table_param);
if (m_windows[wno]->make_special_rows_cache(thd,
qep_tab[curr_tmp_table].table()))
DBUG_RETURN(true);
ORDER_with_src w_partition(m_windows[wno]->sorting_order(thd),
ESC_WINDOWING);
if (w_partition.order != nullptr && !m_windows[wno]->sort_redundant())
{
Opt_trace_object trace_pre_sort(trace, "adding_sort_to_previous_table");
if (add_sorting_to_table(curr_tmp_table - 1, &w_partition, true))
DBUG_RETURN(true);
}
if (order != nullptr &&
ordered_index_usage != ordered_index_order_by &&
m_windows[wno]->is_last())
{
if (add_sorting_to_table(curr_tmp_table, &order))
DBUG_RETURN(true);
}
if (alloc_ref_item_slice(thd, widx))
DBUG_RETURN(true);
if (change_to_use_tmp_fields(thd,
ref_items[widx],
tmp_fields_list[widx],
tmp_all_fields[widx],
fields_list.elements,
(last_slice_before_windowing == REF_SLICE_BASE ?
all_fields :
tmp_all_fields[last_slice_before_windowing])))
DBUG_RETURN(true);
curr_fields_list= &tmp_fields_list[widx];
curr_all_fields= &tmp_all_fields[widx];
set_ref_item_slice(widx);
qep_tab[curr_tmp_table].ref_item_slice= widx;
qep_tab[curr_tmp_table].all_fields= &tmp_all_fields[widx];
qep_tab[curr_tmp_table].fields= &tmp_fields_list[widx];
setup_tmptable_write_func(&qep_tab[curr_tmp_table], widx,
&trace_this_tbl);
if (having_cond != nullptr)
{
qep_tab[curr_tmp_table].having= having_cond;
having_cond= nullptr;
}
last_slice_before_windowing= widx;
}
}
fields= curr_fields_list;
// Reset before execution
set_ref_item_slice(REF_SLICE_SAVE);
if (qep_tab)
{
qep_tab[primary_tables + tmp_tables - 1].next_select=
get_end_select_func();
}
grouped= has_group_by;
unplug_join_tabs();
/*
Tmp tables are a layer between the nested loop and the derived table's
result, WITH RECURSIVE cannot work with them. This should not happen, as a
recursive query cannot have clauses which use a tmp table (GROUP BY,
etc).
*/
DBUG_ASSERT(!select_lex->is_recursive() || !tmp_tables);
DBUG_RETURN(false);
}