in sql/sql_planner.cc [399:1083]
void Optimize_table_order::best_access_path(
JOIN_TAB *s,
table_map remaining_tables,
uint idx,
bool disable_jbuf,
double record_count,
POSITION *pos,
POSITION *loose_scan_pos)
{
Key_use *best_key= NULL;
uint best_max_key_part= 0;
bool found_constraint= false;
double best= DBL_MAX;
double best_time= DBL_MAX;
double records= DBL_MAX;
table_map best_ref_depends_map= 0;
double tmp;
bool best_uses_jbuf= false;
Opt_trace_context * const trace= &thd->opt_trace;
status_var_increment(thd->status_var.last_query_partial_plans);
/*
Cannot use join buffering if either
1. This is the first table in the join sequence, or
2. Join buffering is not enabled
(Only Block Nested Loop is considered in this context)
*/
disable_jbuf= disable_jbuf ||
idx == join->const_tables || // 1
!thd->optimizer_switch_flag(OPTIMIZER_SWITCH_BNL); // 2
Loose_scan_opt loose_scan_opt;
DBUG_ENTER("Optimize_table_order::best_access_path");
Opt_trace_object trace_wrapper(trace, "best_access_path");
Opt_trace_array trace_paths(trace, "considered_access_paths");
{
/*
Loose-scan specific-logic:
- we must decide whether this is within the dups_producing range.
- if 'pos' is within the JOIN::positions array, then decide this
by using the pos[-1] entry.
- if 'pos' is not in the JOIN::position array then
in_dups_producing_range must be false (this case may occur in
semijoin_*_access_paths() which calls best_access_path() with 'pos'
allocated on the stack).
@todo One day Loose-scan will be considered in advance_sj_state() only,
outside best_access_path(), so this complicated logic will not be
needed.
*/
const bool in_dups_producing_range=
(idx == join->const_tables) ?
false :
(pos == (join->positions + idx) ?
(pos[-1].dups_producing_tables != 0) :
false);
loose_scan_opt.init(s, remaining_tables, in_dups_producing_range,
emb_sjm_nest != NULL);
}
/*
This isn't unlikely at all, but unlikely() cuts 6% CPU time on a 20-table
search when s->keyuse==0, and has no cost when s->keyuse!=0.
*/
if (unlikely(s->keyuse != NULL))
{ /* Use key if possible */
TABLE *const table= s->table;
double best_records= DBL_MAX;
/* Test how we can use keys */
ha_rows rec=
s->records/MATCHING_ROWS_IN_OTHER_TABLE; // Assumed records/key
for (Key_use *keyuse=s->keyuse; keyuse->table == table; )
{
key_part_map found_part= 0;
table_map found_ref= 0;
const uint key= keyuse->key;
uint max_key_part= 0;
KEY *const keyinfo= table->key_info+key;
const bool ft_key= (keyuse->keypart == FT_KEYPART);
/* Bitmap of keyparts where the ref access is over 'keypart=const': */
key_part_map const_part= 0;
/* The or-null keypart in ref-or-null access: */
key_part_map ref_or_null_part= 0;
/// Set dodgy_ref_cost only if that index is chosen for ref access.
bool is_dodgy= false;
/* Calculate how many key segments of the current key we can use */
Key_use *const start_key= keyuse;
loose_scan_opt.next_ref_key();
DBUG_PRINT("info", ("Considering ref access on key %s",
keyuse->table->key_info[keyuse->key].name));
Opt_trace_object trace_access_idx(trace);
trace_access_idx.add_alnum("access_type", "ref").
add_utf8("index", keyinfo->name);
// For each keypart
while (keyuse->table == table && keyuse->key == key)
{
const uint keypart= keyuse->keypart;
table_map best_part_found_ref= 0;
double best_prev_record_reads= DBL_MAX;
// For each way to access the keypart
for ( ; keyuse->table == table && keyuse->key == key &&
keyuse->keypart == keypart ; ++keyuse)
{
/*
When calculating a plan for a materialized semijoin nest,
we must not consider key references between tables inside the
semijoin nest and those outside of it. The same applies to a
materialized subquery.
*/
if ((excluded_tables & keyuse->used_tables))
continue;
/*
if 1. expression doesn't refer to forward tables
2. we won't get two ref-or-null's
*/
if (!(remaining_tables & keyuse->used_tables) &&
!(ref_or_null_part && (keyuse->optimize &
KEY_OPTIMIZE_REF_OR_NULL)))
{
found_part|= keyuse->keypart_map;
if (!(keyuse->used_tables & ~join->const_table_map))
const_part|= keyuse->keypart_map;
double tmp2= prev_record_reads(join, idx, (found_ref |
keyuse->used_tables));
if (tmp2 < best_prev_record_reads)
{
best_part_found_ref= keyuse->used_tables & ~join->const_table_map;
best_prev_record_reads= tmp2;
}
if (rec > keyuse->ref_table_rows)
rec= keyuse->ref_table_rows;
/*
If there is one 'key_column IS NULL' expression, we can
use this ref_or_null optimisation of this field
*/
if (keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL)
ref_or_null_part |= keyuse->keypart_map;
}
loose_scan_opt.add_keyuse(remaining_tables, keyuse);
}
found_ref|= best_part_found_ref;
}
/*
Assume that that each key matches a proportional part of table.
*/
if (!found_part && !ft_key && !loose_scan_opt.have_a_case())
{
trace_access_idx.add("usable", false);
goto done_with_index; // Nothing usable found
}
if (rec < MATCHING_ROWS_IN_OTHER_TABLE)
rec= MATCHING_ROWS_IN_OTHER_TABLE; // Fix for small tables
/*
ft-keys require special treatment
*/
if (ft_key)
{
/*
Really, there should be records=0.0 (yes!)
but 1.0 would be probably safer
*/
tmp= prev_record_reads(join, idx, found_ref);
records= 1.0;
}
else
{
found_constraint= MY_TEST(found_part);
loose_scan_opt.check_ref_access_part1(s, key, start_key, found_part);
/* Check if we found full key */
if (found_part == LOWER_BITS(key_part_map, actual_key_parts(keyinfo)) &&
!ref_or_null_part)
{ /* use eq key */
max_key_part= (uint) ~0;
if ((keyinfo->flags & (HA_NOSAME | HA_NULL_PART_KEY)) == HA_NOSAME)
{
tmp = prev_record_reads(join, idx, found_ref);
records=1.0;
}
else
{
if (!found_ref)
{ /* We found a const key */
/*
ReuseRangeEstimateForRef-1:
We get here if we've found a ref(const) (c_i are constants):
"(keypart1=c1) AND ... AND (keypartN=cN)" [ref_const_cond]
If range optimizer was able to construct a "range"
access on this index, then its condition "quick_cond" was
eqivalent to ref_const_cond (*), and we can re-use E(#rows)
from the range optimizer.
Proof of (*): By properties of range and ref optimizers
quick_cond will be equal or tighther than ref_const_cond.
ref_const_cond already covers "smallest" possible interval -
a singlepoint interval over all keyparts. Therefore,
quick_cond is equivalent to ref_const_cond (if it was an
empty interval we wouldn't have got here).
*/
if (table->quick_keys.is_set(key))
records= (double) table->quick_rows[key];
else
{
/* quick_range couldn't use key! */
records= (double) s->records/rec;
}
}
else
{
if (!(records= keyinfo->rec_per_key[actual_key_parts(keyinfo)-1]))
{ /* Prefer longer keys */
records=
((double) s->records / (double) rec *
(1.0 +
((double) (table->s->max_key_length-keyinfo->key_length) /
(double) table->s->max_key_length)));
if (records < 2.0)
records=2.0; /* Can't be as good as a unique */
}
/*
ReuseRangeEstimateForRef-2: We get here if we could not reuse
E(#rows) from range optimizer. Make another try:
If range optimizer produced E(#rows) for a prefix of the ref
access we're considering, and that E(#rows) is lower then our
current estimate, make an adjustment. The criteria of when we
can make an adjustment is a special case of the criteria used
in ReuseRangeEstimateForRef-3.
*/
if (table->quick_keys.is_set(key) &&
(const_part &
(((key_part_map)1 << table->quick_key_parts[key])-1)) ==
(((key_part_map)1 << table->quick_key_parts[key])-1) &&
table->quick_n_ranges[key] == 1 &&
records > (double) table->quick_rows[key])
{
records= (double) table->quick_rows[key];
}
}
/* Limit the number of matched rows */
tmp= records;
set_if_smaller(tmp, (double) thd->variables.max_seeks_for_key);
if (table->covering_keys.is_set(key))
{
/* we can use only index tree */
tmp= record_count * table->file->index_only_read_time(key, tmp);
}
else
tmp= record_count*min(tmp,s->worst_seeks);
}
}
else
{
/*
Use as much key-parts as possible and a uniq key is better
than a not unique key
Set tmp to (previous record count) * (records / combination)
*/
if ((found_part & 1) &&
(!(table->file->index_flags(key, 0, 0) & HA_ONLY_WHOLE_INDEX) ||
found_part == LOWER_BITS(key_part_map,
actual_key_parts(keyinfo))))
{
max_key_part= max_part_bit(found_part);
/*
ReuseRangeEstimateForRef-3:
We're now considering a ref[or_null] access via
(t.keypart1=e1 AND ... AND t.keypartK=eK) [ OR
(same-as-above but with one cond replaced
with "t.keypart_i IS NULL")] (**)
Try re-using E(#rows) from "range" optimizer:
We can do so if "range" optimizer used the same intervals as
in (**). The intervals used by range optimizer may be not
available at this point (as "range" access might have choosen to
create quick select over another index), so we can't compare
them to (**). We'll make indirect judgements instead.
The sufficient conditions for re-use are:
(C1) All e_i in (**) are constants, i.e. found_ref==FALSE. (if
this is not satisfied we have no way to know which ranges
will be actually scanned by 'ref' until we execute the
join)
(C2) max #key parts in 'range' access == K == max_key_part (this
is apparently a necessary requirement)
We also have a property that "range optimizer produces equal or
tighter set of scan intervals than ref(const) optimizer". Each
of the intervals in (**) are "tightest possible" intervals when
one limits itself to using keyparts 1..K (which we do in #2).
From here it follows that range access used either one, or
both of the (I1) and (I2) intervals:
(t.keypart1=c1 AND ... AND t.keypartK=eK) (I1)
(same-as-above but with one cond replaced
with "t.keypart_i IS NULL") (I2)
The remaining part is to exclude the situation where range
optimizer used one interval while we're considering
ref-or-null and looking for estimate for two intervals. This
is done by last limitation:
(C3) "range optimizer used (have ref_or_null?2:1) intervals"
*/
if (table->quick_keys.is_set(key) && !found_ref && //(C1)
table->quick_key_parts[key] == max_key_part && //(C2)
table->quick_n_ranges[key] == 1+MY_TEST(ref_or_null_part)) //(C3)
{
tmp= records= (double) table->quick_rows[key];
}
else
{
/* Check if we have statistic about the distribution */
if ((records= keyinfo->rec_per_key[max_key_part-1]))
{
/*
Fix for the case where the index statistics is too
optimistic: If
(1) We're considering ref(const) and there is quick select
on the same index,
(2) and that quick select uses more keyparts (i.e. it will
scan equal/smaller interval then this ref(const))
(3) and E(#rows) for quick select is higher then our
estimate,
Then
We'll use E(#rows) from quick select.
One observation is that when there are multiple
indexes with a common prefix (eg (b) and (b, c)) we
are not always selecting (b, c) even when this can
use more keyparts. Inaccuracies in statistics from
the storage engines can cause the record estimate
for the quick object for (b) to be lower than the
record estimate for the quick object for (b,c).
Q: Why do we choose to use 'ref'? Won't quick select be
cheaper in some cases ?
TODO: figure this out and adjust the plan choice if needed.
*/
if (!found_ref && table->quick_keys.is_set(key) && // (1)
table->quick_key_parts[key] > max_key_part && // (2)
records < (double)table->quick_rows[key]) // (3)
{
records= (double)table->quick_rows[key];
is_dodgy= true;
}
tmp= records;
}
else
{
/*
Assume that the first key part matches 1% of the file
and that the whole key matches 10 (duplicates) or 1
(unique) records.
Assume also that more key matches proportionally more
records
This gives the formula:
records = (x * (b-a) + a*c-b)/(c-1)
b = records matched by whole key
a = records matched by first key part (1% of all records?)
c = number of key parts in key
x = used key parts (1 <= x <= c)
*/
double rec_per_key;
if (!(rec_per_key=(double)
keyinfo->rec_per_key[keyinfo->user_defined_key_parts-1]))
rec_per_key=(double) s->records/rec+1;
if (!s->records)
tmp = 0;
else if (rec_per_key/(double) s->records >= 0.01)
tmp = rec_per_key;
else
{
double a=s->records*0.01;
if (keyinfo->user_defined_key_parts > 1)
tmp= (max_key_part * (rec_per_key - a) +
a * keyinfo->user_defined_key_parts - rec_per_key) /
(keyinfo->user_defined_key_parts - 1);
else
tmp= a;
set_if_bigger(tmp,1.0);
}
records = (ulong) tmp;
}
if (ref_or_null_part)
{
/* We need to do two key searches to find key */
tmp *= 2.0;
records *= 2.0;
}
/*
ReuseRangeEstimateForRef-4: We get here if we could not reuse
E(#rows) from range optimizer. Make another try:
If range optimizer produced E(#rows) for a prefix of the ref
access we're considering, and that E(#rows) is lower then our
current estimate, make the adjustment.
The decision whether we can re-use the estimate from the range
optimizer is the same as in ReuseRangeEstimateForRef-3,
applied to first table->quick_key_parts[key] key parts.
*/
if (table->quick_keys.is_set(key) &&
table->quick_key_parts[key] <= max_key_part &&
const_part &
((key_part_map)1 << table->quick_key_parts[key]) &&
table->quick_n_ranges[key] == 1 + MY_TEST(ref_or_null_part &
const_part) &&
records > (double) table->quick_rows[key])
{
tmp= records= (double) table->quick_rows[key];
}
}
/* Limit the number of matched rows */
set_if_smaller(tmp, (double) thd->variables.max_seeks_for_key);
if (table->covering_keys.is_set(key))
{
/* we can use only index tree */
tmp= record_count * table->file->index_only_read_time(key, tmp);
}
else
tmp= record_count * min(tmp,s->worst_seeks);
}
else
tmp= best_time; // Do nothing
}
// {semijoin LooseScan + ref} is disabled
#if 0
loose_scan_opt.check_ref_access_part2(key, start_key, records, tmp);
#endif
} /* not ft_key */
{
const double idx_time= tmp + records * ROW_EVALUATE_COST;
trace_access_idx.add("rows", records).add("cost", idx_time);
if (idx_time < best_time)
{
best_time= idx_time;
best= tmp;
best_records= records;
best_key= start_key;
best_max_key_part= max_key_part;
best_ref_depends_map= found_ref;
}
}
done_with_index:
bool chosen= (best_key == start_key);
trace_access_idx.add("chosen", chosen);
if (chosen)
s->dodgy_ref_cost= is_dodgy;
} /* for each key */
records= best_records;
}
Opt_trace_object trace_access_scan(trace);
/*
Don't test table scan if it can't be better.
Prefer key lookup if we would use the same key for scanning.
Don't do a table scan on InnoDB tables, if we can read the used
parts of the row from any of the used index.
This is because table scans uses index and we would not win
anything by using a table scan. The only exception is INDEX_MERGE
quick select. We can not say for sure that INDEX_MERGE quick select
is always faster than ref access. So it's necessary to check if
ref access is more expensive.
A word for word translation of the below if-statement in sergefp's
understanding: we check if we should use table scan if:
(1) The found 'ref' access produces more records than a table scan
(or index scan, or quick select), or 'ref' is more expensive than
any of them.
(2) The best way to perform table or index scan is to use 'range' access
using index IDX. If it is a 'tight range' scan (i.e not a loose index
scan' or 'index merge'), then ref access on the same index will
perform equal or better if ref access can use the same or more number
of key parts.
(3) See above note about InnoDB.
(4) NOT ("FORCE INDEX(...)" is used for table and there is 'ref' access
path, but there is no quick select)
If the condition in the above brackets holds, then the only possible
"table scan" access method is ALL/index (there is no quick select).
Since we have a 'ref' access path, and FORCE INDEX instructs us to
choose it over ALL/index, there is no need to consider a full table
scan.
*/
if (!(records >= s->found_records || best > s->read_time)) // (1)
{
// "scan" means (full) index scan or (full) table scan.
trace_access_scan.add_alnum("access_type", s->quick ? "range" : "scan").
add("cost", s->read_time + s->found_records * ROW_EVALUATE_COST).
add("rows", s->found_records).
add_alnum("cause", "cost");
goto skip_table_scan;
}
if ((s->quick && best_key && s->quick->index == best_key->key && // (2)
best_max_key_part >= s->table->quick_key_parts[best_key->key]) && // (2)
(s->quick->get_type() !=
QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX) && // (2)
(s->quick->get_type() !=
QUICK_SELECT_I::QS_TYPE_SKIP_SCAN)) // (2)
{
trace_access_scan.add_alnum("access_type", "range").
add_alnum("cause", "heuristic_index_cheaper");
goto skip_table_scan;
}
if ((s->table->file->ha_table_flags() & HA_TABLE_SCAN_ON_INDEX) && //(3)
best_key && //(3)
(!s->table->covering_keys.is_clear_all() ||
(s->table->file->primary_key_is_clustered() &&
s->table->s->primary_key == best_key->key)) && //(3)
(!s->quick || //(3)
(s->quick->get_type() == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT &&//(3)
best < s->quick->read_time))) //(3)
{
trace_access_scan.add_alnum("access_type", s->quick ? "range" : "scan").
add_alnum("cause", "covering_index_better_than_full_scan");
goto skip_table_scan;
}
if ((s->table->force_index && best_key && !s->quick)) // (4)
{
trace_access_scan.add_alnum("access_type", "scan").
add_alnum("cause", "force_index");
goto skip_table_scan;
}
{ // Check full join
ha_rows rnd_records= s->found_records;
/*
If there is a filtering condition on the table (i.e. ref analyzer found
at least one "table.keyXpartY= exprZ", where exprZ refers only to tables
preceding this table in the join order we're now considering), then
assume that 25% of the rows will be filtered out by this condition.
This heuristic is supposed to force tables used in exprZ to be before
this table in join order.
*/
if (found_constraint)
rnd_records-= rnd_records/4;
/*
If applicable, get a more accurate estimate. Don't use the two
heuristics at once.
*/
if (s->table->quick_condition_rows != s->found_records)
rnd_records= s->table->quick_condition_rows;
/*
Range optimizer never proposes a RANGE if it isn't better
than FULL: so if RANGE is present, it's always preferred to FULL.
Here we estimate its cost.
*/
if (s->quick)
{
trace_access_scan.add_alnum("access_type", "range");
/*
For each record we:
- read record range through 'quick'
- skip rows which does not satisfy WHERE constraints
TODO:
We take into account possible use of join cache for ALL/index
access (see first else-branch below), but we don't take it into
account here for range/index_merge access. Find out why this is so.
*/
tmp= record_count *
(s->quick->read_time +
(s->found_records - rnd_records) * ROW_EVALUATE_COST);
loose_scan_opt.check_range_access(join, idx, s->quick);
}
else
{
trace_access_scan.add_alnum("access_type", "scan");
/* Estimate cost of reading table. */
if (s->table->force_index && !best_key) // index scan
tmp= s->table->file->read_time(s->ref.key, 1, s->records);
else // table scan
tmp= s->table->file->scan_time();
if (disable_jbuf)
{
/*
For each record we have to:
- read the whole table record
- skip rows which does not satisfy join condition
*/
tmp= record_count *
(tmp + (s->records - rnd_records) * ROW_EVALUATE_COST);
}
else
{
trace_access_scan.add("using_join_cache", true);
/*
We read the table as many times as join buffer becomes full.
It would be more exact to round the result of the division with
floor(), but that takes 5% of time in a 20-table query plan search.
*/
tmp*= (1.0 + ((double) cache_record_length(join,idx) *
record_count /
(double) thd->variables.join_buff_size));
/*
We don't make full cartesian product between rows in the scanned
table and existing records because we skip all rows from the
scanned table, which does not satisfy join condition when
we read the table (see flush_cached_records for details). Here we
take into account cost to read and skip these records.
*/
tmp+= (s->records - rnd_records) * ROW_EVALUATE_COST;
}
}
const double scan_cost=
tmp + (record_count * ROW_EVALUATE_COST * rnd_records);
trace_access_scan.add("rows", rows2double(rnd_records)).
add("cost", scan_cost);
/*
We estimate the cost of evaluating WHERE clause for found records
as record_count * rnd_records * ROW_EVALUATE_COST. This cost plus
tmp give us total cost of using TABLE SCAN
*/
if (best == DBL_MAX ||
(scan_cost < best + (record_count * ROW_EVALUATE_COST * records)))
{
/*
If the table has a range (s->quick is set) make_join_select()
will ensure that this will be used
*/
best= tmp;
records= rows2double(rnd_records);
best_key= 0;
/* range/index_merge/ALL/index access method are "independent", so: */
best_ref_depends_map= 0;
best_uses_jbuf= MY_TEST(!disable_jbuf);
}
}
skip_table_scan:
trace_access_scan.add("chosen", best_key == NULL);
/* Update the cost information for the current partial plan */
pos->records_read= records;
pos->read_time= best;
pos->key= best_key;
pos->table= s;
pos->ref_depend_map= best_ref_depends_map;
pos->loosescan_key= MAX_KEY;
pos->use_join_buffer= best_uses_jbuf;
loose_scan_opt.save_to_position(s, loose_scan_pos);
if (!best_key &&
idx == join->const_tables &&
s->table == join->sort_by_table &&
join->unit->select_limit_cnt >= records)
{
trace_access_scan.add("use_tmp_table", true);
join->sort_by_table= (TABLE*) 1; // Must use temporary table
}
DBUG_VOID_RETURN;
}