bool JOIN::make_tmp_tables

bool JOIN::make_tmp_tables_info()

in sql/sql_select.cc [4454:5041]
327 lines of code
142 McCabe index (conditional complexity)

bool JOIN::make_tmp_tables_info() {
  assert(!join_tab);
  mem_root_deque<Item *> *curr_fields = fields;
  bool materialize_join = false;
  uint curr_tmp_table = const_tables;
  TABLE *exec_tmp_table = nullptr;

  auto cleanup_tmp_tables_on_error =
      create_scope_guard([this, &curr_tmp_table] {
        if (qep_tab == nullptr) {
          return;
        }
        for (unsigned table_idx = primary_tables; table_idx <= curr_tmp_table;
             ++table_idx) {
          TABLE *table = qep_tab[table_idx].table();
          if (table != nullptr) {
            close_tmp_table(table);
            free_tmp_table(table);
            qep_tab[table_idx].set_table(nullptr);
          }
        }
      });

  /*
    If the plan is constant, we will not do window tmp table processing
    cf. special code path for handling const plans.
  */
  m_windowing_steps = m_windows.elements > 0 && !plan_is_const() &&
                      !implicit_grouping && !group_optimized_away;
  const bool may_trace =  // just to avoid an empty trace block
      need_tmp_before_win || implicit_grouping || m_windowing_steps ||
      !group_list.empty() || !order.empty();

  Opt_trace_context *const trace = &thd->opt_trace;
  const Opt_trace_disable_I_S trace_disabled(trace, !may_trace);
  const Opt_trace_object wrapper(trace);
  const Opt_trace_array trace_tmp(trace, "considering_tmp_tables");

  DBUG_TRACE;

  /*
    In this function, we may change having_cond into a condition on a
    temporary sort/group table, so we have to assign having_for_explain now:
  */
  having_for_explain = having_cond;

  const bool has_group_by = this->grouped;

  /*
    The loose index scan access method guarantees that all grouping or
    duplicate row elimination (for distinct) is already performed
    during data retrieval, and that all MIN/MAX functions are already
    computed for each group. Thus all MIN/MAX functions should be
    treated as regular functions, and there is no need to perform
    grouping in the main execution loop.
    Currently loose index scan is only applicable for single table queries. The
    only exception is when a single table query becomes a multi-table query
    because of a semijoin transformation. We check the first join_tab element
    of the plan for its access method here, which holds good even for the
    multi-table query, but only when optimizer has picked nested loop joins.
    Skip scan is enabled only for the original table in the query which is the
    first table in the join order for a nested loop join. However, for hash
    joins it does not hold good. So, we see an additional de-duplication step
    when hash join is picked as it is not aware that de-duplication is taken
    care by the access method picked.

    TODO: Make optimize_distinct_group_order() understand that de-duplication
    is taken care by the chosen access method, so that we avoid the additional
    de-duplication step.
  */
  if (qep_tab && qep_tab[0].range_scan() &&
      is_loose_index_scan(qep_tab[0].range_scan()))
    tmp_table_param.precomputed_group_by =
        !is_agg_loose_index_scan(qep_tab[0].range_scan());

  /*
    Create the first temporary table if distinct elimination is requested or
    if the sort is too complicated to be evaluated as a filesort.
  */
  if (need_tmp_before_win) {
    curr_tmp_table = primary_tables;
    Opt_trace_object trace_this_outer(trace);
    trace_this_outer.add("adding_tmp_table_in_plan_at_position",
                         curr_tmp_table);
    tmp_tables++;

    /*
      Make a copy of the base slice in the save slice.
      This is needed because later steps will overwrite the base slice with
      another slice (1-3).
      After this slice has been used, overwrite the base slice again with
      the copy in the save slice.
    */
    if (alloc_ref_item_slice(thd, REF_SLICE_SAVED_BASE)) return true;

    copy_ref_item_slice(REF_SLICE_SAVED_BASE, REF_SLICE_ACTIVE);
    current_ref_item_slice = REF_SLICE_SAVED_BASE;

    /*
      Create temporary table for use in a single execution.
      (Will be reused if this is a subquery that is executed several times
       for one execution of the statement)
      Don't use tmp table grouping for json aggregate funcs as it's
      very ineffective.
    */
    ORDER_with_src tmp_group;
    if (!simple_group && !(test_flags & TEST_NO_KEY_GROUP) && !with_json_agg)
      tmp_group = group_list;

    tmp_table_param.hidden_field_count = CountHiddenFields(*fields);

    if (create_intermediate_table(&qep_tab[curr_tmp_table], *fields, tmp_group,
                                  !group_list.empty() && simple_group))
      return true;
    exec_tmp_table = qep_tab[curr_tmp_table].table();

    if (exec_tmp_table->s->is_distinct) optimize_distinct();

    /*
      If there is no sorting or grouping, 'use_order'
      index result should not have been requested.
      Exception: LooseScan strategy for semijoin requires
      sorted access even if final result is not to be sorted.
    */
    assert(
        !(m_ordered_index_usage == ORDERED_INDEX_VOID && !plan_is_const() &&
          qep_tab[const_tables].position()->sj_strategy != SJ_OPT_LOOSE_SCAN &&
          qep_tab[const_tables].use_order()));

    /*
      Allocate a slice of ref items that describe the items to be copied
      from the first temporary table.
    */
    if (alloc_ref_item_slice(thd, REF_SLICE_TMP1)) return true;

    // Change sum_fields reference to calculated fields in tmp_table
    if (streaming_aggregation || qep_tab[curr_tmp_table].table()->group ||
        tmp_table_param.precomputed_group_by) {
      if (change_to_use_tmp_fields(fields, thd, ref_items[REF_SLICE_TMP1],
                                   &tmp_fields[REF_SLICE_TMP1],
                                   query_block->m_added_non_hidden_fields))
        return true;
    } else {
      if (change_to_use_tmp_fields_except_sums(
              fields, thd, query_block, ref_items[REF_SLICE_TMP1],
              &tmp_fields[REF_SLICE_TMP1],
              query_block->m_added_non_hidden_fields))
        return true;
    }
    curr_fields = &tmp_fields[REF_SLICE_TMP1];
    // Need to set them now for correct group_fields setup, reset at the end.
    set_ref_item_slice(REF_SLICE_TMP1);
    qep_tab[curr_tmp_table].ref_item_slice = REF_SLICE_TMP1;
    setup_tmptable_write_func(&qep_tab[curr_tmp_table], &trace_this_outer);

    /*
      If having is not handled here, it will be checked before the row is sent
      to the client.
    */
    if (having_cond &&
        (streaming_aggregation ||
         (exec_tmp_table->s->is_distinct && group_list.empty()))) {
      /*
        If there is no select distinct or rollup, then move the having to table
        conds of tmp table.
        NOTE : We cannot apply having after distinct. If columns of having are
               not part of select distinct, then distinct may remove rows
               which can satisfy having.

        As this condition will read the tmp table, it is appropriate that
        REF_SLICE_TMP1 is in effect when we create it below.
      */
      if ((!select_distinct && rollup_state == RollupState::NONE) &&
          add_having_as_tmp_table_cond(curr_tmp_table))
        return true;

      /*
        Having condition which we are not able to add as tmp table conds are
        kept as before. And, this will be applied before storing the rows in
        tmp table.
      */
      qep_tab[curr_tmp_table].having = having_cond;
      having_cond = nullptr;  // Already done
    }

    tmp_table_param.func_count = 0;

    if (streaming_aggregation || qep_tab[curr_tmp_table].table()->group) {
      tmp_table_param.func_count += tmp_table_param.sum_func_count;
      tmp_table_param.sum_func_count = 0;
    }

    if (exec_tmp_table->group) {  // Already grouped
                                  /*
                                    Check if group by has to respect ordering. If true, move group by to
                                    order by.
                                  */
      if (order.empty() && !skip_sort_order) {
        for (ORDER *group = group_list.order; group; group = group->next) {
          if (group->direction != ORDER_NOT_RELEVANT) {
            order = group_list; /* order by group */
            break;
          }
        }
      }
      group_list.clean();
    }
    /*
      If we have different sort & group then we must sort the data by group
      and copy it to a second temporary table.
      This code is also used if we are using distinct something
      we haven't been able to store in the temporary table yet
      like SEC_TO_TIME(SUM(...)) or when distinct is used with rollup.
    */

    if ((!group_list.empty() &&
         (!test_if_subpart(group_list.order, order.order) || select_distinct ||
          m_windowing_steps || rollup_state != RollupState::NONE)) ||
        (select_distinct && (tmp_table_param.using_outer_summary_function ||
                             rollup_state != RollupState::NONE))) {
      DBUG_PRINT("info", ("Creating group table"));

      calc_group_buffer(this, group_list.order);
      count_field_types(query_block, &tmp_table_param,
                        tmp_fields[REF_SLICE_TMP1],
                        select_distinct && group_list.empty(), false);
      tmp_table_param.hidden_field_count =
          CountHiddenFields(tmp_fields[REF_SLICE_TMP1]);
      streaming_aggregation = false;
      if (!exec_tmp_table->group && !exec_tmp_table->s->is_distinct) {
        // 1st tmp table were materializing join result
        materialize_join = true;
        explain_flags.set(ESC_BUFFER_RESULT, ESP_USING_TMPTABLE);
      }
      curr_tmp_table++;
      tmp_tables++;
      Opt_trace_object trace_this_tbl(trace);
      trace_this_tbl.add("adding_tmp_table_in_plan_at_position", curr_tmp_table)
          .add_alnum("cause", "sorting_to_make_groups");

      /* group data to new table */
      /*
        If the access method is loose index scan then all MIN/MAX
        functions are precomputed, and should be treated as regular
        functions. See extended comment above.
      */
      if (qep_tab[0].range_scan() &&
          is_loose_index_scan(qep_tab[0].range_scan()))
        tmp_table_param.precomputed_group_by = true;

      ORDER_with_src dummy;  // TODO can use table->group here also

      if (create_intermediate_table(&qep_tab[curr_tmp_table], *curr_fields,
                                    dummy, true))
        return true;

      if (!group_list.empty()) {
        explain_flags.set(group_list.src, ESP_USING_TMPTABLE);
        if (!plan_is_const())  // No need to sort a single row
        {
          if (add_sorting_to_table(curr_tmp_table - 1, &group_list,
                                   /*sort_before_group=*/true))
            return true;
        }

        if (make_group_fields(this, this)) return true;
      }

      // Setup sum funcs only when necessary, otherwise we might break info
      // for the first table
      if (!group_list.empty() || tmp_table_param.sum_func_count) {
        if (make_sum_func_list(*curr_fields, true, true)) return true;
        const bool need_distinct =
            !(qep_tab[0].range_scan() &&
              is_agg_loose_index_scan(qep_tab[0].range_scan()));
        if (prepare_sum_aggregators(sum_funcs, need_distinct)) return true;
        group_list.clean();
        if (setup_sum_funcs(thd, sum_funcs)) return true;
      }

      /*
        Allocate a slice of ref items that describe the items to be copied
        from the second temporary table.
      */
      if (alloc_ref_item_slice(thd, REF_SLICE_TMP2)) return true;

      // No sum funcs anymore
      if (change_to_use_tmp_fields(&tmp_fields[REF_SLICE_TMP1], thd,
                                   ref_items[REF_SLICE_TMP2],
                                   &tmp_fields[REF_SLICE_TMP2],
                                   query_block->m_added_non_hidden_fields))
        return true;

      curr_fields = &tmp_fields[REF_SLICE_TMP2];
      set_ref_item_slice(REF_SLICE_TMP2);
      qep_tab[curr_tmp_table].ref_item_slice = REF_SLICE_TMP2;
      setup_tmptable_write_func(&qep_tab[curr_tmp_table], &trace_this_tbl);
    }
    if (qep_tab[curr_tmp_table].table()->s->is_distinct)
      select_distinct = false; /* Each row is unique */

    if (select_distinct && group_list.empty() && !m_windowing_steps) {
      if (having_cond) {
        qep_tab[curr_tmp_table].having = having_cond;
        having_cond->update_used_tables();
        having_cond = nullptr;
      }
      qep_tab[curr_tmp_table].needs_duplicate_removal = true;
      trace_this_outer.add("reading_from_table_eliminates_duplicates", true);
      explain_flags.set(ESC_DISTINCT, ESP_DUPS_REMOVAL);
      select_distinct = false;
    }
    /* Clean tmp_table_param for the next tmp table. */
    tmp_table_param.sum_func_count = tmp_table_param.func_count = 0;

    tmp_table_param.cleanup();
    streaming_aggregation = false;

    if (!group_optimized_away) {
      grouped = false;
    } else {
      /*
        If grouping has been optimized away, a temporary table is
        normally not needed unless we're explicitly requested to create
        one (e.g. due to a SQL_BUFFER_RESULT hint or INSERT ... SELECT or
        there is a windowing function that needs sorting).

        In this case (grouping was optimized away), temp_table was
        created without a grouping expression and JOIN::exec() will not
        perform the necessary grouping (by the use of end_send_group()
        or end_write_group()) if JOIN::group is set to false.
      */
      /*
         The temporary table was explicitly requested or there is a window
         function which needs sorting (check need_tmp_before_win in
         JOIN::optimize).
      */
      assert(query_block->active_options() & OPTION_BUFFER_RESULT ||
             m_windowing_steps);
      // the temporary table does not have a grouping expression
      assert(!qep_tab[curr_tmp_table].table()->group);
    }
    calc_group_buffer(this, group_list.order);
    count_field_types(query_block, &tmp_table_param, *curr_fields, false,
                      false);
  }

  /*
    Set up structures for a temporary table but do not actually create
    the temporary table if one of these conditions are true:
    - The query is implicitly grouped.
    - The query is explicitly grouped and
        + implemented as a simple grouping, or
        + LIMIT 1 is specified, or
        + ROLLUP is specified, or
        + <some unknown condition>.
  */

  if ((grouped || implicit_grouping) && !m_windowing_steps) {
    if (make_group_fields(this, this)) return true;

    if (make_sum_func_list(*curr_fields, true, true)) return true;
    const bool need_distinct =
        !(qep_tab && qep_tab[0].range_scan() &&
          is_agg_loose_index_scan(qep_tab[0].range_scan()));
    if (prepare_sum_aggregators(sum_funcs, need_distinct)) return true;
    if (setup_sum_funcs(thd, sum_funcs) || thd->is_fatal_error()) return true;
  }

  if (qep_tab && (!group_list.empty() ||
                  (!order.empty() && !m_windowing_steps /* [1] */))) {
    /*
      [1] above: too early to do query ORDER BY if we have windowing; must
      wait till after window processing.
    */
    ASSERT_BEST_REF_IN_JOIN_ORDER(this);
    DBUG_PRINT("info", ("Sorting for send_result_set_metadata"));
    /*
      If we have already done the group, add HAVING to sorted table except
      when rollup is present
    */
    if (having_cond && group_list.empty() && !streaming_aggregation &&
        rollup_state == RollupState::NONE) {
      if (add_having_as_tmp_table_cond(curr_tmp_table)) return true;
    }

    if (grouped)
      m_select_limit = HA_POS_ERROR;
    else if (!need_tmp_before_win) {
      /*
        We can abort sorting after thd->select_limit rows if there are no
        filter conditions for any tables after the sorted one.
        Filter conditions come in several forms:
         1. as a condition item attached to the join_tab, or
         2. as a keyuse attached to the join_tab (ref access).
      */
      for (uint i = const_tables + 1; i < primary_tables; i++) {
        QEP_TAB *const tab = qep_tab + i;
        if (tab->condition() ||  // 1
            (best_ref[tab->idx()]->keyuse() &&
             tab->first_inner() == NO_PLAN_IDX))  // 2
        {
          /* We have to sort all rows */
          m_select_limit = HA_POS_ERROR;
          break;
        }
      }
    }
    /*
      Here we add sorting stage for ORDER BY/GROUP BY clause, if the
      optimiser chose FILESORT to be faster than INDEX SCAN or there is
      no suitable index present.
      OPTION_FOUND_ROWS supersedes LIMIT and is taken into account.
    */
    DBUG_PRINT("info", ("Sorting for order by/group by"));
    ORDER_with_src order_arg = group_list.empty() ? order : group_list;
    if (qep_tab &&
        m_ordered_index_usage != (group_list.empty()
                                      ? ORDERED_INDEX_ORDER_BY
                                      : ORDERED_INDEX_GROUP_BY) &&
        // Windowing will change order, so it's too early to sort here
        !m_windowing_steps) {
      // Sort either first non-const table or the last tmp table
      QEP_TAB *const sort_tab = &qep_tab[curr_tmp_table];
      if (need_tmp_before_win && !materialize_join && !exec_tmp_table->group)
        explain_flags.set(order_arg.src, ESP_USING_TMPTABLE);

      if (add_sorting_to_table(curr_tmp_table, &order_arg,
                               /*sort_before_group=*/false))
        return true;
      /*
        filesort_limit:	 Return only this many rows from filesort().
        We can use select_limit_cnt only if we have no group_by and 1 table.
        This allows us to use Bounded_queue for queries like:
          "select * from t1 order by b desc limit 1;"
        m_select_limit == HA_POS_ERROR (we need a full table scan)
        query_expression->select_limit_cnt == 1 (we only need one row in the
        result set)
      */
      if (sort_tab->filesort)
        sort_tab->filesort->limit =
            (has_group_by || (primary_tables > curr_tmp_table + 1) ||
             calc_found_rows)
                ? m_select_limit
                : query_expression()->select_limit_cnt;
    }
  }

  if (qep_tab && m_windowing_steps) {
    for (uint wno = 0; wno < m_windows.elements; wno++) {
      tmp_table_param.m_window = m_windows[wno];

      if (!tmp_tables) {
        curr_tmp_table = primary_tables;
        tmp_tables++;

        if (ref_items[REF_SLICE_SAVED_BASE].is_null()) {
          /*
           Make a copy of the base slice in the save slice.
           This is needed because later steps will overwrite the base slice with
           another slice (1-3 or window slice).
           After this slice has been used, overwrite the base slice again with
           the copy in the save slice.
           */
          if (alloc_ref_item_slice(thd, REF_SLICE_SAVED_BASE)) return true;

          copy_ref_item_slice(REF_SLICE_SAVED_BASE, REF_SLICE_ACTIVE);
          current_ref_item_slice = REF_SLICE_SAVED_BASE;
        }
      } else {
        curr_tmp_table++;
        tmp_tables++;
      }

      ORDER_with_src dummy;

      tmp_table_param.hidden_field_count = CountHiddenFields(*curr_fields);

      /*
        Allocate a slice of ref items that describe the items to be copied
        from the next temporary table.
      */
      const uint widx = REF_SLICE_WIN_1 + wno;

      QEP_TAB *tab = &qep_tab[curr_tmp_table];
      mem_root_deque<Item *> *orig_fields = curr_fields;
      {
        Opt_trace_object trace_this_tbl(trace);
        trace_this_tbl
            .add("adding_tmp_table_in_plan_at_position", curr_tmp_table)
            .add_alnum("cause", "output_for_window_functions")
            .add("with_buffer", m_windows[wno]->needs_buffering());
        if (create_intermediate_table(tab, *curr_fields, dummy, false,
                                      "window output buffer"))
          return true;

        if (alloc_ref_item_slice(thd, widx)) return true;

        if (change_to_use_tmp_fields(curr_fields, thd, ref_items[widx],
                                     &tmp_fields[widx],
                                     query_block->m_added_non_hidden_fields,
                                     /*windowing*/ true))
          return true;

        curr_fields = &tmp_fields[widx];
        set_ref_item_slice(widx);
        tab->ref_item_slice = widx;
        setup_tmptable_write_func(tab, &trace_this_tbl);
      }

      if (m_windows[wno]->needs_buffering()) {
        if (CreateFramebufferTable(
                thd, tmp_table_param, *query_block, *orig_fields, *curr_fields,
                tab->tmp_table_param->items_to_copy, m_windows[wno])) {
          return true;
        }
      }

      if (m_windows[wno]->make_special_rows_cache(thd, tab->table()))
        return true;

      ORDER_with_src w_partition(m_windows[wno]->sorting_order(),
                                 ESC_WINDOWING);

      if (w_partition.order != nullptr) {
        Opt_trace_object trace_pre_sort(trace, "adding_sort_to_previous_table");
        if (add_sorting_to_table(curr_tmp_table - 1, &w_partition,
                                 /*sort_before_group=*/false))
          return true;
      }

      if (m_windows[wno]->is_last()) {
        if (!order.empty() && m_ordered_index_usage != ORDERED_INDEX_ORDER_BY) {
          if (add_sorting_to_table(curr_tmp_table, &order,
                                   /*sort_before_group=*/false))
            return true;
        }
        if (!tab->filesort && !tab->table()->s->keys &&
            (!(query_block->active_options() & OPTION_BUFFER_RESULT) ||
             need_tmp_before_win || wno >= 1)) {
          /*
            Last tmp table of execution; no sort, no duplicate elimination, no
            buffering imposed by user (or it has already been implemented by
            a previous tmp table): hence any row needn't be written to
            tmp table's storage; send it out to query's result instead:
          */
          m_windows[wno]->set_short_circuit(true);
        }
      }

      if (having_cond != nullptr) {
        tab->having = having_cond;
        having_cond = nullptr;
      }
    }
  }

  // In the case of rollup (only): After the base slice list was made, we may
  // have modified the field list to add rollup group items and sum switchers.
  // Since there may be HAVING filters with refs that refer to the base slice,
  // we need to refresh that slice (and its copy, REF_SLICE_SAVED_BASE) so
  // that it includes the updated items.
  //
  // Note that we do this after we've made the TMP1 and TMP2 slices, since
  // there's a lot of logic that looks through the GROUP BY list, which refers
  // to the base slice and expects _not_ to find rollup items there.
  refresh_base_slice();

  fields = curr_fields;
  // Reset before execution
  set_ref_item_slice(REF_SLICE_SAVED_BASE);
  if (qep_tab) {
    qep_tab[primary_tables + tmp_tables].op_type = get_end_select_func();
  }
  grouped = has_group_by;

  unplug_join_tabs();

  /*
    Tmp tables are a layer between the nested loop and the derived table's
    result, WITH RECURSIVE cannot work with them. This should not happen, as a
    recursive query cannot have clauses which use a tmp table (GROUP BY,
    etc).
  */
  assert(!query_block->is_recursive() || !tmp_tables);
  cleanup_tmp_tables_on_error.release();
  return false;
}