bool JOIN::make_tmp_tables

bool JOIN::make_tmp_tables_info()

in sql/sql_select.cc [5060:5472]
268 lines of code
96 McCabe index (conditional complexity)

bool JOIN::make_tmp_tables_info()
{
  List<Item> *curr_all_fields= &all_fields;
  List<Item> *curr_fields_list= &fields_list;
  bool materialize_join= false;
  uint curr_tmp_table= const_tables;
  TABLE *exec_tmp_table= NULL;
  DBUG_ENTER("JOIN::make_tmp_tables_info");
  having_for_explain= having;

  const bool has_group_by= this->group;
  /*
    Setup last table to provide fields and all_fields lists to the next
    node in the plan.
  */
  if (join_tab)
  {
    join_tab[primary_tables - 1].fields= &fields_list;
    join_tab[primary_tables - 1].all_fields= &all_fields;
  }
  /*
    The loose index scan access method guarantees that all grouping or
    duplicate row elimination (for distinct) is already performed
    during data retrieval, and that all MIN/MAX functions are already
    computed for each group. Thus all MIN/MAX functions should be
    treated as regular functions, and there is no need to perform
    grouping in the main execution loop.
    Notice that currently loose index scan is applicable only for
    single table queries, thus it is sufficient to test only the first
    join_tab element of the plan for its access method.
  */
  if (join_tab && join_tab->is_using_loose_index_scan())
    tmp_table_param.precomputed_group_by=
      !join_tab->is_using_agg_loose_index_scan();

  /* Create a tmp table if distinct or if the sort is too complicated */
  if (need_tmp)
  {
    curr_tmp_table= primary_tables;
    tmp_tables++;
    if (plan_is_const())
      first_select= sub_select_op;

    /*
      Create temporary table on first execution of this join.
      (Will be reused if this is a subquery that is executed several times.)
    */
    init_items_ref_array();

    ORDER_with_src tmp_group;
    if (!simple_group && !(test_flags & TEST_NO_KEY_GROUP))
      tmp_group= group_list;

    tmp_table_param.hidden_field_count=
      all_fields.elements - fields_list.elements;

    if (create_intermediate_table(&join_tab[curr_tmp_table],
                                  &all_fields, tmp_group,
                                  group_list && simple_group))
      DBUG_RETURN(true);
    exec_tmp_table= join_tab[curr_tmp_table].table;

    if (exec_tmp_table->distinct)
      optimize_distinct();

    /*
      If there is no sorting or grouping, 'use_order'
      index result should not have been requested.
      Exception: LooseScan strategy for semijoin requires
      sorted access even if final result is not to be sorted.
    */
    DBUG_ASSERT(
      !(ordered_index_usage == ordered_index_void &&
        !plan_is_const() &&
        join_tab[const_tables].position->sj_strategy != SJ_OPT_LOOSE_SCAN &&
        join_tab[const_tables].use_order()));

    /* Change sum_fields reference to calculated fields in tmp_table */
    DBUG_ASSERT(items1.is_null());
    items1= ref_ptr_array_slice(2);
    if (sort_and_group || join_tab[curr_tmp_table].table->group ||
        tmp_table_param.precomputed_group_by)
    {
      if (change_to_use_tmp_fields(thd, items1,
                                   tmp_fields_list1, tmp_all_fields1,
                                   fields_list.elements, all_fields))
        DBUG_RETURN(true);
    }
    else
    {
      if (change_refs_to_tmp_fields(thd, items1,
                                    tmp_fields_list1, tmp_all_fields1,
                                    fields_list.elements, all_fields))
        DBUG_RETURN(true);
    }
    curr_all_fields= &tmp_all_fields1;
    curr_fields_list= &tmp_fields_list1;
    // Need to set them now for correct group_fields setup, reset at the end.
    set_items_ref_array(items1);
    join_tab[curr_tmp_table].ref_array= &items1;
    join_tab[curr_tmp_table].all_fields= &tmp_all_fields1;
    join_tab[curr_tmp_table].fields= &tmp_fields_list1;
    setup_tmptable_write_func(&join_tab[curr_tmp_table]);

    /*
      If having is not handled here, it will be checked before the row is sent
      to the client.
    */
    if (having &&
        (sort_and_group || (exec_tmp_table->distinct && !group_list)))
    {
      /*
        If there is no select distinct then move the having to table conds of
        tmp table.
        NOTE : We cannot apply having after distinct. If columns of having are
               not part of select distinct, then distinct may remove rows
               which can satisfy having.
      */
      if (!select_distinct && add_having_as_tmp_table_cond(curr_tmp_table))
	DBUG_RETURN(true);

      /*
        Having condition which we are not able to add as tmp table conds are
        kept as before. And, this will be applied before storing the rows in
        tmp table.
      */
      join_tab[curr_tmp_table].having= having;
      having= NULL; // Already done
    }

    tmp_table_param.func_count= 0;
    tmp_table_param.field_count+= tmp_table_param.func_count;
    if (sort_and_group || join_tab[curr_tmp_table].table->group)
    {
      tmp_table_param.field_count+= tmp_table_param.sum_func_count;
      tmp_table_param.sum_func_count= 0;
    }

    if (exec_tmp_table->group)
    {						// Already grouped
      if (!order && !no_order && !skip_sort_order)
        order= group_list;  /* order by group */
      group_list= NULL;
    }
    /*
      If we have different sort & group then we must sort the data by group
      and copy it to another tmp table
      This code is also used if we are using distinct something
      we haven't been able to store in the temporary table yet
      like SEC_TO_TIME(SUM(...)).
    */

    if ((group_list &&
         (!test_if_subpart(group_list, order) || select_distinct)) ||
        (select_distinct && tmp_table_param.using_outer_summary_function))
    {					/* Must copy to another table */
      DBUG_PRINT("info",("Creating group table"));

      calc_group_buffer(this, group_list);
      count_field_types(select_lex, &tmp_table_param, tmp_all_fields1,
                        select_distinct && !group_list);
      tmp_table_param.hidden_field_count=
        tmp_all_fields1.elements - tmp_fields_list1.elements;

      if (!exec_tmp_table->group && !exec_tmp_table->distinct)
      {
        // 1st tmp table were materializing join result
        materialize_join= true;
        explain_flags.set(ESC_BUFFER_RESULT, ESP_USING_TMPTABLE);
      }
      curr_tmp_table++;
      tmp_tables++;

      /* group data to new table */
      /*
        If the access method is loose index scan then all MIN/MAX
        functions are precomputed, and should be treated as regular
        functions. See extended comment above.
      */
      if (join_tab->is_using_loose_index_scan())
        tmp_table_param.precomputed_group_by= TRUE;

      tmp_table_param.hidden_field_count=
        curr_all_fields->elements - curr_fields_list->elements;
      ORDER_with_src dummy= NULL; //TODO can use table->group here also

      if (create_intermediate_table(&join_tab[curr_tmp_table],
                                    curr_all_fields, dummy, true))
	DBUG_RETURN(true);

      if (group_list)
      {
        explain_flags.set(group_list.src, ESP_USING_TMPTABLE);
        if (!plan_is_const())        // No need to sort a single row
        {
          JOIN_TAB *sort_tab= &join_tab[curr_tmp_table - 1];
          if (add_sorting_to_table(sort_tab, &group_list))
            DBUG_RETURN(true);
        }

        if (make_group_fields(this, this))
          DBUG_RETURN(true);
      }

      /*
        If there is no sorting or grouping, 'use_order'
        index result should not have been requested.
      */
      DBUG_ASSERT(!(ordered_index_usage == ordered_index_void &&
                    !plan_is_const() &&
                    join_tab[const_tables].use_order()));

      // Setup sum funcs only when necessary, otherwise we might break info
      // for the first table
      if (group_list || tmp_table_param.sum_func_count)
      {
        if (make_sum_func_list(*curr_all_fields, *curr_fields_list, true, true))
          DBUG_RETURN(true);
        if (prepare_sum_aggregators(sum_funcs,
                                    !join_tab->is_using_agg_loose_index_scan()))
          DBUG_RETURN(true);
        group_list= NULL;
        if (setup_sum_funcs(thd, sum_funcs))
          DBUG_RETURN(true);
      }
      // No sum funcs anymore
      DBUG_ASSERT(items2.is_null());

      items2= ref_ptr_array_slice(3);
      if (change_to_use_tmp_fields(thd, items2,
                                   tmp_fields_list2, tmp_all_fields2,
                                   fields_list.elements, tmp_all_fields1))
        DBUG_RETURN(true);

      curr_fields_list= &tmp_fields_list2;
      curr_all_fields= &tmp_all_fields2;
      set_items_ref_array(items2);
      join_tab[curr_tmp_table].ref_array= &items2;
      join_tab[curr_tmp_table].all_fields= &tmp_all_fields2;
      join_tab[curr_tmp_table].fields= &tmp_fields_list2;
      setup_tmptable_write_func(&join_tab[curr_tmp_table]);

      tmp_table_param.field_count+= tmp_table_param.sum_func_count;
      tmp_table_param.sum_func_count= 0;
    }
    if (join_tab[curr_tmp_table].table->distinct)
      select_distinct= false;               /* Each row is unique */

    if (select_distinct && !group_list)
    {
      if (having)
      {
        join_tab[curr_tmp_table].having= having;
        having->update_used_tables();
      }
      join_tab[curr_tmp_table].distinct= true;
      explain_flags.set(ESC_DISTINCT, ESP_DUPS_REMOVAL);
      having= NULL;
      select_distinct= false;
    }
    /* Clean tmp_table_param for the next tmp table. */
    tmp_table_param.field_count= tmp_table_param.sum_func_count=
      tmp_table_param.func_count= 0;

    tmp_table_param.copy_field= tmp_table_param.copy_field_end=0;
    first_record= sort_and_group=0;

    if (!group_optimized_away)
    {
      group= false;
    }
    else
    {
      /*
        If grouping has been optimized away, a temporary table is
        normally not needed unless we're explicitly requested to create
        one (e.g. due to a SQL_BUFFER_RESULT hint or INSERT ... SELECT).

        In this case (grouping was optimized away), temp_table was
        created without a grouping expression and JOIN::exec() will not
        perform the necessary grouping (by the use of end_send_group()
        or end_write_group()) if JOIN::group is set to false.
      */
      // the temporary table was explicitly requested
      DBUG_ASSERT(MY_TEST(select_options & OPTION_BUFFER_RESULT));
      // the temporary table does not have a grouping expression
      DBUG_ASSERT(!join_tab[curr_tmp_table].table->group);
    }
    calc_group_buffer(this, group_list);
    count_field_types(select_lex, &tmp_table_param, *curr_all_fields, false);
  }

  if (group || implicit_grouping || tmp_table_param.sum_func_count)
  {
    if (make_group_fields(this, this))
      DBUG_RETURN(true);

    DBUG_ASSERT(items3.is_null());

    if (items0.is_null())
      init_items_ref_array();
    items3= ref_ptr_array_slice(4);
    setup_copy_fields(thd, &tmp_table_param,
                      items3, tmp_fields_list3, tmp_all_fields3,
                      curr_fields_list->elements, *curr_all_fields);

    curr_fields_list= &tmp_fields_list3;
    curr_all_fields= &tmp_all_fields3;
    set_items_ref_array(items3);
    if (join_tab)
    {
      // Set grouped fields on the last table
      join_tab[primary_tables + tmp_tables - 1].ref_array= &items3;
      join_tab[primary_tables + tmp_tables - 1].all_fields= &tmp_all_fields3;
      join_tab[primary_tables + tmp_tables - 1].fields= &tmp_fields_list3;
    }
    if (make_sum_func_list(*curr_all_fields, *curr_fields_list, true, true))
      DBUG_RETURN(true);
    if (prepare_sum_aggregators(sum_funcs,
                                !join_tab ||
                                !join_tab-> is_using_agg_loose_index_scan()))
      DBUG_RETURN(true);
    if (setup_sum_funcs(thd, sum_funcs) || thd->is_fatal_error)
      DBUG_RETURN(true);
  }
  if (group_list || order)
  {
    DBUG_PRINT("info",("Sorting for send_result_set_metadata"));
    THD_STAGE_INFO(thd, stage_sorting_result);
    /* If we have already done the group, add HAVING to sorted table */
    if (having && !group_list && !sort_and_group)
    {
      if (add_having_as_tmp_table_cond(curr_tmp_table))
        DBUG_RETURN(true);
    }

    if (group)
      m_select_limit= HA_POS_ERROR;
    else if (!need_tmp)
    {
      /*
        We can abort sorting after thd->select_limit rows if there are no
        filter conditions for any tables after the sorted one.
        Filter conditions come in several forms:
         1. as a condition item attached to the join_tab, or
         2. as a keyuse attached to the join_tab (ref access).
      */
      for (uint i= const_tables + 1; i < primary_tables; i++)
      {
        JOIN_TAB *const tab= join_tab + i;
        if (tab->condition() ||                                // 1
            (tab->keyuse && !tab->first_inner))                // 2
        {
          /* We have to sort all rows */
          m_select_limit= HA_POS_ERROR;
          break;
        }
      }
    }
    /*
      Here we add sorting stage for ORDER BY/GROUP BY clause, if the
      optimiser chose FILESORT to be faster than INDEX SCAN or there is
      no suitable index present.
      OPTION_FOUND_ROWS supersedes LIMIT and is taken into account.
    */
    DBUG_PRINT("info",("Sorting for order by/group by"));
    ORDER_with_src order_arg= group_list ?  group_list : order;
    if (join_tab &&
        ordered_index_usage !=
        (group_list ? ordered_index_group_by : ordered_index_order_by) &&
        join_tab[curr_tmp_table].type != JT_CONST &&
        join_tab[curr_tmp_table].type != JT_EQ_REF) // Don't sort 1 row
    {
      // Sort either first non-const table or the last tmp table
      JOIN_TAB *sort_tab= &join_tab[curr_tmp_table];
      if (need_tmp && !materialize_join && !exec_tmp_table->group)
        explain_flags.set(order_arg.src, ESP_USING_TMPTABLE);

      if (add_sorting_to_table(sort_tab, &order_arg))
        DBUG_RETURN(true);
      /*
        filesort_limit:	 Return only this many rows from filesort().
        We can use select_limit_cnt only if we have no group_by and 1 table.
        This allows us to use Bounded_queue for queries like:
          "select SQL_CALC_FOUND_ROWS * from t1 order by b desc limit 1;"
        m_select_limit == HA_POS_ERROR (we need a full table scan)
        unit->select_limit_cnt == 1 (we only need one row in the result set)
      */
      sort_tab->filesort->limit=
        (has_group_by || (primary_tables > curr_tmp_table + 1)) ?
         m_select_limit : unit->select_limit_cnt;
    }
    if (!plan_is_const() &&
        !join_tab[const_tables].table->sort.io_cache)
    {
      /*
        If no IO cache exists for the first table then we are using an
        INDEX SCAN and no filesort. Thus we should not remove the sorted
        attribute on the INDEX SCAN.
      */
      skip_sort_order= true;
    }
  }
  fields= curr_fields_list;
  // Reset before execution
  set_items_ref_array(items0);
  if (join_tab)
    join_tab[primary_tables + tmp_tables - 1].next_select=
      setup_end_select_func(this, NULL);
  group= has_group_by;

  DBUG_RETURN(false);
}