static void generate_base_implied_equalities

static void generate_base_implied_equalities_const()

in src/backend/optimizer/path/equivclass.c [39:469]
259 lines of code
34 McCabe index (conditional complexity)

static void generate_base_implied_equalities_const(PlannerInfo *root,
												   EquivalenceClass *ec);
static void generate_base_implied_equalities_no_const(PlannerInfo *root,
													  EquivalenceClass *ec);
static void generate_base_implied_equalities_broken(PlannerInfo *root,
													EquivalenceClass *ec);
static List *generate_join_implied_equalities_normal(PlannerInfo *root,
													 EquivalenceClass *ec,
													 Relids join_relids,
													 Relids outer_relids,
													 Relids inner_relids);
static List *generate_join_implied_equalities_broken(PlannerInfo *root,
													 EquivalenceClass *ec,
													 Relids nominal_join_relids,
													 Relids outer_relids,
													 Relids nominal_inner_relids,
													 RelOptInfo *inner_rel);
static Oid	select_equality_operator(EquivalenceClass *ec,
									 Oid lefttype, Oid righttype);
static RestrictInfo *create_join_clause(PlannerInfo *root,
										EquivalenceClass *ec, Oid opno,
										EquivalenceMember *leftem,
										EquivalenceMember *rightem,
										EquivalenceClass *parent_ec);
static bool reconsider_outer_join_clause(PlannerInfo *root,
										 RestrictInfo *rinfo,
										 bool outer_on_left);
static bool reconsider_full_join_clause(PlannerInfo *root,
										RestrictInfo *rinfo);
static Bitmapset *get_eclass_indexes_for_relids(PlannerInfo *root,
												Relids relids);
static Bitmapset *get_common_eclass_indexes(PlannerInfo *root, Relids relids1,
											Relids relids2);


/*
 * process_equivalence
 *	  The given clause has a mergejoinable operator and can be applied without
 *	  any delay by an outer join, so its two sides can be considered equal
 *	  anywhere they are both computable; moreover that equality can be
 *	  extended transitively.  Record this knowledge in the EquivalenceClass
 *	  data structure, if applicable.  Returns true if successful, false if not
 *	  (in which case caller should treat the clause as ordinary, not an
 *	  equivalence).
 *
 * In some cases, although we cannot convert a clause into EquivalenceClass
 * knowledge, we can still modify it to a more useful form than the original.
 * Then, *p_restrictinfo will be replaced by a new RestrictInfo, which is what
 * the caller should use for further processing.
 *
 * If below_outer_join is true, then the clause was found below the nullable
 * side of an outer join, so its sides might validly be both NULL rather than
 * strictly equal.  We can still deduce equalities in such cases, but we take
 * care to mark an EquivalenceClass if it came from any such clauses.  Also,
 * we have to check that both sides are either pseudo-constants or strict
 * functions of Vars, else they might not both go to NULL above the outer
 * join.  (This is the main reason why we need a failure return.  It's more
 * convenient to check this case here than at the call sites...)
 *
 * We also reject proposed equivalence clauses if they contain leaky functions
 * and have security_level above zero.  The EC evaluation rules require us to
 * apply certain tests at certain joining levels, and we can't tolerate
 * delaying any test on security_level grounds.  By rejecting candidate clauses
 * that might require security delays, we ensure it's safe to apply an EC
 * clause as soon as it's supposed to be applied.
 *
 * On success return, we have also initialized the clause's left_ec/right_ec
 * fields to point to the EquivalenceClass representing it.  This saves lookup
 * effort later.
 *
 * Note: constructing merged EquivalenceClasses is a standard UNION-FIND
 * problem, for which there exist better data structures than simple lists.
 * If this code ever proves to be a bottleneck then it could be sped up ---
 * but for now, simple is beautiful.
 *
 * Note: this is only called during planner startup, not during GEQO
 * exploration, so we need not worry about whether we're in the right
 * memory context.
 */
bool
process_equivalence(PlannerInfo *root,
					RestrictInfo **p_restrictinfo,
					bool below_outer_join)
{
	RestrictInfo *restrictinfo = *p_restrictinfo;
	Expr	   *clause = restrictinfo->clause;
	Oid			opno,
				collation,
				item1_type,
				item2_type;
	Expr	   *item1;
	Expr	   *item2;
	Relids		item1_relids,
				item2_relids,
				item1_nullable_relids,
				item2_nullable_relids;
	List	   *opfamilies;
	EquivalenceClass *ec1,
			   *ec2;
	EquivalenceMember *em1,
			   *em2;
	ListCell   *lc1;
	int			ec2_idx;

	/* Should not already be marked as having generated an eclass */
	Assert(restrictinfo->left_ec == NULL);
	Assert(restrictinfo->right_ec == NULL);

	/* Reject if it is potentially postponable by security considerations */
	if (restrictinfo->security_level > 0 && !restrictinfo->leakproof)
		return false;

	/* Extract info from given clause */
	Assert(is_opclause(clause));
	opno = ((OpExpr *) clause)->opno;
	collation = ((OpExpr *) clause)->inputcollid;
	item1 = (Expr *) get_leftop(clause);
	item2 = (Expr *) get_rightop(clause);
	item1_relids = restrictinfo->left_relids;
	item2_relids = restrictinfo->right_relids;

	/*
	 * Ensure both input expressions expose the desired collation (their types
	 * should be OK already); see comments for canonicalize_ec_expression.
	 */
	item1 = canonicalize_ec_expression(item1,
									   exprType((Node *) item1),
									   collation);
	item2 = canonicalize_ec_expression(item2,
									   exprType((Node *) item2),
									   collation);

	/*
	 * Clauses of the form X=X cannot be translated into EquivalenceClasses.
	 * We'd either end up with a single-entry EC, losing the knowledge that
	 * the clause was present at all, or else make an EC with duplicate
	 * entries, causing other issues.
	 */
	if (equal(item1, item2))
	{
		/*
		 * If the operator is strict, then the clause can be treated as just
		 * "X IS NOT NULL".  (Since we know we are considering a top-level
		 * qual, we can ignore the difference between FALSE and NULL results.)
		 * It's worth making the conversion because we'll typically get a much
		 * better selectivity estimate than we would for X=X.
		 *
		 * If the operator is not strict, we can't be sure what it will do
		 * with NULLs, so don't attempt to optimize it.
		 */
		set_opfuncid((OpExpr *) clause);
		if (func_strict(((OpExpr *) clause)->opfuncid))
		{
			NullTest   *ntest = makeNode(NullTest);

			ntest->arg = item1;
			ntest->nulltesttype = IS_NOT_NULL;
			ntest->argisrow = false;	/* correct even if composite arg */
			ntest->location = -1;

			*p_restrictinfo =
				make_restrictinfo(root,
								  (Expr *) ntest,
								  restrictinfo->is_pushed_down,
								  restrictinfo->outerjoin_delayed,
								  restrictinfo->pseudoconstant,
								  restrictinfo->security_level,
								  NULL,
								  restrictinfo->outer_relids,
								  restrictinfo->nullable_relids);
		}
		return false;
	}

	/*
	 * If below outer join, check for strictness, else reject.
	 */
	if (below_outer_join)
	{
		if (!bms_is_empty(item1_relids) &&
			contain_nonstrict_functions((Node *) item1))
			return false;		/* LHS is non-strict but not constant */
		if (!bms_is_empty(item2_relids) &&
			contain_nonstrict_functions((Node *) item2))
			return false;		/* RHS is non-strict but not constant */
	}

	/* Calculate nullable-relid sets for each side of the clause */
	item1_nullable_relids = bms_intersect(item1_relids,
										  restrictinfo->nullable_relids);
	item2_nullable_relids = bms_intersect(item2_relids,
										  restrictinfo->nullable_relids);

	/*
	 * We use the declared input types of the operator, not exprType() of the
	 * inputs, as the nominal datatypes for opfamily lookup.  This presumes
	 * that btree operators are always registered with amoplefttype and
	 * amoprighttype equal to their declared input types.  We will need this
	 * info anyway to build EquivalenceMember nodes, and by extracting it now
	 * we can use type comparisons to short-circuit some equal() tests.
	 */
	op_input_types(opno, &item1_type, &item2_type);

	opfamilies = restrictinfo->mergeopfamilies;

	/*
	 * Sweep through the existing EquivalenceClasses looking for matches to
	 * item1 and item2.  These are the possible outcomes:
	 *
	 * 1. We find both in the same EC.  The equivalence is already known, so
	 * there's nothing to do.
	 *
	 * 2. We find both in different ECs.  Merge the two ECs together.
	 *
	 * 3. We find just one.  Add the other to its EC.
	 *
	 * 4. We find neither.  Make a new, two-entry EC.
	 *
	 * Note: since all ECs are built through this process or the similar
	 * search in get_eclass_for_sort_expr(), it's impossible that we'd match
	 * an item in more than one existing nonvolatile EC.  So it's okay to stop
	 * at the first match.
	 */
	ec1 = ec2 = NULL;
	em1 = em2 = NULL;
	ec2_idx = -1;
	foreach(lc1, root->eq_classes)
	{
		EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1);
		ListCell   *lc2;

		/* Never match to a volatile EC */
		if (cur_ec->ec_has_volatile)
			continue;

		/*
		 * The collation has to match; check this first since it's cheaper
		 * than the opfamily comparison.
		 */
		if (collation != cur_ec->ec_collation)
			continue;

		/*
		 * A "match" requires matching sets of btree opfamilies.  Use of
		 * equal() for this test has implications discussed in the comments
		 * for get_mergejoin_opfamilies().
		 */
		if (!equal(opfamilies, cur_ec->ec_opfamilies))
			continue;

		foreach(lc2, cur_ec->ec_members)
		{
			EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2);

			Assert(!cur_em->em_is_child);	/* no children yet */

			/*
			 * If below an outer join, don't match constants: they're not as
			 * constant as they look.
			 */
			if ((below_outer_join || cur_ec->ec_below_outer_join) &&
				cur_em->em_is_const)
				continue;

			if (!ec1 &&
				item1_type == cur_em->em_datatype &&
				equal(item1, cur_em->em_expr))
			{
				ec1 = cur_ec;
				em1 = cur_em;
				if (ec2)
					break;
			}

			if (!ec2 &&
				item2_type == cur_em->em_datatype &&
				equal(item2, cur_em->em_expr))
			{
				ec2 = cur_ec;
				ec2_idx = foreach_current_index(lc1);
				em2 = cur_em;
				if (ec1)
					break;
			}
		}

		if (ec1 && ec2)
			break;
	}

	/* Sweep finished, what did we find? */

	if (ec1 && ec2)
	{
		/* If case 1, nothing to do, except add to sources */
		if (ec1 == ec2)
		{
			ec1->ec_sources = lappend(ec1->ec_sources, restrictinfo);
			ec1->ec_below_outer_join |= below_outer_join;
			ec1->ec_min_security = Min(ec1->ec_min_security,
									   restrictinfo->security_level);
			ec1->ec_max_security = Max(ec1->ec_max_security,
									   restrictinfo->security_level);
			/* mark the RI as associated with this eclass */
			restrictinfo->left_ec = ec1;
			restrictinfo->right_ec = ec1;
			/* mark the RI as usable with this pair of EMs */
			restrictinfo->left_em = em1;
			restrictinfo->right_em = em2;
			return true;
		}

		/*
		 * Case 2: need to merge ec1 and ec2.  This should never happen after
		 * the ECs have reached canonical state; otherwise, pathkeys could be
		 * rendered non-canonical by the merge, and relation eclass indexes
		 * would get broken by removal of an eq_classes list entry.
		 */
		if (root->ec_merging_done)
			elog(ERROR, "too late to merge equivalence classes");

		/*
		 * We add ec2's items to ec1, then set ec2's ec_merged link to point
		 * to ec1 and remove ec2 from the eq_classes list.  We cannot simply
		 * delete ec2 because that could leave dangling pointers in existing
		 * PathKeys.  We leave it behind with a link so that the merged EC can
		 * be found.
		 */
		ec1->ec_members = list_concat(ec1->ec_members, ec2->ec_members);
		ec1->ec_sources = list_concat(ec1->ec_sources, ec2->ec_sources);
		ec1->ec_derives = list_concat(ec1->ec_derives, ec2->ec_derives);
		ec1->ec_relids = bms_join(ec1->ec_relids, ec2->ec_relids);
		ec1->ec_has_const |= ec2->ec_has_const;
		/* can't need to set has_volatile */
		ec1->ec_below_outer_join |= ec2->ec_below_outer_join;
		ec1->ec_min_security = Min(ec1->ec_min_security,
								   ec2->ec_min_security);
		ec1->ec_max_security = Max(ec1->ec_max_security,
								   ec2->ec_max_security);
		ec2->ec_merged = ec1;
		root->eq_classes = list_delete_nth_cell(root->eq_classes, ec2_idx);
		/* just to avoid debugging confusion w/ dangling pointers: */
		ec2->ec_members = NIL;
		ec2->ec_sources = NIL;
		ec2->ec_derives = NIL;
		ec2->ec_relids = NULL;
		ec1->ec_sources = lappend(ec1->ec_sources, restrictinfo);
		ec1->ec_below_outer_join |= below_outer_join;
		ec1->ec_min_security = Min(ec1->ec_min_security,
								   restrictinfo->security_level);
		ec1->ec_max_security = Max(ec1->ec_max_security,
								   restrictinfo->security_level);
		/* mark the RI as associated with this eclass */
		restrictinfo->left_ec = ec1;
		restrictinfo->right_ec = ec1;
		/* mark the RI as usable with this pair of EMs */
		restrictinfo->left_em = em1;
		restrictinfo->right_em = em2;
	}
	else if (ec1)
	{
		/* Case 3: add item2 to ec1 */
		em2 = add_eq_member(ec1, item2, item2_relids, item2_nullable_relids,
							false, item2_type);
		ec1->ec_sources = lappend(ec1->ec_sources, restrictinfo);
		ec1->ec_below_outer_join |= below_outer_join;
		ec1->ec_min_security = Min(ec1->ec_min_security,
								   restrictinfo->security_level);
		ec1->ec_max_security = Max(ec1->ec_max_security,
								   restrictinfo->security_level);
		/* mark the RI as associated with this eclass */
		restrictinfo->left_ec = ec1;
		restrictinfo->right_ec = ec1;
		/* mark the RI as usable with this pair of EMs */
		restrictinfo->left_em = em1;
		restrictinfo->right_em = em2;
	}
	else if (ec2)
	{
		/* Case 3: add item1 to ec2 */
		em1 = add_eq_member(ec2, item1, item1_relids, item1_nullable_relids,
							false, item1_type);
		ec2->ec_sources = lappend(ec2->ec_sources, restrictinfo);
		ec2->ec_below_outer_join |= below_outer_join;
		ec2->ec_min_security = Min(ec2->ec_min_security,
								   restrictinfo->security_level);
		ec2->ec_max_security = Max(ec2->ec_max_security,
								   restrictinfo->security_level);
		/* mark the RI as associated with this eclass */
		restrictinfo->left_ec = ec2;
		restrictinfo->right_ec = ec2;
		/* mark the RI as usable with this pair of EMs */
		restrictinfo->left_em = em1;
		restrictinfo->right_em = em2;
	}
	else
	{
		/* Case 4: make a new, two-entry EC */
		EquivalenceClass *ec = makeNode(EquivalenceClass);

		ec->ec_opfamilies = opfamilies;
		ec->ec_collation = collation;
		ec->ec_members = NIL;
		ec->ec_sources = list_make1(restrictinfo);
		ec->ec_derives = NIL;
		ec->ec_relids = NULL;
		ec->ec_has_const = false;
		ec->ec_has_volatile = false;
		ec->ec_below_outer_join = below_outer_join;
		ec->ec_broken = false;
		ec->ec_sortref = 0;
		ec->ec_min_security = restrictinfo->security_level;
		ec->ec_max_security = restrictinfo->security_level;
		ec->ec_merged = NULL;
		em1 = add_eq_member(ec, item1, item1_relids, item1_nullable_relids,
							false, item1_type);
		em2 = add_eq_member(ec, item2, item2_relids, item2_nullable_relids,
							false, item2_type);

		root->eq_classes = lappend(root->eq_classes, ec);

		/* mark the RI as associated with this eclass */
		restrictinfo->left_ec = ec;
		restrictinfo->right_ec = ec;
		/* mark the RI as usable with this pair of EMs */
		restrictinfo->left_em = em1;
		restrictinfo->right_em = em2;
	}

	return true;
}