diff options
Diffstat (limited to 'src/backend')
-rw-r--r-- | src/backend/catalog/pg_constraint.c | 94 | ||||
-rw-r--r-- | src/backend/optimizer/plan/planner.c | 159 |
2 files changed, 253 insertions, 0 deletions
diff --git a/src/backend/catalog/pg_constraint.c b/src/backend/catalog/pg_constraint.c index d85484976a9..74007856f35 100644 --- a/src/backend/catalog/pg_constraint.c +++ b/src/backend/catalog/pg_constraint.c @@ -17,6 +17,7 @@ #include "access/genam.h" #include "access/heapam.h" #include "access/htup_details.h" +#include "access/sysattr.h" #include "catalog/dependency.h" #include "catalog/indexing.h" #include "catalog/objectaccess.h" @@ -872,6 +873,99 @@ get_domain_constraint_oid(Oid typid, const char *conname, bool missing_ok) } /* + * get_primary_key_attnos + * Identify the columns in a relation's primary key, if any. + * + * Returns a Bitmapset of the column attnos of the primary key's columns, + * with attnos being offset by FirstLowInvalidHeapAttributeNumber so that + * system columns can be represented. + * + * If there is no primary key, return NULL. We also return NULL if the pkey + * constraint is deferrable and deferrableOk is false. + * + * *constraintOid is set to the OID of the pkey constraint, or InvalidOid + * on failure. + */ +Bitmapset * +get_primary_key_attnos(Oid relid, bool deferrableOk, Oid *constraintOid) +{ + Bitmapset *pkattnos = NULL; + Relation pg_constraint; + HeapTuple tuple; + SysScanDesc scan; + ScanKeyData skey[1]; + + /* Set *constraintOid, to avoid complaints about uninitialized vars */ + *constraintOid = InvalidOid; + + /* Scan pg_constraint for constraints of the target rel */ + pg_constraint = heap_open(ConstraintRelationId, AccessShareLock); + + ScanKeyInit(&skey[0], + Anum_pg_constraint_conrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(relid)); + + scan = systable_beginscan(pg_constraint, ConstraintRelidIndexId, true, + NULL, 1, skey); + + while (HeapTupleIsValid(tuple = systable_getnext(scan))) + { + Form_pg_constraint con = (Form_pg_constraint) GETSTRUCT(tuple); + Datum adatum; + bool isNull; + ArrayType *arr; + int16 *attnums; + int numkeys; + int i; + + /* Skip constraints that are not PRIMARY KEYs */ + if (con->contype != CONSTRAINT_PRIMARY) + continue; + + /* + * If the primary key is deferrable, but we've been instructed to + * ignore deferrable constraints, then we might as well give up + * searching, since there can only be a single primary key on a table. + */ + if (con->condeferrable && !deferrableOk) + break; + + /* Extract the conkey array, ie, attnums of PK's columns */ + adatum = heap_getattr(tuple, Anum_pg_constraint_conkey, + RelationGetDescr(pg_constraint), &isNull); + if (isNull) + elog(ERROR, "null conkey for constraint %u", + HeapTupleGetOid(tuple)); + arr = DatumGetArrayTypeP(adatum); /* ensure not toasted */ + numkeys = ARR_DIMS(arr)[0]; + if (ARR_NDIM(arr) != 1 || + numkeys < 0 || + ARR_HASNULL(arr) || + ARR_ELEMTYPE(arr) != INT2OID) + elog(ERROR, "conkey is not a 1-D smallint array"); + attnums = (int16 *) ARR_DATA_PTR(arr); + + /* Construct the result value */ + for (i = 0; i < numkeys; i++) + { + pkattnos = bms_add_member(pkattnos, + attnums[i] - FirstLowInvalidHeapAttributeNumber); + } + *constraintOid = HeapTupleGetOid(tuple); + + /* No need to search further */ + break; + } + + systable_endscan(scan); + + heap_close(pg_constraint, AccessShareLock); + + return pkattnos; +} + +/* * Determine whether a relation can be proven functionally dependent on * a set of grouping columns. If so, return TRUE and add the pg_constraint * OIDs of the constraints needed for the proof to the *constraintDeps list. diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index af4a2198465..f77c804b702 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -20,7 +20,9 @@ #include "access/htup_details.h" #include "access/parallel.h" +#include "access/sysattr.h" #include "access/xact.h" +#include "catalog/pg_constraint_fn.h" #include "executor/executor.h" #include "executor/nodeAgg.h" #include "foreign/fdwapi.h" @@ -89,6 +91,7 @@ static double preprocess_limit(PlannerInfo *root, double tuple_fraction, int64 *offset_est, int64 *count_est); static bool limit_needed(Query *parse); +static void remove_useless_groupby_columns(PlannerInfo *root); static List *preprocess_groupclause(PlannerInfo *root, List *force); static List *extract_rollup_sets(List *groupingSets); static List *reorder_grouping_sets(List *groupingSets, List *sortclause); @@ -710,6 +713,9 @@ subquery_planner(PlannerGlobal *glob, Query *parse, } parse->havingQual = (Node *) newHaving; + /* Remove any redundant GROUP BY columns */ + remove_useless_groupby_columns(root); + /* * If we have any outer joins, try to reduce them to plain inner joins. * This step is most easily done after we've done expression @@ -3208,6 +3214,159 @@ limit_needed(Query *parse) /* + * remove_useless_groupby_columns + * Remove any columns in the GROUP BY clause that are redundant due to + * being functionally dependent on other GROUP BY columns. + * + * Since some other DBMSes do not allow references to ungrouped columns, it's + * not unusual to find all columns listed in GROUP BY even though listing the + * primary-key columns would be sufficient. Deleting such excess columns + * avoids redundant sorting work, so it's worth doing. When we do this, we + * must mark the plan as dependent on the pkey constraint (compare the + * parser's check_ungrouped_columns() and check_functional_grouping()). + * + * In principle, we could treat any NOT-NULL columns appearing in a UNIQUE + * index as the determining columns. But as with check_functional_grouping(), + * there's currently no way to represent dependency on a NOT NULL constraint, + * so we consider only the pkey for now. + */ +static void +remove_useless_groupby_columns(PlannerInfo *root) +{ + Query *parse = root->parse; + Bitmapset **groupbyattnos; + Bitmapset **surplusvars; + ListCell *lc; + int relid; + + /* No chance to do anything if there are less than two GROUP BY items */ + if (list_length(parse->groupClause) < 2) + return; + + /* Don't fiddle with the GROUP BY clause if the query has grouping sets */ + if (parse->groupingSets) + return; + + /* + * Scan the GROUP BY clause to find GROUP BY items that are simple Vars. + * Fill groupbyattnos[k] with a bitmapset of the column attnos of RTE k + * that are GROUP BY items. + */ + groupbyattnos = (Bitmapset **) palloc0(sizeof(Bitmapset *) * + (list_length(parse->rtable) + 1)); + foreach(lc, parse->groupClause) + { + SortGroupClause *sgc = (SortGroupClause *) lfirst(lc); + TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList); + Var *var = (Var *) tle->expr; + + /* + * Ignore non-Vars and Vars from other query levels. + * + * XXX in principle, stable expressions containing Vars could also be + * removed, if all the Vars are functionally dependent on other GROUP + * BY items. But it's not clear that such cases occur often enough to + * be worth troubling over. + */ + if (!IsA(var, Var) || + var->varlevelsup > 0) + continue; + + /* OK, remember we have this Var */ + relid = var->varno; + Assert(relid <= list_length(parse->rtable)); + groupbyattnos[relid] = bms_add_member(groupbyattnos[relid], + var->varattno - FirstLowInvalidHeapAttributeNumber); + } + + /* + * Consider each relation and see if it is possible to remove some of its + * Vars from GROUP BY. For simplicity and speed, we do the actual removal + * in a separate pass. Here, we just fill surplusvars[k] with a bitmapset + * of the column attnos of RTE k that are removable GROUP BY items. + */ + surplusvars = NULL; /* don't allocate array unless required */ + relid = 0; + foreach(lc, parse->rtable) + { + RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc); + Bitmapset *relattnos; + Bitmapset *pkattnos; + Oid constraintOid; + + relid++; + + /* Only plain relations could have primary-key constraints */ + if (rte->rtekind != RTE_RELATION) + continue; + + /* Nothing to do unless this rel has multiple Vars in GROUP BY */ + relattnos = groupbyattnos[relid]; + if (bms_membership(relattnos) != BMS_MULTIPLE) + continue; + + /* + * Can't remove any columns for this rel if there is no suitable + * (i.e., nondeferrable) primary key constraint. + */ + pkattnos = get_primary_key_attnos(rte->relid, false, &constraintOid); + if (pkattnos == NULL) + continue; + + /* + * If the primary key is a proper subset of relattnos then we have + * some items in the GROUP BY that can be removed. + */ + if (bms_subset_compare(pkattnos, relattnos) == BMS_SUBSET1) + { + /* + * To easily remember whether we've found anything to do, we don't + * allocate the surplusvars[] array until we find something. + */ + if (surplusvars == NULL) + surplusvars = (Bitmapset **) palloc0(sizeof(Bitmapset *) * + (list_length(parse->rtable) + 1)); + + /* Remember the attnos of the removable columns */ + surplusvars[relid] = bms_difference(relattnos, pkattnos); + + /* Also, mark the resulting plan as dependent on this constraint */ + parse->constraintDeps = lappend_oid(parse->constraintDeps, + constraintOid); + } + } + + /* + * If we found any surplus Vars, build a new GROUP BY clause without them. + * (Note: this may leave some TLEs with unreferenced ressortgroupref + * markings, but that's harmless.) + */ + if (surplusvars != NULL) + { + List *new_groupby = NIL; + + foreach(lc, parse->groupClause) + { + SortGroupClause *sgc = (SortGroupClause *) lfirst(lc); + TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList); + Var *var = (Var *) tle->expr; + + /* + * New list must include non-Vars, outer Vars, and anything not + * marked as surplus. + */ + if (!IsA(var, Var) || + var->varlevelsup > 0 || + !bms_is_member(var->varattno - FirstLowInvalidHeapAttributeNumber, + surplusvars[var->varno])) + new_groupby = lappend(new_groupby, sgc); + } + + parse->groupClause = new_groupby; + } +} + +/* * preprocess_groupclause - do preparatory work on GROUP BY clause * * The idea here is to adjust the ordering of the GROUP BY elements |