From c676e659b246f94d571b57b559f80cb2dc03e73b Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Thu, 28 Nov 2019 22:20:28 +0100 Subject: Fix choose_best_statistics to check clauses individually When picking the best extended statistics object for a list of clauses, it's not enough to look at attnums extracted from the clause list as a whole. Consider for example this query with OR clauses: SELECT * FROM t WHERE (t.a = 1) OR (t.b = 1) OR (t.c = 1) with a statistics defined on columns (a,b). Relying on attnums extracted from the whole OR clause, we'd consider the statistics usable. That does not work, as we see the conditions as a single OR-clause, referencing an attribute not covered by the statistic, leading to empty list of clauses to be estimated using the statistics and an assert failure. This changes choose_best_statistics to check which clauses are actually covered, and only using attributes from the fully covered ones. For the previous example this means the statistics object will not be considered as compatible with the OR-clause. Backpatch to 12, where MCVs were introduced. The issue does not affect older versions because functional dependencies don't handle OR clauses. Author: Tomas Vondra Reviewed-by: Dean Rasheed Reported-By: Manuel Rigger Discussion: https://postgr.es/m/CA+u7OA7H5rcE2=8f263w4NZD6ipO_XOrYB816nuLXbmSTH9pQQ@mail.gmail.com Backpatch-through: 12 --- src/backend/statistics/dependencies.c | 26 ++++++++++++--------- src/backend/statistics/extended_stats.c | 40 ++++++++++++++++++++++++--------- 2 files changed, 44 insertions(+), 22 deletions(-) (limited to 'src/backend') diff --git a/src/backend/statistics/dependencies.c b/src/backend/statistics/dependencies.c index 9493b2d5397..ce31c959a9e 100644 --- a/src/backend/statistics/dependencies.c +++ b/src/backend/statistics/dependencies.c @@ -951,14 +951,14 @@ dependencies_clauselist_selectivity(PlannerInfo *root, Bitmapset *clauses_attnums = NULL; StatisticExtInfo *stat; MVDependencies *dependencies; - AttrNumber *list_attnums; + Bitmapset **list_attnums; int listidx; /* check if there's any stats that might be useful for us. */ if (!has_stats_of_kind(rel->statlist, STATS_EXT_DEPENDENCIES)) return 1.0; - list_attnums = (AttrNumber *) palloc(sizeof(AttrNumber) * + list_attnums = (Bitmapset **) palloc(sizeof(Bitmapset *) * list_length(clauses)); /* @@ -981,11 +981,11 @@ dependencies_clauselist_selectivity(PlannerInfo *root, if (!bms_is_member(listidx, *estimatedclauses) && dependency_is_compatible_clause(clause, rel->relid, &attnum)) { - list_attnums[listidx] = attnum; + list_attnums[listidx] = bms_make_singleton(attnum); clauses_attnums = bms_add_member(clauses_attnums, attnum); } else - list_attnums[listidx] = InvalidAttrNumber; + list_attnums[listidx] = NULL; listidx++; } @@ -1002,8 +1002,8 @@ dependencies_clauselist_selectivity(PlannerInfo *root, } /* find the best suited statistics object for these attnums */ - stat = choose_best_statistics(rel->statlist, clauses_attnums, - STATS_EXT_DEPENDENCIES); + stat = choose_best_statistics(rel->statlist, STATS_EXT_DEPENDENCIES, + list_attnums, list_length(clauses)); /* if no matching stats could be found then we've nothing to do */ if (!stat) @@ -1043,15 +1043,21 @@ dependencies_clauselist_selectivity(PlannerInfo *root, foreach(l, clauses) { Node *clause; + AttrNumber attnum; listidx++; /* * Skip incompatible clauses, and ones we've already estimated on. */ - if (list_attnums[listidx] == InvalidAttrNumber) + if (!list_attnums[listidx]) continue; + /* + * We expect the bitmaps ton contain a single attribute number. + */ + attnum = bms_singleton_member(list_attnums[listidx]); + /* * Technically we could find more than one clause for a given * attnum. Since these clauses must be equality clauses, we choose @@ -1061,8 +1067,7 @@ dependencies_clauselist_selectivity(PlannerInfo *root, * anyway. If it happens to be compared to the same Const, then * ignoring the additional clause is just the thing to do. */ - if (dependency_implies_attribute(dependency, - list_attnums[listidx])) + if (dependency_implies_attribute(dependency, attnum)) { clause = (Node *) lfirst(l); @@ -1077,8 +1082,7 @@ dependencies_clauselist_selectivity(PlannerInfo *root, * We'll want to ignore this when looking for the next * strongest dependency above. */ - clauses_attnums = bms_del_member(clauses_attnums, - list_attnums[listidx]); + clauses_attnums = bms_del_member(clauses_attnums, attnum); } } diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c index 207ee3160ef..6299011ca66 100644 --- a/src/backend/statistics/extended_stats.c +++ b/src/backend/statistics/extended_stats.c @@ -844,15 +844,20 @@ has_stats_of_kind(List *stats, char requiredkind) * there's no match. * * The current selection criteria is very simple - we choose the statistics - * object referencing the most of the requested attributes, breaking ties - * in favor of objects with fewer keys overall. + * object referencing the most attributes in covered (and still unestimated + * clauses), breaking ties in favor of objects with fewer keys overall. + * + * The clause_attnums is an array of bitmaps, storing attnums for individual + * clauses. A NULL element means the clause is either incompatible or already + * estimated. * * XXX If multiple statistics objects tie on both criteria, then which object * is chosen depends on the order that they appear in the stats list. Perhaps * further tiebreakers are needed. */ StatisticExtInfo * -choose_best_statistics(List *stats, Bitmapset *attnums, char requiredkind) +choose_best_statistics(List *stats, char requiredkind, + Bitmapset **clause_attnums, int nclauses) { ListCell *lc; StatisticExtInfo *best_match = NULL; @@ -861,17 +866,33 @@ choose_best_statistics(List *stats, Bitmapset *attnums, char requiredkind) foreach(lc, stats) { + int i; StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc); + Bitmapset *matched = NULL; int num_matched; int numkeys; - Bitmapset *matched; /* skip statistics that are not of the correct type */ if (info->kind != requiredkind) continue; - /* determine how many attributes of these stats can be matched to */ - matched = bms_intersect(attnums, info->keys); + /* + * Collect attributes in remaining (unestimated) clauses fully covered + * by this statistic object. + */ + for (i = 0; i < nclauses; i++) + { + /* ignore incompatible/estimated clauses */ + if (!clause_attnums[i]) + continue; + + /* ignore clauses that are not covered by this object */ + if (!bms_is_subset(clause_attnums[i], info->keys)) + continue; + + matched = bms_add_members(matched, clause_attnums[i]); + } + num_matched = bms_num_members(matched); bms_free(matched); @@ -1233,12 +1254,9 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli listidx++; } - /* We need at least two attributes for multivariate statistics. */ - if (bms_membership(clauses_attnums) != BMS_MULTIPLE) - return 1.0; - /* find the best suited statistics object for these attnums */ - stat = choose_best_statistics(rel->statlist, clauses_attnums, STATS_EXT_MCV); + stat = choose_best_statistics(rel->statlist, STATS_EXT_MCV, + list_attnums, list_length(clauses)); /* if no matching stats could be found then we've nothing to do */ if (!stat) -- cgit v1.2.3