Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 36c4bc6

Browse files
committed
Ignore extended statistics for inheritance trees
Since commit 859b300 we only build extended statistics for individual relations, ignoring the child relations. This resolved the issue with updating catalog tuple twice, but we still tried to use the statistics when calculating estimates for the whole inheritance tree. When the relations contain very distinct data, it may produce bogus estimates. This is roughly the same issue 427c6b5 addressed ~15 years ago, and we fix it the same way - by ignoring extended statistics when calculating estimates for the inheritance tree as a whole. We still consider extended statistics when calculating estimates for individual child relations, of course. This may result in plan changes due to different estimates, but if the old statistics were not describing the inheritance tree particularly well it's quite likely the new plans is actually better. Report and patch by Justin Pryzby, minor fixes and cleanup by me. Backpatch all the way back to PostgreSQL 10, where extended statistics were introduced (same as 859b300). Author: Justin Pryzby Reported-by: Justin Pryzby Backpatch-through: 10 Discussion: https://postgr.es/m/20210923212624.GI831%40telsasoft.com
1 parent 49c9d9f commit 36c4bc6

File tree

5 files changed

+98
-0
lines changed

5 files changed

+98
-0
lines changed

src/backend/statistics/dependencies.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "nodes/pathnodes.h"
2525
#include "optimizer/clauses.h"
2626
#include "optimizer/optimizer.h"
27+
#include "parser/parsetree.h"
2728
#include "statistics/extended_stats_internal.h"
2829
#include "statistics/statistics.h"
2930
#include "utils/bytea.h"
@@ -1410,11 +1411,19 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
14101411
int ndependencies;
14111412
int i;
14121413
AttrNumber attnum_offset;
1414+
RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
14131415

14141416
/* unique expressions */
14151417
Node **unique_exprs;
14161418
int unique_exprs_cnt;
14171419

1420+
/*
1421+
* When dealing with inheritance trees, ignore extended stats (which were
1422+
* built without data from child rels, and thus do not represent them).
1423+
*/
1424+
if (rte->inh)
1425+
return 1.0;
1426+
14181427
/* check if there's any stats that might be useful for us. */
14191428
if (!has_stats_of_kind(rel->statlist, STATS_EXT_DEPENDENCIES))
14201429
return 1.0;

src/backend/statistics/extended_stats.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include "nodes/nodeFuncs.h"
3131
#include "optimizer/clauses.h"
3232
#include "optimizer/optimizer.h"
33+
#include "parser/parsetree.h"
3334
#include "pgstat.h"
3435
#include "postmaster/autovacuum.h"
3536
#include "statistics/extended_stats_internal.h"
@@ -1694,6 +1695,14 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
16941695
List **list_exprs; /* expressions matched to any statistic */
16951696
int listidx;
16961697
Selectivity sel = (is_or) ? 0.0 : 1.0;
1698+
RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
1699+
1700+
/*
1701+
* When dealing with inheritance trees, ignore extended stats (which were
1702+
* built without data from child rels, and thus do not represent them).
1703+
*/
1704+
if (rte->inh)
1705+
return sel;
16971706

16981707
/* check if there's any stats that might be useful for us. */
16991708
if (!has_stats_of_kind(rel->statlist, STATS_EXT_MCV))

src/backend/utils/adt/selfuncs.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3913,6 +3913,14 @@ estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel,
39133913
Oid statOid = InvalidOid;
39143914
MVNDistinct *stats;
39153915
StatisticExtInfo *matched_info = NULL;
3916+
RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
3917+
3918+
/*
3919+
* When dealing with inheritance trees, ignore extended stats (which were
3920+
* built without data from child rels, and thus do not represent them).
3921+
*/
3922+
if (rte->inh)
3923+
return false;
39163924

39173925
/* bail out immediately if the table has no extended statistics */
39183926
if (!rel->statlist)
@@ -5222,6 +5230,7 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
52225230
foreach(slist, onerel->statlist)
52235231
{
52245232
StatisticExtInfo *info = (StatisticExtInfo *) lfirst(slist);
5233+
RangeTblEntry *rte = planner_rt_fetch(onerel->relid, root);
52255234
ListCell *expr_item;
52265235
int pos;
52275236

@@ -5232,6 +5241,14 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
52325241
if (vardata->statsTuple)
52335242
break;
52345243

5244+
/*
5245+
* When dealing with inheritance trees, ignore extended stats (which
5246+
* were built without data from child rels, and so do not represent
5247+
* them).
5248+
*/
5249+
if (rte->inh)
5250+
break;
5251+
52355252
/* skip stats without per-expression stats */
52365253
if (info->kind != STATS_EXT_EXPRESSIONS)
52375254
continue;

src/test/regress/expected/stats_ext.out

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,47 @@ CREATE STATISTICS ab1_a_b_stats ON a, b FROM ab1;
176176
ANALYZE ab1;
177177
DROP TABLE ab1 CASCADE;
178178
NOTICE: drop cascades to table ab1c
179+
-- Tests for stats with inheritance
180+
CREATE TABLE stxdinh(a int, b int);
181+
CREATE TABLE stxdinh1() INHERITS(stxdinh);
182+
CREATE TABLE stxdinh2() INHERITS(stxdinh);
183+
INSERT INTO stxdinh SELECT mod(a,50), mod(a,100) FROM generate_series(0, 1999) a;
184+
INSERT INTO stxdinh1 SELECT mod(a,100), mod(a,100) FROM generate_series(0, 999) a;
185+
INSERT INTO stxdinh2 SELECT mod(a,100), mod(a,100) FROM generate_series(0, 999) a;
186+
VACUUM ANALYZE stxdinh, stxdinh1, stxdinh2;
187+
-- Ensure non-inherited stats are not applied to inherited query
188+
-- Without stats object, it looks like this
189+
SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* GROUP BY 1, 2');
190+
estimated | actual
191+
-----------+--------
192+
400 | 150
193+
(1 row)
194+
195+
SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* WHERE a = 0 AND b = 0');
196+
estimated | actual
197+
-----------+--------
198+
3 | 40
199+
(1 row)
200+
201+
CREATE STATISTICS stxdinh ON a, b FROM stxdinh;
202+
VACUUM ANALYZE stxdinh, stxdinh1, stxdinh2;
203+
-- Since the stats object does not include inherited stats, it should not
204+
-- affect the estimates
205+
SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* GROUP BY 1, 2');
206+
estimated | actual
207+
-----------+--------
208+
400 | 150
209+
(1 row)
210+
211+
-- Dependencies are applied at individual relations (within append), so
212+
-- this estimate changes a bit because we improve estimates for the parent
213+
SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* WHERE a = 0 AND b = 0');
214+
estimated | actual
215+
-----------+--------
216+
22 | 40
217+
(1 row)
218+
219+
DROP TABLE stxdinh, stxdinh1, stxdinh2;
179220
-- basic test for statistics on expressions
180221
CREATE TABLE ab1 (a INTEGER, b INTEGER, c TIMESTAMP, d TIMESTAMPTZ);
181222
-- expression stats may be built on a single expression column

src/test/regress/sql/stats_ext.sql

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,28 @@ CREATE STATISTICS ab1_a_b_stats ON a, b FROM ab1;
112112
ANALYZE ab1;
113113
DROP TABLE ab1 CASCADE;
114114

115+
-- Tests for stats with inheritance
116+
CREATE TABLE stxdinh(a int, b int);
117+
CREATE TABLE stxdinh1() INHERITS(stxdinh);
118+
CREATE TABLE stxdinh2() INHERITS(stxdinh);
119+
INSERT INTO stxdinh SELECT mod(a,50), mod(a,100) FROM generate_series(0, 1999) a;
120+
INSERT INTO stxdinh1 SELECT mod(a,100), mod(a,100) FROM generate_series(0, 999) a;
121+
INSERT INTO stxdinh2 SELECT mod(a,100), mod(a,100) FROM generate_series(0, 999) a;
122+
VACUUM ANALYZE stxdinh, stxdinh1, stxdinh2;
123+
-- Ensure non-inherited stats are not applied to inherited query
124+
-- Without stats object, it looks like this
125+
SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* GROUP BY 1, 2');
126+
SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* WHERE a = 0 AND b = 0');
127+
CREATE STATISTICS stxdinh ON a, b FROM stxdinh;
128+
VACUUM ANALYZE stxdinh, stxdinh1, stxdinh2;
129+
-- Since the stats object does not include inherited stats, it should not
130+
-- affect the estimates
131+
SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* GROUP BY 1, 2');
132+
-- Dependencies are applied at individual relations (within append), so
133+
-- this estimate changes a bit because we improve estimates for the parent
134+
SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinh* WHERE a = 0 AND b = 0');
135+
DROP TABLE stxdinh, stxdinh1, stxdinh2;
136+
115137
-- basic test for statistics on expressions
116138
CREATE TABLE ab1 (a INTEGER, b INTEGER, c TIMESTAMP, d TIMESTAMPTZ);
117139

0 commit comments

Comments
 (0)