Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 297b280

Browse files
author
Richard Guo
committed
Ignore nullingrels when looking up statistics
When looking up statistical data about an expression, we do not need to concern ourselves with the outer joins that could null the Vars/PHVs contained in the expression. Accounting for nullingrels in the expression could cause estimate_num_groups to count the same Var multiple times if it's marked with different nullingrels. This is incorrect, and could lead to "ERROR: corrupt MVNDistinct entry" when searching for multivariate n-distinct. Furthermore, the nullingrels could prevent us from matching an expression to expressional index columns or to the expressions in extended statistics, leading to inaccurate estimates. To fix, strip out all the nullingrels from the expression before we look up statistical data about it. There is one ensuing plan change in the regression tests, but it looks reasonable and does not compromise its original purpose. This patch could result in plan changes, but it fixes an actual bug, so back-patch to v16 where the outer-join-aware-Var infrastructure was introduced. Author: Richard Guo Discussion: https://postgr.es/m/CAMbWs4-2Z4k+nFTiZe0Qbu5n8juUWenDAtMzi98bAZQtwHx0-w@mail.gmail.com
1 parent 7596207 commit 297b280

File tree

3 files changed

+63
-5
lines changed

3 files changed

+63
-5
lines changed

src/backend/utils/adt/selfuncs.c

+26-3
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@
121121
#include "parser/parse_clause.h"
122122
#include "parser/parse_relation.h"
123123
#include "parser/parsetree.h"
124+
#include "rewrite/rewriteManip.h"
124125
#include "statistics/statistics.h"
125126
#include "storage/bufmgr.h"
126127
#include "utils/acl.h"
@@ -3306,6 +3307,15 @@ add_unique_group_var(PlannerInfo *root, List *varinfos,
33063307

33073308
ndistinct = get_variable_numdistinct(vardata, &isdefault);
33083309

3310+
/*
3311+
* The nullingrels bits within the var could cause the same var to be
3312+
* counted multiple times if it's marked with different nullingrels. They
3313+
* could also prevent us from matching the var to the expressions in
3314+
* extended statistics (see estimate_multivariate_ndistinct). So strip
3315+
* them out first.
3316+
*/
3317+
var = remove_nulling_relids(var, root->outer_join_rels, NULL);
3318+
33093319
foreach(lc, varinfos)
33103320
{
33113321
varinfo = (GroupVarInfo *) lfirst(lc);
@@ -5017,6 +5027,7 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
50175027
{
50185028
Node *basenode;
50195029
Relids varnos;
5030+
Relids basevarnos;
50205031
RelOptInfo *onerel;
50215032

50225033
/* Make sure we don't return dangling pointers in vardata */
@@ -5058,18 +5069,20 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
50585069
* relation are considered "real" vars.
50595070
*/
50605071
varnos = pull_varnos(root, basenode);
5072+
basevarnos = bms_difference(varnos, root->outer_join_rels);
50615073

50625074
onerel = NULL;
50635075

5064-
if (bms_is_empty(varnos))
5076+
if (bms_is_empty(basevarnos))
50655077
{
50665078
/* No Vars at all ... must be pseudo-constant clause */
50675079
}
50685080
else
50695081
{
50705082
int relid;
50715083

5072-
if (bms_get_singleton_member(varnos, &relid))
5084+
/* Check if the expression is in vars of a single base relation */
5085+
if (bms_get_singleton_member(basevarnos, &relid))
50735086
{
50745087
if (varRelid == 0 || varRelid == relid)
50755088
{
@@ -5099,7 +5112,7 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
50995112
}
51005113
}
51015114

5102-
bms_free(varnos);
5115+
bms_free(basevarnos);
51035116

51045117
vardata->var = node;
51055118
vardata->atttype = exprType(node);
@@ -5124,6 +5137,14 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
51245137
ListCell *slist;
51255138
Oid userid;
51265139

5140+
/*
5141+
* The nullingrels bits within the expression could prevent us from
5142+
* matching it to expressional index columns or to the expressions in
5143+
* extended statistics. So strip them out first.
5144+
*/
5145+
if (bms_overlap(varnos, root->outer_join_rels))
5146+
node = remove_nulling_relids(node, root->outer_join_rels, NULL);
5147+
51275148
/*
51285149
* Determine the user ID to use for privilege checks: either
51295150
* onerel->userid if it's set (e.g., in case we're accessing the table
@@ -5394,6 +5415,8 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
53945415
}
53955416
}
53965417
}
5418+
5419+
bms_free(varnos);
53975420
}
53985421

53995422
/*

src/test/regress/expected/join.out

+24-2
Original file line numberDiff line numberDiff line change
@@ -2517,10 +2517,11 @@ where t1.f1 = coalesce(t2.f1, 1);
25172517
-> Materialize
25182518
-> Seq Scan on int4_tbl t2
25192519
Filter: (f1 > 1)
2520-
-> Seq Scan on int4_tbl t3
2520+
-> Materialize
2521+
-> Seq Scan on int4_tbl t3
25212522
-> Materialize
25222523
-> Seq Scan on int4_tbl t4
2523-
(13 rows)
2524+
(14 rows)
25242525

25252526
explain (costs off)
25262527
select * from int4_tbl t1
@@ -8014,3 +8015,24 @@ SELECT t1.a FROM skip_fetch t1 LEFT JOIN skip_fetch t2 ON t2.a = 1 WHERE t2.a IS
80148015

80158016
RESET enable_indexonlyscan;
80168017
RESET enable_seqscan;
8018+
-- Test that we do not account for nullingrels when looking up statistics
8019+
CREATE TABLE group_tbl (a INT, b INT);
8020+
INSERT INTO group_tbl SELECT 1, 1;
8021+
CREATE STATISTICS group_tbl_stat (ndistinct) ON a, b FROM group_tbl;
8022+
ANALYZE group_tbl;
8023+
EXPLAIN (COSTS OFF)
8024+
SELECT 1 FROM group_tbl t1
8025+
LEFT JOIN (SELECT a c1, COALESCE(a) c2 FROM group_tbl t2) s ON TRUE
8026+
GROUP BY s.c1, s.c2;
8027+
QUERY PLAN
8028+
--------------------------------------------
8029+
Group
8030+
Group Key: t2.a, (COALESCE(t2.a))
8031+
-> Sort
8032+
Sort Key: t2.a, (COALESCE(t2.a))
8033+
-> Nested Loop Left Join
8034+
-> Seq Scan on group_tbl t1
8035+
-> Seq Scan on group_tbl t2
8036+
(7 rows)
8037+
8038+
DROP TABLE group_tbl;

src/test/regress/sql/join.sql

+13
Original file line numberDiff line numberDiff line change
@@ -2952,3 +2952,16 @@ SELECT t1.a FROM skip_fetch t1 LEFT JOIN skip_fetch t2 ON t2.a = 1 WHERE t2.a IS
29522952

29532953
RESET enable_indexonlyscan;
29542954
RESET enable_seqscan;
2955+
2956+
-- Test that we do not account for nullingrels when looking up statistics
2957+
CREATE TABLE group_tbl (a INT, b INT);
2958+
INSERT INTO group_tbl SELECT 1, 1;
2959+
CREATE STATISTICS group_tbl_stat (ndistinct) ON a, b FROM group_tbl;
2960+
ANALYZE group_tbl;
2961+
2962+
EXPLAIN (COSTS OFF)
2963+
SELECT 1 FROM group_tbl t1
2964+
LEFT JOIN (SELECT a c1, COALESCE(a) c2 FROM group_tbl t2) s ON TRUE
2965+
GROUP BY s.c1, s.c2;
2966+
2967+
DROP TABLE group_tbl;

0 commit comments

Comments
 (0)