Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit e983ee9

Browse files
committed
Improve statistics estimation for single-column GROUP BY in sub-queries
This commit follows the idea of the 4767bc8. If sub-query has only one GROUP BY column, we can consider its output variable as being unique. We can employ this fact in the statistics to make more precise estimations in the upper query block. Author: Andrei Lepikhov <lepihov@gmail.com> Reviewed-by: Heikki Linnakangas <hlinnaka@iki.fi> Reviewed-by: Alexander Korotkov <aekorotkov@gmail.com>
1 parent 8a695d7 commit e983ee9

File tree

4 files changed

+60
-23
lines changed

4 files changed

+60
-23
lines changed

src/backend/utils/adt/selfuncs.c

+31-22
Original file line numberDiff line numberDiff line change
@@ -322,10 +322,10 @@ var_eq_const(VariableStatData *vardata, Oid oproid, Oid collation,
322322
}
323323

324324
/*
325-
* If we matched the var to a unique index or DISTINCT clause, assume
326-
* there is exactly one match regardless of anything else. (This is
327-
* slightly bogus, since the index or clause's equality operator might be
328-
* different from ours, but it's much more likely to be right than
325+
* If we matched the var to a unique index, DISTINCT or GROUP-BY clause,
326+
* assume there is exactly one match regardless of anything else. (This
327+
* is slightly bogus, since the index or clause's equality operator might
328+
* be different from ours, but it's much more likely to be right than
329329
* ignoring the information.)
330330
*/
331331
if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
@@ -484,10 +484,10 @@ var_eq_non_const(VariableStatData *vardata, Oid oproid, Oid collation,
484484
}
485485

486486
/*
487-
* If we matched the var to a unique index or DISTINCT clause, assume
488-
* there is exactly one match regardless of anything else. (This is
489-
* slightly bogus, since the index or clause's equality operator might be
490-
* different from ours, but it's much more likely to be right than
487+
* If we matched the var to a unique index, DISTINCT or GROUP-BY clause,
488+
* assume there is exactly one match regardless of anything else. (This
489+
* is slightly bogus, since the index or clause's equality operator might
490+
* be different from ours, but it's much more likely to be right than
491491
* ignoring the information.)
492492
*/
493493
if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
@@ -5018,11 +5018,11 @@ ReleaseDummy(HeapTuple tuple)
50185018
* atttype, atttypmod: actual type/typmod of the "var" expression. This is
50195019
* commonly the same as the exposed type of the variable argument,
50205020
* but can be different in binary-compatible-type cases.
5021-
* isunique: true if we were able to match the var to a unique index or a
5022-
* single-column DISTINCT clause, implying its values are unique for
5023-
* this query. (Caution: this should be trusted for statistical
5024-
* purposes only, since we do not check indimmediate nor verify that
5025-
* the exact same definition of equality applies.)
5021+
* isunique: true if we were able to match the var to a unique index, a
5022+
* single-column DISTINCT or GROUP-BY clause, implying its values are
5023+
* unique for this query. (Caution: this should be trusted for
5024+
* statistical purposes only, since we do not check indimmediate nor
5025+
* verify that the exact same definition of equality applies.)
50265026
* acl_ok: true if current user has permission to read the column(s)
50275027
* underlying the pg_statistic entry. This is consulted by
50285028
* statistic_proc_security_check().
@@ -5680,15 +5680,14 @@ examine_simple_variable(PlannerInfo *root, Var *var,
56805680
Assert(IsA(subquery, Query));
56815681

56825682
/*
5683-
* Punt if subquery uses set operations or GROUP BY, as these will
5684-
* mash underlying columns' stats beyond recognition. (Set ops are
5685-
* particularly nasty; if we forged ahead, we would return stats
5683+
* Punt if subquery uses set operations or grouping sets, as these
5684+
* will mash underlying columns' stats beyond recognition. (Set ops
5685+
* are particularly nasty; if we forged ahead, we would return stats
56865686
* relevant to only the leftmost subselect...) DISTINCT is also
56875687
* problematic, but we check that later because there is a possibility
56885688
* of learning something even with it.
56895689
*/
56905690
if (subquery->setOperations ||
5691-
subquery->groupClause ||
56925691
subquery->groupingSets)
56935692
return;
56945693

@@ -5718,6 +5717,16 @@ examine_simple_variable(PlannerInfo *root, Var *var,
57185717
return;
57195718
}
57205719

5720+
/* The same idea as with DISTINCT clause works for a GROUP-BY too */
5721+
if (subquery->groupClause)
5722+
{
5723+
if (list_length(subquery->groupClause) == 1 &&
5724+
targetIsInSortList(ste, InvalidOid, subquery->groupClause))
5725+
vardata->isunique = true;
5726+
/* cannot go further */
5727+
return;
5728+
}
5729+
57215730
/*
57225731
* If the sub-query originated from a view with the security_barrier
57235732
* attribute, we must not look at the variable's statistics, though it
@@ -5869,11 +5878,11 @@ get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
58695878
}
58705879

58715880
/*
5872-
* If there is a unique index or DISTINCT clause for the variable, assume
5873-
* it is unique no matter what pg_statistic says; the statistics could be
5874-
* out of date, or we might have found a partial unique index that proves
5875-
* the var is unique for this query. However, we'd better still believe
5876-
* the null-fraction statistic.
5881+
* If there is a unique index, DISTINCT or GROUP-BY clause for the
5882+
* variable, assume it is unique no matter what pg_statistic says; the
5883+
* statistics could be out of date, or we might have found a partial
5884+
* unique index that proves the var is unique for this query. However,
5885+
* we'd better still believe the null-fraction statistic.
58775886
*/
58785887
if (vardata->isunique)
58795888
stadistinct = -1.0 * (1.0 - stanullfrac);

src/include/utils/selfuncs.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,8 @@ typedef struct VariableStatData
9494
Oid vartype; /* exposed type of expression */
9595
Oid atttype; /* actual type (after stripping relabel) */
9696
int32 atttypmod; /* actual typmod (after stripping relabel) */
97-
bool isunique; /* matches unique index or DISTINCT clause */
97+
bool isunique; /* matches unique index, DISTINCT or GROUP-BY
98+
* clause */
9899
bool acl_ok; /* result of ACL check on table or column */
99100
} VariableStatData;
100101

src/test/regress/expected/stats_ext.out

+15
Original file line numberDiff line numberDiff line change
@@ -3368,3 +3368,18 @@ NOTICE: drop cascades to 2 other objects
33683368
DETAIL: drop cascades to table tststats.priv_test_tbl
33693369
drop cascades to view tststats.priv_test_view
33703370
DROP USER regress_stats_user1;
3371+
CREATE TABLE grouping_unique (x integer);
3372+
INSERT INTO grouping_unique (x) SELECT gs FROM generate_series(1,1000) AS gs;
3373+
ANALYZE grouping_unique;
3374+
-- Optimiser treat GROUP-BY operator as an 'uniqueser' of the input
3375+
SELECT * FROM check_estimated_rows('
3376+
SELECT * FROM generate_series(1, 1) t1 LEFT JOIN (
3377+
SELECT x FROM grouping_unique t2 GROUP BY x) AS q1
3378+
ON t1.t1 = q1.x;
3379+
');
3380+
estimated | actual
3381+
-----------+--------
3382+
1 | 1
3383+
(1 row)
3384+
3385+
DROP TABLE grouping_unique;

src/test/regress/sql/stats_ext.sql

+12
Original file line numberDiff line numberDiff line change
@@ -1707,3 +1707,15 @@ RESET SESSION AUTHORIZATION;
17071707
DROP TABLE stats_ext_tbl;
17081708
DROP SCHEMA tststats CASCADE;
17091709
DROP USER regress_stats_user1;
1710+
1711+
CREATE TABLE grouping_unique (x integer);
1712+
INSERT INTO grouping_unique (x) SELECT gs FROM generate_series(1,1000) AS gs;
1713+
ANALYZE grouping_unique;
1714+
1715+
-- Optimiser treat GROUP-BY operator as an 'uniqueser' of the input
1716+
SELECT * FROM check_estimated_rows('
1717+
SELECT * FROM generate_series(1, 1) t1 LEFT JOIN (
1718+
SELECT x FROM grouping_unique t2 GROUP BY x) AS q1
1719+
ON t1.t1 = q1.x;
1720+
');
1721+
DROP TABLE grouping_unique;

0 commit comments

Comments
 (0)