Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 84c7cef

Browse files
committed
Fix estimate_num_groups to be able to use expression-index statistics
when there is an expressional index matching a GROUP BY item.
1 parent 089fb6c commit 84c7cef

File tree

1 file changed

+112
-72
lines changed

1 file changed

+112
-72
lines changed

src/backend/utils/adt/selfuncs.c

+112-72
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
*
1616
*
1717
* IDENTIFICATION
18-
* $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.165 2004/08/30 02:54:39 momjian Exp $
18+
* $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.166 2004/09/18 19:39:50 tgl Exp $
1919
*
2020
*-------------------------------------------------------------------------
2121
*/
@@ -1869,6 +1869,71 @@ mergejoinscansel(Query *root, Node *clause,
18691869
ReleaseVariableStats(rightvar);
18701870
}
18711871

1872+
1873+
/*
1874+
* Helper routine for estimate_num_groups: add an item to a list of
1875+
* GroupVarInfos, but only if it's not known equal to any of the existing
1876+
* entries.
1877+
*/
1878+
typedef struct
1879+
{
1880+
Node *var; /* might be an expression, not just a Var */
1881+
RelOptInfo *rel; /* relation it belongs to */
1882+
double ndistinct; /* # distinct values */
1883+
} GroupVarInfo;
1884+
1885+
static List *
1886+
add_unique_group_var(Query *root, List *varinfos,
1887+
Node *var, VariableStatData *vardata)
1888+
{
1889+
GroupVarInfo *varinfo;
1890+
double ndistinct;
1891+
ListCell *lc;
1892+
1893+
ndistinct = get_variable_numdistinct(vardata);
1894+
1895+
/* cannot use foreach here because of possible list_delete */
1896+
lc = list_head(varinfos);
1897+
while (lc)
1898+
{
1899+
varinfo = (GroupVarInfo *) lfirst(lc);
1900+
1901+
/* must advance lc before list_delete possibly pfree's it */
1902+
lc = lnext(lc);
1903+
1904+
/* Drop exact duplicates */
1905+
if (equal(var, varinfo->var))
1906+
return varinfos;
1907+
1908+
/*
1909+
* Drop known-equal vars, but only if they belong to different
1910+
* relations (see comments for estimate_num_groups)
1911+
*/
1912+
if (vardata->rel != varinfo->rel &&
1913+
exprs_known_equal(root, var, varinfo->var))
1914+
{
1915+
if (varinfo->ndistinct <= ndistinct)
1916+
{
1917+
/* Keep older item, forget new one */
1918+
return varinfos;
1919+
}
1920+
else
1921+
{
1922+
/* Delete the older item */
1923+
varinfos = list_delete_ptr(varinfos, varinfo);
1924+
}
1925+
}
1926+
}
1927+
1928+
varinfo = (GroupVarInfo *) palloc(sizeof(GroupVarInfo));
1929+
1930+
varinfo->var = var;
1931+
varinfo->rel = vardata->rel;
1932+
varinfo->ndistinct = ndistinct;
1933+
varinfos = lappend(varinfos, varinfo);
1934+
return varinfos;
1935+
}
1936+
18721937
/*
18731938
* estimate_num_groups - Estimate number of groups in a grouped query
18741939
*
@@ -1900,6 +1965,9 @@ mergejoinscansel(Query *root, Node *clause,
19001965
* increase the number of distinct values (unless it is volatile,
19011966
* which we consider unlikely for grouping), but it probably won't
19021967
* reduce the number of distinct values much either.
1968+
* As a special case, if a GROUP BY expression can be matched to an
1969+
* expressional index for which we have statistics, then we treat the
1970+
* whole expression as though it were just a Var.
19031971
* 2. If the list contains Vars of different relations that are known equal
19041972
* due to equijoin clauses, then drop all but one of the Vars from each
19051973
* known-equal set, keeping the one with smallest estimated # of values
@@ -1926,25 +1994,44 @@ mergejoinscansel(Query *root, Node *clause,
19261994
double
19271995
estimate_num_groups(Query *root, List *groupExprs, double input_rows)
19281996
{
1929-
List *allvars = NIL;
19301997
List *varinfos = NIL;
19311998
double numdistinct;
19321999
ListCell *l;
1933-
typedef struct
1934-
{ /* varinfos is a List of these */
1935-
Var *var;
1936-
double ndistinct;
1937-
} MyVarInfo;
19382000

19392001
/* We should not be called unless query has GROUP BY (or DISTINCT) */
19402002
Assert(groupExprs != NIL);
19412003

1942-
/* Step 1: get the unique Vars used */
2004+
/*
2005+
* Steps 1/2: find the unique Vars used, treating an expression as a Var
2006+
* if we can find stats for it. For each one, record the statistical
2007+
* estimate of number of distinct values (total in its table, without
2008+
* regard for filtering).
2009+
*/
19432010
foreach(l, groupExprs)
19442011
{
19452012
Node *groupexpr = (Node *) lfirst(l);
2013+
VariableStatData vardata;
19462014
List *varshere;
2015+
ListCell *l2;
2016+
2017+
/*
2018+
* If examine_variable is able to deduce anything about the GROUP BY
2019+
* expression, treat it as a single variable even if it's really more
2020+
* complicated.
2021+
*/
2022+
examine_variable(root, groupexpr, 0, &vardata);
2023+
if (vardata.statsTuple != NULL || vardata.isunique)
2024+
{
2025+
varinfos = add_unique_group_var(root, varinfos,
2026+
groupexpr, &vardata);
2027+
ReleaseVariableStats(vardata);
2028+
continue;
2029+
}
2030+
ReleaseVariableStats(vardata);
19472031

2032+
/*
2033+
* Else pull out the component Vars
2034+
*/
19482035
varshere = pull_var_clause(groupexpr, false);
19492036

19502037
/*
@@ -1959,70 +2046,24 @@ estimate_num_groups(Query *root, List *groupExprs, double input_rows)
19592046
return input_rows;
19602047
continue;
19612048
}
1962-
allvars = list_concat(allvars, varshere);
1963-
}
1964-
1965-
/* If now no Vars, we must have an all-constant GROUP BY list. */
1966-
if (allvars == NIL)
1967-
return 1.0;
1968-
1969-
/* Use list_union() to discard duplicates */
1970-
allvars = list_union(NIL, allvars);
1971-
1972-
/*
1973-
* Step 2: acquire statistical estimate of number of distinct values
1974-
* of each Var (total in its table, without regard for filtering).
1975-
* Also, detect known-equal Vars and discard the ones we don't want.
1976-
*/
1977-
foreach(l, allvars)
1978-
{
1979-
Var *var = (Var *) lfirst(l);
1980-
VariableStatData vardata;
1981-
double ndistinct;
1982-
bool keep = true;
1983-
ListCell *l2;
1984-
1985-
examine_variable(root, (Node *) var, 0, &vardata);
1986-
ndistinct = get_variable_numdistinct(&vardata);
1987-
ReleaseVariableStats(vardata);
1988-
1989-
/* cannot use foreach here because of possible list_delete */
1990-
l2 = list_head(varinfos);
1991-
while (l2)
1992-
{
1993-
MyVarInfo *varinfo = (MyVarInfo *) lfirst(l2);
1994-
1995-
/* must advance l2 before list_delete possibly pfree's it */
1996-
l2 = lnext(l2);
1997-
1998-
if (var->varno != varinfo->var->varno &&
1999-
exprs_known_equal(root, (Node *) var, (Node *) varinfo->var))
2000-
{
2001-
/* Found a match */
2002-
if (varinfo->ndistinct <= ndistinct)
2003-
{
2004-
/* Keep older item, forget new one */
2005-
keep = false;
2006-
break;
2007-
}
2008-
else
2009-
{
2010-
/* Delete the older item */
2011-
varinfos = list_delete_ptr(varinfos, varinfo);
2012-
}
2013-
}
2014-
}
20152049

2016-
if (keep)
2050+
/*
2051+
* Else add variables to varinfos list
2052+
*/
2053+
foreach(l2, varshere)
20172054
{
2018-
MyVarInfo *varinfo = (MyVarInfo *) palloc(sizeof(MyVarInfo));
2055+
Node *var = (Node *) lfirst(l2);
20192056

2020-
varinfo->var = var;
2021-
varinfo->ndistinct = ndistinct;
2022-
varinfos = lcons(varinfo, varinfos);
2057+
examine_variable(root, var, 0, &vardata);
2058+
varinfos = add_unique_group_var(root, varinfos, var, &vardata);
2059+
ReleaseVariableStats(vardata);
20232060
}
20242061
}
20252062

2063+
/* If now no Vars, we must have an all-constant GROUP BY list. */
2064+
if (varinfos == NIL)
2065+
return 1.0;
2066+
20262067
/*
20272068
* Steps 3/4: group Vars by relation and estimate total numdistinct.
20282069
*
@@ -2031,25 +2072,24 @@ estimate_num_groups(Query *root, List *groupExprs, double input_rows)
20312072
* these Vars from the newvarinfos list for the next iteration. This
20322073
* is the easiest way to group Vars of same rel together.
20332074
*/
2034-
Assert(varinfos != NIL);
20352075
numdistinct = 1.0;
20362076

20372077
do
20382078
{
2039-
MyVarInfo *varinfo1 = (MyVarInfo *) linitial(varinfos);
2040-
RelOptInfo *rel = find_base_rel(root, varinfo1->var->varno);
2079+
GroupVarInfo *varinfo1 = (GroupVarInfo *) linitial(varinfos);
2080+
RelOptInfo *rel = varinfo1->rel;
20412081
double reldistinct = varinfo1->ndistinct;
20422082
List *newvarinfos = NIL;
20432083

20442084
/*
2045-
* Get the largest numdistinct estimate of the Vars for this rel.
2085+
* Get the product of numdistinct estimates of the Vars for this rel.
20462086
* Also, construct new varinfos list of remaining Vars.
20472087
*/
20482088
for_each_cell(l, lnext(list_head(varinfos)))
20492089
{
2050-
MyVarInfo *varinfo2 = (MyVarInfo *) lfirst(l);
2090+
GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
20512091

2052-
if (varinfo2->var->varno == varinfo1->var->varno)
2092+
if (varinfo2->rel == varinfo1->rel)
20532093
reldistinct *= varinfo2->ndistinct;
20542094
else
20552095
{

0 commit comments

Comments
 (0)