15
15
*
16
16
*
17
17
* IDENTIFICATION
18
- * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.165 2004/08/30 02:54:39 momjian Exp $
18
+ * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.166 2004/09/18 19:39:50 tgl Exp $
19
19
*
20
20
*-------------------------------------------------------------------------
21
21
*/
@@ -1869,6 +1869,71 @@ mergejoinscansel(Query *root, Node *clause,
1869
1869
ReleaseVariableStats (rightvar );
1870
1870
}
1871
1871
1872
+
1873
+ /*
1874
+ * Helper routine for estimate_num_groups: add an item to a list of
1875
+ * GroupVarInfos, but only if it's not known equal to any of the existing
1876
+ * entries.
1877
+ */
1878
+ typedef struct
1879
+ {
1880
+ Node * var ; /* might be an expression, not just a Var */
1881
+ RelOptInfo * rel ; /* relation it belongs to */
1882
+ double ndistinct ; /* # distinct values */
1883
+ } GroupVarInfo ;
1884
+
1885
+ static List *
1886
+ add_unique_group_var (Query * root , List * varinfos ,
1887
+ Node * var , VariableStatData * vardata )
1888
+ {
1889
+ GroupVarInfo * varinfo ;
1890
+ double ndistinct ;
1891
+ ListCell * lc ;
1892
+
1893
+ ndistinct = get_variable_numdistinct (vardata );
1894
+
1895
+ /* cannot use foreach here because of possible list_delete */
1896
+ lc = list_head (varinfos );
1897
+ while (lc )
1898
+ {
1899
+ varinfo = (GroupVarInfo * ) lfirst (lc );
1900
+
1901
+ /* must advance lc before list_delete possibly pfree's it */
1902
+ lc = lnext (lc );
1903
+
1904
+ /* Drop exact duplicates */
1905
+ if (equal (var , varinfo -> var ))
1906
+ return varinfos ;
1907
+
1908
+ /*
1909
+ * Drop known-equal vars, but only if they belong to different
1910
+ * relations (see comments for estimate_num_groups)
1911
+ */
1912
+ if (vardata -> rel != varinfo -> rel &&
1913
+ exprs_known_equal (root , var , varinfo -> var ))
1914
+ {
1915
+ if (varinfo -> ndistinct <= ndistinct )
1916
+ {
1917
+ /* Keep older item, forget new one */
1918
+ return varinfos ;
1919
+ }
1920
+ else
1921
+ {
1922
+ /* Delete the older item */
1923
+ varinfos = list_delete_ptr (varinfos , varinfo );
1924
+ }
1925
+ }
1926
+ }
1927
+
1928
+ varinfo = (GroupVarInfo * ) palloc (sizeof (GroupVarInfo ));
1929
+
1930
+ varinfo -> var = var ;
1931
+ varinfo -> rel = vardata -> rel ;
1932
+ varinfo -> ndistinct = ndistinct ;
1933
+ varinfos = lappend (varinfos , varinfo );
1934
+ return varinfos ;
1935
+ }
1936
+
1872
1937
/*
1873
1938
* estimate_num_groups - Estimate number of groups in a grouped query
1874
1939
*
@@ -1900,6 +1965,9 @@ mergejoinscansel(Query *root, Node *clause,
1900
1965
* increase the number of distinct values (unless it is volatile,
1901
1966
* which we consider unlikely for grouping), but it probably won't
1902
1967
* reduce the number of distinct values much either.
1968
+ * As a special case, if a GROUP BY expression can be matched to an
1969
+ * expressional index for which we have statistics, then we treat the
1970
+ * whole expression as though it were just a Var.
1903
1971
* 2. If the list contains Vars of different relations that are known equal
1904
1972
* due to equijoin clauses, then drop all but one of the Vars from each
1905
1973
* known-equal set, keeping the one with smallest estimated # of values
@@ -1926,25 +1994,44 @@ mergejoinscansel(Query *root, Node *clause,
1926
1994
double
1927
1995
estimate_num_groups (Query * root , List * groupExprs , double input_rows )
1928
1996
{
1929
- List * allvars = NIL ;
1930
1997
List * varinfos = NIL ;
1931
1998
double numdistinct ;
1932
1999
ListCell * l ;
1933
- typedef struct
1934
- { /* varinfos is a List of these */
1935
- Var * var ;
1936
- double ndistinct ;
1937
- } MyVarInfo ;
1938
2000
1939
2001
/* We should not be called unless query has GROUP BY (or DISTINCT) */
1940
2002
Assert (groupExprs != NIL );
1941
2003
1942
- /* Step 1: get the unique Vars used */
2004
+ /*
2005
+ * Steps 1/2: find the unique Vars used, treating an expression as a Var
2006
+ * if we can find stats for it. For each one, record the statistical
2007
+ * estimate of number of distinct values (total in its table, without
2008
+ * regard for filtering).
2009
+ */
1943
2010
foreach (l , groupExprs )
1944
2011
{
1945
2012
Node * groupexpr = (Node * ) lfirst (l );
2013
+ VariableStatData vardata ;
1946
2014
List * varshere ;
2015
+ ListCell * l2 ;
2016
+
2017
+ /*
2018
+ * If examine_variable is able to deduce anything about the GROUP BY
2019
+ * expression, treat it as a single variable even if it's really more
2020
+ * complicated.
2021
+ */
2022
+ examine_variable (root , groupexpr , 0 , & vardata );
2023
+ if (vardata .statsTuple != NULL || vardata .isunique )
2024
+ {
2025
+ varinfos = add_unique_group_var (root , varinfos ,
2026
+ groupexpr , & vardata );
2027
+ ReleaseVariableStats (vardata );
2028
+ continue ;
2029
+ }
2030
+ ReleaseVariableStats (vardata );
1947
2031
2032
+ /*
2033
+ * Else pull out the component Vars
2034
+ */
1948
2035
varshere = pull_var_clause (groupexpr , false);
1949
2036
1950
2037
/*
@@ -1959,70 +2046,24 @@ estimate_num_groups(Query *root, List *groupExprs, double input_rows)
1959
2046
return input_rows ;
1960
2047
continue ;
1961
2048
}
1962
- allvars = list_concat (allvars , varshere );
1963
- }
1964
-
1965
- /* If now no Vars, we must have an all-constant GROUP BY list. */
1966
- if (allvars == NIL )
1967
- return 1.0 ;
1968
-
1969
- /* Use list_union() to discard duplicates */
1970
- allvars = list_union (NIL , allvars );
1971
-
1972
- /*
1973
- * Step 2: acquire statistical estimate of number of distinct values
1974
- * of each Var (total in its table, without regard for filtering).
1975
- * Also, detect known-equal Vars and discard the ones we don't want.
1976
- */
1977
- foreach (l , allvars )
1978
- {
1979
- Var * var = (Var * ) lfirst (l );
1980
- VariableStatData vardata ;
1981
- double ndistinct ;
1982
- bool keep = true;
1983
- ListCell * l2 ;
1984
-
1985
- examine_variable (root , (Node * ) var , 0 , & vardata );
1986
- ndistinct = get_variable_numdistinct (& vardata );
1987
- ReleaseVariableStats (vardata );
1988
-
1989
- /* cannot use foreach here because of possible list_delete */
1990
- l2 = list_head (varinfos );
1991
- while (l2 )
1992
- {
1993
- MyVarInfo * varinfo = (MyVarInfo * ) lfirst (l2 );
1994
-
1995
- /* must advance l2 before list_delete possibly pfree's it */
1996
- l2 = lnext (l2 );
1997
-
1998
- if (var -> varno != varinfo -> var -> varno &&
1999
- exprs_known_equal (root , (Node * ) var , (Node * ) varinfo -> var ))
2000
- {
2001
- /* Found a match */
2002
- if (varinfo -> ndistinct <= ndistinct )
2003
- {
2004
- /* Keep older item, forget new one */
2005
- keep = false;
2006
- break ;
2007
- }
2008
- else
2009
- {
2010
- /* Delete the older item */
2011
- varinfos = list_delete_ptr (varinfos , varinfo );
2012
- }
2013
- }
2014
- }
2015
2049
2016
- if (keep )
2050
+ /*
2051
+ * Else add variables to varinfos list
2052
+ */
2053
+ foreach (l2 , varshere )
2017
2054
{
2018
- MyVarInfo * varinfo = (MyVarInfo * ) palloc ( sizeof ( MyVarInfo ) );
2055
+ Node * var = (Node * ) lfirst ( l2 );
2019
2056
2020
- varinfo -> var = var ;
2021
- varinfo -> ndistinct = ndistinct ;
2022
- varinfos = lcons ( varinfo , varinfos );
2057
+ examine_variable ( root , var , 0 , & vardata ) ;
2058
+ varinfos = add_unique_group_var ( root , varinfos , var , & vardata ) ;
2059
+ ReleaseVariableStats ( vardata );
2023
2060
}
2024
2061
}
2025
2062
2063
+ /* If now no Vars, we must have an all-constant GROUP BY list. */
2064
+ if (varinfos == NIL )
2065
+ return 1.0 ;
2066
+
2026
2067
/*
2027
2068
* Steps 3/4: group Vars by relation and estimate total numdistinct.
2028
2069
*
@@ -2031,25 +2072,24 @@ estimate_num_groups(Query *root, List *groupExprs, double input_rows)
2031
2072
* these Vars from the newvarinfos list for the next iteration. This
2032
2073
* is the easiest way to group Vars of same rel together.
2033
2074
*/
2034
- Assert (varinfos != NIL );
2035
2075
numdistinct = 1.0 ;
2036
2076
2037
2077
do
2038
2078
{
2039
- MyVarInfo * varinfo1 = (MyVarInfo * ) linitial (varinfos );
2040
- RelOptInfo * rel = find_base_rel ( root , varinfo1 -> var -> varno ) ;
2079
+ GroupVarInfo * varinfo1 = (GroupVarInfo * ) linitial (varinfos );
2080
+ RelOptInfo * rel = varinfo1 -> rel ;
2041
2081
double reldistinct = varinfo1 -> ndistinct ;
2042
2082
List * newvarinfos = NIL ;
2043
2083
2044
2084
/*
2045
- * Get the largest numdistinct estimate of the Vars for this rel.
2085
+ * Get the product of numdistinct estimates of the Vars for this rel.
2046
2086
* Also, construct new varinfos list of remaining Vars.
2047
2087
*/
2048
2088
for_each_cell (l , lnext (list_head (varinfos )))
2049
2089
{
2050
- MyVarInfo * varinfo2 = (MyVarInfo * ) lfirst (l );
2090
+ GroupVarInfo * varinfo2 = (GroupVarInfo * ) lfirst (l );
2051
2091
2052
- if (varinfo2 -> var -> varno == varinfo1 -> var -> varno )
2092
+ if (varinfo2 -> rel == varinfo1 -> rel )
2053
2093
reldistinct *= varinfo2 -> ndistinct ;
2054
2094
else
2055
2095
{
0 commit comments