Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 82e1ba7

Browse files
committed
Make ANALYZE compute basic statistics even for types with no "=" operator.
Previously, ANALYZE simply ignored columns of datatypes that have neither a btree nor hash opclass (which means they have no recognized equality operator). Without a notion of equality, we can't identify most-common values nor estimate the number of distinct values. But we can still count nulls and compute the average physical column width, and those stats might be of value. Moreover there are some tools out there that don't work so well if rows are missing from pg_statistic. So let's add suitable logic for this case. While this is arguably a bug fix, it also has the potential to change query plans, and the gain seems not worth taking a risk of that in stable branches. So back-patch into 9.5 but not further. Oleksandr Shulgin, rewritten a bit by me.
1 parent a0d9f6e commit 82e1ba7

File tree

1 file changed

+104
-14
lines changed

1 file changed

+104
-14
lines changed

src/backend/commands/analyze.c

+104-14
Original file line numberDiff line numberDiff line change
@@ -1689,10 +1689,14 @@ typedef struct
16891689
} CompareScalarsContext;
16901690

16911691

1692-
static void compute_minimal_stats(VacAttrStatsP stats,
1692+
static void compute_trivial_stats(VacAttrStatsP stats,
16931693
AnalyzeAttrFetchFunc fetchfunc,
16941694
int samplerows,
16951695
double totalrows);
1696+
static void compute_distinct_stats(VacAttrStatsP stats,
1697+
AnalyzeAttrFetchFunc fetchfunc,
1698+
int samplerows,
1699+
double totalrows);
16961700
static void compute_scalar_stats(VacAttrStatsP stats,
16971701
AnalyzeAttrFetchFunc fetchfunc,
16981702
int samplerows,
@@ -1723,21 +1727,17 @@ std_typanalyze(VacAttrStats *stats)
17231727
&ltopr, &eqopr, NULL,
17241728
NULL);
17251729

1726-
/* If column has no "=" operator, we can't do much of anything */
1727-
if (!OidIsValid(eqopr))
1728-
return false;
1729-
17301730
/* Save the operator info for compute_stats routines */
17311731
mystats = (StdAnalyzeData *) palloc(sizeof(StdAnalyzeData));
17321732
mystats->eqopr = eqopr;
1733-
mystats->eqfunc = get_opcode(eqopr);
1733+
mystats->eqfunc = OidIsValid(eqopr) ? get_opcode(eqopr) : InvalidOid;
17341734
mystats->ltopr = ltopr;
17351735
stats->extra_data = mystats;
17361736

17371737
/*
17381738
* Determine which standard statistics algorithm to use
17391739
*/
1740-
if (OidIsValid(ltopr))
1740+
if (OidIsValid(eqopr) && OidIsValid(ltopr))
17411741
{
17421742
/* Seems to be a scalar datatype */
17431743
stats->compute_stats = compute_scalar_stats;
@@ -1762,19 +1762,109 @@ std_typanalyze(VacAttrStats *stats)
17621762
*/
17631763
stats->minrows = 300 * attr->attstattarget;
17641764
}
1765+
else if (OidIsValid(eqopr))
1766+
{
1767+
/* We can still recognize distinct values */
1768+
stats->compute_stats = compute_distinct_stats;
1769+
/* Might as well use the same minrows as above */
1770+
stats->minrows = 300 * attr->attstattarget;
1771+
}
17651772
else
17661773
{
1767-
/* Can't do much but the minimal stuff */
1768-
stats->compute_stats = compute_minimal_stats;
1774+
/* Can't do much but the trivial stuff */
1775+
stats->compute_stats = compute_trivial_stats;
17691776
/* Might as well use the same minrows as above */
17701777
stats->minrows = 300 * attr->attstattarget;
17711778
}
17721779

17731780
return true;
17741781
}
17751782

1783+
1784+
/*
1785+
* compute_trivial_stats() -- compute very basic column statistics
1786+
*
1787+
* We use this when we cannot find a hash "=" operator for the datatype.
1788+
*
1789+
* We determine the fraction of non-null rows and the average datum width.
1790+
*/
1791+
static void
1792+
compute_trivial_stats(VacAttrStatsP stats,
1793+
AnalyzeAttrFetchFunc fetchfunc,
1794+
int samplerows,
1795+
double totalrows)
1796+
{
1797+
int i;
1798+
int null_cnt = 0;
1799+
int nonnull_cnt = 0;
1800+
double total_width = 0;
1801+
bool is_varlena = (!stats->attrtype->typbyval &&
1802+
stats->attrtype->typlen == -1);
1803+
bool is_varwidth = (!stats->attrtype->typbyval &&
1804+
stats->attrtype->typlen < 0);
1805+
1806+
for (i = 0; i < samplerows; i++)
1807+
{
1808+
Datum value;
1809+
bool isnull;
1810+
1811+
vacuum_delay_point();
1812+
1813+
value = fetchfunc(stats, i, &isnull);
1814+
1815+
/* Check for null/nonnull */
1816+
if (isnull)
1817+
{
1818+
null_cnt++;
1819+
continue;
1820+
}
1821+
nonnull_cnt++;
1822+
1823+
/*
1824+
* If it's a variable-width field, add up widths for average width
1825+
* calculation. Note that if the value is toasted, we use the toasted
1826+
* width. We don't bother with this calculation if it's a fixed-width
1827+
* type.
1828+
*/
1829+
if (is_varlena)
1830+
{
1831+
total_width += VARSIZE_ANY(DatumGetPointer(value));
1832+
}
1833+
else if (is_varwidth)
1834+
{
1835+
/* must be cstring */
1836+
total_width += strlen(DatumGetCString(value)) + 1;
1837+
}
1838+
}
1839+
1840+
/* We can only compute average width if we found some non-null values. */
1841+
if (nonnull_cnt > 0)
1842+
{
1843+
stats->stats_valid = true;
1844+
/* Do the simple null-frac and width stats */
1845+
stats->stanullfrac = (double) null_cnt / (double) samplerows;
1846+
if (is_varwidth)
1847+
stats->stawidth = total_width / (double) nonnull_cnt;
1848+
else
1849+
stats->stawidth = stats->attrtype->typlen;
1850+
stats->stadistinct = 0.0; /* "unknown" */
1851+
}
1852+
else if (null_cnt > 0)
1853+
{
1854+
/* We found only nulls; assume the column is entirely null */
1855+
stats->stats_valid = true;
1856+
stats->stanullfrac = 1.0;
1857+
if (is_varwidth)
1858+
stats->stawidth = 0; /* "unknown" */
1859+
else
1860+
stats->stawidth = stats->attrtype->typlen;
1861+
stats->stadistinct = 0.0; /* "unknown" */
1862+
}
1863+
}
1864+
1865+
17761866
/*
1777-
* compute_minimal_stats() -- compute minimal column statistics
1867+
* compute_distinct_stats() -- compute column statistics including ndistinct
17781868
*
17791869
* We use this when we can find only an "=" operator for the datatype.
17801870
*
@@ -1789,10 +1879,10 @@ std_typanalyze(VacAttrStats *stats)
17891879
* depend mainly on the length of the list we are willing to keep.
17901880
*/
17911881
static void
1792-
compute_minimal_stats(VacAttrStatsP stats,
1793-
AnalyzeAttrFetchFunc fetchfunc,
1794-
int samplerows,
1795-
double totalrows)
1882+
compute_distinct_stats(VacAttrStatsP stats,
1883+
AnalyzeAttrFetchFunc fetchfunc,
1884+
int samplerows,
1885+
double totalrows)
17961886
{
17971887
int i;
17981888
int null_cnt = 0;

0 commit comments

Comments
 (0)