@@ -1689,10 +1689,14 @@ typedef struct
1689
1689
} CompareScalarsContext ;
1690
1690
1691
1691
1692
- static void compute_minimal_stats (VacAttrStatsP stats ,
1692
+ static void compute_trivial_stats (VacAttrStatsP stats ,
1693
1693
AnalyzeAttrFetchFunc fetchfunc ,
1694
1694
int samplerows ,
1695
1695
double totalrows );
1696
+ static void compute_distinct_stats (VacAttrStatsP stats ,
1697
+ AnalyzeAttrFetchFunc fetchfunc ,
1698
+ int samplerows ,
1699
+ double totalrows );
1696
1700
static void compute_scalar_stats (VacAttrStatsP stats ,
1697
1701
AnalyzeAttrFetchFunc fetchfunc ,
1698
1702
int samplerows ,
@@ -1723,21 +1727,17 @@ std_typanalyze(VacAttrStats *stats)
1723
1727
& ltopr , & eqopr , NULL ,
1724
1728
NULL );
1725
1729
1726
- /* If column has no "=" operator, we can't do much of anything */
1727
- if (!OidIsValid (eqopr ))
1728
- return false;
1729
-
1730
1730
/* Save the operator info for compute_stats routines */
1731
1731
mystats = (StdAnalyzeData * ) palloc (sizeof (StdAnalyzeData ));
1732
1732
mystats -> eqopr = eqopr ;
1733
- mystats -> eqfunc = get_opcode (eqopr );
1733
+ mystats -> eqfunc = OidIsValid ( eqopr ) ? get_opcode (eqopr ) : InvalidOid ;
1734
1734
mystats -> ltopr = ltopr ;
1735
1735
stats -> extra_data = mystats ;
1736
1736
1737
1737
/*
1738
1738
* Determine which standard statistics algorithm to use
1739
1739
*/
1740
- if (OidIsValid (ltopr ))
1740
+ if (OidIsValid (eqopr ) && OidIsValid ( ltopr ))
1741
1741
{
1742
1742
/* Seems to be a scalar datatype */
1743
1743
stats -> compute_stats = compute_scalar_stats ;
@@ -1762,19 +1762,109 @@ std_typanalyze(VacAttrStats *stats)
1762
1762
*/
1763
1763
stats -> minrows = 300 * attr -> attstattarget ;
1764
1764
}
1765
+ else if (OidIsValid (eqopr ))
1766
+ {
1767
+ /* We can still recognize distinct values */
1768
+ stats -> compute_stats = compute_distinct_stats ;
1769
+ /* Might as well use the same minrows as above */
1770
+ stats -> minrows = 300 * attr -> attstattarget ;
1771
+ }
1765
1772
else
1766
1773
{
1767
- /* Can't do much but the minimal stuff */
1768
- stats -> compute_stats = compute_minimal_stats ;
1774
+ /* Can't do much but the trivial stuff */
1775
+ stats -> compute_stats = compute_trivial_stats ;
1769
1776
/* Might as well use the same minrows as above */
1770
1777
stats -> minrows = 300 * attr -> attstattarget ;
1771
1778
}
1772
1779
1773
1780
return true;
1774
1781
}
1775
1782
1783
+
1784
+ /*
1785
+ * compute_trivial_stats() -- compute very basic column statistics
1786
+ *
1787
+ * We use this when we cannot find a hash "=" operator for the datatype.
1788
+ *
1789
+ * We determine the fraction of non-null rows and the average datum width.
1790
+ */
1791
+ static void
1792
+ compute_trivial_stats (VacAttrStatsP stats ,
1793
+ AnalyzeAttrFetchFunc fetchfunc ,
1794
+ int samplerows ,
1795
+ double totalrows )
1796
+ {
1797
+ int i ;
1798
+ int null_cnt = 0 ;
1799
+ int nonnull_cnt = 0 ;
1800
+ double total_width = 0 ;
1801
+ bool is_varlena = (!stats -> attrtype -> typbyval &&
1802
+ stats -> attrtype -> typlen == -1 );
1803
+ bool is_varwidth = (!stats -> attrtype -> typbyval &&
1804
+ stats -> attrtype -> typlen < 0 );
1805
+
1806
+ for (i = 0 ; i < samplerows ; i ++ )
1807
+ {
1808
+ Datum value ;
1809
+ bool isnull ;
1810
+
1811
+ vacuum_delay_point ();
1812
+
1813
+ value = fetchfunc (stats , i , & isnull );
1814
+
1815
+ /* Check for null/nonnull */
1816
+ if (isnull )
1817
+ {
1818
+ null_cnt ++ ;
1819
+ continue ;
1820
+ }
1821
+ nonnull_cnt ++ ;
1822
+
1823
+ /*
1824
+ * If it's a variable-width field, add up widths for average width
1825
+ * calculation. Note that if the value is toasted, we use the toasted
1826
+ * width. We don't bother with this calculation if it's a fixed-width
1827
+ * type.
1828
+ */
1829
+ if (is_varlena )
1830
+ {
1831
+ total_width += VARSIZE_ANY (DatumGetPointer (value ));
1832
+ }
1833
+ else if (is_varwidth )
1834
+ {
1835
+ /* must be cstring */
1836
+ total_width += strlen (DatumGetCString (value )) + 1 ;
1837
+ }
1838
+ }
1839
+
1840
+ /* We can only compute average width if we found some non-null values. */
1841
+ if (nonnull_cnt > 0 )
1842
+ {
1843
+ stats -> stats_valid = true;
1844
+ /* Do the simple null-frac and width stats */
1845
+ stats -> stanullfrac = (double ) null_cnt / (double ) samplerows ;
1846
+ if (is_varwidth )
1847
+ stats -> stawidth = total_width / (double ) nonnull_cnt ;
1848
+ else
1849
+ stats -> stawidth = stats -> attrtype -> typlen ;
1850
+ stats -> stadistinct = 0.0 ; /* "unknown" */
1851
+ }
1852
+ else if (null_cnt > 0 )
1853
+ {
1854
+ /* We found only nulls; assume the column is entirely null */
1855
+ stats -> stats_valid = true;
1856
+ stats -> stanullfrac = 1.0 ;
1857
+ if (is_varwidth )
1858
+ stats -> stawidth = 0 ; /* "unknown" */
1859
+ else
1860
+ stats -> stawidth = stats -> attrtype -> typlen ;
1861
+ stats -> stadistinct = 0.0 ; /* "unknown" */
1862
+ }
1863
+ }
1864
+
1865
+
1776
1866
/*
1777
- * compute_minimal_stats () -- compute minimal column statistics
1867
+ * compute_distinct_stats () -- compute column statistics including ndistinct
1778
1868
*
1779
1869
* We use this when we can find only an "=" operator for the datatype.
1780
1870
*
@@ -1789,10 +1879,10 @@ std_typanalyze(VacAttrStats *stats)
1789
1879
* depend mainly on the length of the list we are willing to keep.
1790
1880
*/
1791
1881
static void
1792
- compute_minimal_stats (VacAttrStatsP stats ,
1793
- AnalyzeAttrFetchFunc fetchfunc ,
1794
- int samplerows ,
1795
- double totalrows )
1882
+ compute_distinct_stats (VacAttrStatsP stats ,
1883
+ AnalyzeAttrFetchFunc fetchfunc ,
1884
+ int samplerows ,
1885
+ double totalrows )
1796
1886
{
1797
1887
int i ;
1798
1888
int null_cnt = 0 ;
0 commit comments