Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 67becf8

Browse files
committed
Fix ANALYZE's ancient deficiency of not trying to collect stats for expression
indexes when the index column type (the opclass opckeytype) is different from the expression's datatype. When coded, this limitation wasn't worth worrying about because we had no intelligence to speak of in stats collection for the datatypes used by such opclasses. However, now that there's non-toy estimation capability for tsvector queries, it amounts to a bug that ANALYZE fails to do this. The fix changes struct VacAttrStats, and therefore constitutes an API break for custom typanalyze functions. Therefore we can't back-patch it into released branches, but it was agreed that 9.0 isn't yet frozen hard enough to make such a change unacceptable. Ergo, back-patch to 9.0 but no further. The API break had better be mentioned in 9.0 release notes.
1 parent 97532f7 commit 67becf8

File tree

2 files changed

+63
-44
lines changed

2 files changed

+63
-44
lines changed

src/backend/commands/analyze.c

+50-38
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.152 2010/02/26 02:00:37 momjian Exp $
11+
* $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.153 2010/08/01 22:38:11 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -92,7 +92,8 @@ static void compute_index_stats(Relation onerel, double totalrows,
9292
AnlIndexData *indexdata, int nindexes,
9393
HeapTuple *rows, int numrows,
9494
MemoryContext col_context);
95-
static VacAttrStats *examine_attribute(Relation onerel, int attnum);
95+
static VacAttrStats *examine_attribute(Relation onerel, int attnum,
96+
Node *index_expr);
9697
static int acquire_sample_rows(Relation onerel, HeapTuple *rows,
9798
int targrows, double *totalrows, double *totaldeadrows);
9899
static double random_fract(void);
@@ -339,7 +340,7 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt,
339340
(errcode(ERRCODE_UNDEFINED_COLUMN),
340341
errmsg("column \"%s\" of relation \"%s\" does not exist",
341342
col, RelationGetRelationName(onerel))));
342-
vacattrstats[tcnt] = examine_attribute(onerel, i);
343+
vacattrstats[tcnt] = examine_attribute(onerel, i, NULL);
343344
if (vacattrstats[tcnt] != NULL)
344345
tcnt++;
345346
}
@@ -353,7 +354,7 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt,
353354
tcnt = 0;
354355
for (i = 1; i <= attr_cnt; i++)
355356
{
356-
vacattrstats[tcnt] = examine_attribute(onerel, i);
357+
vacattrstats[tcnt] = examine_attribute(onerel, i, NULL);
357358
if (vacattrstats[tcnt] != NULL)
358359
tcnt++;
359360
}
@@ -407,21 +408,8 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt,
407408
elog(ERROR, "too few entries in indexprs list");
408409
indexkey = (Node *) lfirst(indexpr_item);
409410
indexpr_item = lnext(indexpr_item);
410-
411-
/*
412-
* Can't analyze if the opclass uses a storage type
413-
* different from the expression result type. We'd get
414-
* confused because the type shown in pg_attribute for
415-
* the index column doesn't match what we are getting
416-
* from the expression. Perhaps this can be fixed
417-
* someday, but for now, punt.
418-
*/
419-
if (exprType(indexkey) !=
420-
Irel[ind]->rd_att->attrs[i]->atttypid)
421-
continue;
422-
423411
thisdata->vacattrstats[tcnt] =
424-
examine_attribute(Irel[ind], i + 1);
412+
examine_attribute(Irel[ind], i + 1, indexkey);
425413
if (thisdata->vacattrstats[tcnt] != NULL)
426414
{
427415
tcnt++;
@@ -802,9 +790,12 @@ compute_index_stats(Relation onerel, double totalrows,
802790
*
803791
* Determine whether the column is analyzable; if so, create and initialize
804792
* a VacAttrStats struct for it. If not, return NULL.
793+
*
794+
* If index_expr isn't NULL, then we're trying to analyze an expression index,
795+
* and index_expr is the expression tree representing the column's data.
805796
*/
806797
static VacAttrStats *
807-
examine_attribute(Relation onerel, int attnum)
798+
examine_attribute(Relation onerel, int attnum, Node *index_expr)
808799
{
809800
Form_pg_attribute attr = onerel->rd_att->attrs[attnum - 1];
810801
HeapTuple typtuple;
@@ -827,9 +818,30 @@ examine_attribute(Relation onerel, int attnum)
827818
stats = (VacAttrStats *) palloc0(sizeof(VacAttrStats));
828819
stats->attr = (Form_pg_attribute) palloc(ATTRIBUTE_FIXED_PART_SIZE);
829820
memcpy(stats->attr, attr, ATTRIBUTE_FIXED_PART_SIZE);
830-
typtuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(attr->atttypid));
821+
822+
/*
823+
* When analyzing an expression index, believe the expression tree's type
824+
* not the column datatype --- the latter might be the opckeytype storage
825+
* type of the opclass, which is not interesting for our purposes. (Note:
826+
* if we did anything with non-expression index columns, we'd need to
827+
* figure out where to get the correct type info from, but for now that's
828+
* not a problem.) It's not clear whether anyone will care about the
829+
* typmod, but we store that too just in case.
830+
*/
831+
if (index_expr)
832+
{
833+
stats->attrtypid = exprType(index_expr);
834+
stats->attrtypmod = exprTypmod(index_expr);
835+
}
836+
else
837+
{
838+
stats->attrtypid = attr->atttypid;
839+
stats->attrtypmod = attr->atttypmod;
840+
}
841+
842+
typtuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(stats->attrtypid));
831843
if (!HeapTupleIsValid(typtuple))
832-
elog(ERROR, "cache lookup failed for type %u", attr->atttypid);
844+
elog(ERROR, "cache lookup failed for type %u", stats->attrtypid);
833845
stats->attrtype = (Form_pg_type) palloc(sizeof(FormData_pg_type));
834846
memcpy(stats->attrtype, GETSTRUCT(typtuple), sizeof(FormData_pg_type));
835847
ReleaseSysCache(typtuple);
@@ -838,12 +850,12 @@ examine_attribute(Relation onerel, int attnum)
838850

839851
/*
840852
* The fields describing the stats->stavalues[n] element types default to
841-
* the type of the field being analyzed, but the type-specific typanalyze
853+
* the type of the data being analyzed, but the type-specific typanalyze
842854
* function can change them if it wants to store something else.
843855
*/
844856
for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
845857
{
846-
stats->statypid[i] = stats->attr->atttypid;
858+
stats->statypid[i] = stats->attrtypid;
847859
stats->statyplen[i] = stats->attrtype->typlen;
848860
stats->statypbyval[i] = stats->attrtype->typbyval;
849861
stats->statypalign[i] = stats->attrtype->typalign;
@@ -1780,7 +1792,7 @@ std_typanalyze(VacAttrStats *stats)
17801792
attr->attstattarget = default_statistics_target;
17811793

17821794
/* Look for default "<" and "=" operators for column's type */
1783-
get_sort_group_operators(attr->atttypid,
1795+
get_sort_group_operators(stats->attrtypid,
17841796
false, false, false,
17851797
&ltopr, &eqopr, NULL);
17861798

@@ -1860,10 +1872,10 @@ compute_minimal_stats(VacAttrStatsP stats,
18601872
int nonnull_cnt = 0;
18611873
int toowide_cnt = 0;
18621874
double total_width = 0;
1863-
bool is_varlena = (!stats->attr->attbyval &&
1864-
stats->attr->attlen == -1);
1865-
bool is_varwidth = (!stats->attr->attbyval &&
1866-
stats->attr->attlen < 0);
1875+
bool is_varlena = (!stats->attrtype->typbyval &&
1876+
stats->attrtype->typlen == -1);
1877+
bool is_varwidth = (!stats->attrtype->typbyval &&
1878+
stats->attrtype->typlen < 0);
18671879
FmgrInfo f_cmpeq;
18681880
typedef struct
18691881
{
@@ -2126,8 +2138,8 @@ compute_minimal_stats(VacAttrStatsP stats,
21262138
for (i = 0; i < num_mcv; i++)
21272139
{
21282140
mcv_values[i] = datumCopy(track[i].value,
2129-
stats->attr->attbyval,
2130-
stats->attr->attlen);
2141+
stats->attrtype->typbyval,
2142+
stats->attrtype->typlen);
21312143
mcv_freqs[i] = (double) track[i].count / (double) samplerows;
21322144
}
21332145
MemoryContextSwitchTo(old_context);
@@ -2184,10 +2196,10 @@ compute_scalar_stats(VacAttrStatsP stats,
21842196
int nonnull_cnt = 0;
21852197
int toowide_cnt = 0;
21862198
double total_width = 0;
2187-
bool is_varlena = (!stats->attr->attbyval &&
2188-
stats->attr->attlen == -1);
2189-
bool is_varwidth = (!stats->attr->attbyval &&
2190-
stats->attr->attlen < 0);
2199+
bool is_varlena = (!stats->attrtype->typbyval &&
2200+
stats->attrtype->typlen == -1);
2201+
bool is_varwidth = (!stats->attrtype->typbyval &&
2202+
stats->attrtype->typlen < 0);
21912203
double corr_xysum;
21922204
Oid cmpFn;
21932205
int cmpFlags;
@@ -2476,8 +2488,8 @@ compute_scalar_stats(VacAttrStatsP stats,
24762488
for (i = 0; i < num_mcv; i++)
24772489
{
24782490
mcv_values[i] = datumCopy(values[track[i].first].value,
2479-
stats->attr->attbyval,
2480-
stats->attr->attlen);
2491+
stats->attrtype->typbyval,
2492+
stats->attrtype->typlen);
24812493
mcv_freqs[i] = (double) track[i].count / (double) samplerows;
24822494
}
24832495
MemoryContextSwitchTo(old_context);
@@ -2583,8 +2595,8 @@ compute_scalar_stats(VacAttrStatsP stats,
25832595
for (i = 0; i < num_hist; i++)
25842596
{
25852597
hist_values[i] = datumCopy(values[pos].value,
2586-
stats->attr->attbyval,
2587-
stats->attr->attlen);
2598+
stats->attrtype->typbyval,
2599+
stats->attrtype->typlen);
25882600
pos += delta;
25892601
posfrac += deltafrac;
25902602
if (posfrac >= (num_hist - 1))

src/include/commands/vacuum.h

+13-6
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/commands/vacuum.h,v 1.89 2010/02/09 21:43:30 tgl Exp $
10+
* $PostgreSQL: pgsql/src/include/commands/vacuum.h,v 1.90 2010/08/01 22:38:11 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -62,9 +62,17 @@ typedef struct VacAttrStats
6262
/*
6363
* These fields are set up by the main ANALYZE code before invoking the
6464
* type-specific typanalyze function.
65+
*
66+
* Note: do not assume that the data being analyzed has the same datatype
67+
* shown in attr, ie do not trust attr->atttypid, attlen, etc. This is
68+
* because some index opclasses store a different type than the underlying
69+
* column/expression. Instead use attrtypid, attrtypmod, and attrtype for
70+
* information about the datatype being fed to the typanalyze function.
6571
*/
6672
Form_pg_attribute attr; /* copy of pg_attribute row for column */
67-
Form_pg_type attrtype; /* copy of pg_type row for column */
73+
Oid attrtypid; /* type of data being analyzed */
74+
int32 attrtypmod; /* typmod of data being analyzed */
75+
Form_pg_type attrtype; /* copy of pg_type row for attrtypid */
6876
MemoryContext anl_context; /* where to save long-lived data */
6977

7078
/*
@@ -95,10 +103,9 @@ typedef struct VacAttrStats
95103

96104
/*
97105
* These fields describe the stavalues[n] element types. They will be
98-
* initialized to be the same as the column's that's underlying the slot,
99-
* but a custom typanalyze function might want to store an array of
100-
* something other than the analyzed column's elements. It should then
101-
* overwrite these fields.
106+
* initialized to match attrtypid, but a custom typanalyze function might
107+
* want to store an array of something other than the analyzed column's
108+
* elements. It should then overwrite these fields.
102109
*/
103110
Oid statypid[STATISTIC_NUM_SLOTS];
104111
int2 statyplen[STATISTIC_NUM_SLOTS];

0 commit comments

Comments
 (0)