Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 696d1f7

Browse files
committed
Make all comparisons done for/with statistics use the default collation.
While this will give wrong answers when estimating selectivity for a comparison operator that's using a non-default collation, the estimation error probably won't be large; and anyway the former approach created estimation errors of its own by trying to use a histogram that might have been computed with some other collation. So we'll adopt this simplified approach for now and perhaps improve it sometime in the future. This patch incorporates changes from Andres Freund to make sure that selfuncs.c passes a valid collation OID to any datatype-specific function it calls, in case that function wants collation information. Said OID will now always be DEFAULT_COLLATION_OID, but at least we won't get errors.
1 parent 94fe9c0 commit 696d1f7

File tree

5 files changed

+35
-29
lines changed

5 files changed

+35
-29
lines changed

src/backend/commands/analyze.c

+5-4
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "catalog/index.h"
2525
#include "catalog/indexing.h"
2626
#include "catalog/namespace.h"
27+
#include "catalog/pg_collation.h"
2728
#include "catalog/pg_inherits_fn.h"
2829
#include "catalog/pg_namespace.h"
2930
#include "commands/dbcommands.h"
@@ -862,13 +863,11 @@ examine_attribute(Relation onerel, int attnum, Node *index_expr)
862863
{
863864
stats->attrtypid = exprType(index_expr);
864865
stats->attrtypmod = exprTypmod(index_expr);
865-
stats->attrcollation = exprCollation(index_expr);
866866
}
867867
else
868868
{
869869
stats->attrtypid = attr->atttypid;
870870
stats->attrtypmod = attr->atttypmod;
871-
stats->attrcollation = attr->attcollation;
872871
}
873872

874873
typtuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(stats->attrtypid));
@@ -1931,7 +1930,8 @@ compute_minimal_stats(VacAttrStatsP stats,
19311930
track_cnt = 0;
19321931

19331932
fmgr_info(mystats->eqfunc, &f_cmpeq);
1934-
fmgr_info_collation(stats->attrcollation, &f_cmpeq);
1933+
/* We always use the default collation for statistics */
1934+
fmgr_info_collation(DEFAULT_COLLATION_OID, &f_cmpeq);
19351935

19361936
for (i = 0; i < samplerows; i++)
19371937
{
@@ -2253,7 +2253,8 @@ compute_scalar_stats(VacAttrStatsP stats,
22532253

22542254
SelectSortFunction(mystats->ltopr, false, &cmpFn, &cmpFlags);
22552255
fmgr_info(cmpFn, &f_cmpfn);
2256-
fmgr_info_collation(stats->attrcollation, &f_cmpfn);
2256+
/* We always use the default collation for statistics */
2257+
fmgr_info_collation(DEFAULT_COLLATION_OID, &f_cmpfn);
22572258

22582259
/* Initial scan to find sortable values */
22592260
for (i = 0; i < samplerows; i++)

src/backend/optimizer/path/costsize.c

-1
Original file line numberDiff line numberDiff line change
@@ -2056,7 +2056,6 @@ cached_scansel(PlannerInfo *root, RestrictInfo *rinfo, PathKey *pathkey)
20562056
mergejoinscansel(root,
20572057
(Node *) rinfo->clause,
20582058
pathkey->pk_opfamily,
2059-
pathkey->pk_collation,
20602059
pathkey->pk_strategy,
20612060
pathkey->pk_nulls_first,
20622061
&leftstartsel,

src/backend/utils/adt/selfuncs.c

+24-20
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ static double eqjoinsel_inner(Oid operator,
145145
static double eqjoinsel_semi(Oid operator,
146146
VariableStatData *vardata1, VariableStatData *vardata2);
147147
static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
148-
Datum lobound, Datum hibound, Oid boundstypid, Oid boundscollid,
148+
Datum lobound, Datum hibound, Oid boundstypid,
149149
double *scaledlobound, double *scaledhibound);
150150
static double convert_numeric_to_scalar(Datum value, Oid typid);
151151
static void convert_string_to_scalar(char *value,
@@ -164,10 +164,10 @@ static double convert_one_string_to_scalar(char *value,
164164
int rangelo, int rangehi);
165165
static double convert_one_bytea_to_scalar(unsigned char *value, int valuelen,
166166
int rangelo, int rangehi);
167-
static char *convert_string_datum(Datum value, Oid typid, Oid collid);
167+
static char *convert_string_datum(Datum value, Oid typid);
168168
static double convert_timevalue_to_scalar(Datum value, Oid typid);
169169
static bool get_variable_range(PlannerInfo *root, VariableStatData *vardata,
170-
Oid sortop, Oid collation, Datum *min, Datum *max);
170+
Oid sortop, Datum *min, Datum *max);
171171
static bool get_actual_variable_range(PlannerInfo *root,
172172
VariableStatData *vardata,
173173
Oid sortop,
@@ -285,6 +285,7 @@ var_eq_const(VariableStatData *vardata, Oid operator,
285285
FmgrInfo eqproc;
286286

287287
fmgr_info(get_opcode(operator), &eqproc);
288+
fmgr_info_collation(DEFAULT_COLLATION_OID, &eqproc);
288289

289290
for (i = 0; i < nvalues; i++)
290291
{
@@ -514,7 +515,7 @@ scalarineqsel(PlannerInfo *root, Oid operator, bool isgt,
514515
stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
515516

516517
fmgr_info(get_opcode(operator), &opproc);
517-
fmgr_info_collation(vardata->attcollation, &opproc);
518+
fmgr_info_collation(DEFAULT_COLLATION_OID, &opproc);
518519

519520
/*
520521
* If we have most-common-values info, add up the fractions of the MCV
@@ -839,7 +840,7 @@ ineq_histogram_selectivity(PlannerInfo *root,
839840
*/
840841
if (convert_to_scalar(constval, consttype, &val,
841842
values[i - 1], values[i],
842-
vardata->vartype, vardata->attcollation,
843+
vardata->vartype,
843844
&low, &high))
844845
{
845846
if (high <= low)
@@ -1700,6 +1701,7 @@ scalararraysel(PlannerInfo *root,
17001701
if (!oprsel)
17011702
return (Selectivity) 0.5;
17021703
fmgr_info(oprsel, &oprselproc);
1704+
fmgr_info_collation(DEFAULT_COLLATION_OID, &oprselproc);
17031705

17041706
/* deconstruct the expression */
17051707
Assert(list_length(clause->args) == 2);
@@ -2116,6 +2118,7 @@ eqjoinsel_inner(Oid operator,
21162118
nmatches;
21172119

21182120
fmgr_info(get_opcode(operator), &eqproc);
2121+
fmgr_info_collation(DEFAULT_COLLATION_OID, &eqproc);
21192122
hasmatch1 = (bool *) palloc0(nvalues1 * sizeof(bool));
21202123
hasmatch2 = (bool *) palloc0(nvalues2 * sizeof(bool));
21212124

@@ -2338,6 +2341,7 @@ eqjoinsel_semi(Oid operator,
23382341
nmatches;
23392342

23402343
fmgr_info(get_opcode(operator), &eqproc);
2344+
fmgr_info_collation(DEFAULT_COLLATION_OID, &eqproc);
23412345
hasmatch1 = (bool *) palloc0(nvalues1 * sizeof(bool));
23422346
hasmatch2 = (bool *) palloc0(nvalues2 * sizeof(bool));
23432347

@@ -2588,7 +2592,7 @@ icnlikejoinsel(PG_FUNCTION_ARGS)
25882592
*/
25892593
void
25902594
mergejoinscansel(PlannerInfo *root, Node *clause,
2591-
Oid opfamily, Oid collation, int strategy, bool nulls_first,
2595+
Oid opfamily, int strategy, bool nulls_first,
25922596
Selectivity *leftstart, Selectivity *leftend,
25932597
Selectivity *rightstart, Selectivity *rightend)
25942598
{
@@ -2757,20 +2761,20 @@ mergejoinscansel(PlannerInfo *root, Node *clause,
27572761
/* Try to get ranges of both inputs */
27582762
if (!isgt)
27592763
{
2760-
if (!get_variable_range(root, &leftvar, lstatop, collation,
2764+
if (!get_variable_range(root, &leftvar, lstatop,
27612765
&leftmin, &leftmax))
27622766
goto fail; /* no range available from stats */
2763-
if (!get_variable_range(root, &rightvar, rstatop, collation,
2767+
if (!get_variable_range(root, &rightvar, rstatop,
27642768
&rightmin, &rightmax))
27652769
goto fail; /* no range available from stats */
27662770
}
27672771
else
27682772
{
27692773
/* need to swap the max and min */
2770-
if (!get_variable_range(root, &leftvar, lstatop, collation,
2774+
if (!get_variable_range(root, &leftvar, lstatop,
27712775
&leftmax, &leftmin))
27722776
goto fail; /* no range available from stats */
2773-
if (!get_variable_range(root, &rightvar, rstatop, collation,
2777+
if (!get_variable_range(root, &rightvar, rstatop,
27742778
&rightmax, &rightmin))
27752779
goto fail; /* no range available from stats */
27762780
}
@@ -3371,7 +3375,7 @@ estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets)
33713375
*/
33723376
static bool
33733377
convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
3374-
Datum lobound, Datum hibound, Oid boundstypid, Oid boundscollid,
3378+
Datum lobound, Datum hibound, Oid boundstypid,
33753379
double *scaledlobound, double *scaledhibound)
33763380
{
33773381
/*
@@ -3424,9 +3428,9 @@ convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
34243428
case TEXTOID:
34253429
case NAMEOID:
34263430
{
3427-
char *valstr = convert_string_datum(value, valuetypid, boundscollid);
3428-
char *lostr = convert_string_datum(lobound, boundstypid, boundscollid);
3429-
char *histr = convert_string_datum(hibound, boundstypid, boundscollid);
3431+
char *valstr = convert_string_datum(value, valuetypid);
3432+
char *lostr = convert_string_datum(lobound, boundstypid);
3433+
char *histr = convert_string_datum(hibound, boundstypid);
34303434

34313435
convert_string_to_scalar(valstr, scaledvalue,
34323436
lostr, scaledlobound,
@@ -3670,7 +3674,7 @@ convert_one_string_to_scalar(char *value, int rangelo, int rangehi)
36703674
* before continuing, so as to generate correct locale-specific results.
36713675
*/
36723676
static char *
3673-
convert_string_datum(Datum value, Oid typid, Oid collid)
3677+
convert_string_datum(Datum value, Oid typid)
36743678
{
36753679
char *val;
36763680

@@ -3703,7 +3707,7 @@ convert_string_datum(Datum value, Oid typid, Oid collid)
37033707
return NULL;
37043708
}
37053709

3706-
if (!lc_collate_is_c(collid))
3710+
if (!lc_collate_is_c(DEFAULT_COLLATION_OID))
37073711
{
37083712
char *xfrmstr;
37093713
size_t xfrmlen;
@@ -4102,7 +4106,6 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
41024106
vardata->rel = find_base_rel(root, var->varno);
41034107
vardata->atttype = var->vartype;
41044108
vardata->atttypmod = var->vartypmod;
4105-
vardata->attcollation = var->varcollid;
41064109
vardata->isunique = has_unique_index(vardata->rel, var->varattno);
41074110

41084111
rte = root->simple_rte_array[var->varno];
@@ -4188,7 +4191,6 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
41884191
vardata->var = node;
41894192
vardata->atttype = exprType(node);
41904193
vardata->atttypmod = exprTypmod(node);
4191-
vardata->attcollation = exprCollation(node);
41924194

41934195
if (onerel)
41944196
{
@@ -4397,7 +4399,7 @@ get_variable_numdistinct(VariableStatData *vardata)
43974399
* be "<" not ">", as only the former is likely to be found in pg_statistic.
43984400
*/
43994401
static bool
4400-
get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop, Oid collation,
4402+
get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop,
44014403
Datum *min, Datum *max)
44024404
{
44034405
Datum tmin = 0;
@@ -4482,7 +4484,7 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop, Oid
44824484
FmgrInfo opproc;
44834485

44844486
fmgr_info(get_opcode(sortop), &opproc);
4485-
fmgr_info_collation(collation, &opproc);
4487+
fmgr_info_collation(DEFAULT_COLLATION_OID, &opproc);
44864488

44874489
for (i = 0; i < nvalues; i++)
44884490
{
@@ -5109,6 +5111,7 @@ prefix_selectivity(PlannerInfo *root, VariableStatData *vardata,
51095111
if (cmpopr == InvalidOid)
51105112
elog(ERROR, "no >= operator for opfamily %u", opfamily);
51115113
fmgr_info(get_opcode(cmpopr), &opproc);
5114+
fmgr_info_collation(DEFAULT_COLLATION_OID, &opproc);
51125115

51135116
prefixsel = ineq_histogram_selectivity(root, vardata, &opproc, true,
51145117
prefixcon->constvalue,
@@ -5130,6 +5133,7 @@ prefix_selectivity(PlannerInfo *root, VariableStatData *vardata,
51305133
if (cmpopr == InvalidOid)
51315134
elog(ERROR, "no < operator for opfamily %u", opfamily);
51325135
fmgr_info(get_opcode(cmpopr), &opproc);
5136+
fmgr_info_collation(DEFAULT_COLLATION_OID, &opproc);
51335137

51345138
greaterstrcon = make_greater_string(prefixcon, &opproc);
51355139
if (greaterstrcon)

src/include/commands/vacuum.h

+5-2
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,10 @@
5050
* the information to be stored in a pg_statistic row for the column. Be
5151
* careful to allocate any pointed-to data in anl_context, which will NOT
5252
* be CurrentMemoryContext when compute_stats is called.
53+
*
54+
* Note: for the moment, all comparisons done for statistical purposes
55+
* should use the database's default collation (DEFAULT_COLLATION_OID).
56+
* This might change in some future release.
5357
*----------
5458
*/
5559
typedef struct VacAttrStats *VacAttrStatsP;
@@ -66,13 +70,12 @@ typedef struct VacAttrStats
6670
* Note: do not assume that the data being analyzed has the same datatype
6771
* shown in attr, ie do not trust attr->atttypid, attlen, etc. This is
6872
* because some index opclasses store a different type than the underlying
69-
* column/expression. Instead use attrtypid, attrtypmod, attrcollation, and attrtype for
73+
* column/expression. Instead use attrtypid, attrtypmod, and attrtype for
7074
* information about the datatype being fed to the typanalyze function.
7175
*/
7276
Form_pg_attribute attr; /* copy of pg_attribute row for column */
7377
Oid attrtypid; /* type of data being analyzed */
7478
int32 attrtypmod; /* typmod of data being analyzed */
75-
Oid attrcollation; /* collation of the data being analyzed */
7679
Form_pg_type attrtype; /* copy of pg_type row for attrtypid */
7780
MemoryContext anl_context; /* where to save long-lived data */
7881

src/include/utils/selfuncs.h

+1-2
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,6 @@ typedef struct VariableStatData
7474
Oid vartype; /* exposed type of expression */
7575
Oid atttype; /* type to pass to get_attstatsslot */
7676
int32 atttypmod; /* typmod to pass to get_attstatsslot */
77-
Oid attcollation; /* collation of the variable */
7877
bool isunique; /* true if matched to a unique index */
7978
} VariableStatData;
8079

@@ -179,7 +178,7 @@ extern Selectivity rowcomparesel(PlannerInfo *root,
179178
int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo);
180179

181180
extern void mergejoinscansel(PlannerInfo *root, Node *clause,
182-
Oid opfamily, Oid collation, int strategy, bool nulls_first,
181+
Oid opfamily, int strategy, bool nulls_first,
183182
Selectivity *leftstart, Selectivity *leftend,
184183
Selectivity *rightstart, Selectivity *rightend);
185184

0 commit comments

Comments
 (0)