Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit a5dda5d

Browse files
committed
Second try at making examine_variable and friends behave sanely in
cases with binary-compatible relabeling. My first try was implicitly assuming that all operators scalarineqsel is used for have binary- compatible datatypes on both sides ... which is very wrong of course. Per report from Michael Fuhr.
1 parent 06fb610 commit a5dda5d

File tree

1 file changed

+35
-36
lines changed

1 file changed

+35
-36
lines changed

src/backend/utils/adt/selfuncs.c

Lines changed: 35 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
*
1616
*
1717
* IDENTIFICATION
18-
* $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.175 2005/03/27 23:53:03 tgl Exp $
18+
* $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.176 2005/04/01 20:31:50 tgl Exp $
1919
*
2020
*-------------------------------------------------------------------------
2121
*/
@@ -118,6 +118,7 @@ typedef struct
118118
RelOptInfo *rel; /* Relation, or NULL if not identifiable */
119119
HeapTuple statsTuple; /* pg_statistic tuple, or NULL if none */
120120
/* NB: if statsTuple!=NULL, it must be freed when caller is done */
121+
Oid vartype; /* exposed type of expression */
121122
Oid atttype; /* type to pass to get_attstatsslot */
122123
int32 atttypmod; /* typmod to pass to get_attstatsslot */
123124
bool isunique; /* true if matched to a unique index */
@@ -546,7 +547,7 @@ scalarineqsel(Query *root, Oid operator, bool isgt,
546547
*/
547548
if (convert_to_scalar(constval, consttype, &val,
548549
values[i - 1], values[i],
549-
vardata->atttype,
550+
vardata->vartype,
550551
&low, &high))
551552
{
552553
if (high <= low)
@@ -862,23 +863,18 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype)
862863
}
863864

864865
/*
865-
* The var, on the other hand, might be a binary-compatible type;
866-
* particularly a domain. Try to fold it if it's not recognized
867-
* immediately.
868-
*/
869-
vartype = vardata.atttype;
870-
if (vartype != consttype)
871-
vartype = getBaseType(vartype);
872-
873-
/*
874-
* We should now be able to recognize the var's datatype. Choose the
875-
* index opclass from which we must draw the comparison operators.
866+
* Similarly, the exposed type of the left-hand side should be one
867+
* of those we know. (Do not look at vardata.atttype, which might be
868+
* something binary-compatible but different.) We can use it to choose
869+
* the index opclass from which we must draw the comparison operators.
876870
*
877871
* NOTE: It would be more correct to use the PATTERN opclasses than the
878872
* simple ones, but at the moment ANALYZE will not generate statistics
879873
* for the PATTERN operators. But our results are so approximate
880874
* anyway that it probably hardly matters.
881875
*/
876+
vartype = vardata.vartype;
877+
882878
switch (vartype)
883879
{
884880
case TEXTOID:
@@ -2304,21 +2300,19 @@ convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
23042300
double *scaledlobound, double *scaledhibound)
23052301
{
23062302
/*
2307-
* In present usage, we can assume that the valuetypid exactly matches
2308-
* the declared input type of the operator we are invoked for (because
2309-
* constant-folding will ensure that any Const passed to the operator
2310-
* has been reduced to the correct type). However, the boundstypid is
2311-
* the type of some variable that might be only binary-compatible with
2312-
* the declared type; for example it might be a domain type. So we
2313-
* ignore it and work with the valuetypid only.
2303+
* Both the valuetypid and the boundstypid should exactly match
2304+
* the declared input type(s) of the operator we are invoked for,
2305+
* so we just error out if either is not recognized.
23142306
*
2315-
* XXX What's really going on here is that we assume that the scalar
2316-
* representations of binary-compatible types are enough alike that we
2317-
* can use a histogram generated with one type's operators to estimate
2318-
* selectivity for the other's. This is outright wrong in some cases ---
2319-
* in particular signed versus unsigned interpretation could trip us up.
2320-
* But it's useful enough in the majority of cases that we do it anyway.
2321-
* Should think about more rigorous ways to do it.
2307+
* XXX The histogram we are interpolating between points of could belong
2308+
* to a column that's only binary-compatible with the declared type.
2309+
* In essence we are assuming that the semantics of binary-compatible
2310+
* types are enough alike that we can use a histogram generated with one
2311+
* type's operators to estimate selectivity for the other's. This is
2312+
* outright wrong in some cases --- in particular signed versus unsigned
2313+
* interpretation could trip us up. But it's useful enough in the
2314+
* majority of cases that we do it anyway. Should think about more
2315+
* rigorous ways to do it.
23222316
*/
23232317
switch (valuetypid)
23242318
{
@@ -2340,8 +2334,8 @@ convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
23402334
case REGCLASSOID:
23412335
case REGTYPEOID:
23422336
*scaledvalue = convert_numeric_to_scalar(value, valuetypid);
2343-
*scaledlobound = convert_numeric_to_scalar(lobound, valuetypid);
2344-
*scaledhibound = convert_numeric_to_scalar(hibound, valuetypid);
2337+
*scaledlobound = convert_numeric_to_scalar(lobound, boundstypid);
2338+
*scaledhibound = convert_numeric_to_scalar(hibound, boundstypid);
23452339
return true;
23462340

23472341
/*
@@ -2354,8 +2348,8 @@ convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
23542348
case NAMEOID:
23552349
{
23562350
unsigned char *valstr = convert_string_datum(value, valuetypid);
2357-
unsigned char *lostr = convert_string_datum(lobound, valuetypid);
2358-
unsigned char *histr = convert_string_datum(hibound, valuetypid);
2351+
unsigned char *lostr = convert_string_datum(lobound, boundstypid);
2352+
unsigned char *histr = convert_string_datum(hibound, boundstypid);
23592353

23602354
convert_string_to_scalar(valstr, scaledvalue,
23612355
lostr, scaledlobound,
@@ -2390,8 +2384,8 @@ convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
23902384
case TIMEOID:
23912385
case TIMETZOID:
23922386
*scaledvalue = convert_timevalue_to_scalar(value, valuetypid);
2393-
*scaledlobound = convert_timevalue_to_scalar(lobound, valuetypid);
2394-
*scaledhibound = convert_timevalue_to_scalar(hibound, valuetypid);
2387+
*scaledlobound = convert_timevalue_to_scalar(lobound, boundstypid);
2388+
*scaledhibound = convert_timevalue_to_scalar(hibound, boundstypid);
23952389
return true;
23962390

23972391
/*
@@ -2401,8 +2395,8 @@ convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
24012395
case CIDROID:
24022396
case MACADDROID:
24032397
*scaledvalue = convert_network_to_scalar(value, valuetypid);
2404-
*scaledlobound = convert_network_to_scalar(lobound, valuetypid);
2405-
*scaledhibound = convert_network_to_scalar(hibound, valuetypid);
2398+
*scaledlobound = convert_network_to_scalar(lobound, boundstypid);
2399+
*scaledhibound = convert_network_to_scalar(hibound, boundstypid);
24062400
return true;
24072401
}
24082402
/* Don't know how to convert */
@@ -2948,6 +2942,8 @@ get_join_variables(Query *root, List *args,
29482942
* subquery, not one in the current query).
29492943
* statsTuple: the pg_statistic entry for the variable, if one exists;
29502944
* otherwise NULL.
2945+
* vartype: exposed type of the expression; this should always match
2946+
* the declared input type of the operator we are estimating for.
29512947
* atttype, atttypmod: type data to pass to get_attstatsslot(). This is
29522948
* commonly the same as the exposed type of the variable argument,
29532949
* but can be different in binary-compatible-type cases.
@@ -2965,6 +2961,9 @@ examine_variable(Query *root, Node *node, int varRelid,
29652961
/* Make sure we don't return dangling pointers in vardata */
29662962
MemSet(vardata, 0, sizeof(VariableStatData));
29672963

2964+
/* Save the exposed type of the expression */
2965+
vardata->vartype = exprType(node);
2966+
29682967
/* Look inside any binary-compatible relabeling */
29692968

29702969
if (IsA(node, RelabelType))
@@ -3153,7 +3152,7 @@ get_variable_numdistinct(VariableStatData *vardata)
31533152
stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
31543153
stadistinct = stats->stadistinct;
31553154
}
3156-
else if (vardata->atttype == BOOLOID)
3155+
else if (vardata->vartype == BOOLOID)
31573156
{
31583157
/*
31593158
* Special-case boolean columns: presumably, two distinct values.

0 commit comments

Comments
 (0)