Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit cad764f

Browse files
committed
Improve selectivity estimation involving string constants: pay attention
to more than one character, and try to do the right thing in non-ASCII locales.
1 parent 1d5e7a6 commit cad764f

File tree

1 file changed

+96
-28
lines changed

1 file changed

+96
-28
lines changed

src/backend/utils/adt/selfuncs.c

+96-28
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
*
1616
*
1717
* IDENTIFICATION
18-
* $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.60 2000/03/20 15:42:46 momjian Exp $
18+
* $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.61 2000/03/23 00:55:42 tgl Exp $
1919
*
2020
*-------------------------------------------------------------------------
2121
*/
@@ -48,6 +48,8 @@
4848
/* default selectivity estimate for inequalities such as "A < b" */
4949
#define DEFAULT_INEQ_SEL (1.0 / 3.0)
5050

51+
static bool convert_string_to_scalar(char *str, int strlength,
52+
double *scaleval);
5153
static void getattproperties(Oid relid, AttrNumber attnum,
5254
Oid *typid,
5355
int *typlen,
@@ -472,9 +474,8 @@ scalargtjoinsel(Oid opid,
472474
* All numeric datatypes are simply converted to their equivalent
473475
* "double" values.
474476
*
475-
* String datatypes are converted to a crude scale using their first character
476-
* (only if it is in the ASCII range, to try to avoid problems with non-ASCII
477-
* collating sequences).
477+
* String datatypes are converted by convert_string_to_scalar(),
478+
* which is explained below.
478479
*
479480
* The several datatypes representing absolute times are all converted
480481
* to Timestamp, which is actually a double, and then we just use that
@@ -525,40 +526,25 @@ convert_to_scalar(Datum value, Oid typid,
525526
*/
526527
case CHAROID:
527528
{
528-
char ch = DatumGetChar(value);
529+
char ch = DatumGetChar(value);
529530

530-
if (ch >= 0 && ch < 127)
531-
{
532-
*scaleval = (double) ch;
533-
return true;
534-
}
535-
break;
531+
return convert_string_to_scalar(&ch, 1, scaleval);
536532
}
537533
case BPCHAROID:
538534
case VARCHAROID:
539535
case TEXTOID:
540-
if (VARSIZE(DatumGetPointer(value)) > VARHDRSZ)
541-
{
542-
char ch = * (char *) VARDATA(DatumGetPointer(value));
536+
{
537+
char *str = (char *) VARDATA(DatumGetPointer(value));
538+
int strlength = VARSIZE(DatumGetPointer(value)) - VARHDRSZ;
543539

544-
if (ch >= 0 && ch < 127)
545-
{
546-
*scaleval = (double) ch;
547-
return true;
548-
}
549-
}
550-
break;
540+
return convert_string_to_scalar(str, strlength, scaleval);
541+
}
551542
case NAMEOID:
552543
{
553544
NameData *nm = (NameData *) DatumGetPointer(value);
554-
char ch = NameStr(*nm)[0];
555545

556-
if (ch >= 0 && ch < 127)
557-
{
558-
*scaleval = (double) ch;
559-
return true;
560-
}
561-
break;
546+
return convert_string_to_scalar(NameStr(*nm), strlen(NameStr(*nm)),
547+
scaleval);
562548
}
563549

564550
/*
@@ -644,6 +630,88 @@ convert_to_scalar(Datum value, Oid typid,
644630
return false;
645631
}
646632

633+
/*
634+
* Do convert_to_scalar()'s work for any character-string data type.
635+
*
636+
* String datatypes are converted to a scale that ranges from 0 to 1, where
637+
* we visualize the bytes of the string as fractional base-256 digits.
638+
* It's sufficient to consider the first few bytes, since double has only
639+
* limited precision (and we can't expect huge accuracy in our selectivity
640+
* predictions anyway!)
641+
*
642+
* If USE_LOCALE is defined, we must pass the string through strxfrm()
643+
* before doing the computation, so as to generate correct locale-specific
644+
* results.
645+
*/
646+
static bool
647+
convert_string_to_scalar(char *str, int strlength,
648+
double *scaleval)
649+
{
650+
unsigned char *sptr;
651+
int slen;
652+
#ifdef USE_LOCALE
653+
char *rawstr;
654+
char *xfrmstr;
655+
size_t xfrmsize;
656+
size_t xfrmlen;
657+
#endif
658+
double num,
659+
denom;
660+
661+
if (strlength <= 0)
662+
{
663+
*scaleval = 0; /* empty string has scalar value 0 */
664+
return true;
665+
}
666+
667+
#ifdef USE_LOCALE
668+
/* Need a null-terminated string to pass to strxfrm() */
669+
rawstr = (char *) palloc(strlength + 1);
670+
memcpy(rawstr, str, strlength);
671+
rawstr[strlength] = '\0';
672+
673+
/* Guess that transformed string is not much bigger */
674+
xfrmsize = strlength + 32; /* arbitrary pad value here... */
675+
xfrmstr = (char *) palloc(xfrmsize);
676+
xfrmlen = strxfrm(xfrmstr, rawstr, xfrmsize);
677+
if (xfrmlen >= xfrmsize)
678+
{
679+
/* Oops, didn't make it */
680+
pfree(xfrmstr);
681+
xfrmstr = (char *) palloc(xfrmlen+1);
682+
xfrmlen = strxfrm(xfrmstr, rawstr, xfrmlen+1);
683+
}
684+
pfree(rawstr);
685+
686+
sptr = (unsigned char *) xfrmstr;
687+
slen = xfrmlen;
688+
#else
689+
sptr = (unsigned char *) str;
690+
slen = strlength;
691+
#endif
692+
693+
/* No need to consider more than about 8 bytes (sizeof double) */
694+
if (slen > 8)
695+
slen = 8;
696+
697+
/* Convert initial characters to fraction */
698+
num = 0.0;
699+
denom = 256.0;
700+
while (slen-- > 0)
701+
{
702+
num += ((double) (*sptr++)) / denom;
703+
denom *= 256.0;
704+
}
705+
706+
#ifdef USE_LOCALE
707+
pfree(xfrmstr);
708+
#endif
709+
710+
*scaleval = num;
711+
return true;
712+
}
713+
714+
647715
/*
648716
* getattproperties
649717
* Retrieve pg_attribute properties for an attribute,

0 commit comments

Comments
 (0)