Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit d8f3752

Browse files
committed
Generate double-sided LIKE indexquals that work even in weird locales,
by continuing to increment the rightmost character until we get a string that is demonstrably greater than the pattern prefix.
1 parent 5f68d5c commit d8f3752

File tree

1 file changed

+163
-45
lines changed

1 file changed

+163
-45
lines changed

src/backend/optimizer/path/indxpath.c

Lines changed: 163 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.74 1999/12/31 03:41:03 tgl Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.75 1999/12/31 05:38:25 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -23,6 +23,7 @@
2323
#include "catalog/pg_amop.h"
2424
#include "catalog/pg_operator.h"
2525
#include "executor/executor.h"
26+
#include "mb/pg_wchar.h"
2627
#include "nodes/makefuncs.h"
2728
#include "nodes/nodeFuncs.h"
2829
#include "optimizer/clauses.h"
@@ -92,7 +93,12 @@ static Prefix_Status regex_fixed_prefix(char *patt, bool case_insensitive,
9293
char **prefix);
9394
static List *prefix_quals(Var *leftop, Oid expr_op,
9495
char *prefix, Prefix_Status pstatus);
96+
static char *make_greater_string(const char * str, Oid datatype);
9597
static Oid find_operator(const char * opname, Oid datatype);
98+
static Datum string_to_datum(const char * str, Oid datatype);
99+
static Const *string_to_const(const char * str, Oid datatype);
100+
static bool string_lessthan(const char * str1, const char * str2,
101+
Oid datatype);
96102

97103

98104
/*
@@ -1653,31 +1659,31 @@ match_special_index_operator(Expr *clause, Oid opclass, Oid relam,
16531659
case OID_TEXT_REGEXEQ_OP:
16541660
case OID_TEXT_ICREGEXEQ_OP:
16551661
if (! op_class(find_operator(">=", TEXTOID), opclass, relam) ||
1656-
! op_class(find_operator("<=", TEXTOID), opclass, relam))
1662+
! op_class(find_operator("<", TEXTOID), opclass, relam))
16571663
isIndexable = false;
16581664
break;
16591665

16601666
case OID_BPCHAR_LIKE_OP:
16611667
case OID_BPCHAR_REGEXEQ_OP:
16621668
case OID_BPCHAR_ICREGEXEQ_OP:
16631669
if (! op_class(find_operator(">=", BPCHAROID), opclass, relam) ||
1664-
! op_class(find_operator("<=", BPCHAROID), opclass, relam))
1670+
! op_class(find_operator("<", BPCHAROID), opclass, relam))
16651671
isIndexable = false;
16661672
break;
16671673

16681674
case OID_VARCHAR_LIKE_OP:
16691675
case OID_VARCHAR_REGEXEQ_OP:
16701676
case OID_VARCHAR_ICREGEXEQ_OP:
16711677
if (! op_class(find_operator(">=", VARCHAROID), opclass, relam) ||
1672-
! op_class(find_operator("<=", VARCHAROID), opclass, relam))
1678+
! op_class(find_operator("<", VARCHAROID), opclass, relam))
16731679
isIndexable = false;
16741680
break;
16751681

16761682
case OID_NAME_LIKE_OP:
16771683
case OID_NAME_REGEXEQ_OP:
16781684
case OID_NAME_ICREGEXEQ_OP:
16791685
if (! op_class(find_operator(">=", NAMEOID), opclass, relam) ||
1680-
! op_class(find_operator("<=", NAMEOID), opclass, relam))
1686+
! op_class(find_operator("<", NAMEOID), opclass, relam))
16811687
isIndexable = false;
16821688
break;
16831689
}
@@ -1774,7 +1780,7 @@ expand_indexqual_conditions(List *indexquals)
17741780

17751781
/*
17761782
* Extract the fixed prefix, if any, for a LIKE pattern.
1777-
* *prefix is set to a palloc'd prefix string with 1 spare byte,
1783+
* *prefix is set to a palloc'd prefix string,
17781784
* or to NULL if no fixed prefix exists for the pattern.
17791785
* The return value distinguishes no fixed prefix, a partial prefix,
17801786
* or an exact-match-only pattern.
@@ -1786,7 +1792,7 @@ like_fixed_prefix(char *patt, char **prefix)
17861792
int pos,
17871793
match_pos;
17881794

1789-
*prefix = match = palloc(strlen(patt)+2);
1795+
*prefix = match = palloc(strlen(patt)+1);
17901796
match_pos = 0;
17911797

17921798
for (pos = 0; patt[pos]; pos++)
@@ -1823,7 +1829,7 @@ like_fixed_prefix(char *patt, char **prefix)
18231829

18241830
/*
18251831
* Extract the fixed prefix, if any, for a regex pattern.
1826-
* *prefix is set to a palloc'd prefix string with 1 spare byte,
1832+
* *prefix is set to a palloc'd prefix string,
18271833
* or to NULL if no fixed prefix exists for the pattern.
18281834
* The return value distinguishes no fixed prefix, a partial prefix,
18291835
* or an exact-match-only pattern.
@@ -1858,7 +1864,7 @@ regex_fixed_prefix(char *patt, bool case_insensitive,
18581864
}
18591865

18601866
/* OK, allocate space for pattern */
1861-
*prefix = match = palloc(strlen(patt)+2);
1867+
*prefix = match = palloc(strlen(patt)+1);
18621868
match_pos = 0;
18631869

18641870
/* note start at pos 1 to skip leading ^ */
@@ -1906,11 +1912,10 @@ prefix_quals(Var *leftop, Oid expr_op,
19061912
List *result;
19071913
Oid datatype;
19081914
Oid oproid;
1909-
void *conval;
19101915
Const *con;
19111916
Oper *op;
19121917
Expr *expr;
1913-
int prefixlen;
1918+
char *greaterstr;
19141919

19151920
Assert(pstatus != Prefix_None);
19161921

@@ -1953,14 +1958,7 @@ prefix_quals(Var *leftop, Oid expr_op,
19531958
oproid = find_operator("=", datatype);
19541959
if (oproid == InvalidOid)
19551960
elog(ERROR, "prefix_quals: no = operator for type %u", datatype);
1956-
/* Note: we cheat a little by assuming that textin() will do for
1957-
* bpchar and varchar constants too...
1958-
*/
1959-
conval = (datatype == NAMEOID) ?
1960-
(void*) namein(prefix) : (void*) textin(prefix);
1961-
con = makeConst(datatype, ((datatype == NAMEOID) ? NAMEDATALEN : -1),
1962-
PointerGetDatum(conval),
1963-
false, false, false, false);
1961+
con = string_to_const(prefix, datatype);
19641962
op = makeOper(oproid, InvalidOid, BOOLOID, 0, NULL);
19651963
expr = make_opclause(op, leftop, (Var *) con);
19661964
result = lcons(expr, NIL);
@@ -1975,43 +1973,92 @@ prefix_quals(Var *leftop, Oid expr_op,
19751973
oproid = find_operator(">=", datatype);
19761974
if (oproid == InvalidOid)
19771975
elog(ERROR, "prefix_quals: no >= operator for type %u", datatype);
1978-
conval = (datatype == NAMEOID) ?
1979-
(void*) namein(prefix) : (void*) textin(prefix);
1980-
con = makeConst(datatype, ((datatype == NAMEOID) ? NAMEDATALEN : -1),
1981-
PointerGetDatum(conval),
1982-
false, false, false, false);
1976+
con = string_to_const(prefix, datatype);
19831977
op = makeOper(oproid, InvalidOid, BOOLOID, 0, NULL);
19841978
expr = make_opclause(op, leftop, (Var *) con);
19851979
result = lcons(expr, NIL);
19861980

19871981
/*
1988-
* In ASCII locale we say "x <= prefix\377". This does not
1989-
* work for non-ASCII collation orders, and it's not really
1990-
* right even for ASCII. FIX ME!
1991-
* Note we assume the passed prefix string is workspace with
1992-
* an extra byte, as created by the xxx_fixed_prefix routines above.
1982+
* If we can create a string larger than the prefix, say "x < greaterstr".
19931983
*/
1994-
#ifndef USE_LOCALE
1995-
prefixlen = strlen(prefix);
1996-
prefix[prefixlen] = '\377';
1997-
prefix[prefixlen+1] = '\0';
1984+
greaterstr = make_greater_string(prefix, datatype);
1985+
if (greaterstr)
1986+
{
1987+
oproid = find_operator("<", datatype);
1988+
if (oproid == InvalidOid)
1989+
elog(ERROR, "prefix_quals: no < operator for type %u", datatype);
1990+
con = string_to_const(greaterstr, datatype);
1991+
op = makeOper(oproid, InvalidOid, BOOLOID, 0, NULL);
1992+
expr = make_opclause(op, leftop, (Var *) con);
1993+
result = lappend(result, expr);
1994+
pfree(greaterstr);
1995+
}
19981996

1999-
oproid = find_operator("<=", datatype);
2000-
if (oproid == InvalidOid)
2001-
elog(ERROR, "prefix_quals: no <= operator for type %u", datatype);
2002-
conval = (datatype == NAMEOID) ?
2003-
(void*) namein(prefix) : (void*) textin(prefix);
2004-
con = makeConst(datatype, ((datatype == NAMEOID) ? NAMEDATALEN : -1),
2005-
PointerGetDatum(conval),
2006-
false, false, false, false);
2007-
op = makeOper(oproid, InvalidOid, BOOLOID, 0, NULL);
2008-
expr = make_opclause(op, leftop, (Var *) con);
2009-
result = lappend(result, expr);
1997+
return result;
1998+
}
1999+
2000+
/*
2001+
* Try to generate a string greater than the given string or any string it is
2002+
* a prefix of. If successful, return a palloc'd string; else return NULL.
2003+
*
2004+
* To work correctly in non-ASCII locales with weird collation orders,
2005+
* we cannot simply increment "foo" to "fop" --- we have to check whether
2006+
* we actually produced a string greater than the given one. If not,
2007+
* increment the righthand byte again and repeat. If we max out the righthand
2008+
* byte, truncate off the last character and start incrementing the next.
2009+
* For example, if "z" were the last character in the sort order, then we
2010+
* could produce "foo" as a string greater than "fonz".
2011+
*
2012+
* This could be rather slow in the worst case, but in most cases we won't
2013+
* have to try more than one or two strings before succeeding.
2014+
*
2015+
* XXX in a sufficiently weird locale, this might produce incorrect results?
2016+
* For example, in German I believe "ss" is treated specially --- if we are
2017+
* given "foos" and return "foot", will this actually be greater than "fooss"?
2018+
*/
2019+
static char *
2020+
make_greater_string(const char * str, Oid datatype)
2021+
{
2022+
char *workstr;
2023+
int len;
2024+
2025+
/* Make a modifiable copy, which will be our return value if successful */
2026+
workstr = pstrdup((char *) str);
2027+
2028+
while ((len = strlen(workstr)) > 0)
2029+
{
2030+
unsigned char *lastchar = (unsigned char *) (workstr + len - 1);
2031+
2032+
/*
2033+
* Try to generate a larger string by incrementing the last byte.
2034+
*/
2035+
while (*lastchar < (unsigned char) 255)
2036+
{
2037+
(*lastchar)++;
2038+
if (string_lessthan(str, workstr, datatype))
2039+
return workstr; /* Success! */
2040+
}
2041+
/*
2042+
* Truncate off the last character, which might be more than 1 byte
2043+
* in MULTIBYTE case.
2044+
*/
2045+
#ifdef MULTIBYTE
2046+
len = pg_mbcliplen((const unsigned char *) workstr, len, len-1);
2047+
workstr[len] = '\0';
2048+
#else
2049+
*lastchar = '\0';
20102050
#endif
2051+
}
20112052

2012-
return result;
2053+
/* Failed... */
2054+
pfree(workstr);
2055+
return NULL;
20132056
}
20142057

2058+
/*
2059+
* Handy subroutines for match_special_index_operator() and friends.
2060+
*/
2061+
20152062
/* See if there is a binary op of the given name for the given datatype */
20162063
static Oid
20172064
find_operator(const char * opname, Oid datatype)
@@ -2027,3 +2074,74 @@ find_operator(const char * opname, Oid datatype)
20272074
return InvalidOid;
20282075
return optup->t_data->t_oid;
20292076
}
2077+
2078+
/*
2079+
* Generate a Datum of the appropriate type from a C string.
2080+
* Note that all of the supported types are pass-by-ref, so the
2081+
* returned value should be pfree'd if no longer needed.
2082+
*/
2083+
static Datum
2084+
string_to_datum(const char * str, Oid datatype)
2085+
{
2086+
/* We cheat a little by assuming that textin() will do for
2087+
* bpchar and varchar constants too...
2088+
*/
2089+
if (datatype == NAMEOID)
2090+
return PointerGetDatum(namein((char *) str));
2091+
else
2092+
return PointerGetDatum(textin((char *) str));
2093+
}
2094+
2095+
/*
2096+
* Generate a Const node of the appropriate type from a C string.
2097+
*/
2098+
static Const *
2099+
string_to_const(const char * str, Oid datatype)
2100+
{
2101+
Datum conval = string_to_datum(str, datatype);
2102+
2103+
return makeConst(datatype, ((datatype == NAMEOID) ? NAMEDATALEN : -1),
2104+
conval, false, false, false, false);
2105+
}
2106+
2107+
/*
2108+
* Test whether two strings are "<" according to the rules of the given
2109+
* datatype. We do this the hard way, ie, actually calling the type's
2110+
* "<" operator function, to ensure we get the right result...
2111+
*/
2112+
static bool
2113+
string_lessthan(const char * str1, const char * str2, Oid datatype)
2114+
{
2115+
Datum datum1 = string_to_datum(str1, datatype);
2116+
Datum datum2 = string_to_datum(str2, datatype);
2117+
bool result;
2118+
2119+
switch (datatype)
2120+
{
2121+
case TEXTOID:
2122+
result = text_lt((text *) datum1, (text *) datum2);
2123+
break;
2124+
2125+
case BPCHAROID:
2126+
result = bpcharlt((char *) datum1, (char *) datum2);
2127+
break;
2128+
2129+
case VARCHAROID:
2130+
result = varcharlt((char *) datum1, (char *) datum2);
2131+
break;
2132+
2133+
case NAMEOID:
2134+
result = namelt((NameData *) datum1, (NameData *) datum2);
2135+
break;
2136+
2137+
default:
2138+
elog(ERROR, "string_lessthan: unexpected datatype %u", datatype);
2139+
result = false;
2140+
break;
2141+
}
2142+
2143+
pfree(DatumGetPointer(datum1));
2144+
pfree(DatumGetPointer(datum2));
2145+
2146+
return result;
2147+
}

0 commit comments

Comments
 (0)