Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 2de946b

Browse files
committed
Improve the performance of LIKE/regex estimation in non-C locales, by making
make_greater_string() try harder to generate a string that's actually greater than its input string. Before we just assumed that making a string that was memcmp-greater was enough, but it is easy to generate examples where this is not so when the locale is not C. Instead, loop until the relevant comparison function agrees that the generated string is greater than the input. Unfortunately this is probably not enough to guarantee that the generated string is greater than all extensions of the input, so we cannot relax the restriction to C locale for the LIKE/regex index optimization. But it should at least improve the odds of getting a useful selectivity estimate in prefix_selectivity(). Per example from Guillaume Smet. Backpatch to 8.1, mainly because that's what the complainant is using...
1 parent 9542287 commit 2de946b

File tree

3 files changed

+43
-27
lines changed

3 files changed

+43
-27
lines changed

src/backend/optimizer/path/indxpath.c

+8-6
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
*
1010
*
1111
* IDENTIFICATION
12-
* $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.222 2007/05/22 01:40:33 tgl Exp $
12+
* $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.223 2007/11/07 22:37:24 tgl Exp $
1313
*
1414
*-------------------------------------------------------------------------
1515
*/
@@ -2668,6 +2668,7 @@ prefix_quals(Node *leftop, Oid opfamily,
26682668
Oid datatype;
26692669
Oid oproid;
26702670
Expr *expr;
2671+
FmgrInfo ltproc;
26712672
Const *greaterstr;
26722673

26732674
Assert(pstatus != Pattern_Prefix_None);
@@ -2759,13 +2760,14 @@ prefix_quals(Node *leftop, Oid opfamily,
27592760
* "x < greaterstr".
27602761
*-------
27612762
*/
2762-
greaterstr = make_greater_string(prefix_const);
2763+
oproid = get_opfamily_member(opfamily, datatype, datatype,
2764+
BTLessStrategyNumber);
2765+
if (oproid == InvalidOid)
2766+
elog(ERROR, "no < operator for opfamily %u", opfamily);
2767+
fmgr_info(get_opcode(oproid), &ltproc);
2768+
greaterstr = make_greater_string(prefix_const, &ltproc);
27632769
if (greaterstr)
27642770
{
2765-
oproid = get_opfamily_member(opfamily, datatype, datatype,
2766-
BTLessStrategyNumber);
2767-
if (oproid == InvalidOid)
2768-
elog(ERROR, "no < operator for opfamily %u", opfamily);
27692771
expr = make_opclause(oproid, BOOLOID, false,
27702772
(Expr *) leftop, (Expr *) greaterstr);
27712773
result = lappend(result,

src/backend/utils/adt/selfuncs.c

+33-19
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
*
1616
*
1717
* IDENTIFICATION
18-
* $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.237 2007/11/07 21:00:37 tgl Exp $
18+
* $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.238 2007/11/07 22:37:24 tgl Exp $
1919
*
2020
*-------------------------------------------------------------------------
2121
*/
@@ -4302,17 +4302,17 @@ prefix_selectivity(VariableStatData *vardata,
43024302
* "x < greaterstr".
43034303
*-------
43044304
*/
4305-
greaterstrcon = make_greater_string(prefixcon);
4305+
cmpopr = get_opfamily_member(opfamily, vartype, vartype,
4306+
BTLessStrategyNumber);
4307+
if (cmpopr == InvalidOid)
4308+
elog(ERROR, "no < operator for opfamily %u", opfamily);
4309+
fmgr_info(get_opcode(cmpopr), &opproc);
4310+
4311+
greaterstrcon = make_greater_string(prefixcon, &opproc);
43064312
if (greaterstrcon)
43074313
{
43084314
Selectivity topsel;
43094315

4310-
cmpopr = get_opfamily_member(opfamily, vartype, vartype,
4311-
BTLessStrategyNumber);
4312-
if (cmpopr == InvalidOid)
4313-
elog(ERROR, "no < operator for opfamily %u", opfamily);
4314-
fmgr_info(get_opcode(cmpopr), &opproc);
4315-
43164316
topsel = ineq_histogram_selectivity(vardata, &opproc, false,
43174317
greaterstrcon->constvalue,
43184318
greaterstrcon->consttype);
@@ -4589,8 +4589,17 @@ pattern_selectivity(Const *patt, Pattern_Type ptype)
45894589
* in the form of a Const pointer; else return NULL.
45904590
*
45914591
* The key requirement here is that given a prefix string, say "foo",
4592-
* we must be able to generate another string "fop" that is greater
4593-
* than all strings "foobar" starting with "foo".
4592+
* we must be able to generate another string "fop" that is greater than
4593+
* all strings "foobar" starting with "foo". We can test that we have
4594+
* generated a string greater than the prefix string, but in non-C locales
4595+
* that is not a bulletproof guarantee that an extension of the string might
4596+
* not sort after it; an example is that "foo " is less than "foo!", but it
4597+
* is not clear that a "dictionary" sort ordering will consider "foo!" less
4598+
* than "foo bar". Therefore, this function should be used only for
4599+
* estimation purposes when working in a non-C locale.
4600+
*
4601+
* The caller must provide the appropriate "less than" comparison function
4602+
* for testing the strings.
45944603
*
45954604
* If we max out the righthand byte, truncate off the last character
45964605
* and start incrementing the next. For example, if "z" were the last
@@ -4599,20 +4608,15 @@ pattern_selectivity(Const *patt, Pattern_Type ptype)
45994608
*
46004609
* This could be rather slow in the worst case, but in most cases we
46014610
* won't have to try more than one or two strings before succeeding.
4602-
*
4603-
* NOTE: at present this assumes we are in the C locale, so that simple
4604-
* bytewise comparison applies. However, we might be in a multibyte
4605-
* encoding such as UTF8, so we do have to watch out for generating
4606-
* invalid encoding sequences.
46074611
*/
46084612
Const *
4609-
make_greater_string(const Const *str_const)
4613+
make_greater_string(const Const *str_const, FmgrInfo *ltproc)
46104614
{
46114615
Oid datatype = str_const->consttype;
46124616
char *workstr;
46134617
int len;
46144618

4615-
/* Get the string and a modifiable copy */
4619+
/* Get a modifiable copy of the string in C-string format */
46164620
if (datatype == NAMEOID)
46174621
{
46184622
workstr = DatumGetCString(DirectFunctionCall1(nameout,
@@ -4660,8 +4664,18 @@ make_greater_string(const Const *str_const)
46604664
else
46614665
workstr_const = string_to_bytea_const(workstr, len);
46624666

4663-
pfree(workstr);
4664-
return workstr_const;
4667+
if (DatumGetBool(FunctionCall2(ltproc,
4668+
str_const->constvalue,
4669+
workstr_const->constvalue)))
4670+
{
4671+
/* Successfully made a string larger than the input */
4672+
pfree(workstr);
4673+
return workstr_const;
4674+
}
4675+
4676+
/* No good, release unusable value and try again */
4677+
pfree(DatumGetPointer(workstr_const->constvalue));
4678+
pfree(workstr_const);
46654679
}
46664680

46674681
/* restore last byte so we don't confuse pg_mbcliplen */

src/include/utils/selfuncs.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
99
* Portions Copyright (c) 1994, Regents of the University of California
1010
*
11-
* $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.40 2007/08/31 23:35:22 tgl Exp $
11+
* $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.41 2007/11/07 22:37:24 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -118,7 +118,7 @@ extern Pattern_Prefix_Status pattern_fixed_prefix(Const *patt,
118118
Pattern_Type ptype,
119119
Const **prefix,
120120
Const **rest);
121-
extern Const *make_greater_string(const Const *str_const);
121+
extern Const *make_greater_string(const Const *str_const, FmgrInfo *ltproc);
122122

123123
extern Datum eqsel(PG_FUNCTION_ARGS);
124124
extern Datum neqsel(PG_FUNCTION_ARGS);

0 commit comments

Comments
 (0)