Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit b2a01b9

Browse files
committed
Fix bugs in contrib/pg_trgm's LIKE pattern analysis code.
Extraction of trigrams did not process LIKE escape sequences properly, leading to possible misidentification of trigrams near escapes, resulting in incorrect index search results. Fujii Masao
1 parent 51fed14 commit b2a01b9

File tree

3 files changed

+39
-19
lines changed

3 files changed

+39
-19
lines changed

contrib/pg_trgm/expected/pg_trgm.out

+12
Original file line numberDiff line numberDiff line change
@@ -3497,6 +3497,12 @@ select * from test2 where t like '%bcd%';
34973497
abcdef
34983498
(1 row)
34993499

3500+
select * from test2 where t like E'%\\bcd%';
3501+
t
3502+
--------
3503+
abcdef
3504+
(1 row)
3505+
35003506
select * from test2 where t ilike '%BCD%';
35013507
t
35023508
--------
@@ -3539,6 +3545,12 @@ select * from test2 where t like '%bcd%';
35393545
abcdef
35403546
(1 row)
35413547

3548+
select * from test2 where t like E'%\\bcd%';
3549+
t
3550+
--------
3551+
abcdef
3552+
(1 row)
3553+
35423554
select * from test2 where t ilike '%BCD%';
35433555
t
35443556
--------

contrib/pg_trgm/sql/pg_trgm.sql

+2
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ explain (costs off)
4949
select * from test2 where t ilike '%BCD%';
5050
select * from test2 where t like '%BCD%';
5151
select * from test2 where t like '%bcd%';
52+
select * from test2 where t like E'%\\bcd%';
5253
select * from test2 where t ilike '%BCD%';
5354
select * from test2 where t ilike 'qua%';
5455
drop index test2_idx_gin;
@@ -60,5 +61,6 @@ explain (costs off)
6061
select * from test2 where t ilike '%BCD%';
6162
select * from test2 where t like '%BCD%';
6263
select * from test2 where t like '%bcd%';
64+
select * from test2 where t like E'%\\bcd%';
6365
select * from test2 where t ilike '%BCD%';
6466
select * from test2 where t ilike 'qua%';

contrib/pg_trgm/trgm_op.c

+25-19
Original file line numberDiff line numberDiff line change
@@ -272,33 +272,36 @@ get_wildcard_part(const char *str, int lenstr,
272272
const char *beginword = str;
273273
const char *endword;
274274
char *s = buf;
275-
bool in_wildcard_meta = false;
275+
bool in_leading_wildcard_meta = false;
276+
bool in_trailing_wildcard_meta = false;
276277
bool in_escape = false;
277278
int clen;
278279

279280
/*
280-
* Find the first word character remembering whether last character was
281-
* wildcard meta-character.
281+
* Find the first word character, remembering whether preceding character
282+
* was wildcard meta-character. Note that the in_escape state persists
283+
* from this loop to the next one, since we may exit at a word character
284+
* that is in_escape.
282285
*/
283286
while (beginword - str < lenstr)
284287
{
285288
if (in_escape)
286289
{
287-
in_escape = false;
288-
in_wildcard_meta = false;
289290
if (iswordchr(beginword))
290291
break;
292+
in_escape = false;
293+
in_leading_wildcard_meta = false;
291294
}
292295
else
293296
{
294297
if (ISESCAPECHAR(beginword))
295298
in_escape = true;
296299
else if (ISWILDCARDCHAR(beginword))
297-
in_wildcard_meta = true;
300+
in_leading_wildcard_meta = true;
298301
else if (iswordchr(beginword))
299302
break;
300303
else
301-
in_wildcard_meta = false;
304+
in_leading_wildcard_meta = false;
302305
}
303306
beginword += pg_mblen(beginword);
304307
}
@@ -310,11 +313,11 @@ get_wildcard_part(const char *str, int lenstr,
310313
return NULL;
311314

312315
/*
313-
* Add left padding spaces if last character wasn't wildcard
316+
* Add left padding spaces if preceding character wasn't wildcard
314317
* meta-character.
315318
*/
316319
*charlen = 0;
317-
if (!in_wildcard_meta)
320+
if (!in_leading_wildcard_meta)
318321
{
319322
if (LPADDING > 0)
320323
{
@@ -333,31 +336,37 @@ get_wildcard_part(const char *str, int lenstr,
333336
* string boundary. Strip escapes during copy.
334337
*/
335338
endword = beginword;
336-
in_wildcard_meta = false;
337-
in_escape = false;
338339
while (endword - str < lenstr)
339340
{
340341
clen = pg_mblen(endword);
341342
if (in_escape)
342343
{
343-
in_escape = false;
344-
in_wildcard_meta = false;
345344
if (iswordchr(endword))
346345
{
347346
memcpy(s, endword, clen);
348347
(*charlen)++;
349348
s += clen;
350349
}
351350
else
351+
{
352+
/*
353+
* Back up endword to the escape character when stopping at
354+
* an escaped char, so that subsequent get_wildcard_part will
355+
* restart from the escape character. We assume here that
356+
* escape chars are single-byte.
357+
*/
358+
endword--;
352359
break;
360+
}
361+
in_escape = false;
353362
}
354363
else
355364
{
356365
if (ISESCAPECHAR(endword))
357366
in_escape = true;
358367
else if (ISWILDCARDCHAR(endword))
359368
{
360-
in_wildcard_meta = true;
369+
in_trailing_wildcard_meta = true;
361370
break;
362371
}
363372
else if (iswordchr(endword))
@@ -367,19 +376,16 @@ get_wildcard_part(const char *str, int lenstr,
367376
s += clen;
368377
}
369378
else
370-
{
371-
in_wildcard_meta = false;
372379
break;
373-
}
374380
}
375381
endword += clen;
376382
}
377383

378384
/*
379-
* Add right padding spaces if last character wasn't wildcard
385+
* Add right padding spaces if next character isn't wildcard
380386
* meta-character.
381387
*/
382-
if (!in_wildcard_meta)
388+
if (!in_trailing_wildcard_meta)
383389
{
384390
if (RPADDING > 0)
385391
{

0 commit comments

Comments
 (0)