Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit ad004ce

Browse files
committed
Change ILIKE to invoke lower() and then do plain LIKE comparison when
working in a multibyte encoding. This fixes the problems exhibited in bug #1931 and other reports of ILIKE misbehavior in UTF8 encoding. It's a pretty grotty solution though --- should rethink how to do it after we install better locale support, someday.
1 parent 6dc920d commit ad004ce

File tree

1 file changed

+100
-63
lines changed

1 file changed

+100
-63
lines changed

src/backend/utils/adt/like.c

Lines changed: 100 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
* Portions Copyright (c) 1994, Regents of the University of California
1212
*
1313
* IDENTIFICATION
14-
* $PostgreSQL: pgsql/src/backend/utils/adt/like.c,v 1.64 2006/03/05 15:58:42 momjian Exp $
14+
* $PostgreSQL: pgsql/src/backend/utils/adt/like.c,v 1.65 2006/09/04 18:32:55 tgl Exp $
1515
*
1616
*-------------------------------------------------------------------------
1717
*/
@@ -64,50 +64,23 @@ wchareq(char *p1, char *p2)
6464
return 1;
6565
}
6666

67-
/*--------------------
68-
* Support routine for MatchTextIC. Compares given multibyte streams
69-
* as wide characters ignoring case.
70-
* If they match, returns 1 otherwise returns 0.
71-
*--------------------
67+
/*
68+
* Formerly we had a routine iwchareq() here that tried to do case-insensitive
69+
* comparison of multibyte characters. It did not work at all, however,
70+
* because it relied on tolower() which has a single-byte API ... and
71+
* towlower() wouldn't be much better since we have no suitably cheap way
72+
* of getting a single character transformed to the system's wchar_t format.
73+
* So now, we just downcase the strings using lower() and apply regular LIKE
74+
* comparison. This should be revisited when we install better locale support.
75+
*
76+
* Note that MBMatchText and MBMatchTextIC do exactly the same thing now.
77+
* Is it worth refactoring to avoid duplicated code? They might become
78+
* different again in the future.
7279
*/
73-
#define CHARMAX 0x80
74-
75-
static int
76-
iwchareq(char *p1, char *p2)
77-
{
78-
pg_wchar c1[2],
79-
c2[2];
80-
int l;
81-
82-
/*
83-
* short cut. if *p1 and *p2 is lower than CHARMAX, then we could assume
84-
* they are ASCII
85-
*/
86-
if ((unsigned char) *p1 < CHARMAX && (unsigned char) *p2 < CHARMAX)
87-
return (tolower((unsigned char) *p1) == tolower((unsigned char) *p2));
88-
89-
/*
90-
* if one of them is an ASCII while the other is not, then they must be
91-
* different characters
92-
*/
93-
else if ((unsigned char) *p1 < CHARMAX || (unsigned char) *p2 < CHARMAX)
94-
return 0;
95-
96-
/*
97-
* ok, p1 and p2 are both > CHARMAX, then they must be multibyte
98-
* characters
99-
*/
100-
l = pg_mblen(p1);
101-
(void) pg_mb2wchar_with_len(p1, c1, l);
102-
c1[0] = tolower(c1[0]);
103-
l = pg_mblen(p2);
104-
(void) pg_mb2wchar_with_len(p2, c2, l);
105-
c2[0] = tolower(c2[0]);
106-
return (c1[0] == c2[0]);
107-
}
10880

81+
/* Set up to compile like_match.c for multibyte characters */
10982
#define CHAREQ(p1, p2) wchareq(p1, p2)
110-
#define ICHAREQ(p1, p2) iwchareq(p1, p2)
83+
#define ICHAREQ(p1, p2) wchareq(p1, p2)
11184
#define NextChar(p, plen) \
11285
do { int __l = pg_mblen(p); (p) +=__l; (plen) -=__l; } while (0)
11386
#define CopyAdvChar(dst, src, srclen) \
@@ -120,7 +93,9 @@ iwchareq(char *p1, char *p2)
12093
#define MatchText MBMatchText
12194
#define MatchTextIC MBMatchTextIC
12295
#define do_like_escape MB_do_like_escape
96+
12397
#include "like_match.c"
98+
12499
#undef CHAREQ
125100
#undef ICHAREQ
126101
#undef NextChar
@@ -129,15 +104,19 @@ iwchareq(char *p1, char *p2)
129104
#undef MatchTextIC
130105
#undef do_like_escape
131106

107+
/* Set up to compile like_match.c for single-byte characters */
132108
#define CHAREQ(p1, p2) (*(p1) == *(p2))
133109
#define ICHAREQ(p1, p2) (tolower((unsigned char) *(p1)) == tolower((unsigned char) *(p2)))
134110
#define NextChar(p, plen) ((p)++, (plen)--)
135111
#define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--)
136112

113+
#include "like_match.c"
114+
115+
/* And some support for BYTEA */
137116
#define BYTEA_CHAREQ(p1, p2) (*(p1) == *(p2))
138117
#define BYTEA_NextChar(p, plen) ((p)++, (plen)--)
139118
#define BYTEA_CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--)
140-
#include "like_match.c"
119+
141120

142121
/*
143122
* interface routines called by the function manager
@@ -296,15 +275,32 @@ nameiclike(PG_FUNCTION_ARGS)
296275
int slen,
297276
plen;
298277

299-
s = NameStr(*str);
300-
slen = strlen(s);
301-
p = VARDATA(pat);
302-
plen = (VARSIZE(pat) - VARHDRSZ);
303-
304278
if (pg_database_encoding_max_length() == 1)
279+
{
280+
s = NameStr(*str);
281+
slen = strlen(s);
282+
p = VARDATA(pat);
283+
plen = (VARSIZE(pat) - VARHDRSZ);
305284
result = (MatchTextIC(s, slen, p, plen) == LIKE_TRUE);
285+
}
306286
else
287+
{
288+
/* Force inputs to lower case to achieve case insensitivity */
289+
text *strtext;
290+
291+
strtext = DatumGetTextP(DirectFunctionCall1(name_text,
292+
NameGetDatum(str)));
293+
strtext = DatumGetTextP(DirectFunctionCall1(lower,
294+
PointerGetDatum(strtext)));
295+
pat = DatumGetTextP(DirectFunctionCall1(lower,
296+
PointerGetDatum(pat)));
297+
298+
s = VARDATA(strtext);
299+
slen = (VARSIZE(strtext) - VARHDRSZ);
300+
p = VARDATA(pat);
301+
plen = (VARSIZE(pat) - VARHDRSZ);
307302
result = (MBMatchTextIC(s, slen, p, plen) == LIKE_TRUE);
303+
}
308304

309305
PG_RETURN_BOOL(result);
310306
}
@@ -320,15 +316,32 @@ nameicnlike(PG_FUNCTION_ARGS)
320316
int slen,
321317
plen;
322318

323-
s = NameStr(*str);
324-
slen = strlen(s);
325-
p = VARDATA(pat);
326-
plen = (VARSIZE(pat) - VARHDRSZ);
327-
328319
if (pg_database_encoding_max_length() == 1)
320+
{
321+
s = NameStr(*str);
322+
slen = strlen(s);
323+
p = VARDATA(pat);
324+
plen = (VARSIZE(pat) - VARHDRSZ);
329325
result = (MatchTextIC(s, slen, p, plen) != LIKE_TRUE);
326+
}
330327
else
328+
{
329+
/* Force inputs to lower case to achieve case insensitivity */
330+
text *strtext;
331+
332+
strtext = DatumGetTextP(DirectFunctionCall1(name_text,
333+
NameGetDatum(str)));
334+
strtext = DatumGetTextP(DirectFunctionCall1(lower,
335+
PointerGetDatum(strtext)));
336+
pat = DatumGetTextP(DirectFunctionCall1(lower,
337+
PointerGetDatum(pat)));
338+
339+
s = VARDATA(strtext);
340+
slen = (VARSIZE(strtext) - VARHDRSZ);
341+
p = VARDATA(pat);
342+
plen = (VARSIZE(pat) - VARHDRSZ);
331343
result = (MBMatchTextIC(s, slen, p, plen) != LIKE_TRUE);
344+
}
332345

333346
PG_RETURN_BOOL(result);
334347
}
@@ -344,15 +357,27 @@ texticlike(PG_FUNCTION_ARGS)
344357
int slen,
345358
plen;
346359

347-
s = VARDATA(str);
348-
slen = (VARSIZE(str) - VARHDRSZ);
349-
p = VARDATA(pat);
350-
plen = (VARSIZE(pat) - VARHDRSZ);
351-
352360
if (pg_database_encoding_max_length() == 1)
361+
{
362+
s = VARDATA(str);
363+
slen = (VARSIZE(str) - VARHDRSZ);
364+
p = VARDATA(pat);
365+
plen = (VARSIZE(pat) - VARHDRSZ);
353366
result = (MatchTextIC(s, slen, p, plen) == LIKE_TRUE);
367+
}
354368
else
369+
{
370+
/* Force inputs to lower case to achieve case insensitivity */
371+
str = DatumGetTextP(DirectFunctionCall1(lower,
372+
PointerGetDatum(str)));
373+
pat = DatumGetTextP(DirectFunctionCall1(lower,
374+
PointerGetDatum(pat)));
375+
s = VARDATA(str);
376+
slen = (VARSIZE(str) - VARHDRSZ);
377+
p = VARDATA(pat);
378+
plen = (VARSIZE(pat) - VARHDRSZ);
355379
result = (MBMatchTextIC(s, slen, p, plen) == LIKE_TRUE);
380+
}
356381

357382
PG_RETURN_BOOL(result);
358383
}
@@ -368,15 +393,27 @@ texticnlike(PG_FUNCTION_ARGS)
368393
int slen,
369394
plen;
370395

371-
s = VARDATA(str);
372-
slen = (VARSIZE(str) - VARHDRSZ);
373-
p = VARDATA(pat);
374-
plen = (VARSIZE(pat) - VARHDRSZ);
375-
376396
if (pg_database_encoding_max_length() == 1)
397+
{
398+
s = VARDATA(str);
399+
slen = (VARSIZE(str) - VARHDRSZ);
400+
p = VARDATA(pat);
401+
plen = (VARSIZE(pat) - VARHDRSZ);
377402
result = (MatchTextIC(s, slen, p, plen) != LIKE_TRUE);
403+
}
378404
else
405+
{
406+
/* Force inputs to lower case to achieve case insensitivity */
407+
str = DatumGetTextP(DirectFunctionCall1(lower,
408+
PointerGetDatum(str)));
409+
pat = DatumGetTextP(DirectFunctionCall1(lower,
410+
PointerGetDatum(pat)));
411+
s = VARDATA(str);
412+
slen = (VARSIZE(str) - VARHDRSZ);
413+
p = VARDATA(pat);
414+
plen = (VARSIZE(pat) - VARHDRSZ);
379415
result = (MBMatchTextIC(s, slen, p, plen) != LIKE_TRUE);
416+
}
380417

381418
PG_RETURN_BOOL(result);
382419
}

0 commit comments

Comments
 (0)