Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit fb1a188

Browse files
committed
Remove ts_locale.c's lowerstr()
lowerstr() and lowerstr_with_len() in ts_locale.c do the same thing as str_tolower() that the rest of the system uses, except that the former don't use the common locale provider framework but instead use the global libc locale settings. This patch replaces uses of lowerstr*() with str_tolower(..., DEFAULT_COLLATION_OID). For instances that use a libc locale globally, this will result in exactly the same behavior. For instances that use other locale providers, you now get consistent behavior and are no longer dependent on the libc locale settings (for this case; there are others). Most uses of these functions are for processing dictionary and configuration files. In those cases, using the default collation seems appropriate. At least we don't have a more specific collation available. But the code in contrib/pg_trgm should really depend on the collation of the columns being processed. This is not done here, this can be done in a separate patch. (You can probably construct some edge cases where this change would create some locale-related upgrade incompatibility, for example if before you used a combination of ICU and a differently-behaving libc locale. We can document this in the release notes, but I don't think there is anything more we can do about this.) Reviewed-by: Jeff Davis <pgsql@j-davis.com> Discussion: https://www.postgresql.org/message-id/flat/653f3b84-fc87-45a7-9a0c-bfb4fcab3e7d%40eisentraut.org
1 parent d3aad4a commit fb1a188

File tree

12 files changed

+43
-121
lines changed

12 files changed

+43
-121
lines changed

contrib/dict_xsyn/dict_xsyn.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,11 @@
1414

1515
#include <ctype.h>
1616

17+
#include "catalog/pg_collation_d.h"
1718
#include "commands/defrem.h"
1819
#include "tsearch/ts_locale.h"
1920
#include "tsearch/ts_public.h"
21+
#include "utils/formatting.h"
2022

2123
PG_MODULE_MAGIC;
2224

@@ -93,7 +95,7 @@ read_dictionary(DictSyn *d, const char *filename)
9395
if (*line == '\0')
9496
continue;
9597

96-
value = lowerstr(line);
98+
value = str_tolower(line, strlen(line), DEFAULT_COLLATION_OID);
9799
pfree(line);
98100

99101
pos = value;
@@ -210,7 +212,7 @@ dxsyn_lexize(PG_FUNCTION_ARGS)
210212
{
211213
char *temp = pnstrdup(in, length);
212214

213-
word.key = lowerstr(temp);
215+
word.key = str_tolower(temp, length, DEFAULT_COLLATION_OID);
214216
pfree(temp);
215217
word.value = NULL;
216218
}

contrib/pg_trgm/trgm_op.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,14 @@
55

66
#include <ctype.h>
77

8+
#include "catalog/pg_collation_d.h"
89
#include "catalog/pg_type.h"
910
#include "common/int.h"
1011
#include "lib/qunique.h"
1112
#include "miscadmin.h"
1213
#include "trgm.h"
1314
#include "tsearch/ts_locale.h"
15+
#include "utils/formatting.h"
1416
#include "utils/guc.h"
1517
#include "utils/lsyscache.h"
1618
#include "utils/memutils.h"
@@ -303,7 +305,7 @@ generate_trgm_only(trgm *trg, char *str, int slen, TrgmBound *bounds)
303305
while ((bword = find_word(eword, slen - (eword - str), &eword, &charlen)) != NULL)
304306
{
305307
#ifdef IGNORECASE
306-
bword = lowerstr_with_len(bword, eword - bword);
308+
bword = str_tolower(bword, eword - bword, DEFAULT_COLLATION_OID);
307309
bytelen = strlen(bword);
308310
#else
309311
bytelen = eword - bword;
@@ -899,7 +901,7 @@ generate_wildcard_trgm(const char *str, int slen)
899901
buf, &bytelen, &charlen)) != NULL)
900902
{
901903
#ifdef IGNORECASE
902-
buf2 = lowerstr_with_len(buf, bytelen);
904+
buf2 = str_tolower(buf, bytelen, DEFAULT_COLLATION_OID);
903905
bytelen = strlen(buf2);
904906
#else
905907
buf2 = buf;

contrib/pg_trgm/trgm_regexp.c

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -191,9 +191,11 @@
191191
*/
192192
#include "postgres.h"
193193

194+
#include "catalog/pg_collation_d.h"
194195
#include "regex/regexport.h"
195196
#include "trgm.h"
196197
#include "tsearch/ts_locale.h"
198+
#include "utils/formatting.h"
197199
#include "utils/hsearch.h"
198200
#include "utils/memutils.h"
199201
#include "varatt.h"
@@ -847,16 +849,16 @@ convertPgWchar(pg_wchar c, trgm_mb_char *result)
847849
* within each color, since we used the REG_ICASE option; so there's no
848850
* need to process the uppercase version.
849851
*
850-
* XXX this code is dependent on the assumption that lowerstr() works the
851-
* same as the regex engine's internal case folding machinery. Might be
852-
* wiser to expose pg_wc_tolower and test whether c == pg_wc_tolower(c).
853-
* On the other hand, the trigrams in the index were created using
854-
* lowerstr(), so we're probably screwed if there's any incompatibility
855-
* anyway.
852+
* XXX this code is dependent on the assumption that str_tolower() works
853+
* the same as the regex engine's internal case folding machinery. Might
854+
* be wiser to expose pg_wc_tolower and test whether c ==
855+
* pg_wc_tolower(c). On the other hand, the trigrams in the index were
856+
* created using str_tolower(), so we're probably screwed if there's any
857+
* incompatibility anyway.
856858
*/
857859
#ifdef IGNORECASE
858860
{
859-
char *lowerCased = lowerstr(s);
861+
char *lowerCased = str_tolower(s, strlen(s), DEFAULT_COLLATION_OID);
860862

861863
if (strcmp(lowerCased, s) != 0)
862864
{

src/backend/snowball/dict_snowball.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,11 @@
1212
*/
1313
#include "postgres.h"
1414

15+
#include "catalog/pg_collation_d.h"
1516
#include "commands/defrem.h"
16-
#include "tsearch/ts_locale.h"
17+
#include "mb/pg_wchar.h"
1718
#include "tsearch/ts_public.h"
19+
#include "utils/formatting.h"
1820

1921
/* Some platforms define MAXINT and/or MININT, causing conflicts */
2022
#ifdef MAXINT
@@ -236,7 +238,7 @@ dsnowball_init(PG_FUNCTION_ARGS)
236238
ereport(ERROR,
237239
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
238240
errmsg("multiple StopWords parameters")));
239-
readstoplist(defGetString(defel), &d->stoplist, lowerstr);
241+
readstoplist(defGetString(defel), &d->stoplist, str_tolower);
240242
stoploaded = true;
241243
}
242244
else if (strcmp(defel->defname, "language") == 0)
@@ -272,7 +274,7 @@ dsnowball_lexize(PG_FUNCTION_ARGS)
272274
DictSnowball *d = (DictSnowball *) PG_GETARG_POINTER(0);
273275
char *in = (char *) PG_GETARG_POINTER(1);
274276
int32 len = PG_GETARG_INT32(2);
275-
char *txt = lowerstr_with_len(in, len);
277+
char *txt = str_tolower(in, len, DEFAULT_COLLATION_OID);
276278
TSLexeme *res = palloc0(sizeof(TSLexeme) * 2);
277279

278280
/*

src/backend/tsearch/dict_ispell.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,12 @@
1313
*/
1414
#include "postgres.h"
1515

16+
#include "catalog/pg_collation_d.h"
1617
#include "commands/defrem.h"
1718
#include "tsearch/dicts/spell.h"
18-
#include "tsearch/ts_locale.h"
1919
#include "tsearch/ts_public.h"
2020
#include "utils/fmgrprotos.h"
21+
#include "utils/formatting.h"
2122

2223

2324
typedef struct
@@ -72,7 +73,7 @@ dispell_init(PG_FUNCTION_ARGS)
7273
ereport(ERROR,
7374
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
7475
errmsg("multiple StopWords parameters")));
75-
readstoplist(defGetString(defel), &(d->stoplist), lowerstr);
76+
readstoplist(defGetString(defel), &(d->stoplist), str_tolower);
7677
stoploaded = true;
7778
}
7879
else
@@ -121,7 +122,7 @@ dispell_lexize(PG_FUNCTION_ARGS)
121122
if (len <= 0)
122123
PG_RETURN_POINTER(NULL);
123124

124-
txt = lowerstr_with_len(in, len);
125+
txt = str_tolower(in, len, DEFAULT_COLLATION_OID);
125126
res = NINormalizeWord(&(d->obj), txt);
126127

127128
if (res == NULL)

src/backend/tsearch/dict_simple.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,11 @@
1313
*/
1414
#include "postgres.h"
1515

16+
#include "catalog/pg_collation_d.h"
1617
#include "commands/defrem.h"
17-
#include "tsearch/ts_locale.h"
1818
#include "tsearch/ts_public.h"
1919
#include "utils/fmgrprotos.h"
20+
#include "utils/formatting.h"
2021

2122

2223
typedef struct
@@ -47,7 +48,7 @@ dsimple_init(PG_FUNCTION_ARGS)
4748
ereport(ERROR,
4849
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4950
errmsg("multiple StopWords parameters")));
50-
readstoplist(defGetString(defel), &d->stoplist, lowerstr);
51+
readstoplist(defGetString(defel), &d->stoplist, str_tolower);
5152
stoploaded = true;
5253
}
5354
else if (strcmp(defel->defname, "accept") == 0)
@@ -80,7 +81,7 @@ dsimple_lexize(PG_FUNCTION_ARGS)
8081
char *txt;
8182
TSLexeme *res;
8283

83-
txt = lowerstr_with_len(in, len);
84+
txt = str_tolower(in, len, DEFAULT_COLLATION_OID);
8485

8586
if (*txt == '\0' || searchstoplist(&(d->stoplist), txt))
8687
{

src/backend/tsearch/dict_synonym.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,12 @@
1313
*/
1414
#include "postgres.h"
1515

16+
#include "catalog/pg_collation_d.h"
1617
#include "commands/defrem.h"
1718
#include "tsearch/ts_locale.h"
1819
#include "tsearch/ts_public.h"
1920
#include "utils/fmgrprotos.h"
21+
#include "utils/formatting.h"
2022

2123
typedef struct
2224
{
@@ -183,8 +185,8 @@ dsynonym_init(PG_FUNCTION_ARGS)
183185
}
184186
else
185187
{
186-
d->syn[cur].in = lowerstr(starti);
187-
d->syn[cur].out = lowerstr(starto);
188+
d->syn[cur].in = str_tolower(starti, strlen(starti), DEFAULT_COLLATION_OID);
189+
d->syn[cur].out = str_tolower(starto, strlen(starto), DEFAULT_COLLATION_OID);
188190
}
189191

190192
d->syn[cur].outlen = strlen(starto);
@@ -223,7 +225,7 @@ dsynonym_lexize(PG_FUNCTION_ARGS)
223225
if (d->case_sensitive)
224226
key.in = pnstrdup(in, len);
225227
else
226-
key.in = lowerstr_with_len(in, len);
228+
key.in = str_tolower(in, len, DEFAULT_COLLATION_OID);
227229

228230
key.out = NULL;
229231

src/backend/tsearch/spell.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@
6666
#include "miscadmin.h"
6767
#include "tsearch/dicts/spell.h"
6868
#include "tsearch/ts_locale.h"
69+
#include "utils/formatting.h"
6970
#include "utils/memutils.h"
7071

7172

@@ -169,7 +170,7 @@ cpstrdup(IspellDict *Conf, const char *str)
169170

170171

171172
/*
172-
* Apply lowerstr(), producing a temporary result (in the buildCxt).
173+
* Apply str_tolower(), producing a temporary result (in the buildCxt).
173174
*/
174175
static char *
175176
lowerstr_ctx(IspellDict *Conf, const char *src)
@@ -178,7 +179,7 @@ lowerstr_ctx(IspellDict *Conf, const char *src)
178179
char *dst;
179180

180181
saveCtx = MemoryContextSwitchTo(Conf->buildCxt);
181-
dst = lowerstr(src);
182+
dst = str_tolower(src, strlen(src), DEFAULT_COLLATION_OID);
182183
MemoryContextSwitchTo(saveCtx);
183184

184185
return dst;
@@ -1449,7 +1450,7 @@ NIImportAffixes(IspellDict *Conf, const char *filename)
14491450

14501451
while ((recoded = tsearch_readline(&trst)) != NULL)
14511452
{
1452-
pstr = lowerstr(recoded);
1453+
pstr = str_tolower(recoded, strlen(recoded), DEFAULT_COLLATION_OID);
14531454

14541455
/* Skip comments and empty lines */
14551456
if (*pstr == '#' || *pstr == '\n')

src/backend/tsearch/ts_locale.c

Lines changed: 0 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -197,92 +197,3 @@ tsearch_readline_callback(void *arg)
197197
stp->lineno,
198198
stp->filename);
199199
}
200-
201-
202-
/*
203-
* lowerstr --- fold null-terminated string to lower case
204-
*
205-
* Returned string is palloc'd
206-
*/
207-
char *
208-
lowerstr(const char *str)
209-
{
210-
return lowerstr_with_len(str, strlen(str));
211-
}
212-
213-
/*
214-
* lowerstr_with_len --- fold string to lower case
215-
*
216-
* Input string need not be null-terminated.
217-
*
218-
* Returned string is palloc'd
219-
*/
220-
char *
221-
lowerstr_with_len(const char *str, int len)
222-
{
223-
char *out;
224-
pg_locale_t mylocale = 0; /* TODO */
225-
226-
if (len == 0)
227-
return pstrdup("");
228-
229-
/*
230-
* Use wide char code only when max encoding length > 1 and ctype != C.
231-
* Some operating systems fail with multi-byte encodings and a C locale.
232-
* Also, for a C locale there is no need to process as multibyte. From
233-
* backend/utils/adt/oracle_compat.c Teodor
234-
*/
235-
if (pg_database_encoding_max_length() > 1 && !database_ctype_is_c)
236-
{
237-
wchar_t *wstr,
238-
*wptr;
239-
int wlen;
240-
241-
/*
242-
* alloc number of wchar_t for worst case, len contains number of
243-
* bytes >= number of characters and alloc 1 wchar_t for 0, because
244-
* wchar2char wants zero-terminated string
245-
*/
246-
wptr = wstr = (wchar_t *) palloc(sizeof(wchar_t) * (len + 1));
247-
248-
wlen = char2wchar(wstr, len + 1, str, len, mylocale);
249-
Assert(wlen <= len);
250-
251-
while (*wptr)
252-
{
253-
*wptr = towlower((wint_t) *wptr);
254-
wptr++;
255-
}
256-
257-
/*
258-
* Alloc result string for worst case + '\0'
259-
*/
260-
len = pg_database_encoding_max_length() * wlen + 1;
261-
out = (char *) palloc(len);
262-
263-
wlen = wchar2char(out, wstr, len, mylocale);
264-
265-
pfree(wstr);
266-
267-
if (wlen < 0)
268-
ereport(ERROR,
269-
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
270-
errmsg("conversion from wchar_t to server encoding failed: %m")));
271-
Assert(wlen < len);
272-
}
273-
else
274-
{
275-
const char *ptr = str;
276-
char *outptr;
277-
278-
outptr = out = (char *) palloc(sizeof(char) * (len + 1));
279-
while ((ptr - str) < len && *ptr)
280-
{
281-
*outptr++ = tolower(TOUCHAR(ptr));
282-
ptr++;
283-
}
284-
*outptr = '\0';
285-
}
286-
287-
return out;
288-
}

src/backend/tsearch/ts_utils.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
#include <ctype.h>
1818

19+
#include "catalog/pg_collation_d.h"
1920
#include "miscadmin.h"
2021
#include "tsearch/ts_locale.h"
2122
#include "tsearch/ts_public.h"
@@ -65,7 +66,7 @@ get_tsearch_config_filename(const char *basename,
6566
* or palloc a new version.
6667
*/
6768
void
68-
readstoplist(const char *fname, StopList *s, char *(*wordop) (const char *))
69+
readstoplist(const char *fname, StopList *s, char *(*wordop) (const char *, size_t, Oid))
6970
{
7071
char **stop = NULL;
7172

@@ -115,7 +116,7 @@ readstoplist(const char *fname, StopList *s, char *(*wordop) (const char *))
115116

116117
if (wordop)
117118
{
118-
stop[s->len] = wordop(line);
119+
stop[s->len] = wordop(line, strlen(line), DEFAULT_COLLATION_OID);
119120
if (stop[s->len] != line)
120121
pfree(line);
121122
}

src/include/tsearch/ts_locale.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,6 @@ typedef struct
4242
extern int t_isalpha(const char *ptr);
4343
extern int t_isalnum(const char *ptr);
4444

45-
extern char *lowerstr(const char *str);
46-
extern char *lowerstr_with_len(const char *str, int len);
47-
4845
extern bool tsearch_readline_begin(tsearch_readline_state *stp,
4946
const char *filename);
5047
extern char *tsearch_readline(tsearch_readline_state *stp);

src/include/tsearch/ts_public.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ typedef struct
104104
} StopList;
105105

106106
extern void readstoplist(const char *fname, StopList *s,
107-
char *(*wordop) (const char *));
107+
char *(*wordop) (const char *, size_t, Oid));
108108
extern bool searchstoplist(StopList *s, char *key);
109109

110110
/*

0 commit comments

Comments
 (0)