Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 32032d4

Browse files
committed
Fix usage of char2wchar/wchar2char. Changes:
- pg_wchar and wchar_t could have different size, so char2wchar doesn't call pg_mb2wchar_with_len to prevent out-of-bound memory bug - make char2wchar/wchar2char symmetric, now they should not be called with C-locale because mbstowcs/wcstombs oftenly doesn't work correct with C-locale. - Text parser uses pg_mb2wchar_with_len directly in case of C-locale and multibyte encoding Per bug report by Hiroshi Inoue <inoue@tpf.co.jp> and following discussion. Backpatch up to 8.2 when multybyte support was implemented in tsearch.
1 parent 876b37d commit 32032d4

File tree

2 files changed

+42
-34
lines changed

2 files changed

+42
-34
lines changed

src/backend/tsearch/wparser_def.c

Lines changed: 33 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
*
88
*
99
* IDENTIFICATION
10-
* $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.20 2009/01/15 16:33:59 teodor Exp $
10+
* $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.21 2009/03/02 15:10:09 teodor Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -240,12 +240,12 @@ typedef struct TParser
240240
int lenstr; /* length of mbstring */
241241
#ifdef USE_WIDE_UPPER_LOWER
242242
wchar_t *wstr; /* wide character string */
243-
int lenwstr; /* length of wsting */
243+
pg_wchar *pgwstr; /* wide character string for C-locale */
244+
bool usewide;
244245
#endif
245246

246247
/* State of parse */
247248
int charmaxlen;
248-
bool usewide;
249249
TParserPosition *state;
250250
bool ignore;
251251
bool wanthost;
@@ -299,13 +299,24 @@ TParserInit(char *str, int len)
299299
if (prs->charmaxlen > 1)
300300
{
301301
prs->usewide = true;
302-
prs->wstr = (wchar_t *) palloc(sizeof(wchar_t) * (prs->lenstr + 1));
303-
prs->lenwstr = char2wchar(prs->wstr, prs->lenstr + 1,
304-
prs->str, prs->lenstr);
302+
if ( lc_ctype_is_c() )
303+
{
304+
/*
305+
* char2wchar doesn't work for C-locale and
306+
* sizeof(pg_wchar) could be not equal to sizeof(wchar_t)
307+
*/
308+
prs->pgwstr = (pg_wchar*) palloc(sizeof(pg_wchar) * (prs->lenstr + 1));
309+
pg_mb2wchar_with_len(prs->str, prs->pgwstr, prs->lenstr);
310+
}
311+
else
312+
{
313+
prs->wstr = (wchar_t *) palloc(sizeof(wchar_t) * (prs->lenstr + 1));
314+
char2wchar(prs->wstr, prs->lenstr + 1, prs->str, prs->lenstr);
315+
}
305316
}
306317
else
307-
#endif
308318
prs->usewide = false;
319+
#endif
309320

310321
prs->state = newTParserPosition(NULL);
311322
prs->state->state = TPS_Base;
@@ -331,17 +342,21 @@ TParserClose(TParser *prs)
331342
#ifdef USE_WIDE_UPPER_LOWER
332343
if (prs->wstr)
333344
pfree(prs->wstr);
345+
if (prs->pgwstr)
346+
pfree(prs->pgwstr);
334347
#endif
335348

336349
pfree(prs);
337350
}
338351

339352
/*
340353
* Character-type support functions, equivalent to is* macros, but
341-
* working with any possible encodings and locales. Note,
342-
* that with multibyte encoding and C-locale isw* function may fail
343-
* or give wrong result. Note 2: multibyte encoding and C-locale
344-
* often are used for Asian languages
354+
* working with any possible encodings and locales. Notes:
355+
* - with multibyte encoding and C-locale isw* function may fail
356+
* or give wrong result.
357+
* - multibyte encoding and C-locale often are used for
358+
* Asian languages.
359+
* - if locale is C the we use pgwstr instead of wstr
345360
*/
346361

347362
#ifdef USE_WIDE_UPPER_LOWER
@@ -352,14 +367,14 @@ p_is##type(TParser *prs) { \
352367
Assert( prs->state ); \
353368
if ( prs->usewide ) \
354369
{ \
355-
if ( lc_ctype_is_c() ) \
356-
return is##type( 0xff & *( prs->wstr + prs->state->poschar) ); \
370+
if ( prs->pgwstr ) \
371+
return is##type( 0xff & *( prs->pgwstr + prs->state->poschar) );\
357372
\
358373
return isw##type( *(wint_t*)( prs->wstr + prs->state->poschar ) ); \
359374
} \
360375
\
361376
return is##type( *(unsigned char*)( prs->str + prs->state->posbyte ) ); \
362-
} \
377+
} \
363378
\
364379
static int \
365380
p_isnot##type(TParser *prs) { \
@@ -373,9 +388,9 @@ p_isalnum(TParser *prs)
373388

374389
if (prs->usewide)
375390
{
376-
if (lc_ctype_is_c())
391+
if (prs->pgwstr)
377392
{
378-
unsigned int c = *(prs->wstr + prs->state->poschar);
393+
unsigned int c = *(prs->pgwstr + prs->state->poschar);
379394

380395
/*
381396
* any non-ascii symbol with multibyte encoding with C-locale is
@@ -405,9 +420,9 @@ p_isalpha(TParser *prs)
405420

406421
if (prs->usewide)
407422
{
408-
if (lc_ctype_is_c())
423+
if (prs->pgwstr)
409424
{
410-
unsigned int c = *(prs->wstr + prs->state->poschar);
425+
unsigned int c = *(prs->pgwstr + prs->state->poschar);
411426

412427
/*
413428
* any non-ascii symbol with multibyte encoding with C-locale is

src/backend/utils/mb/mbutils.c

Lines changed: 9 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
* (currently mule internal code (mic) is used)
55
* Tatsuo Ishii
66
*
7-
* $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.78 2009/01/22 10:09:48 mha Exp $
7+
* $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.79 2009/03/02 15:10:09 teodor Exp $
88
*/
99
#include "postgres.h"
1010

@@ -601,7 +601,10 @@ wchar2char(char *to, const wchar_t *from, size_t tolen)
601601
}
602602
else
603603
#endif /* WIN32 */
604+
{
605+
Assert( !lc_ctype_is_c() );
604606
result = wcstombs(to, from, tolen);
607+
}
605608
return result;
606609
}
607610

@@ -647,22 +650,12 @@ char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen)
647650
else
648651
#endif /* WIN32 */
649652
{
650-
if (lc_ctype_is_c())
651-
{
652-
/*
653-
* pg_mb2wchar_with_len always adds trailing '\0', so 'to' should be
654-
* allocated with sufficient space
655-
*/
656-
result = pg_mb2wchar_with_len(from, (pg_wchar *) to, fromlen);
657-
}
658-
else
659-
{
660-
/* mbstowcs requires ending '\0' */
661-
char *str = pnstrdup(from, fromlen);
653+
/* mbstowcs requires ending '\0' */
654+
char *str = pnstrdup(from, fromlen);
662655

663-
result = mbstowcs(to, str, tolen);
664-
pfree(str);
665-
}
656+
Assert( !lc_ctype_is_c() );
657+
result = mbstowcs(to, str, tolen);
658+
pfree(str);
666659
}
667660

668661
if (result == -1)

0 commit comments

Comments
 (0)