7
7
*
8
8
*
9
9
* IDENTIFICATION
10
- * $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.20 2009/01/15 16:33:59 teodor Exp $
10
+ * $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.21 2009/03/02 15:10:09 teodor Exp $
11
11
*
12
12
*-------------------------------------------------------------------------
13
13
*/
@@ -240,12 +240,12 @@ typedef struct TParser
240
240
int lenstr ; /* length of mbstring */
241
241
#ifdef USE_WIDE_UPPER_LOWER
242
242
wchar_t * wstr ; /* wide character string */
243
- int lenwstr ; /* length of wsting */
243
+ pg_wchar * pgwstr ; /* wide character string for C-locale */
244
+ bool usewide ;
244
245
#endif
245
246
246
247
/* State of parse */
247
248
int charmaxlen ;
248
- bool usewide ;
249
249
TParserPosition * state ;
250
250
bool ignore ;
251
251
bool wanthost ;
@@ -299,13 +299,24 @@ TParserInit(char *str, int len)
299
299
if (prs -> charmaxlen > 1 )
300
300
{
301
301
prs -> usewide = true;
302
- prs -> wstr = (wchar_t * ) palloc (sizeof (wchar_t ) * (prs -> lenstr + 1 ));
303
- prs -> lenwstr = char2wchar (prs -> wstr , prs -> lenstr + 1 ,
304
- prs -> str , prs -> lenstr );
302
+ if ( lc_ctype_is_c () )
303
+ {
304
+ /*
305
+ * char2wchar doesn't work for C-locale and
306
+ * sizeof(pg_wchar) could be not equal to sizeof(wchar_t)
307
+ */
308
+ prs -> pgwstr = (pg_wchar * ) palloc (sizeof (pg_wchar ) * (prs -> lenstr + 1 ));
309
+ pg_mb2wchar_with_len (prs -> str , prs -> pgwstr , prs -> lenstr );
310
+ }
311
+ else
312
+ {
313
+ prs -> wstr = (wchar_t * ) palloc (sizeof (wchar_t ) * (prs -> lenstr + 1 ));
314
+ char2wchar (prs -> wstr , prs -> lenstr + 1 , prs -> str , prs -> lenstr );
315
+ }
305
316
}
306
317
else
307
- #endif
308
318
prs -> usewide = false;
319
+ #endif
309
320
310
321
prs -> state = newTParserPosition (NULL );
311
322
prs -> state -> state = TPS_Base ;
@@ -331,17 +342,21 @@ TParserClose(TParser *prs)
331
342
#ifdef USE_WIDE_UPPER_LOWER
332
343
if (prs -> wstr )
333
344
pfree (prs -> wstr );
345
+ if (prs -> pgwstr )
346
+ pfree (prs -> pgwstr );
334
347
#endif
335
348
336
349
pfree (prs );
337
350
}
338
351
339
352
/*
340
353
* Character-type support functions, equivalent to is* macros, but
341
- * working with any possible encodings and locales. Note,
342
- * that with multibyte encoding and C-locale isw* function may fail
343
- * or give wrong result. Note 2: multibyte encoding and C-locale
344
- * often are used for Asian languages
354
+ * working with any possible encodings and locales. Notes:
355
+ * - with multibyte encoding and C-locale isw* function may fail
356
+ * or give wrong result.
357
+ * - multibyte encoding and C-locale often are used for
358
+ * Asian languages.
359
+ * - if locale is C the we use pgwstr instead of wstr
345
360
*/
346
361
347
362
#ifdef USE_WIDE_UPPER_LOWER
@@ -352,14 +367,14 @@ p_is##type(TParser *prs) { \
352
367
Assert( prs->state ); \
353
368
if ( prs->usewide ) \
354
369
{ \
355
- if ( lc_ctype_is_c() ) \
356
- return is##type( 0xff & *( prs->wstr + prs->state->poschar) ); \
370
+ if ( prs->pgwstr ) \
371
+ return is##type( 0xff & *( prs->pgwstr + prs->state->poschar) );\
357
372
\
358
373
return isw##type( *(wint_t*)( prs->wstr + prs->state->poschar ) ); \
359
374
} \
360
375
\
361
376
return is##type( *(unsigned char*)( prs->str + prs->state->posbyte ) ); \
362
- } \
377
+ } \
363
378
\
364
379
static int \
365
380
p_isnot##type(TParser *prs) { \
@@ -373,9 +388,9 @@ p_isalnum(TParser *prs)
373
388
374
389
if (prs -> usewide )
375
390
{
376
- if (lc_ctype_is_c () )
391
+ if (prs -> pgwstr )
377
392
{
378
- unsigned int c = * (prs -> wstr + prs -> state -> poschar );
393
+ unsigned int c = * (prs -> pgwstr + prs -> state -> poschar );
379
394
380
395
/*
381
396
* any non-ascii symbol with multibyte encoding with C-locale is
@@ -405,9 +420,9 @@ p_isalpha(TParser *prs)
405
420
406
421
if (prs -> usewide )
407
422
{
408
- if (lc_ctype_is_c () )
423
+ if (prs -> pgwstr )
409
424
{
410
- unsigned int c = * (prs -> wstr + prs -> state -> poschar );
425
+ unsigned int c = * (prs -> pgwstr + prs -> state -> poschar );
411
426
412
427
/*
413
428
* any non-ascii symbol with multibyte encoding with C-locale is
0 commit comments