|
47 | 47 | * permission to use and distribute the software in accordance with the
|
48 | 48 | * terms specified in this license.
|
49 | 49 | *
|
50 |
| - * $PostgreSQL: pgsql/src/backend/regex/regc_locale.c,v 1.9 2008/02/14 17:33:37 tgl Exp $ |
| 50 | + * $PostgreSQL: pgsql/src/backend/regex/regc_locale.c,v 1.10 2009/12/01 21:00:24 tgl Exp $ |
51 | 51 | */
|
52 | 52 |
|
53 | 53 | /* ASCII character-name table */
|
@@ -349,75 +349,167 @@ static const struct cname
|
349 | 349 | }
|
350 | 350 | };
|
351 | 351 |
|
| 352 | + |
352 | 353 | /*
|
353 |
| - * some ctype functions with non-ascii-char guard |
| 354 | + * ctype functions adapted to work on pg_wchar (a/k/a chr) |
| 355 | + * |
| 356 | + * When working in UTF8 encoding, we use the <wctype.h> functions if |
| 357 | + * available. This assumes that every platform uses Unicode codepoints |
| 358 | + * directly as the wchar_t representation of Unicode. On some platforms |
| 359 | + * wchar_t is only 16 bits wide, so we have to punt for codepoints > 0xFFFF. |
| 360 | + * |
| 361 | + * In all other encodings, we use the <ctype.h> functions for pg_wchar |
| 362 | + * values up to 255, and punt for values above that. This is only 100% |
| 363 | + * correct in single-byte encodings such as LATINn. However, non-Unicode |
| 364 | + * multibyte encodings are mostly Far Eastern character sets for which the |
| 365 | + * properties being tested here aren't relevant for higher code values anyway. |
| 366 | + * |
| 367 | + * NB: the coding here assumes pg_wchar is an unsigned type. |
354 | 368 | */
|
| 369 | + |
355 | 370 | static int
|
356 | 371 | pg_wc_isdigit(pg_wchar c)
|
357 | 372 | {
|
358 |
| - return (c >= 0 && c <= UCHAR_MAX && isdigit((unsigned char) c)); |
| 373 | +#ifdef USE_WIDE_UPPER_LOWER |
| 374 | + if (GetDatabaseEncoding() == PG_UTF8) |
| 375 | + { |
| 376 | + if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) |
| 377 | + return iswdigit((wint_t) c); |
| 378 | + } |
| 379 | +#endif |
| 380 | + return (c <= (pg_wchar) UCHAR_MAX && isdigit((unsigned char) c)); |
359 | 381 | }
|
360 | 382 |
|
361 | 383 | static int
|
362 | 384 | pg_wc_isalpha(pg_wchar c)
|
363 | 385 | {
|
364 |
| - return (c >= 0 && c <= UCHAR_MAX && isalpha((unsigned char) c)); |
| 386 | +#ifdef USE_WIDE_UPPER_LOWER |
| 387 | + if (GetDatabaseEncoding() == PG_UTF8) |
| 388 | + { |
| 389 | + if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) |
| 390 | + return iswalpha((wint_t) c); |
| 391 | + } |
| 392 | +#endif |
| 393 | + return (c <= (pg_wchar) UCHAR_MAX && isalpha((unsigned char) c)); |
365 | 394 | }
|
366 | 395 |
|
367 | 396 | static int
|
368 | 397 | pg_wc_isalnum(pg_wchar c)
|
369 | 398 | {
|
370 |
| - return (c >= 0 && c <= UCHAR_MAX && isalnum((unsigned char) c)); |
| 399 | +#ifdef USE_WIDE_UPPER_LOWER |
| 400 | + if (GetDatabaseEncoding() == PG_UTF8) |
| 401 | + { |
| 402 | + if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) |
| 403 | + return iswalnum((wint_t) c); |
| 404 | + } |
| 405 | +#endif |
| 406 | + return (c <= (pg_wchar) UCHAR_MAX && isalnum((unsigned char) c)); |
371 | 407 | }
|
372 | 408 |
|
373 | 409 | static int
|
374 | 410 | pg_wc_isupper(pg_wchar c)
|
375 | 411 | {
|
376 |
| - return (c >= 0 && c <= UCHAR_MAX && isupper((unsigned char) c)); |
| 412 | +#ifdef USE_WIDE_UPPER_LOWER |
| 413 | + if (GetDatabaseEncoding() == PG_UTF8) |
| 414 | + { |
| 415 | + if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) |
| 416 | + return iswupper((wint_t) c); |
| 417 | + } |
| 418 | +#endif |
| 419 | + return (c <= (pg_wchar) UCHAR_MAX && isupper((unsigned char) c)); |
377 | 420 | }
|
378 | 421 |
|
379 | 422 | static int
|
380 | 423 | pg_wc_islower(pg_wchar c)
|
381 | 424 | {
|
382 |
| - return (c >= 0 && c <= UCHAR_MAX && islower((unsigned char) c)); |
| 425 | +#ifdef USE_WIDE_UPPER_LOWER |
| 426 | + if (GetDatabaseEncoding() == PG_UTF8) |
| 427 | + { |
| 428 | + if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) |
| 429 | + return iswlower((wint_t) c); |
| 430 | + } |
| 431 | +#endif |
| 432 | + return (c <= (pg_wchar) UCHAR_MAX && islower((unsigned char) c)); |
383 | 433 | }
|
384 | 434 |
|
385 | 435 | static int
|
386 | 436 | pg_wc_isgraph(pg_wchar c)
|
387 | 437 | {
|
388 |
| - return (c >= 0 && c <= UCHAR_MAX && isgraph((unsigned char) c)); |
| 438 | +#ifdef USE_WIDE_UPPER_LOWER |
| 439 | + if (GetDatabaseEncoding() == PG_UTF8) |
| 440 | + { |
| 441 | + if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) |
| 442 | + return iswgraph((wint_t) c); |
| 443 | + } |
| 444 | +#endif |
| 445 | + return (c <= (pg_wchar) UCHAR_MAX && isgraph((unsigned char) c)); |
389 | 446 | }
|
390 | 447 |
|
391 | 448 | static int
|
392 | 449 | pg_wc_isprint(pg_wchar c)
|
393 | 450 | {
|
394 |
| - return (c >= 0 && c <= UCHAR_MAX && isprint((unsigned char) c)); |
| 451 | +#ifdef USE_WIDE_UPPER_LOWER |
| 452 | + if (GetDatabaseEncoding() == PG_UTF8) |
| 453 | + { |
| 454 | + if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) |
| 455 | + return iswprint((wint_t) c); |
| 456 | + } |
| 457 | +#endif |
| 458 | + return (c <= (pg_wchar) UCHAR_MAX && isprint((unsigned char) c)); |
395 | 459 | }
|
396 | 460 |
|
397 | 461 | static int
|
398 | 462 | pg_wc_ispunct(pg_wchar c)
|
399 | 463 | {
|
400 |
| - return (c >= 0 && c <= UCHAR_MAX && ispunct((unsigned char) c)); |
| 464 | +#ifdef USE_WIDE_UPPER_LOWER |
| 465 | + if (GetDatabaseEncoding() == PG_UTF8) |
| 466 | + { |
| 467 | + if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) |
| 468 | + return iswpunct((wint_t) c); |
| 469 | + } |
| 470 | +#endif |
| 471 | + return (c <= (pg_wchar) UCHAR_MAX && ispunct((unsigned char) c)); |
401 | 472 | }
|
402 | 473 |
|
403 | 474 | static int
|
404 | 475 | pg_wc_isspace(pg_wchar c)
|
405 | 476 | {
|
406 |
| - return (c >= 0 && c <= UCHAR_MAX && isspace((unsigned char) c)); |
| 477 | +#ifdef USE_WIDE_UPPER_LOWER |
| 478 | + if (GetDatabaseEncoding() == PG_UTF8) |
| 479 | + { |
| 480 | + if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) |
| 481 | + return iswspace((wint_t) c); |
| 482 | + } |
| 483 | +#endif |
| 484 | + return (c <= (pg_wchar) UCHAR_MAX && isspace((unsigned char) c)); |
407 | 485 | }
|
408 | 486 |
|
409 | 487 | static pg_wchar
|
410 | 488 | pg_wc_toupper(pg_wchar c)
|
411 | 489 | {
|
412 |
| - if (c >= 0 && c <= UCHAR_MAX) |
| 490 | +#ifdef USE_WIDE_UPPER_LOWER |
| 491 | + if (GetDatabaseEncoding() == PG_UTF8) |
| 492 | + { |
| 493 | + if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) |
| 494 | + return towupper((wint_t) c); |
| 495 | + } |
| 496 | +#endif |
| 497 | + if (c <= (pg_wchar) UCHAR_MAX) |
413 | 498 | return toupper((unsigned char) c);
|
414 | 499 | return c;
|
415 | 500 | }
|
416 | 501 |
|
417 | 502 | static pg_wchar
|
418 | 503 | pg_wc_tolower(pg_wchar c)
|
419 | 504 | {
|
420 |
| - if (c >= 0 && c <= UCHAR_MAX) |
| 505 | +#ifdef USE_WIDE_UPPER_LOWER |
| 506 | + if (GetDatabaseEncoding() == PG_UTF8) |
| 507 | + { |
| 508 | + if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) |
| 509 | + return towlower((wint_t) c); |
| 510 | + } |
| 511 | +#endif |
| 512 | + if (c <= (pg_wchar) UCHAR_MAX) |
421 | 513 | return tolower((unsigned char) c);
|
422 | 514 | return c;
|
423 | 515 | }
|
|
0 commit comments