|
4 | 4 | * (currently mule internal code (mic) is used)
|
5 | 5 | * Tatsuo Ishii
|
6 | 6 | *
|
7 |
| - * $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.71 2008/05/27 12:24:42 mha Exp $ |
| 7 | + * $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.72 2008/06/18 18:42:54 momjian Exp $ |
8 | 8 | */
|
9 | 9 | #include "postgres.h"
|
10 | 10 |
|
@@ -555,6 +555,134 @@ perform_default_encoding_conversion(const char *src, int len, bool is_client_to_
|
555 | 555 | return result;
|
556 | 556 | }
|
557 | 557 |
|
| 558 | + |
| 559 | + |
| 560 | +#ifdef USE_WIDE_UPPER_LOWER |
| 561 | + |
| 562 | +/* |
| 563 | + * wchar2char --- convert wide characters to multibyte format |
| 564 | + * |
| 565 | + * This has the same API as the standard wcstombs() function; in particular, |
| 566 | + * tolen is the maximum number of bytes to store at *to, and *from must be |
| 567 | + * zero-terminated. The output will be zero-terminated iff there is room. |
| 568 | + */ |
| 569 | +size_t |
| 570 | +wchar2char(char *to, const wchar_t *from, size_t tolen) |
| 571 | +{ |
| 572 | + size_t result; |
| 573 | + |
| 574 | + if (tolen == 0) |
| 575 | + return 0; |
| 576 | + |
| 577 | +#ifdef WIN32 |
| 578 | + /* |
| 579 | + * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding, |
| 580 | + * and for some reason mbstowcs and wcstombs won't do this for us, |
| 581 | + * so we use MultiByteToWideChar(). |
| 582 | + */ |
| 583 | + if (GetDatabaseEncoding() == PG_UTF8) |
| 584 | + { |
| 585 | + result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen, |
| 586 | + NULL, NULL); |
| 587 | + /* A zero return is failure */ |
| 588 | + if (result <= 0) |
| 589 | + result = -1; |
| 590 | + else |
| 591 | + { |
| 592 | + Assert(result <= tolen); |
| 593 | + /* Microsoft counts the zero terminator in the result */ |
| 594 | + result--; |
| 595 | + } |
| 596 | + } |
| 597 | + else |
| 598 | +#endif /* WIN32 */ |
| 599 | + result = wcstombs(to, from, tolen); |
| 600 | + return result; |
| 601 | +} |
| 602 | + |
| 603 | +/* |
| 604 | + * char2wchar --- convert multibyte characters to wide characters |
| 605 | + * |
| 606 | + * This has almost the API of mbstowcs(), except that *from need not be |
| 607 | + * null-terminated; instead, the number of input bytes is specified as |
| 608 | + * fromlen. Also, we ereport() rather than returning -1 for invalid |
| 609 | + * input encoding. tolen is the maximum number of wchar_t's to store at *to. |
| 610 | + * The output will be zero-terminated iff there is room. |
| 611 | + */ |
| 612 | +size_t |
| 613 | +char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen) |
| 614 | +{ |
| 615 | + size_t result; |
| 616 | + |
| 617 | + if (tolen == 0) |
| 618 | + return 0; |
| 619 | + |
| 620 | +#ifdef WIN32 |
| 621 | + /* See WIN32 "Unicode" comment above */ |
| 622 | + if (GetDatabaseEncoding() == PG_UTF8) |
| 623 | + { |
| 624 | + /* Win32 API does not work for zero-length input */ |
| 625 | + if (fromlen == 0) |
| 626 | + result = 0; |
| 627 | + else |
| 628 | + { |
| 629 | + result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1); |
| 630 | + /* A zero return is failure */ |
| 631 | + if (result == 0) |
| 632 | + result = -1; |
| 633 | + } |
| 634 | + |
| 635 | + if (result != -1) |
| 636 | + { |
| 637 | + Assert(result < tolen); |
| 638 | + /* Append trailing null wchar (MultiByteToWideChar() does not) */ |
| 639 | + to[result] = 0; |
| 640 | + } |
| 641 | + } |
| 642 | + else |
| 643 | +#endif /* WIN32 */ |
| 644 | + { |
| 645 | + if (lc_ctype_is_c()) |
| 646 | + { |
| 647 | + /* |
| 648 | + * pg_mb2wchar_with_len always adds trailing '\0', so 'to' should be |
| 649 | + * allocated with sufficient space |
| 650 | + */ |
| 651 | + result = pg_mb2wchar_with_len(from, (pg_wchar *) to, fromlen); |
| 652 | + } |
| 653 | + else |
| 654 | + { |
| 655 | + /* mbstowcs requires ending '\0' */ |
| 656 | + char *str = pnstrdup(from, fromlen); |
| 657 | + |
| 658 | + result = mbstowcs(to, str, tolen); |
| 659 | + pfree(str); |
| 660 | + } |
| 661 | + } |
| 662 | + |
| 663 | + if (result == -1) |
| 664 | + { |
| 665 | + /* |
| 666 | + * Invalid multibyte character encountered. We try to give a useful |
| 667 | + * error message by letting pg_verifymbstr check the string. But it's |
| 668 | + * possible that the string is OK to us, and not OK to mbstowcs --- |
| 669 | + * this suggests that the LC_CTYPE locale is different from the |
| 670 | + * database encoding. Give a generic error message if verifymbstr |
| 671 | + * can't find anything wrong. |
| 672 | + */ |
| 673 | + pg_verifymbstr(from, fromlen, false); /* might not return */ |
| 674 | + /* but if it does ... */ |
| 675 | + ereport(ERROR, |
| 676 | + (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), |
| 677 | + errmsg("invalid multibyte character for locale"), |
| 678 | + errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding."))); |
| 679 | + } |
| 680 | + |
| 681 | + return result; |
| 682 | +} |
| 683 | + |
| 684 | +#endif |
| 685 | + |
558 | 686 | /* convert a multibyte string to a wchar */
|
559 | 687 | int
|
560 | 688 | pg_mb2wchar(const char *from, pg_wchar *to)
|
|
0 commit comments