Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit b46a212

Browse files
author
Marina Polyakova
committed
ICU: fix the use of locale names in Windows for ICU functions 2
The previous attempt did not work because the function _create_locale returns different locales for the same locale names. Therefore, use the Language Code Identifiers (LCIDS) to get the canonical locale names both for libc and icu. Important: now you cannot use language and country strings in locale names e.g. "Russian_Russia[.encoding]" or "English_United States[.encoding]". Instead, use "ru-RU[.encoding]" or "en-US[.encoding]" correspondingly. List of canonical names of Windows locales and their LCIDS: https://www.microsoft.com/resources/msdn/goglobal/default.mspx
1 parent c2ac42d commit b46a212

File tree

6 files changed

+451
-130
lines changed

6 files changed

+451
-130
lines changed

src/backend/commands/dbcommands.c

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -149,8 +149,13 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
149149
char *dbcanonname = NULL;
150150
char dbcollprovider;
151151
char *dbcollate_full_name;
152+
char *icu_wincollate = NULL;
152153
char *langtag = NULL;
153154
const char *collate;
155+
#ifdef WIN32
156+
char *dbcollate_original;
157+
char *dbctype_original;
158+
#endif
154159

155160
/* Extract options from the statement node tree */
156161
foreach(option, stmt->options)
@@ -393,6 +398,11 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
393398

394399
/* Check that the chosen locales are valid, and get canonical spellings */
395400

401+
#ifdef WIN32
402+
dbcollate_original = dbcollate;
403+
dbctype_original = dbctype;
404+
#endif
405+
396406
if (!check_locale(LC_CTYPE, dbctype, &canonname, '\0'))
397407
ereport(ERROR,
398408
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
@@ -429,6 +439,15 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
429439
/* check lc_collate and lc_ctype for icu if we need it */
430440
if (dbcollprovider == COLLPROVIDER_ICU)
431441
{
442+
#ifdef WIN32
443+
/* set the libc canonnames for dbcollate and dbctype */
444+
445+
check_winlocale(dbcollate_original, &canonname, NULL);
446+
dbcollate = canonname;
447+
448+
check_winlocale(dbctype_original, &canonname, NULL);
449+
dbctype = canonname;
450+
#endif
432451
if (!check_locale(LC_COLLATE, dbcollate, NULL, dbcollprovider))
433452
ereport(ERROR,
434453
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
@@ -449,17 +468,19 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
449468
if (dbcollprovider == COLLPROVIDER_ICU)
450469
{
451470
#ifdef WIN32
452-
collate = (const char *) IsoLocaleName(dbcollate, LC_COLLATE);
453-
if (collate == NULL)
454-
#endif /* WIN32 */
471+
if (!locale_is_c(dbcollate))
455472
{
456-
collate = (const char *) dbcollate;
473+
check_winlocale(dbcollate, NULL, &icu_wincollate);
474+
collate = (const char *) icu_wincollate;
457475
}
476+
#else /* not WIN32 */
477+
collate = (const char *) dbcollate;
478+
#endif /* not WIN32 */
458479
langtag = get_icu_language_tag(collate);
459480
collate = get_icu_collate(collate, langtag);
460481
}
461482
else
462-
#endif /* USE_ICU */
483+
#endif
463484
{
464485
/* COLLPROVIDER_LIBC */
465486
collate = (const char *) dbcollate;
@@ -797,6 +818,8 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
797818
pfree(dbcanonname);
798819
if (langtag)
799820
pfree(langtag);
821+
if (icu_wincollate)
822+
pfree(icu_wincollate);
800823

801824
PG_END_ENSURE_ERROR_CLEANUP(createdb_failure_callback,
802825
PointerGetDatum(&fparms));

src/backend/utils/adt/pg_locale.c

Lines changed: 126 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,10 @@ typedef struct
131131
static HTAB *collation_cache = NULL;
132132

133133

134+
#if defined(WIN32) && defined(LC_MESSAGES)
135+
static char *IsoLocaleName(const char *); /* MSVC specific */
136+
#endif
137+
134138
#ifdef USE_ICU
135139
static char *check_icu_locale(const char *locale);
136140
#endif
@@ -251,7 +255,7 @@ pg_perm_setlocale(int category, const char *locale, char collprovider)
251255
envvar = "LC_MESSAGES";
252256
envbuf = lc_messages_envbuf;
253257
#ifdef WIN32
254-
result = IsoLocaleName(locale, LC_CTYPE);
258+
result = IsoLocaleName(locale);
255259
if (result == NULL)
256260
result = locale;
257261
#endif /* WIN32 */
@@ -972,6 +976,114 @@ cache_locale_time(void)
972976
}
973977

974978

979+
#if defined(WIN32) && defined(LC_MESSAGES)
980+
/*
981+
* Convert a Windows setlocale() argument to a Unix-style one.
982+
*
983+
* Regardless of platform, we install message catalogs under a Unix-style
984+
* LL[_CC][.ENCODING][@VARIANT] naming convention. Only LC_MESSAGES settings
985+
* following that style will elicit localized interface strings.
986+
*
987+
* Before Visual Studio 2012 (msvcr110.dll), Windows setlocale() accepted "C"
988+
* (but not "c") and strings of the form <Language>[_<Country>][.<CodePage>],
989+
* case-insensitive. setlocale() returns the fully-qualified form; for
990+
* example, setlocale("thaI") returns "Thai_Thailand.874". Internally,
991+
* setlocale() and _create_locale() select a "locale identifier"[1] and store
992+
* it in an undocumented _locale_t field. From that LCID, we can retrieve the
993+
* ISO 639 language and the ISO 3166 country. Character encoding does not
994+
* matter, because the server and client encodings govern that.
995+
*
996+
* Windows Vista introduced the "locale name" concept[2], closely following
997+
* RFC 4646. Locale identifiers are now deprecated. Starting with Visual
998+
* Studio 2012, setlocale() accepts locale names in addition to the strings it
999+
* accepted historically. It does not standardize them; setlocale("Th-tH")
1000+
* returns "Th-tH". setlocale(category, "") still returns a traditional
1001+
* string. Furthermore, msvcr110.dll changed the undocumented _locale_t
1002+
* content to carry locale names instead of locale identifiers.
1003+
*
1004+
* MinGW headers declare _create_locale(), but msvcrt.dll lacks that symbol.
1005+
* IsoLocaleName() always fails in a MinGW-built postgres.exe, so only
1006+
* Unix-style values of the lc_messages GUC can elicit localized messages. In
1007+
* particular, every lc_messages setting that initdb can select automatically
1008+
* will yield only C-locale messages. XXX This could be fixed by running the
1009+
* fully-qualified locale name through a lookup table.
1010+
*
1011+
* This function returns a pointer to a static buffer bearing the converted
1012+
* name or NULL if conversion fails.
1013+
*
1014+
* [1] http://msdn.microsoft.com/en-us/library/windows/desktop/dd373763.aspx
1015+
* [2] http://msdn.microsoft.com/en-us/library/windows/desktop/dd373814.aspx
1016+
*/
1017+
static char *
1018+
IsoLocaleName(const char *winlocname)
1019+
{
1020+
#if (_MSC_VER >= 1400) /* VC8.0 or later */
1021+
static char iso_lc_messages[32];
1022+
_locale_t loct = NULL;
1023+
1024+
if (pg_strcasecmp("c", winlocname) == 0 ||
1025+
pg_strcasecmp("posix", winlocname) == 0)
1026+
{
1027+
strcpy(iso_lc_messages, "C");
1028+
return iso_lc_messages;
1029+
}
1030+
1031+
loct = _create_locale(LC_CTYPE, winlocname);
1032+
if (loct != NULL)
1033+
{
1034+
#if (_MSC_VER >= 1700) /* Visual Studio 2012 or later */
1035+
size_t rc;
1036+
char *hyphen;
1037+
1038+
/* Locale names use only ASCII, any conversion locale suffices. */
1039+
rc = wchar2char(iso_lc_messages, loct->locinfo->locale_name[LC_CTYPE],
1040+
sizeof(iso_lc_messages), NULL);
1041+
_free_locale(loct);
1042+
if (rc == -1 || rc == sizeof(iso_lc_messages))
1043+
return NULL;
1044+
1045+
/*
1046+
* Since the message catalogs sit on a case-insensitive filesystem, we
1047+
* need not standardize letter case here. So long as we do not ship
1048+
* message catalogs for which it would matter, we also need not
1049+
* translate the script/variant portion, e.g. uz-Cyrl-UZ to
1050+
* uz_UZ@cyrillic. Simply replace the hyphen with an underscore.
1051+
*
1052+
* Note that the locale name can be less-specific than the value we
1053+
* would derive under earlier Visual Studio releases. For example,
1054+
* French_France.1252 yields just "fr". This does not affect any of
1055+
* the country-specific message catalogs available as of this writing
1056+
* (pt_BR, zh_CN, zh_TW).
1057+
*/
1058+
hyphen = strchr(iso_lc_messages, '-');
1059+
if (hyphen)
1060+
*hyphen = '_';
1061+
#else
1062+
char isolang[32],
1063+
isocrty[32];
1064+
LCID lcid;
1065+
1066+
lcid = loct->locinfo->lc_handle[LC_CTYPE];
1067+
if (lcid == 0)
1068+
lcid = MAKELCID(MAKELANGID(LANG_ENGLISH, SUBLANG_ENGLISH_US), SORT_DEFAULT);
1069+
_free_locale(loct);
1070+
1071+
if (!GetLocaleInfoA(lcid, LOCALE_SISO639LANGNAME, isolang, sizeof(isolang)))
1072+
return NULL;
1073+
if (!GetLocaleInfoA(lcid, LOCALE_SISO3166CTRYNAME, isocrty, sizeof(isocrty)))
1074+
return NULL;
1075+
snprintf(iso_lc_messages, sizeof(iso_lc_messages) - 1, "%s_%s", isolang, isocrty);
1076+
#endif
1077+
return iso_lc_messages;
1078+
}
1079+
return NULL;
1080+
#else
1081+
return NULL; /* Not supported on this version of msvc/mingw */
1082+
#endif /* _MSC_VER >= 1400 */
1083+
}
1084+
#endif /* WIN32 && LC_MESSAGES */
1085+
1086+
9751087
/*
9761088
* Detect aging strxfrm() implementations that, in a subset of locales, write
9771089
* past the specified buffer length. Affected users must update OS packages
@@ -1725,32 +1837,36 @@ char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
17251837
*
17261838
* Otherwise return a malloc'd copy of locale if it is not NULL.
17271839
*
1728-
* In both cases, return the name of the locale in the Unix style if possible.
1840+
* In Windows, check that the name is not long (for example,
1841+
* "English_United States[.encoding]").
17291842
*/
17301843
static char *
17311844
check_icu_locale(const char *locale)
17321845
{
17331846
char *canonname = NULL;
1847+
char *winlocale = NULL;
17341848
char *result;
17351849

1850+
#ifdef WIN32
1851+
if (!locale_is_c(locale))
1852+
{
1853+
check_winlocale(locale, NULL, &winlocale);
1854+
locale = (const char *) winlocale;
1855+
}
1856+
#else /* not WIN32 */
17361857
if (locale && strlen(locale) == 0)
17371858
{
17381859
check_locale(LC_COLLATE, locale, &canonname, COLLPROVIDER_LIBC);
17391860
locale = (const char *) canonname;
17401861
}
1741-
1742-
#ifdef WIN32
1743-
{
1744-
char *iso_locale = IsoLocaleName(locale, LC_COLLATE);
1745-
if (iso_locale)
1746-
locale = iso_locale;
1747-
}
1748-
#endif
1862+
#endif /* not WIN32 */
17491863

17501864
result = locale ? pstrdup(locale) : NULL;
17511865

17521866
if (canonname)
17531867
pfree(canonname);
1868+
if (winlocale)
1869+
pfree(winlocale);
17541870

17551871
return result;
17561872
}

src/backend/utils/init/postinit.c

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,7 @@ CheckMyDatabase(const char *name, bool am_superuser)
295295
char *datcollate;
296296
char collprovider;
297297
char *collversion;
298+
char *wincollate = NULL;
298299
char *langtag = NULL;
299300
const char *collcollate;
300301
char *actual_versionstr;
@@ -437,18 +438,19 @@ CheckMyDatabase(const char *name, bool am_superuser)
437438
#ifdef USE_ICU
438439
if (collprovider == COLLPROVIDER_ICU)
439440
{
441+
collcollate = (const char *) collate;
440442
#ifdef WIN32
441-
collcollate = (const char *) IsoLocaleName(collate, LC_COLLATE);
442-
if (collcollate == NULL)
443-
#endif /* WIN32 */
443+
if (!locale_is_c(collcollate))
444444
{
445-
collcollate = (const char *) collate;
445+
check_winlocale(collcollate, NULL, &wincollate);
446+
collcollate = (const char *) wincollate;
446447
}
448+
#endif /* WIN32 */
447449
langtag = get_icu_language_tag(collcollate);
448450
collcollate = get_icu_collate(collcollate, langtag);
449451
}
450452
else
451-
#endif /* USE_ICU */
453+
#endif
452454
{
453455
/* COLLPROVIDER_LIBC */
454456
collcollate = (const char *) collate;
@@ -493,6 +495,8 @@ CheckMyDatabase(const char *name, bool am_superuser)
493495
pfree(langtag);
494496
if (actual_versionstr)
495497
pfree(actual_versionstr);
498+
if (wincollate)
499+
pfree(wincollate);
496500

497501
check_strxfrm_bug();
498502

0 commit comments

Comments
 (0)