Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit f3a01af

Browse files
committed
ICU: do not convert locale 'C' to 'en-US-u-va-posix'.
Older versions of ICU canonicalize "C" to "en-US-u-va-posix"; but starting in ICU version 64, the "C" locale is considered obsolete. Postgres commit ea1db8a introduced code to always canonicalize "C" to "en-US-u-va-posix" for consistency and convenience, but it was deemed too confusing. This commit removes that code, so that "C" is treated like other ICU locale names: canonicalization is attempted, and if it fails, the behavior is controlled by icu_validation_level. A similar change was previously committed as f7faa99, then reverted due to an ICU-version-dependent test failure. This commit un-reverts it, omitting the test because we now expect the behavior to depend on the version of ICU being used. Discussion: https://postgr.es/m/3a200aca-4672-4b37-fc91-5d198a323503%40eisentraut.org Discussion: https://postgr.es/m/f83f089ee1e9acd5dbbbf3353294d24e1f196e95.camel@j-davis.com Discussion: https://postgr.es/m/37520ec1ae9591f83132f82dbd625f3fc2d69c16.camel@j-davis.com
1 parent 2535c74 commit f3a01af

File tree

4 files changed

+6
-34
lines changed

4 files changed

+6
-34
lines changed

src/backend/utils/adt/pg_locale.c

+1-18
Original file line numberDiff line numberDiff line change
@@ -2784,26 +2784,10 @@ icu_language_tag(const char *loc_str, int elevel)
27842784
{
27852785
#ifdef USE_ICU
27862786
UErrorCode status;
2787-
char lang[ULOC_LANG_CAPACITY];
27882787
char *langtag;
27892788
size_t buflen = 32; /* arbitrary starting buffer size */
27902789
const bool strict = true;
27912790

2792-
status = U_ZERO_ERROR;
2793-
uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
2794-
if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
2795-
{
2796-
if (elevel > 0)
2797-
ereport(elevel,
2798-
(errmsg("could not get language from locale \"%s\": %s",
2799-
loc_str, u_errorName(status))));
2800-
return NULL;
2801-
}
2802-
2803-
/* C/POSIX locales aren't handled by uloc_getLanguageTag() */
2804-
if (strcmp(lang, "c") == 0 || strcmp(lang, "posix") == 0)
2805-
return pstrdup("en-US-u-va-posix");
2806-
28072791
/*
28082792
* A BCP47 language tag doesn't have a clearly-defined upper limit (cf.
28092793
* RFC5646 section 4.4). Additionally, in older ICU versions,
@@ -2884,8 +2868,7 @@ icu_validate_locale(const char *loc_str)
28842868

28852869
/* check for special language name */
28862870
if (strcmp(lang, "") == 0 ||
2887-
strcmp(lang, "root") == 0 || strcmp(lang, "und") == 0 ||
2888-
strcmp(lang, "c") == 0 || strcmp(lang, "posix") == 0)
2871+
strcmp(lang, "root") == 0 || strcmp(lang, "und") == 0)
28892872
found = true;
28902873

28912874
/* search for matching language within ICU */

src/bin/initdb/initdb.c

+1-16
Original file line numberDiff line numberDiff line change
@@ -2244,24 +2244,10 @@ icu_language_tag(const char *loc_str)
22442244
{
22452245
#ifdef USE_ICU
22462246
UErrorCode status;
2247-
char lang[ULOC_LANG_CAPACITY];
22482247
char *langtag;
22492248
size_t buflen = 32; /* arbitrary starting buffer size */
22502249
const bool strict = true;
22512250

2252-
status = U_ZERO_ERROR;
2253-
uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
2254-
if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
2255-
{
2256-
pg_fatal("could not get language from locale \"%s\": %s",
2257-
loc_str, u_errorName(status));
2258-
return NULL;
2259-
}
2260-
2261-
/* C/POSIX locales aren't handled by uloc_getLanguageTag() */
2262-
if (strcmp(lang, "c") == 0 || strcmp(lang, "posix") == 0)
2263-
return pstrdup("en-US-u-va-posix");
2264-
22652251
/*
22662252
* A BCP47 language tag doesn't have a clearly-defined upper limit (cf.
22672253
* RFC5646 section 4.4). Additionally, in older ICU versions,
@@ -2326,8 +2312,7 @@ icu_validate_locale(const char *loc_str)
23262312

23272313
/* check for special language name */
23282314
if (strcmp(lang, "") == 0 ||
2329-
strcmp(lang, "root") == 0 || strcmp(lang, "und") == 0 ||
2330-
strcmp(lang, "c") == 0 || strcmp(lang, "posix") == 0)
2315+
strcmp(lang, "root") == 0 || strcmp(lang, "und") == 0)
23312316
found = true;
23322317

23332318
/* search for matching language within ICU */

src/test/regress/expected/collate.icu.utf8.out

+2
Original file line numberDiff line numberDiff line change
@@ -1020,6 +1020,7 @@ CREATE ROLE regress_test_role;
10201020
CREATE SCHEMA test_schema;
10211021
-- We need to do this this way to cope with varying names for encodings:
10221022
SET client_min_messages TO WARNING;
1023+
SET icu_validation_level = disabled;
10231024
do $$
10241025
BEGIN
10251026
EXECUTE 'CREATE COLLATION test0 (provider = icu, locale = ' ||
@@ -1034,6 +1035,7 @@ BEGIN
10341035
quote_literal((SELECT CASE WHEN datlocprovider='i' THEN daticulocale ELSE datcollate END FROM pg_database WHERE datname = current_database())) || ');';
10351036
END
10361037
$$;
1038+
RESET icu_validation_level;
10371039
RESET client_min_messages;
10381040
CREATE COLLATION test3 (provider = icu, lc_collate = 'en_US.utf8'); -- fail, needs "locale"
10391041
ERROR: parameter "locale" must be specified

src/test/regress/sql/collate.icu.utf8.sql

+2
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,7 @@ CREATE SCHEMA test_schema;
358358

359359
-- We need to do this this way to cope with varying names for encodings:
360360
SET client_min_messages TO WARNING;
361+
SET icu_validation_level = disabled;
361362

362363
do $$
363364
BEGIN
@@ -373,6 +374,7 @@ BEGIN
373374
END
374375
$$;
375376

377+
RESET icu_validation_level;
376378
RESET client_min_messages;
377379

378380
CREATE COLLATION test3 (provider = icu, lc_collate = 'en_US.utf8'); -- fail, needs "locale"

0 commit comments

Comments
 (0)