diff options
Diffstat (limited to 'src/backend')
-rw-r--r-- | src/backend/commands/collationcmds.c | 46 | ||||
-rw-r--r-- | src/backend/commands/dbcommands.c | 20 | ||||
-rw-r--r-- | src/backend/utils/adt/pg_locale.c | 85 |
3 files changed, 130 insertions, 21 deletions
diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c index 45de78352c7..c91fe66d9b2 100644 --- a/src/backend/commands/collationcmds.c +++ b/src/backend/commands/collationcmds.c @@ -165,6 +165,11 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e else colliculocale = NULL; + /* + * When the ICU locale comes from an existing collation, do not + * canonicalize to a language tag. + */ + datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collicurules, &isnull); if (!isnull) collicurules = TextDatumGetCString(datum); @@ -259,6 +264,25 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), errmsg("parameter \"locale\" must be specified"))); + /* + * During binary upgrade, preserve the locale string. Otherwise, + * canonicalize to a language tag. + */ + if (!IsBinaryUpgrade) + { + char *langtag = icu_language_tag(colliculocale, + icu_validation_level); + + if (langtag && strcmp(colliculocale, langtag) != 0) + { + ereport(NOTICE, + (errmsg("using standard form \"%s\" for locale \"%s\"", + langtag, colliculocale))); + + colliculocale = langtag; + } + } + icu_validate_locale(colliculocale); } @@ -570,26 +594,6 @@ cmpaliases(const void *a, const void *b) #ifdef USE_ICU /* - * Get the ICU language tag for a locale name. - * The result is a palloc'd string. - */ -static char * -get_icu_language_tag(const char *localename) -{ - char buf[ULOC_FULLNAME_CAPACITY]; - UErrorCode status; - - status = U_ZERO_ERROR; - uloc_toLanguageTag(localename, buf, sizeof(buf), true, &status); - if (U_FAILURE(status)) - ereport(ERROR, - (errmsg("could not convert locale name \"%s\" to language tag: %s", - localename, u_errorName(status)))); - - return pstrdup(buf); -} - -/* * Get a comment (specifically, the display name) for an ICU locale. * The result is a palloc'd string, or NULL if we can't get a comment * or find that it's not all ASCII. (We can *not* accept non-ASCII @@ -950,7 +954,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS) else name = uloc_getAvailable(i); - langtag = get_icu_language_tag(name); + langtag = icu_language_tag(name, ERROR); /* * Be paranoid about not allowing any non-ASCII strings into diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index 24bcc5adfe8..2e242eeff24 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -1058,6 +1058,26 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("ICU locale must be specified"))); + /* + * During binary upgrade, or when the locale came from the template + * database, preserve locale string. Otherwise, canonicalize to a + * language tag. + */ + if (!IsBinaryUpgrade && dbiculocale != src_iculocale) + { + char *langtag = icu_language_tag(dbiculocale, + icu_validation_level); + + if (langtag && strcmp(dbiculocale, langtag) != 0) + { + ereport(NOTICE, + (errmsg("using standard form \"%s\" for locale \"%s\"", + langtag, dbiculocale))); + + dbiculocale = langtag; + } + } + icu_validate_locale(dbiculocale); } else diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 9497c20d123..06e73aa012f 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -2827,6 +2827,91 @@ icu_set_collation_attributes(UCollator *collator, const char *loc, #endif /* + * Return the BCP47 language tag representation of the requested locale. + * + * This function should be called before passing the string to ucol_open(), + * because conversion to a language tag also performs "level 2 + * canonicalization". In addition to producing a consistent format, level 2 + * canonicalization is able to more accurately interpret different input + * locale string formats, such as POSIX and .NET IDs. + */ +char * +icu_language_tag(const char *loc_str, int elevel) +{ +#ifdef USE_ICU + UErrorCode status; + char lang[ULOC_LANG_CAPACITY]; + char *langtag; + size_t buflen = 32; /* arbitrary starting buffer size */ + const bool strict = true; + + status = U_ZERO_ERROR; + uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status); + if (U_FAILURE(status)) + { + if (elevel > 0) + ereport(elevel, + (errmsg("could not get language from locale \"%s\": %s", + loc_str, u_errorName(status)))); + return NULL; + } + + /* C/POSIX locales aren't handled by uloc_getLanguageTag() */ + if (strcmp(lang, "c") == 0 || strcmp(lang, "posix") == 0) + return pstrdup("en-US-u-va-posix"); + + /* + * A BCP47 language tag doesn't have a clearly-defined upper limit + * (cf. RFC5646 section 4.4). Additionally, in older ICU versions, + * uloc_toLanguageTag() doesn't always return the ultimate length on the + * first call, necessitating a loop. + */ + langtag = palloc(buflen); + while (true) + { + int32_t len; + + status = U_ZERO_ERROR; + len = uloc_toLanguageTag(loc_str, langtag, buflen, strict, &status); + + /* + * If the result fits in the buffer exactly (len == buflen), + * uloc_toLanguageTag() will return success without nul-terminating + * the result. Check for either U_BUFFER_OVERFLOW_ERROR or len >= + * buflen and try again. + */ + if ((status == U_BUFFER_OVERFLOW_ERROR || + (U_SUCCESS(status) && len >= buflen)) && + buflen < MaxAllocSize) + { + buflen = Min(buflen * 2, MaxAllocSize); + langtag = repalloc(langtag, buflen); + continue; + } + + break; + } + + if (U_FAILURE(status)) + { + pfree(langtag); + + if (elevel > 0) + ereport(elevel, + (errmsg("could not convert locale name \"%s\" to language tag: %s", + loc_str, u_errorName(status)))); + return NULL; + } + + return langtag; +#else /* not USE_ICU */ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("ICU is not supported in this build"))); +#endif /* not USE_ICU */ +} + +/* * Perform best-effort check that the locale is a valid one. */ void |