Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend')
-rw-r--r--src/backend/commands/collationcmds.c46
-rw-r--r--src/backend/commands/dbcommands.c20
-rw-r--r--src/backend/utils/adt/pg_locale.c85
3 files changed, 130 insertions, 21 deletions
diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c
index 45de78352c7..c91fe66d9b2 100644
--- a/src/backend/commands/collationcmds.c
+++ b/src/backend/commands/collationcmds.c
@@ -165,6 +165,11 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
else
colliculocale = NULL;
+ /*
+ * When the ICU locale comes from an existing collation, do not
+ * canonicalize to a language tag.
+ */
+
datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collicurules, &isnull);
if (!isnull)
collicurules = TextDatumGetCString(datum);
@@ -259,6 +264,25 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("parameter \"locale\" must be specified")));
+ /*
+ * During binary upgrade, preserve the locale string. Otherwise,
+ * canonicalize to a language tag.
+ */
+ if (!IsBinaryUpgrade)
+ {
+ char *langtag = icu_language_tag(colliculocale,
+ icu_validation_level);
+
+ if (langtag && strcmp(colliculocale, langtag) != 0)
+ {
+ ereport(NOTICE,
+ (errmsg("using standard form \"%s\" for locale \"%s\"",
+ langtag, colliculocale)));
+
+ colliculocale = langtag;
+ }
+ }
+
icu_validate_locale(colliculocale);
}
@@ -570,26 +594,6 @@ cmpaliases(const void *a, const void *b)
#ifdef USE_ICU
/*
- * Get the ICU language tag for a locale name.
- * The result is a palloc'd string.
- */
-static char *
-get_icu_language_tag(const char *localename)
-{
- char buf[ULOC_FULLNAME_CAPACITY];
- UErrorCode status;
-
- status = U_ZERO_ERROR;
- uloc_toLanguageTag(localename, buf, sizeof(buf), true, &status);
- if (U_FAILURE(status))
- ereport(ERROR,
- (errmsg("could not convert locale name \"%s\" to language tag: %s",
- localename, u_errorName(status))));
-
- return pstrdup(buf);
-}
-
-/*
* Get a comment (specifically, the display name) for an ICU locale.
* The result is a palloc'd string, or NULL if we can't get a comment
* or find that it's not all ASCII. (We can *not* accept non-ASCII
@@ -950,7 +954,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
else
name = uloc_getAvailable(i);
- langtag = get_icu_language_tag(name);
+ langtag = icu_language_tag(name, ERROR);
/*
* Be paranoid about not allowing any non-ASCII strings into
diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c
index 24bcc5adfe8..2e242eeff24 100644
--- a/src/backend/commands/dbcommands.c
+++ b/src/backend/commands/dbcommands.c
@@ -1058,6 +1058,26 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("ICU locale must be specified")));
+ /*
+ * During binary upgrade, or when the locale came from the template
+ * database, preserve locale string. Otherwise, canonicalize to a
+ * language tag.
+ */
+ if (!IsBinaryUpgrade && dbiculocale != src_iculocale)
+ {
+ char *langtag = icu_language_tag(dbiculocale,
+ icu_validation_level);
+
+ if (langtag && strcmp(dbiculocale, langtag) != 0)
+ {
+ ereport(NOTICE,
+ (errmsg("using standard form \"%s\" for locale \"%s\"",
+ langtag, dbiculocale)));
+
+ dbiculocale = langtag;
+ }
+ }
+
icu_validate_locale(dbiculocale);
}
else
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 9497c20d123..06e73aa012f 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -2827,6 +2827,91 @@ icu_set_collation_attributes(UCollator *collator, const char *loc,
#endif
/*
+ * Return the BCP47 language tag representation of the requested locale.
+ *
+ * This function should be called before passing the string to ucol_open(),
+ * because conversion to a language tag also performs "level 2
+ * canonicalization". In addition to producing a consistent format, level 2
+ * canonicalization is able to more accurately interpret different input
+ * locale string formats, such as POSIX and .NET IDs.
+ */
+char *
+icu_language_tag(const char *loc_str, int elevel)
+{
+#ifdef USE_ICU
+ UErrorCode status;
+ char lang[ULOC_LANG_CAPACITY];
+ char *langtag;
+ size_t buflen = 32; /* arbitrary starting buffer size */
+ const bool strict = true;
+
+ status = U_ZERO_ERROR;
+ uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
+ if (U_FAILURE(status))
+ {
+ if (elevel > 0)
+ ereport(elevel,
+ (errmsg("could not get language from locale \"%s\": %s",
+ loc_str, u_errorName(status))));
+ return NULL;
+ }
+
+ /* C/POSIX locales aren't handled by uloc_getLanguageTag() */
+ if (strcmp(lang, "c") == 0 || strcmp(lang, "posix") == 0)
+ return pstrdup("en-US-u-va-posix");
+
+ /*
+ * A BCP47 language tag doesn't have a clearly-defined upper limit
+ * (cf. RFC5646 section 4.4). Additionally, in older ICU versions,
+ * uloc_toLanguageTag() doesn't always return the ultimate length on the
+ * first call, necessitating a loop.
+ */
+ langtag = palloc(buflen);
+ while (true)
+ {
+ int32_t len;
+
+ status = U_ZERO_ERROR;
+ len = uloc_toLanguageTag(loc_str, langtag, buflen, strict, &status);
+
+ /*
+ * If the result fits in the buffer exactly (len == buflen),
+ * uloc_toLanguageTag() will return success without nul-terminating
+ * the result. Check for either U_BUFFER_OVERFLOW_ERROR or len >=
+ * buflen and try again.
+ */
+ if ((status == U_BUFFER_OVERFLOW_ERROR ||
+ (U_SUCCESS(status) && len >= buflen)) &&
+ buflen < MaxAllocSize)
+ {
+ buflen = Min(buflen * 2, MaxAllocSize);
+ langtag = repalloc(langtag, buflen);
+ continue;
+ }
+
+ break;
+ }
+
+ if (U_FAILURE(status))
+ {
+ pfree(langtag);
+
+ if (elevel > 0)
+ ereport(elevel,
+ (errmsg("could not convert locale name \"%s\" to language tag: %s",
+ loc_str, u_errorName(status))));
+ return NULL;
+ }
+
+ return langtag;
+#else /* not USE_ICU */
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("ICU is not supported in this build")));
+#endif /* not USE_ICU */
+}
+
+/*
* Perform best-effort check that the locale is a valid one.
*/
void