Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 869650f

Browse files
committed
Support language tags in older ICU versions (53 and earlier).
By calling uloc_canonicalize() before parsing the attributes, the existing locale attribute parsing logic works on language tags as well. Fix a small memory leak, too. Discussion: http://postgr.es/m/60da0cecfb512a78b8666b31631a636215d8ce73.camel@j-davis.com Reviewed-by: Peter Eisentraut
1 parent e8e1f96 commit 869650f

File tree

4 files changed

+50
-11
lines changed

4 files changed

+50
-11
lines changed

src/backend/commands/collationcmds.c

+3-5
Original file line numberDiff line numberDiff line change
@@ -950,7 +950,6 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
950950
const char *name;
951951
char *langtag;
952952
char *icucomment;
953-
const char *iculocstr;
954953
Oid collid;
955954

956955
if (i == -1)
@@ -959,20 +958,19 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
959958
name = uloc_getAvailable(i);
960959

961960
langtag = get_icu_language_tag(name);
962-
iculocstr = U_ICU_VERSION_MAJOR_NUM >= 54 ? langtag : name;
963961

964962
/*
965963
* Be paranoid about not allowing any non-ASCII strings into
966964
* pg_collation
967965
*/
968-
if (!pg_is_ascii(langtag) || !pg_is_ascii(iculocstr))
966+
if (!pg_is_ascii(langtag))
969967
continue;
970968

971969
collid = CollationCreate(psprintf("%s-x-icu", langtag),
972970
nspid, GetUserId(),
973971
COLLPROVIDER_ICU, true, -1,
974-
NULL, NULL, iculocstr, NULL,
975-
get_collation_actual_version(COLLPROVIDER_ICU, iculocstr),
972+
NULL, NULL, langtag, NULL,
973+
get_collation_actual_version(COLLPROVIDER_ICU, langtag),
976974
true, true);
977975
if (OidIsValid(collid))
978976
{

src/backend/utils/adt/pg_locale.c

+35-6
Original file line numberDiff line numberDiff line change
@@ -2634,9 +2634,12 @@ icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
26342634
}
26352635

26362636
/*
2637-
* Parse collation attributes and apply them to the open collator. This takes
2638-
* a string like "und@colStrength=primary;colCaseLevel=yes" and parses and
2639-
* applies the key-value arguments.
2637+
* Parse collation attributes from the given locale string and apply them to
2638+
* the open collator.
2639+
*
2640+
* First, the locale string is canonicalized to an ICU format locale ID such
2641+
* as "und@colStrength=primary;colCaseLevel=yes". Then, it parses and applies
2642+
* the key-value arguments.
26402643
*
26412644
* Starting with ICU version 54, the attributes are processed automatically by
26422645
* ucol_open(), so this is only necessary for emulating this behavior on older
@@ -2646,9 +2649,34 @@ pg_attribute_unused()
26462649
static void
26472650
icu_set_collation_attributes(UCollator *collator, const char *loc)
26482651
{
2649-
char *str = asc_tolower(loc, strlen(loc));
2652+
UErrorCode status;
2653+
int32_t len;
2654+
char *icu_locale_id;
2655+
char *lower_str;
2656+
char *str;
2657+
2658+
/*
2659+
* The input locale may be a BCP 47 language tag, e.g.
2660+
* "und-u-kc-ks-level1", which expresses the same attributes in a
2661+
* different form. It will be converted to the equivalent ICU format
2662+
* locale ID, e.g. "und@colcaselevel=yes;colstrength=primary", by
2663+
* uloc_canonicalize().
2664+
*/
2665+
status = U_ZERO_ERROR;
2666+
len = uloc_canonicalize(loc, NULL, 0, &status);
2667+
icu_locale_id = palloc(len + 1);
2668+
status = U_ZERO_ERROR;
2669+
len = uloc_canonicalize(loc, icu_locale_id, len + 1, &status);
2670+
if (U_FAILURE(status))
2671+
ereport(ERROR,
2672+
(errmsg("canonicalization failed for locale string \"%s\": %s",
2673+
loc, u_errorName(status))));
26502674

2651-
str = strchr(str, '@');
2675+
lower_str = asc_tolower(icu_locale_id, strlen(icu_locale_id));
2676+
2677+
pfree(icu_locale_id);
2678+
2679+
str = strchr(lower_str, '@');
26522680
if (!str)
26532681
return;
26542682
str++;
@@ -2663,7 +2691,6 @@ icu_set_collation_attributes(UCollator *collator, const char *loc)
26632691
char *value;
26642692
UColAttribute uattr;
26652693
UColAttributeValue uvalue;
2666-
UErrorCode status;
26672694

26682695
status = U_ZERO_ERROR;
26692696

@@ -2730,6 +2757,8 @@ icu_set_collation_attributes(UCollator *collator, const char *loc)
27302757
loc, u_errorName(status))));
27312758
}
27322759
}
2760+
2761+
pfree(lower_str);
27332762
}
27342763

27352764
#endif /* USE_ICU */

src/test/regress/expected/collate.icu.utf8.out

+8
Original file line numberDiff line numberDiff line change
@@ -1304,6 +1304,14 @@ SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_inse
13041304
t | t
13051305
(1 row)
13061306

1307+
-- test language tags
1308+
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
1309+
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
1310+
?column?
1311+
----------
1312+
t
1313+
(1 row)
1314+
13071315
CREATE TABLE test1cs (x text COLLATE case_sensitive);
13081316
CREATE TABLE test2cs (x text COLLATE case_sensitive);
13091317
CREATE TABLE test3cs (x text COLLATE case_sensitive);

src/test/regress/sql/collate.icu.utf8.sql

+4
Original file line numberDiff line numberDiff line change
@@ -518,6 +518,10 @@ CREATE COLLATION case_insensitive (provider = icu, locale = '@colStrength=second
518518
SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive;
519519
SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_insensitive;
520520

521+
-- test language tags
522+
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
523+
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
524+
521525
CREATE TABLE test1cs (x text COLLATE case_sensitive);
522526
CREATE TABLE test2cs (x text COLLATE case_sensitive);
523527
CREATE TABLE test3cs (x text COLLATE case_sensitive);

0 commit comments

Comments
 (0)