Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 0d21d4b

Browse files
committed
Add standard collation UNICODE
This adds a new predefined collation named UNICODE, which sorts by the default Unicode collation algorithm specifications, per SQL standard. This only works if ICU support is built. Reviewed-by: Jeff Davis <pgsql@j-davis.com> Discussion: https://www.postgresql.org/message-id/flat/1293e382-2093-a2bf-a397-c04e8f83d3c2@enterprisedb.com
1 parent 6ad5793 commit 0d21d4b

File tree

5 files changed

+46
-7
lines changed

5 files changed

+46
-7
lines changed

doc/src/sgml/charset.sgml

+28-3
Original file line numberDiff line numberDiff line change
@@ -659,9 +659,34 @@ SELECT * FROM test1 ORDER BY a || b COLLATE "fr_FR";
659659
</para>
660660

661661
<para>
662-
Additionally, the SQL standard collation name <literal>ucs_basic</literal>
663-
is available for encoding <literal>UTF8</literal>. It is equivalent
664-
to <literal>C</literal> and sorts by Unicode code point.
662+
Additionally, two SQL standard collation names are available:
663+
664+
<variablelist>
665+
<varlistentry>
666+
<term><literal>unicode</literal></term>
667+
<listitem>
668+
<para>
669+
This collation sorts using the Unicode Collation Algorithm with the
670+
Default Unicode Collation Element Table. It is available in all
671+
encodings. ICU support is required to use this collation. (This
672+
collation has the same behavior as the ICU root locale; see <xref
673+
linkend="collation-managing-predefined-icu-und-x-icu"/>.)
674+
</para>
675+
</listitem>
676+
</varlistentry>
677+
678+
<varlistentry>
679+
<term><literal>ucs_basic</literal></term>
680+
<listitem>
681+
<para>
682+
This collation sorts by Unicode code point. It is only available for
683+
encoding <literal>UTF8</literal>. (This collation has the same
684+
behavior as the libc locale specification <literal>C</literal> in
685+
<literal>UTF8</literal> encoding.)
686+
</para>
687+
</listitem>
688+
</varlistentry>
689+
</variablelist>
665690
</para>
666691
</sect3>
667692

src/bin/initdb/initdb.c

+7-3
Original file line numberDiff line numberDiff line change
@@ -1493,10 +1493,14 @@ static void
14931493
setup_collation(FILE *cmdfd)
14941494
{
14951495
/*
1496-
* Add an SQL-standard name. We don't want to pin this, so it doesn't go
1497-
* in pg_collation.h. But add it before reading system collations, so
1498-
* that it wins if libc defines a locale named ucs_basic.
1496+
* Add SQL-standard names. We don't want to pin these, so they don't go
1497+
* in pg_collation.dat. But add them before reading system collations, so
1498+
* that they win if libc defines a locale with the same name.
14991499
*/
1500+
PG_CMD_PRINTF("INSERT INTO pg_collation (oid, collname, collnamespace, collowner, collprovider, collisdeterministic, collencoding, colliculocale)"
1501+
"VALUES (pg_nextoid('pg_catalog.pg_collation', 'oid', 'pg_catalog.pg_collation_oid_index'), 'unicode', 'pg_catalog'::regnamespace, %u, '%c', true, -1, 'und');\n\n",
1502+
BOOTSTRAP_SUPERUSERID, COLLPROVIDER_ICU);
1503+
15001504
PG_CMD_PRINTF("INSERT INTO pg_collation (oid, collname, collnamespace, collowner, collprovider, collisdeterministic, collencoding, collcollate, collctype)"
15011505
"VALUES (pg_nextoid('pg_catalog.pg_collation', 'oid', 'pg_catalog.pg_collation_oid_index'), 'ucs_basic', 'pg_catalog'::regnamespace, %u, '%c', true, %d, 'C', 'C');\n\n",
15021506
BOOTSTRAP_SUPERUSERID, COLLPROVIDER_LIBC, PG_UTF8);

src/include/catalog/catversion.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,6 @@
5757
*/
5858

5959
/* yyyymmddN */
60-
#define CATALOG_VERSION_NO 202303081
60+
#define CATALOG_VERSION_NO 202303101
6161

6262
#endif

src/test/regress/expected/collate.icu.utf8.out

+9
Original file line numberDiff line numberDiff line change
@@ -1151,6 +1151,15 @@ SELECT * FROM collate_test2 ORDER BY b COLLATE UCS_BASIC;
11511151
2 | äbc
11521152
(4 rows)
11531153

1154+
SELECT * FROM collate_test2 ORDER BY b COLLATE UNICODE;
1155+
a | b
1156+
---+-----
1157+
1 | abc
1158+
4 | ABC
1159+
2 | äbc
1160+
3 | bbc
1161+
(4 rows)
1162+
11541163
-- test ICU collation customization
11551164
-- test the attributes handled by icu_set_collation_attributes()
11561165
CREATE COLLATION testcoll_ignore_accents (provider = icu, locale = '@colStrength=primary;colCaseLevel=yes');

src/test/regress/sql/collate.icu.utf8.sql

+1
Original file line numberDiff line numberDiff line change
@@ -447,6 +447,7 @@ drop type textrange_en_us;
447447
-- standard collations
448448

449449
SELECT * FROM collate_test2 ORDER BY b COLLATE UCS_BASIC;
450+
SELECT * FROM collate_test2 ORDER BY b COLLATE UNICODE;
450451

451452

452453
-- test ICU collation customization

0 commit comments

Comments
 (0)