Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 27b6237

Browse files
committed
Use ICU by default at initdb time.
If the ICU locale is not specified, initialize the default collator and retrieve the locale name from that. Discussion: https://postgr.es/m/510d284759f6e943ce15096167760b2edcb2e700.camel@j-davis.com Reviewed-by: Peter Eisentraut
1 parent a7e584a commit 27b6237

File tree

18 files changed

+147
-42
lines changed

18 files changed

+147
-42
lines changed

contrib/citext/expected/citext_utf8.out

+8-1
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,16 @@
11
/*
22
* This test must be run in a database with UTF-8 encoding
33
* and a Unicode-aware locale.
4+
*
5+
* Also disable this file for ICU, because the test for the the
6+
* Turkish dotted I is not correct for many ICU locales. citext always
7+
* uses the default collation, so it's not easy to restrict the test
8+
* to the "tr-TR-x-icu" collation where it will succeed.
49
*/
510
SELECT getdatabaseencoding() <> 'UTF8' OR
6-
current_setting('lc_ctype') = 'C'
11+
current_setting('lc_ctype') = 'C' OR
12+
(SELECT datlocprovider='i' FROM pg_database
13+
WHERE datname=current_database())
714
AS skip_test \gset
815
\if :skip_test
916
\quit
+8-1
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,16 @@
11
/*
22
* This test must be run in a database with UTF-8 encoding
33
* and a Unicode-aware locale.
4+
*
5+
* Also disable this file for ICU, because the test for the the
6+
* Turkish dotted I is not correct for many ICU locales. citext always
7+
* uses the default collation, so it's not easy to restrict the test
8+
* to the "tr-TR-x-icu" collation where it will succeed.
49
*/
510
SELECT getdatabaseencoding() <> 'UTF8' OR
6-
current_setting('lc_ctype') = 'C'
11+
current_setting('lc_ctype') = 'C' OR
12+
(SELECT datlocprovider='i' FROM pg_database
13+
WHERE datname=current_database())
714
AS skip_test \gset
815
\if :skip_test
916
\quit

contrib/citext/sql/citext_utf8.sql

+8-1
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,17 @@
11
/*
22
* This test must be run in a database with UTF-8 encoding
33
* and a Unicode-aware locale.
4+
*
5+
* Also disable this file for ICU, because the test for the the
6+
* Turkish dotted I is not correct for many ICU locales. citext always
7+
* uses the default collation, so it's not easy to restrict the test
8+
* to the "tr-TR-x-icu" collation where it will succeed.
49
*/
510

611
SELECT getdatabaseencoding() <> 'UTF8' OR
7-
current_setting('lc_ctype') = 'C'
12+
current_setting('lc_ctype') = 'C' OR
13+
(SELECT datlocprovider='i' FROM pg_database
14+
WHERE datname=current_database())
815
AS skip_test \gset
916
\if :skip_test
1017
\quit

contrib/unaccent/expected/unaccent.out

+9
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,12 @@
1+
-- unaccent is broken if the default collation is provided by ICU and
2+
-- LC_CTYPE=C
3+
SELECT current_setting('lc_ctype') = 'C' AND
4+
(SELECT datlocprovider='i' FROM pg_database
5+
WHERE datname=current_database())
6+
AS skip_test \gset
7+
\if :skip_test
8+
\quit
9+
\endif
110
CREATE EXTENSION unaccent;
211
-- must have a UTF8 database
312
SELECT getdatabaseencoding();
+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
-- unaccent is broken if the default collation is provided by ICU and
2+
-- LC_CTYPE=C
3+
SELECT current_setting('lc_ctype') = 'C' AND
4+
(SELECT datlocprovider='i' FROM pg_database
5+
WHERE datname=current_database())
6+
AS skip_test \gset
7+
\if :skip_test
8+
\quit

contrib/unaccent/sql/unaccent.sql

+11
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,14 @@
1+
2+
-- unaccent is broken if the default collation is provided by ICU and
3+
-- LC_CTYPE=C
4+
SELECT current_setting('lc_ctype') = 'C' AND
5+
(SELECT datlocprovider='i' FROM pg_database
6+
WHERE datname=current_database())
7+
AS skip_test \gset
8+
\if :skip_test
9+
\quit
10+
\endif
11+
112
CREATE EXTENSION unaccent;
213

314
-- must have a UTF8 database

doc/src/sgml/ref/initdb.sgml

+34-19
Original file line numberDiff line numberDiff line change
@@ -89,10 +89,28 @@ PostgreSQL documentation
8989
and character set encoding. These can also be set separately for each
9090
database when it is created. <command>initdb</command> determines those
9191
settings for the template databases, which will serve as the default for
92-
all other databases. By default, <command>initdb</command> uses the
93-
locale provider <literal>libc</literal>, takes the locale settings from
94-
the environment, and determines the encoding from the locale settings.
95-
This is almost always sufficient, unless there are special requirements.
92+
all other databases.
93+
</para>
94+
95+
<para>
96+
By default, <command>initdb</command> uses the ICU library to provide
97+
locale services if the server was built with ICU support; otherwise it uses
98+
the <literal>libc</literal> locale provider (see <xref
99+
linkend="locale-providers"/>). To choose the specific ICU locale ID to
100+
apply, use the option <option>--icu-locale</option>. Note that for
101+
implementation reasons and to support legacy code,
102+
<command>initdb</command> will still select and initialize libc locale
103+
settings when the ICU locale provider is used.
104+
</para>
105+
106+
<para>
107+
Alternatively, <command>initdb</command> can use the locale provider
108+
<literal>libc</literal>. To select this option, specify
109+
<literal>--locale-provider=libc</literal>, or build the server without ICU
110+
support. The <literal>libc</literal> locale provider takes the locale
111+
settings from the environment, and determines the encoding from the locale
112+
settings. This is almost always sufficient, unless there are special
113+
requirements.
96114
</para>
97115

98116
<para>
@@ -103,17 +121,6 @@ PostgreSQL documentation
103121
categories can give nonsensical results, so this should be used with care.
104122
</para>
105123

106-
<para>
107-
Alternatively, the ICU library can be used to provide locale services.
108-
(Again, this only sets the default for subsequently created databases.) To
109-
select this option, specify <literal>--locale-provider=icu</literal>.
110-
To choose the specific ICU locale ID to apply, use the option
111-
<option>--icu-locale</option>. Note that
112-
for implementation reasons and to support legacy code,
113-
<command>initdb</command> will still select and initialize libc locale
114-
settings when the ICU locale provider is used.
115-
</para>
116-
117124
<para>
118125
When <command>initdb</command> runs, it will print out the locale settings
119126
it has chosen. If you have complex requirements or specified multiple
@@ -234,7 +241,13 @@ PostgreSQL documentation
234241
<term><option>--icu-locale=<replaceable>locale</replaceable></option></term>
235242
<listitem>
236243
<para>
237-
Specifies the ICU locale ID, if the ICU locale provider is used.
244+
Specifies the ICU locale when the ICU provider is used. Locale support
245+
is described in <xref linkend="locale"/>.
246+
</para>
247+
<para>
248+
If this option is not specified, the locale is inherited from the
249+
environment in which <command>initdb</command> runs. The environment's
250+
locale is matched to a similar ICU locale name, if possible.
238251
</para>
239252
</listitem>
240253
</varlistentry>
@@ -307,10 +320,12 @@ PostgreSQL documentation
307320
<term><option>--locale-provider={<literal>libc</literal>|<literal>icu</literal>}</option></term>
308321
<listitem>
309322
<para>
310-
This option sets the locale provider for databases created in the
311-
new cluster. It can be overridden in the <command>CREATE
323+
This option sets the locale provider for databases created in the new
324+
cluster. It can be overridden in the <command>CREATE
312325
DATABASE</command> command when new databases are subsequently
313-
created. The default is <literal>libc</literal>.
326+
created. The default is <literal>icu</literal> if the server was
327+
built with ICU support; otherwise the default is
328+
<literal>libc</literal> (see <xref linkend="locale-providers"/>).
314329
</para>
315330
</listitem>
316331
</varlistentry>

src/bin/initdb/Makefile

+2-2
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,15 @@ subdir = src/bin/initdb
1616
top_builddir = ../../..
1717
include $(top_builddir)/src/Makefile.global
1818

19-
override CPPFLAGS := -I$(libpq_srcdir) -I$(top_srcdir)/src/timezone $(CPPFLAGS)
19+
override CPPFLAGS := -I$(libpq_srcdir) -I$(top_srcdir)/src/timezone $(ICU_CFLAGS) $(CPPFLAGS)
2020

2121
# Note: it's important that we link to encnames.o from libpgcommon, not
2222
# from libpq, else we have risks of version skew if we run with a libpq
2323
# shared library from a different PG version. The libpq_pgport macro
2424
# should ensure that that happens.
2525
#
2626
# We need libpq only because fe_utils does.
27-
LDFLAGS_INTERNAL += -L$(top_builddir)/src/fe_utils -lpgfeutils $(libpq_pgport)
27+
LDFLAGS_INTERNAL += -L$(top_builddir)/src/fe_utils -lpgfeutils $(libpq_pgport) $(ICU_LIBS)
2828

2929
# use system timezone data?
3030
ifneq (,$(with_system_tzdata))

src/bin/initdb/initdb.c

+52-2
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,9 @@
5353
#include <netdb.h>
5454
#include <sys/socket.h>
5555
#include <sys/stat.h>
56+
#ifdef USE_ICU
57+
#include <unicode/ucol.h>
58+
#endif
5659
#include <unistd.h>
5760
#include <signal.h>
5861
#include <time.h>
@@ -133,7 +136,11 @@ static char *lc_monetary = NULL;
133136
static char *lc_numeric = NULL;
134137
static char *lc_time = NULL;
135138
static char *lc_messages = NULL;
139+
#ifdef USE_ICU
140+
static char locale_provider = COLLPROVIDER_ICU;
141+
#else
136142
static char locale_provider = COLLPROVIDER_LIBC;
143+
#endif
137144
static char *icu_locale = NULL;
138145
static char *icu_rules = NULL;
139146
static const char *default_text_search_config = NULL;
@@ -2028,6 +2035,50 @@ check_icu_locale_encoding(int user_enc)
20282035
return true;
20292036
}
20302037

2038+
/*
2039+
* Check that ICU accepts the locale name; or if not specified, retrieve the
2040+
* default ICU locale.
2041+
*/
2042+
static void
2043+
check_icu_locale(void)
2044+
{
2045+
#ifdef USE_ICU
2046+
UCollator *collator;
2047+
UErrorCode status;
2048+
2049+
status = U_ZERO_ERROR;
2050+
collator = ucol_open(icu_locale, &status);
2051+
if (U_FAILURE(status))
2052+
{
2053+
if (icu_locale)
2054+
pg_fatal("could not open collator for locale \"%s\": %s",
2055+
icu_locale, u_errorName(status));
2056+
else
2057+
pg_fatal("could not open collator for default locale: %s",
2058+
u_errorName(status));
2059+
}
2060+
2061+
/* if not specified, get locale from default collator */
2062+
if (icu_locale == NULL)
2063+
{
2064+
const char *default_locale;
2065+
2066+
status = U_ZERO_ERROR;
2067+
default_locale = ucol_getLocaleByType(collator, ULOC_VALID_LOCALE,
2068+
&status);
2069+
if (U_FAILURE(status))
2070+
{
2071+
ucol_close(collator);
2072+
pg_fatal("could not determine default ICU locale");
2073+
}
2074+
2075+
icu_locale = pg_strdup(default_locale);
2076+
}
2077+
2078+
ucol_close(collator);
2079+
#endif
2080+
}
2081+
20312082
/*
20322083
* set up the locale variables
20332084
*
@@ -2081,8 +2132,7 @@ setlocales(void)
20812132

20822133
if (locale_provider == COLLPROVIDER_ICU)
20832134
{
2084-
if (!icu_locale)
2085-
pg_fatal("ICU locale must be specified");
2135+
check_icu_locale();
20862136

20872137
/*
20882138
* In supported builds, the ICU locale ID will be checked by the

src/bin/initdb/t/001_initdb.pl

+1-6
Original file line numberDiff line numberDiff line change
@@ -97,11 +97,6 @@
9797

9898
if ($ENV{with_icu} eq 'yes')
9999
{
100-
command_fails_like(
101-
[ 'initdb', '--no-sync', '--locale-provider=icu', "$tempdir/data2" ],
102-
qr/initdb: error: ICU locale must be specified/,
103-
'locale provider ICU requires --icu-locale');
104-
105100
command_ok(
106101
[
107102
'initdb', '--no-sync',
@@ -116,7 +111,7 @@
116111
'--locale-provider=icu', '--icu-locale=@colNumeric=lower',
117112
"$tempdir/dataX"
118113
],
119-
qr/FATAL: could not open collator for locale/,
114+
qr/error: could not open collator for locale/,
120115
'fails for invalid ICU locale');
121116

122117
command_fails_like(

src/bin/pg_dump/t/002_pg_dump.pl

+1-1
Original file line numberDiff line numberDiff line change
@@ -1758,7 +1758,7 @@
17581758
create_sql =>
17591759
"CREATE DATABASE dump_test2 LOCALE = 'C' TEMPLATE = template0;",
17601760
regexp => qr/^
1761-
\QCREATE DATABASE dump_test2 \E.*\QLOCALE = 'C';\E
1761+
\QCREATE DATABASE dump_test2 \E.*\QLOCALE = 'C'\E
17621762
/xm,
17631763
like => { pg_dumpall_dbprivs => 1, },
17641764
},

src/bin/scripts/t/020_createdb.pl

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
program_options_handling_ok('createdb');
1414

1515
my $node = PostgreSQL::Test::Cluster->new('main');
16-
$node->init;
16+
$node->init(extra => ['--locale-provider=libc']);
1717
$node->start;
1818

1919
$node->issues_sql_like(

src/interfaces/ecpg/test/Makefile

-3
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,6 @@ override CPPFLAGS := \
1414
'-DSHELLPROG="$(SHELL)"' \
1515
$(CPPFLAGS)
1616

17-
# default encoding for regression tests
18-
ENCODING = SQL_ASCII
19-
2017
ifneq ($(build_os),mingw32)
2118
abs_builddir := $(shell pwd)
2219
else

src/interfaces/ecpg/test/connect/test5.pgc

+1-1
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ exec sql end declare section;
5555
exec sql connect to 'unix:postgresql://localhost/ecpg2_regression' as main user :user USING "connectpw";
5656
exec sql disconnect main;
5757

58-
exec sql connect to unix:postgresql://localhost/ecpg2_regression?connect_timeout=180&client_encoding=latin1 as main user regress_ecpg_user1/connectpw;
58+
exec sql connect to unix:postgresql://localhost/ecpg2_regression?connect_timeout=180&client_encoding=sql_ascii as main user regress_ecpg_user1/connectpw;
5959
exec sql disconnect main;
6060

6161
exec sql connect to "unix:postgresql://200.46.204.71/ecpg2_regression" as main user regress_ecpg_user1/connectpw;

src/interfaces/ecpg/test/expected/connect-test5.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ main(void)
117117
#line 56 "test5.pgc"
118118

119119

120-
{ ECPGconnect(__LINE__, 0, "unix:postgresql://localhost/ecpg2_regression?connect_timeout=180 & client_encoding=latin1" , "regress_ecpg_user1" , "connectpw" , "main", 0); }
120+
{ ECPGconnect(__LINE__, 0, "unix:postgresql://localhost/ecpg2_regression?connect_timeout=180 & client_encoding=sql_ascii" , "regress_ecpg_user1" , "connectpw" , "main", 0); }
121121
#line 58 "test5.pgc"
122122

123123
{ ECPGdisconnect(__LINE__, "main");}

src/interfaces/ecpg/test/expected/connect-test5.stderr

+1-1
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
[NO_PID]: sqlca: code: 0, state: 00000
5151
[NO_PID]: ecpg_finish: connection main closed
5252
[NO_PID]: sqlca: code: 0, state: 00000
53-
[NO_PID]: ECPGconnect: opening database ecpg2_regression on <DEFAULT> port <DEFAULT> with options connect_timeout=180 & client_encoding=latin1 for user regress_ecpg_user1
53+
[NO_PID]: ECPGconnect: opening database ecpg2_regression on <DEFAULT> port <DEFAULT> with options connect_timeout=180 & client_encoding=sql_ascii for user regress_ecpg_user1
5454
[NO_PID]: sqlca: code: 0, state: 00000
5555
[NO_PID]: ecpg_finish: connection main closed
5656
[NO_PID]: sqlca: code: 0, state: 00000

src/interfaces/ecpg/test/meson.build

-1
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,6 @@ ecpg_test_files = files(
6969
ecpg_regress_args = [
7070
'--dbname=ecpg1_regression,ecpg2_regression',
7171
'--create-role=regress_ecpg_user1,regress_ecpg_user2',
72-
'--encoding=SQL_ASCII',
7372
]
7473

7574
tests += {

src/test/icu/t/010_database.pl

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
}
1313

1414
my $node1 = PostgreSQL::Test::Cluster->new('node1');
15-
$node1->init;
15+
$node1->init(extra => ['--locale-provider=libc']);
1616
$node1->start;
1717

1818
$node1->safe_psql('postgres',

0 commit comments

Comments
 (0)