Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit c2d4526

Browse files
committed
Tighten the check in initdb and CREATE DATABASE that the chosen encoding
matches the encoding of the locale. LC_COLLATE is now checked in addition to LC_CTYPE.
1 parent 61d9674 commit c2d4526

File tree

3 files changed

+81
-52
lines changed

3 files changed

+81
-52
lines changed

doc/src/sgml/charset.sgml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
<!-- $PostgreSQL: pgsql/doc/src/sgml/charset.sgml,v 2.88 2008/09/23 09:20:34 heikki Exp $ -->
1+
<!-- $PostgreSQL: pgsql/doc/src/sgml/charset.sgml,v 2.89 2008/09/23 10:58:03 heikki Exp $ -->
22

33
<chapter id="charset">
44
<title>Localization</>
@@ -320,10 +320,10 @@ initdb --locale=sv_SE
320320

321321
<para>
322322
An important restriction, however, is that each database's character set
323-
must be compatible with the database's <envar>LC_CTYPE</> setting.
324-
When <envar>LC_CTYPE</> is <literal>C</> or <literal>POSIX</>, any
325-
character set is allowed, but for other settings of <envar>LC_CTYPE</>
326-
there is only one character set that will work correctly.
323+
must be compatible with the database's <envar>LC_CTYPE</> and
324+
<envvar>LC_COLLATE</> locale settings. For <literal>C</> or
325+
<literal>POSIX</> locale, any character set is allowed, but for other
326+
locales there is only one character set that will work correctly.
327327
</para>
328328

329329
<sect2 id="multibyte-charset-supported">

src/backend/commands/dbcommands.c

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
*
1414
*
1515
* IDENTIFICATION
16-
* $PostgreSQL: pgsql/src/backend/commands/dbcommands.c,v 1.211 2008/09/23 09:20:35 heikki Exp $
16+
* $PostgreSQL: pgsql/src/backend/commands/dbcommands.c,v 1.212 2008/09/23 10:58:03 heikki Exp $
1717
*
1818
*-------------------------------------------------------------------------
1919
*/
@@ -118,6 +118,7 @@ createdb(const CreatedbStmt *stmt)
118118
int encoding = -1;
119119
int dbconnlimit = -1;
120120
int ctype_encoding;
121+
int collate_encoding;
121122
int notherbackends;
122123
int npreparedxacts;
123124
createdb_failure_params fparms;
@@ -334,6 +335,7 @@ createdb(const CreatedbStmt *stmt)
334335
* Note: if you change this policy, fix initdb to match.
335336
*/
336337
ctype_encoding = pg_get_encoding_from_locale(dbctype);
338+
collate_encoding = pg_get_encoding_from_locale(dbcollate);
337339

338340
if (!(ctype_encoding == encoding ||
339341
ctype_encoding == PG_SQL_ASCII ||
@@ -345,9 +347,22 @@ createdb(const CreatedbStmt *stmt)
345347
(errmsg("encoding %s does not match locale %s",
346348
pg_encoding_to_char(encoding),
347349
dbctype),
348-
errdetail("The chosen LC_CTYPE setting requires encoding %s.",
350+
errdetail("The chosen CTYPE setting requires encoding %s.",
349351
pg_encoding_to_char(ctype_encoding))));
350352

353+
if (!(collate_encoding == encoding ||
354+
collate_encoding == PG_SQL_ASCII ||
355+
#ifdef WIN32
356+
encoding == PG_UTF8 ||
357+
#endif
358+
(encoding == PG_SQL_ASCII && superuser())))
359+
ereport(ERROR,
360+
(errmsg("encoding %s does not match locale %s",
361+
pg_encoding_to_char(encoding),
362+
dbcollate),
363+
errdetail("The chosen COLLATE setting requires encoding %s.",
364+
pg_encoding_to_char(collate_encoding))));
365+
351366
/*
352367
* Check that the new locale is compatible with the source database.
353368
*

src/bin/initdb/initdb.c

Lines changed: 59 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
* Portions Copyright (c) 1994, Regents of the University of California
4343
* Portions taken from FreeBSD.
4444
*
45-
* $PostgreSQL: pgsql/src/bin/initdb/initdb.c,v 1.160 2008/09/23 09:20:37 heikki Exp $
45+
* $PostgreSQL: pgsql/src/bin/initdb/initdb.c,v 1.161 2008/09/23 10:58:03 heikki Exp $
4646
*
4747
*-------------------------------------------------------------------------
4848
*/
@@ -188,7 +188,8 @@ static void trapsig(int signum);
188188
static void check_ok(void);
189189
static char *escape_quotes(const char *src);
190190
static int locale_date_order(const char *locale);
191-
static bool chklocale(const char *locale);
191+
static bool check_locale_name(const char *locale);
192+
static bool check_locale_encoding(const char *locale, int encoding);
192193
static void setlocales(void);
193194
static void usage(const char *progname);
194195

@@ -2187,7 +2188,7 @@ locale_date_order(const char *locale)
21872188
* this should match the backend check_locale() function
21882189
*/
21892190
static bool
2190-
chklocale(const char *locale)
2191+
check_locale_name(const char *locale)
21912192
{
21922193
bool ret;
21932194
int category = LC_CTYPE;
@@ -2211,6 +2212,50 @@ chklocale(const char *locale)
22112212
return ret;
22122213
}
22132214

2215+
/*
2216+
* check if the chosen encoding matches the encoding required by the locale
2217+
*
2218+
* this should match the similar check in the backend createdb() function
2219+
*/
2220+
static bool
2221+
check_locale_encoding(const char *locale, int user_enc)
2222+
{
2223+
int locale_enc;
2224+
2225+
locale_enc = pg_get_encoding_from_locale(locale);
2226+
2227+
/* We allow selection of SQL_ASCII --- see notes in createdb() */
2228+
if (!(locale_enc == user_enc ||
2229+
locale_enc == PG_SQL_ASCII ||
2230+
user_enc == PG_SQL_ASCII
2231+
#ifdef WIN32
2232+
2233+
/*
2234+
* On win32, if the encoding chosen is UTF8, all locales are OK
2235+
* (assuming the actual locale name passed the checks above). This is
2236+
* because UTF8 is a pseudo-codepage, that we convert to UTF16 before
2237+
* doing any operations on, and UTF16 supports all locales.
2238+
*/
2239+
|| user_enc == PG_UTF8
2240+
#endif
2241+
))
2242+
{
2243+
fprintf(stderr, _("%s: encoding mismatch\n"), progname);
2244+
fprintf(stderr,
2245+
_("The encoding you selected (%s) and the encoding that the\n"
2246+
"selected locale uses (%s) do not match. This would lead to\n"
2247+
"misbehavior in various character string processing functions.\n"
2248+
"Rerun %s and either do not specify an encoding explicitly,\n"
2249+
"or choose a matching combination.\n"),
2250+
pg_encoding_to_char(user_enc),
2251+
pg_encoding_to_char(locale_enc),
2252+
progname);
2253+
return false;
2254+
}
2255+
return true;
2256+
}
2257+
2258+
22142259
/*
22152260
* set up the locale variables
22162261
*
@@ -2241,17 +2286,17 @@ setlocales(void)
22412286
* override absent/invalid config settings from initdb's locale settings
22422287
*/
22432288

2244-
if (strlen(lc_ctype) == 0 || !chklocale(lc_ctype))
2289+
if (strlen(lc_ctype) == 0 || !check_locale_name(lc_ctype))
22452290
lc_ctype = xstrdup(setlocale(LC_CTYPE, NULL));
2246-
if (strlen(lc_collate) == 0 || !chklocale(lc_collate))
2291+
if (strlen(lc_collate) == 0 || !check_locale_name(lc_collate))
22472292
lc_collate = xstrdup(setlocale(LC_COLLATE, NULL));
2248-
if (strlen(lc_numeric) == 0 || !chklocale(lc_numeric))
2293+
if (strlen(lc_numeric) == 0 || !check_locale_name(lc_numeric))
22492294
lc_numeric = xstrdup(setlocale(LC_NUMERIC, NULL));
2250-
if (strlen(lc_time) == 0 || !chklocale(lc_time))
2295+
if (strlen(lc_time) == 0 || !check_locale_name(lc_time))
22512296
lc_time = xstrdup(setlocale(LC_TIME, NULL));
2252-
if (strlen(lc_monetary) == 0 || !chklocale(lc_monetary))
2297+
if (strlen(lc_monetary) == 0 || !check_locale_name(lc_monetary))
22532298
lc_monetary = xstrdup(setlocale(LC_MONETARY, NULL));
2254-
if (strlen(lc_messages) == 0 || !chklocale(lc_messages))
2299+
if (strlen(lc_messages) == 0 || !check_locale_name(lc_messages))
22552300
#if defined(LC_MESSAGES) && !defined(WIN32)
22562301
{
22572302
/* when available get the current locale setting */
@@ -2452,6 +2497,7 @@ main(int argc, char *argv[])
24522497
* environment */
24532498
char bin_dir[MAXPGPATH];
24542499
char *pg_data_native;
2500+
int user_enc;
24552501

24562502
#ifdef WIN32
24572503
char *restrict_env;
@@ -2868,44 +2914,12 @@ main(int argc, char *argv[])
28682914
}
28692915
}
28702916
else
2871-
{
2872-
int user_enc;
2873-
int ctype_enc;
2874-
28752917
encodingid = get_encoding_id(encoding);
2876-
user_enc = atoi(encodingid);
2877-
2878-
ctype_enc = pg_get_encoding_from_locale(lc_ctype);
28792918

2880-
/* We allow selection of SQL_ASCII --- see notes in createdb() */
2881-
if (!(ctype_enc == user_enc ||
2882-
ctype_enc == PG_SQL_ASCII ||
2883-
user_enc == PG_SQL_ASCII
2884-
#ifdef WIN32
2885-
2886-
/*
2887-
* On win32, if the encoding chosen is UTF8, all locales are OK
2888-
* (assuming the actual locale name passed the checks above). This is
2889-
* because UTF8 is a pseudo-codepage, that we convert to UTF16 before
2890-
* doing any operations on, and UTF16 supports all locales.
2891-
*/
2892-
|| user_enc == PG_UTF8
2893-
#endif
2894-
))
2895-
{
2896-
fprintf(stderr, _("%s: encoding mismatch\n"), progname);
2897-
fprintf(stderr,
2898-
_("The encoding you selected (%s) and the encoding that the\n"
2899-
"selected locale uses (%s) do not match. This would lead to\n"
2900-
"misbehavior in various character string processing functions.\n"
2901-
"Rerun %s and either do not specify an encoding explicitly,\n"
2902-
"or choose a matching combination.\n"),
2903-
pg_encoding_to_char(user_enc),
2904-
pg_encoding_to_char(ctype_enc),
2905-
progname);
2906-
exit(1);
2907-
}
2908-
}
2919+
user_enc = atoi(encodingid);
2920+
if (!check_locale_encoding(lc_ctype, user_enc) ||
2921+
!check_locale_encoding(lc_collate, user_enc))
2922+
exit(1); /* check_locale_encoding printed the error */
29092923

29102924
if (strlen(default_text_search_config) == 0)
29112925
{

0 commit comments

Comments
 (0)