Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 33755e8

Browse files
committed
Change the way encoding and locale checks are done in pg_upgrade.
Lc_collate and lc_ctype have been per-database settings since server version 8.4, but pg_upgrade was still treating them as cluster-wide options. It fetched the values for the template0 databases in old and new cluster, and compared them. That's backwards; the encoding and locale of the template0 database doesn't matter, as template0 is guaranteed to contain only ASCII characters. But if there are any other databases that exist on both clusters (in particular template1 and postgres databases), their encodings and locales must be compatible. Also, make the locale comparison more lenient. If the locale names are not equal, try to canonicalize both of them by passing them to setlocale(). We used to do that only when upgrading from 9.1 or below, but it seems like a good idea even with newer versions. If we change the canonical form of a locale, this allows pg_upgrade to still work. I'm about to do just that to fix bug #11431, by mapping a locale name that contains non-ASCII characters to a pure-ASCII alias of the same locale. No backpatching, because earlier versions of pg_upgrade still support upgrading from 8.3 servers. That would be more complicated, so it doesn't seem worth it, given that we haven't received any complaints about this from users.
1 parent f19f0ee commit 33755e8

File tree

4 files changed

+87
-173
lines changed

4 files changed

+87
-173
lines changed

contrib/pg_upgrade/check.c

+72-134
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,10 @@
1414
#include "pg_upgrade.h"
1515

1616

17-
static void set_locale_and_encoding(ClusterInfo *cluster);
1817
static void check_new_cluster_is_empty(void);
19-
static void check_locale_and_encoding(ControlData *oldctrl,
20-
ControlData *newctrl);
21-
static bool equivalent_locale(const char *loca, const char *locb);
22-
static bool equivalent_encoding(const char *chara, const char *charb);
18+
static void check_databases_are_compatible(void);
19+
static void check_locale_and_encoding(DbInfo *olddb, DbInfo *newdb);
20+
static bool equivalent_locale(int category, const char *loca, const char *locb);
2321
static void check_is_install_user(ClusterInfo *cluster);
2422
static void check_for_prepared_transactions(ClusterInfo *cluster);
2523
static void check_for_isn_and_int8_passing_mismatch(ClusterInfo *cluster);
@@ -81,8 +79,6 @@ check_and_dump_old_cluster(bool live_check)
8179
if (!live_check)
8280
start_postmaster(&old_cluster, true);
8381

84-
set_locale_and_encoding(&old_cluster);
85-
8682
get_pg_database_relfilenode(&old_cluster);
8783

8884
/* Extract a list of databases and tables from the old cluster */
@@ -127,13 +123,10 @@ check_and_dump_old_cluster(bool live_check)
127123
void
128124
check_new_cluster(void)
129125
{
130-
set_locale_and_encoding(&new_cluster);
131-
132-
check_locale_and_encoding(&old_cluster.controldata, &new_cluster.controldata);
133-
134126
get_db_and_rel_infos(&new_cluster);
135127

136128
check_new_cluster_is_empty();
129+
check_databases_are_compatible();
137130

138131
check_loadable_libraries();
139132

@@ -278,156 +271,73 @@ check_cluster_compatibility(bool live_check)
278271
}
279272

280273

281-
/*
282-
* set_locale_and_encoding()
283-
*
284-
* query the database to get the template0 locale
285-
*/
286-
static void
287-
set_locale_and_encoding(ClusterInfo *cluster)
288-
{
289-
ControlData *ctrl = &cluster->controldata;
290-
PGconn *conn;
291-
PGresult *res;
292-
int i_encoding;
293-
int cluster_version = cluster->major_version;
294-
295-
conn = connectToServer(cluster, "template1");
296-
297-
/* for pg < 80400, we got the values from pg_controldata */
298-
if (cluster_version >= 80400)
299-
{
300-
int i_datcollate;
301-
int i_datctype;
302-
303-
res = executeQueryOrDie(conn,
304-
"SELECT datcollate, datctype "
305-
"FROM pg_catalog.pg_database "
306-
"WHERE datname = 'template0' ");
307-
assert(PQntuples(res) == 1);
308-
309-
i_datcollate = PQfnumber(res, "datcollate");
310-
i_datctype = PQfnumber(res, "datctype");
311-
312-
if (GET_MAJOR_VERSION(cluster->major_version) < 902)
313-
{
314-
/*
315-
* Pre-9.2 did not canonicalize the supplied locale names to match
316-
* what the system returns, while 9.2+ does, so convert pre-9.2 to
317-
* match.
318-
*/
319-
ctrl->lc_collate = get_canonical_locale_name(LC_COLLATE,
320-
pg_strdup(PQgetvalue(res, 0, i_datcollate)));
321-
ctrl->lc_ctype = get_canonical_locale_name(LC_CTYPE,
322-
pg_strdup(PQgetvalue(res, 0, i_datctype)));
323-
}
324-
else
325-
{
326-
ctrl->lc_collate = pg_strdup(PQgetvalue(res, 0, i_datcollate));
327-
ctrl->lc_ctype = pg_strdup(PQgetvalue(res, 0, i_datctype));
328-
}
329-
330-
PQclear(res);
331-
}
332-
333-
res = executeQueryOrDie(conn,
334-
"SELECT pg_catalog.pg_encoding_to_char(encoding) "
335-
"FROM pg_catalog.pg_database "
336-
"WHERE datname = 'template0' ");
337-
assert(PQntuples(res) == 1);
338-
339-
i_encoding = PQfnumber(res, "pg_encoding_to_char");
340-
ctrl->encoding = pg_strdup(PQgetvalue(res, 0, i_encoding));
341-
342-
PQclear(res);
343-
344-
PQfinish(conn);
345-
}
346-
347-
348274
/*
349275
* check_locale_and_encoding()
350276
*
351-
* Check that old and new locale and encoding match. Even though the backend
352-
* tries to canonicalize stored locale names, the platform often doesn't
353-
* cooperate, so it's entirely possible that one DB thinks its locale is
354-
* "en_US.UTF-8" while the other says "en_US.utf8". Try to be forgiving.
277+
* Check that locale and encoding of a database in the old and new clusters
278+
* are compatible.
355279
*/
356280
static void
357-
check_locale_and_encoding(ControlData *oldctrl,
358-
ControlData *newctrl)
281+
check_locale_and_encoding(DbInfo *olddb, DbInfo *newdb)
359282
{
360-
if (!equivalent_locale(oldctrl->lc_collate, newctrl->lc_collate))
361-
pg_fatal("lc_collate cluster values do not match: old \"%s\", new \"%s\"\n",
362-
oldctrl->lc_collate, newctrl->lc_collate);
363-
if (!equivalent_locale(oldctrl->lc_ctype, newctrl->lc_ctype))
364-
pg_fatal("lc_ctype cluster values do not match: old \"%s\", new \"%s\"\n",
365-
oldctrl->lc_ctype, newctrl->lc_ctype);
366-
if (!equivalent_encoding(oldctrl->encoding, newctrl->encoding))
367-
pg_fatal("encoding cluster values do not match: old \"%s\", new \"%s\"\n",
368-
oldctrl->encoding, newctrl->encoding);
283+
if (olddb->db_encoding != newdb->db_encoding)
284+
pg_fatal("encodings for database \"%s\" do not match: old \"%s\", new \"%s\"\n",
285+
olddb->db_name,
286+
pg_encoding_to_char(olddb->db_encoding),
287+
pg_encoding_to_char(newdb->db_encoding));
288+
if (!equivalent_locale(LC_COLLATE, olddb->db_collate, newdb->db_collate))
289+
pg_fatal("lc_collate values for database \"%s\" do not match: old \"%s\", new \"%s\"\n",
290+
olddb->db_name, olddb->db_collate, newdb->db_collate);
291+
if (!equivalent_locale(LC_CTYPE, olddb->db_ctype, newdb->db_ctype))
292+
pg_fatal("lc_ctype values for database \"%s\" do not match: old \"%s\", new \"%s\"\n",
293+
olddb->db_name, olddb->db_ctype, newdb->db_ctype);
369294
}
370295

371296
/*
372297
* equivalent_locale()
373298
*
374299
* Best effort locale-name comparison. Return false if we are not 100% sure
375300
* the locales are equivalent.
301+
*
302+
* Note: The encoding parts of the names are ignored. This function is
303+
* currently used to compare locale names stored in pg_database, and
304+
* pg_database contains a separate encoding field. That's compared directly
305+
* in check_locale_and_encoding().
376306
*/
377307
static bool
378-
equivalent_locale(const char *loca, const char *locb)
308+
equivalent_locale(int category, const char *loca, const char *locb)
379309
{
380310
const char *chara = strrchr(loca, '.');
381311
const char *charb = strrchr(locb, '.');
382-
int lencmp;
383-
384-
/* If they don't both contain an encoding part, just do strcasecmp(). */
385-
if (!chara || !charb)
386-
return (pg_strcasecmp(loca, locb) == 0);
312+
char *canona;
313+
char *canonb;
314+
int lena;
315+
int lenb;
387316

388317
/*
389-
* Compare the encoding parts. Windows tends to use code page numbers for
390-
* the encoding part, which equivalent_encoding() won't like, so accept if
391-
* the strings are case-insensitive equal; otherwise use
392-
* equivalent_encoding() to compare.
318+
* If the names are equal, the locales are equivalent. Checking this
319+
* first avoids calling setlocale() in the common case that the names
320+
* are equal. That's a good thing, if setlocale() is buggy, for example.
393321
*/
394-
if (pg_strcasecmp(chara + 1, charb + 1) != 0 &&
395-
!equivalent_encoding(chara + 1, charb + 1))
396-
return false;
322+
if (pg_strcasecmp(loca, locb) == 0)
323+
return true;
397324

398325
/*
399-
* OK, compare the locale identifiers (e.g. en_US part of en_US.utf8).
400-
*
401-
* It's tempting to ignore non-alphanumeric chars here, but for now it's
402-
* not clear that that's necessary; just do case-insensitive comparison.
326+
* Not identical. Canonicalize both names, remove the encoding parts,
327+
* and try again.
403328
*/
404-
lencmp = chara - loca;
405-
if (lencmp != charb - locb)
406-
return false;
329+
canona = get_canonical_locale_name(category, loca);
330+
chara = strrchr(canona, '.');
331+
lena = chara ? (chara - canona) : strlen(canona);
407332

408-
return (pg_strncasecmp(loca, locb, lencmp) == 0);
409-
}
333+
canonb = get_canonical_locale_name(category, locb);
334+
charb = strrchr(canonb, '.');
335+
lenb = charb ? (charb - canonb) : strlen(canonb);
410336

411-
/*
412-
* equivalent_encoding()
413-
*
414-
* Best effort encoding-name comparison. Return true only if the encodings
415-
* are valid server-side encodings and known equivalent.
416-
*
417-
* Because the lookup in pg_valid_server_encoding() does case folding and
418-
* ignores non-alphanumeric characters, this will recognize many popular
419-
* variant spellings as equivalent, eg "utf8" and "UTF-8" will match.
420-
*/
421-
static bool
422-
equivalent_encoding(const char *chara, const char *charb)
423-
{
424-
int enca = pg_valid_server_encoding(chara);
425-
int encb = pg_valid_server_encoding(charb);
337+
if (lena == lenb && pg_strncasecmp(canona, canonb, lena) == 0)
338+
return true;
426339

427-
if (enca < 0 || encb < 0)
428-
return false;
429-
430-
return (enca == encb);
340+
return false;
431341
}
432342

433343

@@ -450,7 +360,35 @@ check_new_cluster_is_empty(void)
450360
new_cluster.dbarr.dbs[dbnum].db_name);
451361
}
452362
}
363+
}
364+
365+
/*
366+
* Check that every database that already exists in the new cluster is
367+
* compatible with the corresponding database in the old one.
368+
*/
369+
static void
370+
check_databases_are_compatible(void)
371+
{
372+
int newdbnum;
373+
int olddbnum;
374+
DbInfo *newdbinfo;
375+
DbInfo *olddbinfo;
453376

377+
for (newdbnum = 0; newdbnum < new_cluster.dbarr.ndbs; newdbnum++)
378+
{
379+
newdbinfo = &new_cluster.dbarr.dbs[newdbnum];
380+
381+
/* Find the corresponding database in the old cluster */
382+
for (olddbnum = 0; olddbnum < old_cluster.dbarr.ndbs; olddbnum++)
383+
{
384+
olddbinfo = &old_cluster.dbarr.dbs[olddbnum];
385+
if (strcmp(newdbinfo->db_name, olddbinfo->db_name) == 0)
386+
{
387+
check_locale_and_encoding(olddbinfo, newdbinfo);
388+
break;
389+
}
390+
}
391+
}
454392
}
455393

456394

contrib/pg_upgrade/controldata.c

-34
Original file line numberDiff line numberDiff line change
@@ -122,10 +122,6 @@ get_control_data(ClusterInfo *cluster, bool live_check)
122122
pg_fatal("Could not get control data using %s: %s\n",
123123
cmd, getErrorText(errno));
124124

125-
/* Only pre-8.4 has these so if they are not set below we will check later */
126-
cluster->controldata.lc_collate = NULL;
127-
cluster->controldata.lc_ctype = NULL;
128-
129125
/* Only in <= 9.2 */
130126
if (GET_MAJOR_VERSION(cluster->major_version) <= 902)
131127
{
@@ -404,36 +400,6 @@ get_control_data(ClusterInfo *cluster, bool live_check)
404400
cluster->controldata.data_checksum_version = str2uint(p);
405401
got_data_checksum_version = true;
406402
}
407-
/* In pre-8.4 only */
408-
else if ((p = strstr(bufin, "LC_COLLATE:")) != NULL)
409-
{
410-
p = strchr(p, ':');
411-
412-
if (p == NULL || strlen(p) <= 1)
413-
pg_fatal("%d: controldata retrieval problem\n", __LINE__);
414-
415-
p++; /* remove ':' char */
416-
/* skip leading spaces and remove trailing newline */
417-
p += strspn(p, " ");
418-
if (strlen(p) > 0 && *(p + strlen(p) - 1) == '\n')
419-
*(p + strlen(p) - 1) = '\0';
420-
cluster->controldata.lc_collate = pg_strdup(p);
421-
}
422-
/* In pre-8.4 only */
423-
else if ((p = strstr(bufin, "LC_CTYPE:")) != NULL)
424-
{
425-
p = strchr(p, ':');
426-
427-
if (p == NULL || strlen(p) <= 1)
428-
pg_fatal("%d: controldata retrieval problem\n", __LINE__);
429-
430-
p++; /* remove ':' char */
431-
/* skip leading spaces and remove trailing newline */
432-
p += strspn(p, " ");
433-
if (strlen(p) > 0 && *(p + strlen(p) - 1) == '\n')
434-
*(p + strlen(p) - 1) = '\0';
435-
cluster->controldata.lc_ctype = pg_strdup(p);
436-
}
437403
}
438404

439405
if (output)

contrib/pg_upgrade/info.c

+12-2
Original file line numberDiff line numberDiff line change
@@ -239,11 +239,15 @@ get_db_infos(ClusterInfo *cluster)
239239
DbInfo *dbinfos;
240240
int i_datname,
241241
i_oid,
242+
i_encoding,
243+
i_datcollate,
244+
i_datctype,
242245
i_spclocation;
243246
char query[QUERY_ALLOC];
244247

245248
snprintf(query, sizeof(query),
246-
"SELECT d.oid, d.datname, %s "
249+
"SELECT d.oid, d.datname, d.encoding, d.datcollate, d.datctype, "
250+
"%s AS spclocation "
247251
"FROM pg_catalog.pg_database d "
248252
" LEFT OUTER JOIN pg_catalog.pg_tablespace t "
249253
" ON d.dattablespace = t.oid "
@@ -252,12 +256,15 @@ get_db_infos(ClusterInfo *cluster)
252256
"ORDER BY 2",
253257
/* 9.2 removed the spclocation column */
254258
(GET_MAJOR_VERSION(cluster->major_version) <= 901) ?
255-
"t.spclocation" : "pg_catalog.pg_tablespace_location(t.oid) AS spclocation");
259+
"t.spclocation" : "pg_catalog.pg_tablespace_location(t.oid)");
256260

257261
res = executeQueryOrDie(conn, "%s", query);
258262

259263
i_oid = PQfnumber(res, "oid");
260264
i_datname = PQfnumber(res, "datname");
265+
i_encoding = PQfnumber(res, "encoding");
266+
i_datcollate = PQfnumber(res, "datcollate");
267+
i_datctype = PQfnumber(res, "datctype");
261268
i_spclocation = PQfnumber(res, "spclocation");
262269

263270
ntups = PQntuples(res);
@@ -267,6 +274,9 @@ get_db_infos(ClusterInfo *cluster)
267274
{
268275
dbinfos[tupnum].db_oid = atooid(PQgetvalue(res, tupnum, i_oid));
269276
dbinfos[tupnum].db_name = pg_strdup(PQgetvalue(res, tupnum, i_datname));
277+
dbinfos[tupnum].db_encoding = atoi(PQgetvalue(res, tupnum, i_encoding));
278+
dbinfos[tupnum].db_collate = pg_strdup(PQgetvalue(res, tupnum, i_datcollate));
279+
dbinfos[tupnum].db_ctype = pg_strdup(PQgetvalue(res, tupnum, i_datctype));
270280
snprintf(dbinfos[tupnum].db_tablespace, sizeof(dbinfos[tupnum].db_tablespace), "%s",
271281
PQgetvalue(res, tupnum, i_spclocation));
272282
}

contrib/pg_upgrade/pg_upgrade.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,9 @@ typedef struct
180180
char *db_name; /* database name */
181181
char db_tablespace[MAXPGPATH]; /* database default tablespace
182182
* path */
183+
char *db_collate;
184+
char *db_ctype;
185+
int db_encoding;
183186
RelInfoArr rel_arr; /* array of all user relinfos */
184187
} DbInfo;
185188

@@ -218,9 +221,6 @@ typedef struct
218221
bool date_is_int;
219222
bool float8_pass_by_value;
220223
bool data_checksum_version;
221-
char *lc_collate;
222-
char *lc_ctype;
223-
char *encoding;
224224
} ControlData;
225225

226226
/*

0 commit comments

Comments
 (0)