Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 4bdf5e5

Browse files
committed
Make the locale comparison in pg_upgrade more lenient
If the locale names are not equal, try to canonicalize both of them by passing them to setlocale(). Before, we only canonicalized the old cluster's locale if upgrading from a 8.4-9.2 server, but we also need to canonicalize when upgrading from a pre-8.4 server. That was an oversight in the code. But we should also canonicalize on newer server versions, so that we cope if the canonical form changes from one release to another. I'm about to do just that to fix bug #11431, by mapping a locale name that contains non-ASCII characters to a pure-ASCII alias of the same locale. This is partial backpatch of commit 33755e8 in master. Apply to 9.2, 9.3 and 9.4. The canonicalization code didn't exist before 9.2. In 9.2 and 9.3, this effectively also back-patches the changes from commit 5827472, to be more lax about the spelling of the encoding in the locale names.
1 parent 1cf54b0 commit 4bdf5e5

File tree

1 file changed

+35
-43
lines changed

1 file changed

+35
-43
lines changed

contrib/pg_upgrade/check.c

+35-43
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ static void set_locale_and_encoding(ClusterInfo *cluster);
1717
static void check_new_cluster_is_empty(void);
1818
static void check_locale_and_encoding(ControlData *oldctrl,
1919
ControlData *newctrl);
20-
static bool equivalent_locale(const char *loca, const char *locb);
20+
static bool equivalent_locale(int category, const char *loca, const char *locb);
2121
static bool equivalent_encoding(const char *chara, const char *charb);
2222
static void check_is_super_user(ClusterInfo *cluster);
2323
static void check_for_prepared_transactions(ClusterInfo *cluster);
@@ -370,23 +370,8 @@ set_locale_and_encoding(ClusterInfo *cluster)
370370
i_datcollate = PQfnumber(res, "datcollate");
371371
i_datctype = PQfnumber(res, "datctype");
372372

373-
if (GET_MAJOR_VERSION(cluster->major_version) < 902)
374-
{
375-
/*
376-
* Pre-9.2 did not canonicalize the supplied locale names to match
377-
* what the system returns, while 9.2+ does, so convert pre-9.2 to
378-
* match.
379-
*/
380-
ctrl->lc_collate = get_canonical_locale_name(LC_COLLATE,
381-
pg_strdup(PQgetvalue(res, 0, i_datcollate)));
382-
ctrl->lc_ctype = get_canonical_locale_name(LC_CTYPE,
383-
pg_strdup(PQgetvalue(res, 0, i_datctype)));
384-
}
385-
else
386-
{
387-
ctrl->lc_collate = pg_strdup(PQgetvalue(res, 0, i_datcollate));
388-
ctrl->lc_ctype = pg_strdup(PQgetvalue(res, 0, i_datctype));
389-
}
373+
ctrl->lc_collate = pg_strdup(PQgetvalue(res, 0, i_datcollate));
374+
ctrl->lc_ctype = pg_strdup(PQgetvalue(res, 0, i_datctype));
390375

391376
PQclear(res);
392377
}
@@ -418,10 +403,10 @@ static void
418403
check_locale_and_encoding(ControlData *oldctrl,
419404
ControlData *newctrl)
420405
{
421-
if (!equivalent_locale(oldctrl->lc_collate, newctrl->lc_collate))
406+
if (!equivalent_locale(LC_COLLATE, oldctrl->lc_collate, newctrl->lc_collate))
422407
pg_fatal("lc_collate cluster values do not match: old \"%s\", new \"%s\"\n",
423408
oldctrl->lc_collate, newctrl->lc_collate);
424-
if (!equivalent_locale(oldctrl->lc_ctype, newctrl->lc_ctype))
409+
if (!equivalent_locale(LC_CTYPE, oldctrl->lc_ctype, newctrl->lc_ctype))
425410
pg_fatal("lc_ctype cluster values do not match: old \"%s\", new \"%s\"\n",
426411
oldctrl->lc_ctype, newctrl->lc_ctype);
427412
if (!equivalent_encoding(oldctrl->encoding, newctrl->encoding))
@@ -434,39 +419,46 @@ check_locale_and_encoding(ControlData *oldctrl,
434419
*
435420
* Best effort locale-name comparison. Return false if we are not 100% sure
436421
* the locales are equivalent.
422+
*
423+
* Note: The encoding parts of the names are ignored. This function is
424+
* currently used to compare locale names stored in pg_database, and
425+
* pg_database contains a separate encoding field. That's compared directly
426+
* in check_locale_and_encoding().
437427
*/
438428
static bool
439-
equivalent_locale(const char *loca, const char *locb)
429+
equivalent_locale(int category, const char *loca, const char *locb)
440430
{
441-
const char *chara = strrchr(loca, '.');
442-
const char *charb = strrchr(locb, '.');
443-
int lencmp;
444-
445-
/* If they don't both contain an encoding part, just do strcasecmp(). */
446-
if (!chara || !charb)
447-
return (pg_strcasecmp(loca, locb) == 0);
431+
const char *chara;
432+
const char *charb;
433+
char *canona;
434+
char *canonb;
435+
int lena;
436+
int lenb;
448437

449438
/*
450-
* Compare the encoding parts. Windows tends to use code page numbers for
451-
* the encoding part, which equivalent_encoding() won't like, so accept if
452-
* the strings are case-insensitive equal; otherwise use
453-
* equivalent_encoding() to compare.
439+
* If the names are equal, the locales are equivalent. Checking this
440+
* first avoids calling setlocale() in the common case that the names
441+
* are equal. That's a good thing, if setlocale() is buggy, for example.
454442
*/
455-
if (pg_strcasecmp(chara + 1, charb + 1) != 0 &&
456-
!equivalent_encoding(chara + 1, charb + 1))
457-
return false;
443+
if (pg_strcasecmp(loca, locb) == 0)
444+
return true;
458445

459446
/*
460-
* OK, compare the locale identifiers (e.g. en_US part of en_US.utf8).
461-
*
462-
* It's tempting to ignore non-alphanumeric chars here, but for now it's
463-
* not clear that that's necessary; just do case-insensitive comparison.
447+
* Not identical. Canonicalize both names, remove the encoding parts,
448+
* and try again.
464449
*/
465-
lencmp = chara - loca;
466-
if (lencmp != charb - locb)
467-
return false;
450+
canona = get_canonical_locale_name(category, loca);
451+
chara = strrchr(canona, '.');
452+
lena = chara ? (chara - canona) : strlen(canona);
453+
454+
canonb = get_canonical_locale_name(category, locb);
455+
charb = strrchr(canonb, '.');
456+
lenb = charb ? (charb - canonb) : strlen(canonb);
457+
458+
if (lena == lenb && pg_strncasecmp(canona, canonb, lena) == 0)
459+
return true;
468460

469-
return (pg_strncasecmp(loca, locb, lencmp) == 0);
461+
return false;
470462
}
471463

472464
/*

0 commit comments

Comments
 (0)