|
9 | 9 |
|
10 | 10 | #include "postgres_fe.h"
|
11 | 11 |
|
| 12 | +#include "mb/pg_wchar.h" |
12 | 13 | #include "pg_upgrade.h"
|
13 | 14 |
|
14 | 15 |
|
15 | 16 | static void set_locale_and_encoding(ClusterInfo *cluster);
|
16 | 17 | static void check_new_cluster_is_empty(void);
|
17 | 18 | static void check_locale_and_encoding(ControlData *oldctrl,
|
18 | 19 | ControlData *newctrl);
|
| 20 | +static bool equivalent_locale(const char *loca, const char *locb); |
| 21 | +static bool equivalent_encoding(const char *chara, const char *charb); |
19 | 22 | static void check_is_super_user(ClusterInfo *cluster);
|
20 | 23 | static void check_for_prepared_transactions(ClusterInfo *cluster);
|
21 | 24 | static void check_for_isn_and_int8_passing_mismatch(ClusterInfo *cluster);
|
@@ -397,27 +400,80 @@ set_locale_and_encoding(ClusterInfo *cluster)
|
397 | 400 | /*
|
398 | 401 | * check_locale_and_encoding()
|
399 | 402 | *
|
400 |
| - * locale is not in pg_controldata in 8.4 and later so |
401 |
| - * we probably had to get via a database query. |
| 403 | + * Check that old and new locale and encoding match. Even though the backend |
| 404 | + * tries to canonicalize stored locale names, the platform often doesn't |
| 405 | + * cooperate, so it's entirely possible that one DB thinks its locale is |
| 406 | + * "en_US.UTF-8" while the other says "en_US.utf8". Try to be forgiving. |
402 | 407 | */
|
403 | 408 | static void
|
404 | 409 | check_locale_and_encoding(ControlData *oldctrl,
|
405 | 410 | ControlData *newctrl)
|
406 | 411 | {
|
407 |
| - /* |
408 |
| - * These are often defined with inconsistent case, so use pg_strcasecmp(). |
409 |
| - * They also often use inconsistent hyphenation, which we cannot fix, e.g. |
410 |
| - * UTF-8 vs. UTF8, so at least we display the mismatching values. |
411 |
| - */ |
412 |
| - if (pg_strcasecmp(oldctrl->lc_collate, newctrl->lc_collate) != 0) |
| 412 | + if (!equivalent_locale(oldctrl->lc_collate, newctrl->lc_collate)) |
413 | 413 | pg_fatal("lc_collate cluster values do not match: old \"%s\", new \"%s\"\n",
|
414 |
| - oldctrl->lc_collate, newctrl->lc_collate); |
415 |
| - if (pg_strcasecmp(oldctrl->lc_ctype, newctrl->lc_ctype) != 0) |
| 414 | + oldctrl->lc_collate, newctrl->lc_collate); |
| 415 | + if (!equivalent_locale(oldctrl->lc_ctype, newctrl->lc_ctype)) |
416 | 416 | pg_fatal("lc_ctype cluster values do not match: old \"%s\", new \"%s\"\n",
|
417 |
| - oldctrl->lc_ctype, newctrl->lc_ctype); |
418 |
| - if (pg_strcasecmp(oldctrl->encoding, newctrl->encoding) != 0) |
| 417 | + oldctrl->lc_ctype, newctrl->lc_ctype); |
| 418 | + if (!equivalent_encoding(oldctrl->encoding, newctrl->encoding)) |
419 | 419 | pg_fatal("encoding cluster values do not match: old \"%s\", new \"%s\"\n",
|
420 |
| - oldctrl->encoding, newctrl->encoding); |
| 420 | + oldctrl->encoding, newctrl->encoding); |
| 421 | +} |
| 422 | + |
| 423 | +/* |
| 424 | + * equivalent_locale() |
| 425 | + * |
| 426 | + * Best effort locale-name comparison. Return false if we are not 100% sure |
| 427 | + * the locales are equivalent. |
| 428 | + */ |
| 429 | +static bool |
| 430 | +equivalent_locale(const char *loca, const char *locb) |
| 431 | +{ |
| 432 | + const char *chara = strrchr(loca, '.'); |
| 433 | + const char *charb = strrchr(locb, '.'); |
| 434 | + int lencmp; |
| 435 | + |
| 436 | + /* If they don't both contain an encoding part, just do strcasecmp(). */ |
| 437 | + if (!chara || !charb) |
| 438 | + return (pg_strcasecmp(loca, locb) == 0); |
| 439 | + |
| 440 | + /* Compare the encoding parts. */ |
| 441 | + if (!equivalent_encoding(chara + 1, charb + 1)) |
| 442 | + return false; |
| 443 | + |
| 444 | + /* |
| 445 | + * OK, compare the locale identifiers (e.g. en_US part of en_US.utf8). |
| 446 | + * |
| 447 | + * It's tempting to ignore non-alphanumeric chars here, but for now it's |
| 448 | + * not clear that that's necessary; just do case-insensitive comparison. |
| 449 | + */ |
| 450 | + lencmp = chara - loca; |
| 451 | + if (lencmp != charb - locb) |
| 452 | + return false; |
| 453 | + |
| 454 | + return (pg_strncasecmp(loca, locb, lencmp) == 0); |
| 455 | +} |
| 456 | + |
| 457 | +/* |
| 458 | + * equivalent_encoding() |
| 459 | + * |
| 460 | + * Best effort encoding-name comparison. Return true only if the encodings |
| 461 | + * are valid server-side encodings and known equivalent. |
| 462 | + * |
| 463 | + * Because the lookup in pg_valid_server_encoding() does case folding and |
| 464 | + * ignores non-alphanumeric characters, this will recognize many popular |
| 465 | + * variant spellings as equivalent, eg "utf8" and "UTF-8" will match. |
| 466 | + */ |
| 467 | +static bool |
| 468 | +equivalent_encoding(const char *chara, const char *charb) |
| 469 | +{ |
| 470 | + int enca = pg_valid_server_encoding(chara); |
| 471 | + int encb = pg_valid_server_encoding(charb); |
| 472 | + |
| 473 | + if (enca < 0 || encb < 0) |
| 474 | + return false; |
| 475 | + |
| 476 | + return (enca == encb); |
421 | 477 | }
|
422 | 478 |
|
423 | 479 |
|
|
0 commit comments