Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit e8f9c00

Browse files
author
Commitfest Bot
committed
[CF 5563] pg_upgrade check for Unicode update
This branch was automatically generated by a robot using patches from an email thread registered at: https://commitfest.postgresql.org/patch/5563 The branch will be overwritten each time a new patch version is posted to the thread, and also periodically to check for bitrot caused by changes on the master branch. Patch(es): https://www.postgresql.org/message-id/3fb775608d4af58c2cf8b94d86710b40f92e8858.camel@j-davis.com Author(s): Jeff Davis
2 parents 7e05df4 + 37e24d8 commit e8f9c00

11 files changed

+7442
-6609
lines changed

src/Makefile.global.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -358,7 +358,7 @@ DOWNLOAD = wget -O $@ --no-use-server-timestamps
358358
# Pick a release from here: <https://www.unicode.org/Public/>. Note
359359
# that the most recent release listed there is often a pre-release;
360360
# don't pick that one, except for testing.
361-
UNICODE_VERSION = 15.1.0
361+
UNICODE_VERSION = 16.0.0
362362

363363
# Pick a release from here: <http://cldr.unicode.org/index/downloads>
364364
CLDR_VERSION = 45

src/bin/pg_upgrade/check.c

Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "catalog/pg_class_d.h"
1414
#include "fe_utils/string_utils.h"
1515
#include "pg_upgrade.h"
16+
#include "common/unicode_version.h"
1617

1718
static void check_new_cluster_is_empty(void);
1819
static void check_is_install_user(ClusterInfo *cluster);
@@ -25,6 +26,7 @@ static void check_for_tables_with_oids(ClusterInfo *cluster);
2526
static void check_for_pg_role_prefix(ClusterInfo *cluster);
2627
static void check_for_new_tablespace_dir(void);
2728
static void check_for_user_defined_encoding_conversions(ClusterInfo *cluster);
29+
static void check_for_unicode_update(ClusterInfo *cluster);
2830
static void check_new_cluster_logical_replication_slots(void);
2931
static void check_new_cluster_subscription_configuration(void);
3032
static void check_old_cluster_for_valid_slots(void);
@@ -633,6 +635,12 @@ check_and_dump_old_cluster(void)
633635

634636
check_for_data_types_usage(&old_cluster);
635637

638+
/*
639+
* Unicode updates can affect some objects that use expressions with
640+
* functions dependent on Unicode.
641+
*/
642+
check_for_unicode_update(&old_cluster);
643+
636644
/*
637645
* PG 14 changed the function signature of encoding conversion functions.
638646
* Conversions from older versions cannot be upgraded automatically
@@ -1727,6 +1735,178 @@ check_for_user_defined_encoding_conversions(ClusterInfo *cluster)
17271735
check_ok();
17281736
}
17291737

1738+
/*
1739+
* Callback function for processing results of query for
1740+
* check_for_unicode_update()'s UpgradeTask. If the query returned any rows
1741+
* (i.e., the check failed), write the details to the report file.
1742+
*/
1743+
static void
1744+
process_unicode_update(DbInfo *dbinfo, PGresult *res, void *arg)
1745+
{
1746+
UpgradeTaskReport *report = (UpgradeTaskReport *) arg;
1747+
int ntups = PQntuples(res);
1748+
int i_reloid = PQfnumber(res, "reloid");
1749+
int i_nspname = PQfnumber(res, "nspname");
1750+
int i_relname = PQfnumber(res, "relname");
1751+
1752+
if (ntups == 0)
1753+
return;
1754+
1755+
if (report->file == NULL &&
1756+
(report->file = fopen_priv(report->path, "w")) == NULL)
1757+
pg_fatal("could not open file \"%s\": %m", report->path);
1758+
1759+
fprintf(report->file, "In database: %s\n", dbinfo->db_name);
1760+
1761+
for (int rowno = 0; rowno < ntups; rowno++)
1762+
fprintf(report->file, " (oid=%s) %s.%s\n",
1763+
PQgetvalue(res, rowno, i_reloid),
1764+
PQgetvalue(res, rowno, i_nspname),
1765+
PQgetvalue(res, rowno, i_relname));
1766+
}
1767+
1768+
/*
1769+
* Check if the Unicode version built into Postgres changed between the old
1770+
* cluster and the new cluster.
1771+
*/
1772+
static bool
1773+
unicode_version_changed(ClusterInfo *cluster)
1774+
{
1775+
PGconn *conn_template1 = connectToServer(cluster, "template1");
1776+
PGresult *res;
1777+
char *old_unicode_version;
1778+
bool unicode_updated;
1779+
1780+
res = executeQueryOrDie(conn_template1, "SELECT unicode_version()");
1781+
old_unicode_version = PQgetvalue(res, 0, 0);
1782+
unicode_updated = (strcmp(old_unicode_version, PG_UNICODE_VERSION) != 0);
1783+
1784+
PQclear(res);
1785+
PQfinish(conn_template1);
1786+
1787+
return unicode_updated;
1788+
}
1789+
1790+
/*
1791+
* check_for_unicode_update()
1792+
*
1793+
* Check if the version of Unicode in the old server and the new server
1794+
* differ. If so, check for indexes, partitioned tables, or constraints that
1795+
* use expressions with functions dependent on Unicode behavior.
1796+
*/
1797+
static void
1798+
check_for_unicode_update(ClusterInfo *cluster)
1799+
{
1800+
UpgradeTaskReport report;
1801+
UpgradeTask *task = upgrade_task_create();
1802+
const char *query;
1803+
1804+
/*
1805+
* The builtin provider did not exist prior to version 17. While there are
1806+
* still problems that could potentially be caught from earlier versions,
1807+
* such as an index on NORMALIZE(), we don't check for that here.
1808+
*/
1809+
if (GET_MAJOR_VERSION(cluster->major_version) < 1700)
1810+
return;
1811+
1812+
prep_status("Checking for objects affected by Unicode update");
1813+
1814+
if (!unicode_version_changed(cluster))
1815+
{
1816+
check_ok();
1817+
return;
1818+
}
1819+
1820+
report.file = NULL;
1821+
snprintf(report.path, sizeof(report.path), "%s/%s",
1822+
log_opts.basedir,
1823+
"unicode_dependent_rels.txt");
1824+
1825+
query =
1826+
/* collations that use built-in Unicode for character semantics */
1827+
"WITH collations(collid) AS ( "
1828+
" SELECT oid FROM pg_collation "
1829+
" WHERE collprovider='b' AND colllocale IN ('C.UTF-8','PG_UNICODE_FAST') "
1830+
/* include default collation, if appropriate */
1831+
" UNION "
1832+
" SELECT 'pg_catalog.default'::regcollation FROM pg_database "
1833+
" WHERE datname = current_database() AND "
1834+
" datlocprovider='b' AND datlocale IN ('C.UTF-8','PG_UNICODE_FAST') "
1835+
"), "
1836+
/* functions that use built-in Unicode */
1837+
"functions(procid) AS ( "
1838+
" SELECT proc.oid FROM pg_proc proc "
1839+
" WHERE proname IN ('normalize','unicode_assigned','unicode_version','is_normalized') AND "
1840+
" pronamespace='pg_catalog'::regnamespace "
1841+
"), "
1842+
/* operators that use the input collation for character semantics */
1843+
"coll_operators(operid, procid, collid) AS ( "
1844+
" SELECT oper.oid, oper.oprcode, collid FROM pg_operator oper, collations "
1845+
" WHERE oprname IN ('~', '~*', '!~', '!~*', '~~*', '!~~*') AND "
1846+
" oprnamespace='pg_catalog'::regnamespace AND "
1847+
" oprright='text'::regtype "
1848+
"), "
1849+
/* functions that use the input collation for character semantics */
1850+
"coll_functions(procid, collid) AS ( "
1851+
" SELECT proc.oid, collid FROM pg_proc proc, collations "
1852+
" WHERE proname IN ('lower','initcap','upper') AND "
1853+
" pronamespace='pg_catalog'::regnamespace AND "
1854+
" proargtypes[0] = 'text'::regtype "
1855+
/* include functions behind the operators listed above */
1856+
" UNION "
1857+
" SELECT procid, collid FROM coll_operators "
1858+
"), "
1859+
1860+
/*
1861+
* Generate patterns to search a pg_node_tree for the above functions and
1862+
* operators.
1863+
*/
1864+
"patterns(p) AS ( "
1865+
" SELECT '{FUNCEXPR :funcid ' || procid::text || '[ }]' FROM functions "
1866+
" UNION "
1867+
" SELECT '{OPEXPR :opno ' || operid::text || ' (:\\w+ \\w+ )*' || "
1868+
" ':inputcollid ' || collid::text || '[ }]' FROM coll_operators "
1869+
" UNION "
1870+
" SELECT '{FUNCEXPR :funcid ' || procid::text || ' (:\\w+ \\w+ )*' || "
1871+
" ':inputcollid ' || collid::text || '[ }]' FROM coll_functions "
1872+
") "
1873+
1874+
/*
1875+
* Match the patterns against expressions used for relation contents.
1876+
*/
1877+
"SELECT reloid, relkind, nspname, relname "
1878+
" FROM ( "
1879+
" SELECT conrelid "
1880+
" FROM pg_constraint, patterns WHERE conbin::text ~ p "
1881+
" UNION "
1882+
" SELECT indexrelid "
1883+
" FROM pg_index, patterns WHERE indexprs::text ~ p OR indpred::text ~ p "
1884+
" UNION "
1885+
" SELECT partrelid "
1886+
" FROM pg_partitioned_table, patterns WHERE partexprs::text ~ p "
1887+
" ) s(reloid), pg_class c, pg_namespace n, pg_database d "
1888+
" WHERE s.reloid = c.oid AND c.relnamespace = n.oid AND "
1889+
" d.datname = current_database() AND "
1890+
" d.encoding = pg_char_to_encoding('UTF8');";
1891+
1892+
upgrade_task_add_step(task, query,
1893+
process_unicode_update,
1894+
true, &report);
1895+
upgrade_task_run(task, cluster);
1896+
upgrade_task_free(task);
1897+
1898+
if (report.file)
1899+
{
1900+
fclose(report.file);
1901+
report_status(PG_WARNING, "warning");
1902+
pg_log(PG_WARNING, "Your installation contains relations that may be affected by a new version of Unicode.\n"
1903+
"A list of potentially-affected relations is in the file:\n"
1904+
" %s", report.path);
1905+
}
1906+
else
1907+
check_ok();
1908+
}
1909+
17301910
/*
17311911
* check_new_cluster_logical_replication_slots()
17321912
*

src/common/unicode/meson.build

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Copyright (c) 2022-2025, PostgreSQL Global Development Group
22

3-
UNICODE_VERSION = '15.1.0'
3+
UNICODE_VERSION = '16.0.0'
44

55
unicode_data = {}
66
unicode_baseurl = 'https://www.unicode.org/Public/@0@/ucd/@1@'

0 commit comments

Comments
 (0)