Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 0ccfc28

Browse files
committed
Check for tables with sql_identifier during pg_upgrade
Commit 7c15cef changed sql_identifier data type to be based on name instead of varchar. Unfortunately, this breaks on-disk format for this data type. Luckily, that should be a very rare problem, as this data type is used only in information_schema views, so this only affects user objects (tables, materialized views and indexes). One way to end in such situation is to do CTAS with a query on those system views. There are two options to deal with this - we can either abort pg_upgrade if there are user objects with sql_identifier columns in pg_upgrade, or we could replace the sql_identifier type with varchar. Considering how rare the issue is expected to be, and the complexity of replacing the data type (e.g. in matviews), we've decided to go with the simple check. The query is somewhat complex - the sql_identifier data type may be used indirectly - through a domain, a composite type or both, possibly in multiple levels. Detecting this requires a recursive CTE. Backpatch to 12, where the sql_identifier definition changed. Reported-by: Hans Buschmann Author: Tomas Vondra Reviewed-by: Tom Lane Backpatch-to: 12 Discussion: https://postgr.es/m/16045-673e8fa6b5ace196%40postgresql.org
1 parent 14ac423 commit 0ccfc28

File tree

3 files changed

+129
-0
lines changed

3 files changed

+129
-0
lines changed

src/bin/pg_upgrade/check.c

+8
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,14 @@ check_and_dump_old_cluster(bool live_check)
108108
if (GET_MAJOR_VERSION(old_cluster.major_version) <= 1100)
109109
check_for_tables_with_oids(&old_cluster);
110110

111+
/*
112+
* PG 12 changed the 'sql_identifier' type storage to be based on name,
113+
* not varchar, which breaks on-disk format for existing data. So we need
114+
* to prevent upgrade when used in user objects (tables, indexes, ...).
115+
*/
116+
if (GET_MAJOR_VERSION(old_cluster.major_version) <= 1100)
117+
old_11_check_for_sql_identifier_data_type_usage(&old_cluster);
118+
111119
/*
112120
* Pre-PG 10 allowed tables with 'unknown' type columns and non WAL logged
113121
* hash indexes

src/bin/pg_upgrade/pg_upgrade.h

+2
Original file line numberDiff line numberDiff line change
@@ -451,6 +451,8 @@ void old_9_6_check_for_unknown_data_type_usage(ClusterInfo *cluster);
451451
void old_9_6_invalidate_hash_indexes(ClusterInfo *cluster,
452452
bool check_mode);
453453

454+
void old_11_check_for_sql_identifier_data_type_usage(ClusterInfo *cluster);
455+
454456
/* parallel.c */
455457
void parallel_exec_prog(const char *log_file, const char *opt_log_file,
456458
const char *fmt,...) pg_attribute_printf(3, 4);

src/bin/pg_upgrade/version.c

+119
Original file line numberDiff line numberDiff line change
@@ -399,3 +399,122 @@ old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, bool check_mode)
399399
else
400400
check_ok();
401401
}
402+
403+
/*
404+
* old_11_check_for_sql_identifier_data_type_usage()
405+
* 11 -> 12
406+
* In 12, the sql_identifier data type was switched from name to varchar,
407+
* which does affect the storage (name is by-ref, but not varlena). This
408+
* means user tables using sql_identifier for columns are broken because
409+
* the on-disk format is different.
410+
*
411+
* We need to check all objects that might store sql_identifier on disk,
412+
* i.e. tables, matviews and indexes. Also check composite types in case
413+
* they are used in this context.
414+
*/
415+
void
416+
old_11_check_for_sql_identifier_data_type_usage(ClusterInfo *cluster)
417+
{
418+
int dbnum;
419+
FILE *script = NULL;
420+
bool found = false;
421+
char output_path[MAXPGPATH];
422+
423+
prep_status("Checking for invalid \"sql_identifier\" user columns");
424+
425+
snprintf(output_path, sizeof(output_path), "tables_using_sql_identifier.txt");
426+
427+
for (dbnum = 0; dbnum < cluster->dbarr.ndbs; dbnum++)
428+
{
429+
PGresult *res;
430+
bool db_used = false;
431+
int ntups;
432+
int rowno;
433+
int i_nspname,
434+
i_relname,
435+
i_attname;
436+
DbInfo *active_db = &cluster->dbarr.dbs[dbnum];
437+
PGconn *conn = connectToServer(cluster, active_db->db_name);
438+
439+
/*
440+
* We need the recursive CTE because the sql_identifier may be wrapped
441+
* either in a domain or composite type, or both (in arbitrary order).
442+
*/
443+
res = executeQueryOrDie(conn,
444+
"WITH RECURSIVE oids AS ( "
445+
/* the sql_identifier type itself */
446+
" SELECT 'information_schema.sql_identifier'::regtype AS oid "
447+
" UNION ALL "
448+
" SELECT * FROM ( "
449+
/* domains on the type */
450+
" WITH x AS (SELECT oid FROM oids) "
451+
" SELECT t.oid FROM pg_catalog.pg_type t, x WHERE typbasetype = x.oid AND typtype = 'd' "
452+
" UNION "
453+
/* composite types containing the type */
454+
" SELECT t.oid FROM pg_catalog.pg_type t, pg_catalog.pg_class c, pg_catalog.pg_attribute a, x "
455+
" WHERE t.typtype = 'c' AND "
456+
" t.oid = c.reltype AND "
457+
" c.oid = a.attrelid AND "
458+
" NOT a.attisdropped AND "
459+
" a.atttypid = x.oid "
460+
" ) foo "
461+
") "
462+
"SELECT n.nspname, c.relname, a.attname "
463+
"FROM pg_catalog.pg_class c, "
464+
" pg_catalog.pg_namespace n, "
465+
" pg_catalog.pg_attribute a "
466+
"WHERE c.oid = a.attrelid AND "
467+
" NOT a.attisdropped AND "
468+
" a.atttypid IN (SELECT oid FROM oids) AND "
469+
" c.relkind IN ("
470+
CppAsString2(RELKIND_RELATION) ", "
471+
CppAsString2(RELKIND_MATVIEW) ", "
472+
CppAsString2(RELKIND_INDEX) ") AND "
473+
" c.relnamespace = n.oid AND "
474+
/* exclude possible orphaned temp tables */
475+
" n.nspname !~ '^pg_temp_' AND "
476+
" n.nspname !~ '^pg_toast_temp_' AND "
477+
" n.nspname NOT IN ('pg_catalog', 'information_schema')");
478+
479+
ntups = PQntuples(res);
480+
i_nspname = PQfnumber(res, "nspname");
481+
i_relname = PQfnumber(res, "relname");
482+
i_attname = PQfnumber(res, "attname");
483+
for (rowno = 0; rowno < ntups; rowno++)
484+
{
485+
found = true;
486+
if (script == NULL && (script = fopen_priv(output_path, "w")) == NULL)
487+
pg_fatal("could not open file \"%s\": %s\n", output_path,
488+
strerror(errno));
489+
if (!db_used)
490+
{
491+
fprintf(script, "Database: %s\n", active_db->db_name);
492+
db_used = true;
493+
}
494+
fprintf(script, " %s.%s.%s\n",
495+
PQgetvalue(res, rowno, i_nspname),
496+
PQgetvalue(res, rowno, i_relname),
497+
PQgetvalue(res, rowno, i_attname));
498+
}
499+
500+
PQclear(res);
501+
502+
PQfinish(conn);
503+
}
504+
505+
if (script)
506+
fclose(script);
507+
508+
if (found)
509+
{
510+
pg_log(PG_REPORT, "fatal\n");
511+
pg_fatal("Your installation contains the \"sql_identifier\" data type in user tables\n"
512+
"and/or indexes. The on-disk format for this data type has changed, so this\n"
513+
"cluster cannot currently be upgraded. You can remove the problem tables or\n"
514+
"change the data type to \"name\" and restart the upgrade.\n"
515+
"A list of the problem columns is in the file:\n"
516+
" %s\n\n", output_path);
517+
}
518+
else
519+
check_ok();
520+
}

0 commit comments

Comments
 (0)