Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Fix some more omissions in pg_upgrade's tests for non-upgradable types.
authorTom Lane <tgl@sss.pgh.pa.us>
Thu, 29 Apr 2021 19:24:37 +0000 (15:24 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Thu, 29 Apr 2021 19:24:37 +0000 (15:24 -0400)
Commits 29aeda6e4 et al closed up some oversights involving not checking
for non-upgradable types within container types, such as arrays and
ranges.  However, I only looked at version.c, failing to notice that
there were substantially-equivalent tests in check.c.  (The division
of responsibility between those files is less than clear...)

In addition, because genbki.pl does not guarantee that auto-generated
rowtype OIDs will hold still across versions, we need to consider that
the composite type associated with a system catalog or view is
non-upgradable.  It seems unlikely that someone would have a user
column declared that way, but if they did, trying to read it in another
PG version would likely draw "no such pg_type OID" failures, thanks
to the type OID embedded in composite Datums.

To support the composite and reg*-type cases, extend the recursive
query that does the search to allow any base query that returns
a column of pg_type OIDs, rather than limiting it to exactly one
starting type.

As before, back-patch to all supported branches.

Discussion: https://postgr.es/m/2798740.1619622555@sss.pgh.pa.us

src/bin/pg_upgrade/check.c
src/bin/pg_upgrade/pg_upgrade.h
src/bin/pg_upgrade/version.c

index de607c67f428ca99910f3cfd08891460bc341e67..1a00eae0b8bd8c701f85d5878e4aa4713ed8fc66 100644 (file)
@@ -24,6 +24,7 @@ static void check_proper_datallowconn(ClusterInfo *cluster);
 static void check_for_prepared_transactions(ClusterInfo *cluster);
 static void check_for_isn_and_int8_passing_mismatch(ClusterInfo *cluster);
 static void check_for_tables_with_oids(ClusterInfo *cluster);
+static void check_for_composite_data_type_usage(ClusterInfo *cluster);
 static void check_for_reg_data_type_usage(ClusterInfo *cluster);
 static void check_for_jsonb_9_4_usage(ClusterInfo *cluster);
 static void check_for_pg_role_prefix(ClusterInfo *cluster);
@@ -99,6 +100,7 @@ check_and_dump_old_cluster(bool live_check)
    check_is_install_user(&old_cluster);
    check_proper_datallowconn(&old_cluster);
    check_for_prepared_transactions(&old_cluster);
+   check_for_composite_data_type_usage(&old_cluster);
    check_for_reg_data_type_usage(&old_cluster);
    check_for_isn_and_int8_passing_mismatch(&old_cluster);
 
@@ -1001,6 +1003,63 @@ check_for_tables_with_oids(ClusterInfo *cluster)
 }
 
 
+/*
+ * check_for_composite_data_type_usage()
+ * Check for system-defined composite types used in user tables.
+ *
+ * The OIDs of rowtypes of system catalogs and information_schema views
+ * can change across major versions; unlike user-defined types, we have
+ * no mechanism for forcing them to be the same in the new cluster.
+ * Hence, if any user table uses one, that's problematic for pg_upgrade.
+ */
+static void
+check_for_composite_data_type_usage(ClusterInfo *cluster)
+{
+   bool        found;
+   Oid         firstUserOid;
+   char        output_path[MAXPGPATH];
+   char       *base_query;
+
+   prep_status("Checking for system-defined composite types in user tables");
+
+   snprintf(output_path, sizeof(output_path), "tables_using_composite.txt");
+
+   /*
+    * Look for composite types that were made during initdb *or* belong to
+    * information_schema; that's important in case information_schema was
+    * dropped and reloaded.
+    *
+    * The cutoff OID here should match the source cluster's value of
+    * FirstNormalObjectId.  We hardcode it rather than using that C #define
+    * because, if that #define is ever changed, our own version's value is
+    * NOT what to use.  Eventually we may need a test on the source cluster's
+    * version to select the correct value.
+    */
+   firstUserOid = 16384;
+
+   base_query = psprintf("SELECT t.oid FROM pg_catalog.pg_type t "
+                         "LEFT JOIN pg_catalog.pg_namespace n ON t.typnamespace = n.oid "
+                         " WHERE typtype = 'c' AND (t.oid < %u OR nspname = 'information_schema')",
+                         firstUserOid);
+
+   found = check_for_data_types_usage(cluster, base_query, output_path);
+
+   free(base_query);
+
+   if (found)
+   {
+       pg_log(PG_REPORT, "fatal\n");
+       pg_fatal("Your installation contains system-defined composite type(s) in user tables.\n"
+                "These type OIDs are not stable across PostgreSQL versions,\n"
+                "so this cluster cannot currently be upgraded.  You can\n"
+                "drop the problem columns and restart the upgrade.\n"
+                "A list of the problem columns is in the file:\n"
+                "    %s\n\n", output_path);
+   }
+   else
+       check_ok();
+}
+
 /*
  * check_for_reg_data_type_usage()
  * pg_upgrade only preserves these system values:
@@ -1015,87 +1074,36 @@ check_for_tables_with_oids(ClusterInfo *cluster)
 static void
 check_for_reg_data_type_usage(ClusterInfo *cluster)
 {
-   int         dbnum;
-   FILE       *script = NULL;
-   bool        found = false;
+   bool        found;
    char        output_path[MAXPGPATH];
 
    prep_status("Checking for reg* data types in user tables");
 
    snprintf(output_path, sizeof(output_path), "tables_using_reg.txt");
 
-   for (dbnum = 0; dbnum < cluster->dbarr.ndbs; dbnum++)
-   {
-       PGresult   *res;
-       bool        db_used = false;
-       int         ntups;
-       int         rowno;
-       int         i_nspname,
-                   i_relname,
-                   i_attname;
-       DbInfo     *active_db = &cluster->dbarr.dbs[dbnum];
-       PGconn     *conn = connectToServer(cluster, active_db->db_name);
-
-       /*
-        * While several relkinds don't store any data, e.g. views, they can
-        * be used to define data types of other columns, so we check all
-        * relkinds.
-        */
-       res = executeQueryOrDie(conn,
-                               "SELECT n.nspname, c.relname, a.attname "
-                               "FROM   pg_catalog.pg_class c, "
-                               "       pg_catalog.pg_namespace n, "
-                               "       pg_catalog.pg_attribute a, "
-                               "       pg_catalog.pg_type t "
-                               "WHERE  c.oid = a.attrelid AND "
-                               "       NOT a.attisdropped AND "
-                               "       a.atttypid = t.oid AND "
-                               "       t.typnamespace = "
-                               "           (SELECT oid FROM pg_namespace "
-                               "            WHERE nspname = 'pg_catalog') AND"
-                               "       t.typname IN ( "
-       /* regclass.oid is preserved, so 'regclass' is OK */
-                               "           'regconfig', "
-                               "           'regdictionary', "
-                               "           'regnamespace', "
-                               "           'regoper', "
-                               "           'regoperator', "
-                               "           'regproc', "
-                               "           'regprocedure' "
-       /* regrole.oid is preserved, so 'regrole' is OK */
-       /* regtype.oid is preserved, so 'regtype' is OK */
-                               "           ) AND "
-                               "       c.relnamespace = n.oid AND "
-                               "       n.nspname NOT IN ('pg_catalog', 'information_schema')");
-
-       ntups = PQntuples(res);
-       i_nspname = PQfnumber(res, "nspname");
-       i_relname = PQfnumber(res, "relname");
-       i_attname = PQfnumber(res, "attname");
-       for (rowno = 0; rowno < ntups; rowno++)
-       {
-           found = true;
-           if (script == NULL && (script = fopen_priv(output_path, "w")) == NULL)
-               pg_fatal("could not open file \"%s\": %s\n",
-                        output_path, strerror(errno));
-           if (!db_used)
-           {
-               fprintf(script, "Database: %s\n", active_db->db_name);
-               db_used = true;
-           }
-           fprintf(script, "  %s.%s.%s\n",
-                   PQgetvalue(res, rowno, i_nspname),
-                   PQgetvalue(res, rowno, i_relname),
-                   PQgetvalue(res, rowno, i_attname));
-       }
-
-       PQclear(res);
-
-       PQfinish(conn);
-   }
-
-   if (script)
-       fclose(script);
+   /*
+    * Note: older servers will not have all of these reg* types, so we have
+    * to write the query like this rather than depending on casts to regtype.
+    */
+   found = check_for_data_types_usage(cluster,
+                                      "SELECT oid FROM pg_catalog.pg_type t "
+                                      "WHERE t.typnamespace = "
+                                      "        (SELECT oid FROM pg_catalog.pg_namespace "
+                                      "         WHERE nspname = 'pg_catalog') "
+                                      "  AND t.typname IN ( "
+   /* pg_class.oid is preserved, so 'regclass' is OK */
+                                      "           'regcollation', "
+                                      "           'regconfig', "
+                                      "           'regdictionary', "
+                                      "           'regnamespace', "
+                                      "           'regoper', "
+                                      "           'regoperator', "
+                                      "           'regproc', "
+                                      "           'regprocedure' "
+   /* pg_authid.oid is preserved, so 'regrole' is OK */
+   /* pg_type.oid is (mostly) preserved, so 'regtype' is OK */
+                                      "         )",
+                                      output_path);
 
    if (found)
    {
@@ -1120,75 +1128,13 @@ check_for_reg_data_type_usage(ClusterInfo *cluster)
 static void
 check_for_jsonb_9_4_usage(ClusterInfo *cluster)
 {
-   int         dbnum;
-   FILE       *script = NULL;
-   bool        found = false;
    char        output_path[MAXPGPATH];
 
    prep_status("Checking for incompatible \"jsonb\" data type");
 
    snprintf(output_path, sizeof(output_path), "tables_using_jsonb.txt");
 
-   for (dbnum = 0; dbnum < cluster->dbarr.ndbs; dbnum++)
-   {
-       PGresult   *res;
-       bool        db_used = false;
-       int         ntups;
-       int         rowno;
-       int         i_nspname,
-                   i_relname,
-                   i_attname;
-       DbInfo     *active_db = &cluster->dbarr.dbs[dbnum];
-       PGconn     *conn = connectToServer(cluster, active_db->db_name);
-
-       /*
-        * While several relkinds don't store any data, e.g. views, they can
-        * be used to define data types of other columns, so we check all
-        * relkinds.
-        */
-       res = executeQueryOrDie(conn,
-                               "SELECT n.nspname, c.relname, a.attname "
-                               "FROM   pg_catalog.pg_class c, "
-                               "       pg_catalog.pg_namespace n, "
-                               "       pg_catalog.pg_attribute a "
-                               "WHERE  c.oid = a.attrelid AND "
-                               "       NOT a.attisdropped AND "
-                               "       a.atttypid = 'pg_catalog.jsonb'::pg_catalog.regtype AND "
-                               "       c.relnamespace = n.oid AND "
-       /* exclude possible orphaned temp tables */
-                               "       n.nspname !~ '^pg_temp_' AND "
-                               "       n.nspname NOT IN ('pg_catalog', 'information_schema')");
-
-       ntups = PQntuples(res);
-       i_nspname = PQfnumber(res, "nspname");
-       i_relname = PQfnumber(res, "relname");
-       i_attname = PQfnumber(res, "attname");
-       for (rowno = 0; rowno < ntups; rowno++)
-       {
-           found = true;
-           if (script == NULL && (script = fopen_priv(output_path, "w")) == NULL)
-               pg_fatal("could not open file \"%s\": %s\n",
-                        output_path, strerror(errno));
-           if (!db_used)
-           {
-               fprintf(script, "Database: %s\n", active_db->db_name);
-               db_used = true;
-           }
-           fprintf(script, "  %s.%s.%s\n",
-                   PQgetvalue(res, rowno, i_nspname),
-                   PQgetvalue(res, rowno, i_relname),
-                   PQgetvalue(res, rowno, i_attname));
-       }
-
-       PQclear(res);
-
-       PQfinish(conn);
-   }
-
-   if (script)
-       fclose(script);
-
-   if (found)
+   if (check_for_data_type_usage(cluster, "pg_catalog.jsonb", output_path))
    {
        pg_log(PG_REPORT, "fatal\n");
        pg_fatal("Your installation contains the \"jsonb\" data type in user tables.\n"
index 63574b51bc7bb4a736a1281e044466be02af7fd9..3922e0a002877c1755397c56cb90e4257d797c7b 100644 (file)
@@ -451,6 +451,12 @@ void       pg_putenv(const char *var, const char *val);
 
 /* version.c */
 
+bool       check_for_data_types_usage(ClusterInfo *cluster,
+                                      const char *base_query,
+                                      const char *output_path);
+bool       check_for_data_type_usage(ClusterInfo *cluster,
+                                     const char *typename,
+                                     const char *output_path);
 void       new_9_0_populate_pg_largeobject_metadata(ClusterInfo *cluster,
                                                     bool check_mode);
 void       old_9_3_check_for_line_data_type_usage(ClusterInfo *cluster);
index cee793d5610600aff0b902231f74fadb0681dcbb..8ec70cd0ea865a5e44affd32cd96dbd9751fcb0d 100644 (file)
@@ -100,17 +100,22 @@ new_9_0_populate_pg_largeobject_metadata(ClusterInfo *cluster, bool check_mode)
 
 
 /*
- * check_for_data_type_usage
- * Detect whether there are any stored columns depending on the given type
+ * check_for_data_types_usage()
+ * Detect whether there are any stored columns depending on given type(s)
  *
  * If so, write a report to the given file name, and return true.
  *
- * We check for the type in tables, matviews, and indexes, but not views;
+ * base_query should be a SELECT yielding a single column named "oid",
+ * containing the pg_type OIDs of one or more types that are known to have
+ * inconsistent on-disk representations across server versions.
+ *
+ * We check for the type(s) in tables, matviews, and indexes, but not views;
  * there's no storage involved in a view.
  */
-static bool
-check_for_data_type_usage(ClusterInfo *cluster, const char *typename,
-                         char *output_path)
+bool
+check_for_data_types_usage(ClusterInfo *cluster,
+                          const char *base_query,
+                          const char *output_path)
 {
    bool        found = false;
    FILE       *script = NULL;
@@ -130,7 +135,7 @@ check_for_data_type_usage(ClusterInfo *cluster, const char *typename,
                    i_attname;
 
        /*
-        * The type of interest might be wrapped in a domain, array,
+        * The type(s) of interest might be wrapped in a domain, array,
         * composite, or range, and these container types can be nested (to
         * varying extents depending on server version, but that's not of
         * concern here).  To handle all these cases we need a recursive CTE.
@@ -138,8 +143,8 @@ check_for_data_type_usage(ClusterInfo *cluster, const char *typename,
        initPQExpBuffer(&querybuf);
        appendPQExpBuffer(&querybuf,
                          "WITH RECURSIVE oids AS ( "
-       /* the target type itself */
-                         " SELECT '%s'::pg_catalog.regtype AS oid "
+       /* start with the type(s) returned by base_query */
+                         " %s "
                          " UNION ALL "
                          " SELECT * FROM ( "
        /* inner WITH because we can only reference the CTE once */
@@ -157,7 +162,7 @@ check_for_data_type_usage(ClusterInfo *cluster, const char *typename,
                          "               c.oid = a.attrelid AND "
                          "               NOT a.attisdropped AND "
                          "               a.atttypid = x.oid ",
-                         typename);
+                         base_query);
 
        /* Ranges came in in 9.2 */
        if (GET_MAJOR_VERSION(cluster->major_version) >= 902)
@@ -225,6 +230,34 @@ check_for_data_type_usage(ClusterInfo *cluster, const char *typename,
    return found;
 }
 
+/*
+ * check_for_data_type_usage()
+ * Detect whether there are any stored columns depending on the given type
+ *
+ * If so, write a report to the given file name, and return true.
+ *
+ * typename should be a fully qualified type name.  This is just a
+ * trivial wrapper around check_for_data_types_usage() to convert a
+ * type name into a base query.
+ */
+bool
+check_for_data_type_usage(ClusterInfo *cluster,
+                         const char *typename,
+                         const char *output_path)
+{
+   bool        found;
+   char       *base_query;
+
+   base_query = psprintf("SELECT '%s'::pg_catalog.regtype AS oid",
+                         typename);
+
+   found = check_for_data_types_usage(cluster, base_query, output_path);
+
+   free(base_query);
+
+   return found;
+}
+
 
 /*
  * old_9_3_check_for_line_data_type_usage()