Fix creation of partition descriptor during concurrent detach+drop

author Alvaro Herrera <alvherre@alvh.no-ip.org>

Mon, 12 Aug 2024 22:17:56 +0000 (18:17 -0400)

committer Alvaro Herrera <alvherre@alvh.no-ip.org>

Mon, 12 Aug 2024 22:17:56 +0000 (18:17 -0400)
author Alvaro Herrera <alvherre@alvh.no-ip.org>
Mon, 12 Aug 2024 22:17:56 +0000 (18:17 -0400)
committer Alvaro Herrera <alvherre@alvh.no-ip.org>
Mon, 12 Aug 2024 22:17:56 +0000 (18:17 -0400)
diff --git a/src/backend/partitioning/partdesc.c b/src/backend/partitioning/partdesc.c

index 5b1353cfc6c64ff1ed72ac331fca6678f7da2541..e2f47b0cd64496b1bfa5643304a2ead375145081 100644 (file)
--- a/src/backend/partitioning/partdesc.c
+++ b/src/backend/partitioning/partdesc.c
@@ -210,6 +210,10 @@ retry:
          * shared queue.  We solve this problem by reading pg_class directly
          * for the desired tuple.
          *
+        * If the partition recently detached is also dropped, we get no tuple
+        * from the scan.  In that case, we also retry, and next time through
+        * here, we don't see that partition anymore.
+        *
          * The other problem is that DETACH CONCURRENTLY is in the process of
          * removing a partition, which happens in two steps: first it marks it
          * as "detach pending", commits, then unsets relpartbound.  If
@@ -224,8 +228,6 @@ retry:
             Relation    pg_class;
             SysScanDesc scan;
             ScanKeyData key[1];
-           Datum       datum;
-           bool        isnull;
  
             pg_class = table_open(RelationRelationId, AccessShareLock);
             ScanKeyInit(&key[0],
@@ -234,17 +236,29 @@ retry:
                         ObjectIdGetDatum(inhrelid));
             scan = systable_beginscan(pg_class, ClassOidIndexId, true,
                                       NULL, 1, key);
+
+           /*
+            * We could get one tuple from the scan (the normal case), or zero
+            * tuples if the table has been dropped meanwhile.
+            */
             tuple = systable_getnext(scan);
-           datum = heap_getattr(tuple, Anum_pg_class_relpartbound,
-                                RelationGetDescr(pg_class), &isnull);
-           if (!isnull)
-               boundspec = stringToNode(TextDatumGetCString(datum));
+           if (HeapTupleIsValid(tuple))
+           {
+               Datum       datum;
+               bool        isnull;
+
+               datum = heap_getattr(tuple, Anum_pg_class_relpartbound,
+                                    RelationGetDescr(pg_class), &isnull);
+               if (!isnull)
+                   boundspec = stringToNode(TextDatumGetCString(datum));
+           }
             systable_endscan(scan);
             table_close(pg_class, AccessShareLock);
  
             /*
-            * If we still don't get a relpartbound value, then it must be
-            * because of DETACH CONCURRENTLY.  Restart from the top, as
+            * If we still don't get a relpartbound value (either because
+            * boundspec is null or because there was no tuple), then it must
+            * be because of DETACH CONCURRENTLY.  Restart from the top, as
              * explained above.  We only do this once, for two reasons: first,
              * only one DETACH CONCURRENTLY session could affect us at a time,
              * since each of them would have to wait for the snapshot under
author	Alvaro Herrera <alvherre@alvh.no-ip.org>
	Mon, 12 Aug 2024 22:17:56 +0000 (18:17 -0400)
committer	Alvaro Herrera <alvherre@alvh.no-ip.org>
	Mon, 12 Aug 2024 22:17:56 +0000 (18:17 -0400)