Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Fix relfilenodemap.c's handling of cache invalidations.
authorRobert Haas <rhaas@postgresql.org>
Wed, 13 Nov 2013 15:52:59 +0000 (10:52 -0500)
committerRobert Haas <rhaas@postgresql.org>
Wed, 13 Nov 2013 15:52:59 +0000 (10:52 -0500)
The old code entered a new hash table entry first, then scanned
pg_class to determine what value to fill in, and then populated the
entry.  This fails to work properly if a cache invalidation happens
as a result of opening pg_class.  Repair.

Along the way, get rid of the idea of blowing away the entire hash
table as a method of processing invalidations.  Instead, just delete
all the entries one by one.  This is probably not quite as cheap but
it's simpler, and shouldn't happen often.

Andres Freund

src/backend/utils/cache/relfilenodemap.c

index 904b2cd9e7487314563d8123628b357cdb9ee361..1469bfaba2f2d0580c981f078bd230fb1c6d162e 100644 (file)
@@ -57,23 +57,20 @@ RelfilenodeMapInvalidateCallback(Datum arg, Oid relid)
    HASH_SEQ_STATUS status;
    RelfilenodeMapEntry *entry;
 
-   /* nothing to do if not active or deleted */
-   if (RelfilenodeMapHash == NULL)
-       return;
-
-   /* if relid is InvalidOid, we must invalidate the entire cache */
-   if (relid == InvalidOid)
-   {
-       hash_destroy(RelfilenodeMapHash);
-       RelfilenodeMapHash = NULL;
-       return;
-   }
+   /* callback only gets registered after creating the hash */
+   Assert(RelfilenodeMapHash != NULL);
 
    hash_seq_init(&status, RelfilenodeMapHash);
    while ((entry = (RelfilenodeMapEntry *) hash_seq_search(&status)) != NULL)
    {
-       /* Same OID may occur in more than one tablespace. */
-       if (entry->relid == relid)
+       /*
+        * If relid is InvalidOid, signalling a complete reset, we must remove
+        * all entries, otherwise just remove the specific relation's entry.
+        * Always remove negative cache entries.
+        */
+       if (relid == InvalidOid ||      /* complete reset */
+           entry->relid == InvalidOid ||       /* negative cache entry */
+           entry->relid == relid)      /* individual flushed relation */
        {
            if (hash_search(RelfilenodeMapHash,
                            (void *) &entry->key,
@@ -92,32 +89,12 @@ static void
 InitializeRelfilenodeMap(void)
 {
    HASHCTL     ctl;
-   static bool initial_init_done = false;
-   int i;
+   int         i;
 
    /* Make sure we've initialized CacheMemoryContext. */
    if (CacheMemoryContext == NULL)
        CreateCacheMemoryContext();
 
-   /* Initialize the hash table. */
-   MemSet(&ctl, 0, sizeof(ctl));
-   ctl.keysize = sizeof(RelfilenodeMapKey);
-   ctl.entrysize = sizeof(RelfilenodeMapEntry);
-   ctl.hash = tag_hash;
-   ctl.hcxt = CacheMemoryContext;
-
-   RelfilenodeMapHash =
-       hash_create("RelfilenodeMap cache", 1024, &ctl,
-                   HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
-
-   /*
-    * For complete resets we simply delete the entire hash, but there's no
-    * need to do the other stuff multiple times. Especially the initialization
-    * of the relcche invalidation should only be done once.
-    */
-   if (initial_init_done)
-       return;
-
    /* build skey */
    MemSet(&relfilenode_skey, 0, sizeof(relfilenode_skey));
 
@@ -134,10 +111,25 @@ InitializeRelfilenodeMap(void)
    relfilenode_skey[0].sk_attno = Anum_pg_class_reltablespace;
    relfilenode_skey[1].sk_attno = Anum_pg_class_relfilenode;
 
+   /* Initialize the hash table. */
+   MemSet(&ctl, 0, sizeof(ctl));
+   ctl.keysize = sizeof(RelfilenodeMapKey);
+   ctl.entrysize = sizeof(RelfilenodeMapEntry);
+   ctl.hash = tag_hash;
+   ctl.hcxt = CacheMemoryContext;
+
+   /*
+    * Only create the RelfilenodeMapHash now, so we don't end up partially
+    * initialized when fmgr_info_cxt() above ERRORs out with an out of memory
+    * error.
+    */
+   RelfilenodeMapHash =
+       hash_create("RelfilenodeMap cache", 1024, &ctl,
+                   HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
+
    /* Watch for invalidation events. */
    CacheRegisterRelcacheCallback(RelfilenodeMapInvalidateCallback,
                                  (Datum) 0);
-   initial_init_done = true;
 }
 
 /*
@@ -156,6 +148,7 @@ RelidByRelfilenode(Oid reltablespace, Oid relfilenode)
    Relation relation;
    HeapTuple ntp;
    ScanKeyData skey[2];
+   Oid         relid;
 
    if (RelfilenodeMapHash == NULL)
        InitializeRelfilenodeMap();
@@ -169,81 +162,99 @@ RelidByRelfilenode(Oid reltablespace, Oid relfilenode)
    key.relfilenode = relfilenode;
 
    /*
-    * Check cache and enter entry if nothing could be found. Even if no target
+    * Check cache and return entry if one is found. Even if no target
     * relation can be found later on we store the negative match and return a
-    * InvalidOid from cache. That's not really necessary for performance since
-    * querying invalid values isn't supposed to be a frequent thing, but the
-    * implementation is simpler this way.
+    * InvalidOid from cache. That's not really necessary for performance
+    * since querying invalid values isn't supposed to be a frequent thing,
+    * but it's basically free.
     */
-   entry = hash_search(RelfilenodeMapHash, (void *) &key, HASH_ENTER, &found);
+   entry = hash_search(RelfilenodeMapHash, (void *) &key, HASH_FIND, &found);
 
    if (found)
        return entry->relid;
 
-   /* initialize empty/negative cache entry before doing the actual lookup */
-   entry->relid = InvalidOid;
-
    /* ok, no previous cache entry, do it the hard way */
 
-   /* check shared tables */
+   /* initialize empty/negative cache entry before doing the actual lookups */
+   relid = InvalidOid;
+
    if (reltablespace == GLOBALTABLESPACE_OID)
    {
-       entry->relid = RelationMapFilenodeToOid(relfilenode, true);
-       return entry->relid;
+       /*
+        * Ok, shared table, check relmapper.
+        */
+       relid = RelationMapFilenodeToOid(relfilenode, true);
    }
+   else
+   {
+       /*
+        * Not a shared table, could either be a plain relation or a
+        * non-shared, nailed one, like e.g. pg_class.
+        */
 
-   /* check plain relations by looking in pg_class */
-   relation = heap_open(RelationRelationId, AccessShareLock);
+       /* check for plain relations by looking in pg_class */
+       relation = heap_open(RelationRelationId, AccessShareLock);
 
-   /* copy scankey to local copy, it will be modified during the scan */
-   memcpy(skey, relfilenode_skey, sizeof(skey));
+       /* copy scankey to local copy, it will be modified during the scan */
+       memcpy(skey, relfilenode_skey, sizeof(skey));
 
-   /* set scan arguments */
-   skey[0].sk_argument = ObjectIdGetDatum(reltablespace);
-   skey[1].sk_argument = ObjectIdGetDatum(relfilenode);
+       /* set scan arguments */
+       skey[0].sk_argument = ObjectIdGetDatum(reltablespace);
+       skey[1].sk_argument = ObjectIdGetDatum(relfilenode);
 
-   scandesc = systable_beginscan(relation,
-                                 ClassTblspcRelfilenodeIndexId,
-                                 true,
-                                 NULL,
-                                 2,
-                                 skey);
+       scandesc = systable_beginscan(relation,
+                                     ClassTblspcRelfilenodeIndexId,
+                                     true,
+                                     NULL,
+                                     2,
+                                     skey);
 
-   found = false;
+       found = false;
 
-   while (HeapTupleIsValid(ntp = systable_getnext(scandesc)))
-   {
-       if (found)
-           elog(ERROR,
-                "unexpected duplicate for tablespace %u, relfilenode %u",
-                reltablespace, relfilenode);
-       found = true;
+       while (HeapTupleIsValid(ntp = systable_getnext(scandesc)))
+       {
+           if (found)
+               elog(ERROR,
+                    "unexpected duplicate for tablespace %u, relfilenode %u",
+                    reltablespace, relfilenode);
+           found = true;
 
 #ifdef USE_ASSERT_CHECKING
-       if (assert_enabled)
-       {
-           bool isnull;
-           Oid check;
-           check = fastgetattr(ntp, Anum_pg_class_reltablespace,
-                               RelationGetDescr(relation),
-                               &isnull);
-           Assert(!isnull && check == reltablespace);
-
-           check = fastgetattr(ntp, Anum_pg_class_relfilenode,
-                               RelationGetDescr(relation),
-                               &isnull);
-           Assert(!isnull && check == relfilenode);
-       }
+           if (assert_enabled)
+           {
+               bool isnull;
+               Oid check;
+               check = fastgetattr(ntp, Anum_pg_class_reltablespace,
+                                   RelationGetDescr(relation),
+                                   &isnull);
+               Assert(!isnull && check == reltablespace);
+
+               check = fastgetattr(ntp, Anum_pg_class_relfilenode,
+                                   RelationGetDescr(relation),
+                                   &isnull);
+               Assert(!isnull && check == relfilenode);
+           }
 #endif
-       entry->relid = HeapTupleGetOid(ntp);
-   }
+           relid = HeapTupleGetOid(ntp);
+       }
 
-   systable_endscan(scandesc);
-   heap_close(relation, AccessShareLock);
+       systable_endscan(scandesc);
+       heap_close(relation, AccessShareLock);
 
-   /* check for tables that are mapped but not shared */
-   if (!found)
-       entry->relid = RelationMapFilenodeToOid(relfilenode, false);
+       /* check for tables that are mapped but not shared */
+       if (!found)
+           relid = RelationMapFilenodeToOid(relfilenode, false);
+   }
+
+   /*
+    * Only enter entry into cache now, our opening of pg_class could have
+    * caused cache invalidations to be executed which would have deleted a
+    * new entry if we had entered it above.
+    */
+   entry = hash_search(RelfilenodeMapHash, (void *) &key, HASH_ENTER, &found);
+   if (found)
+       elog(ERROR, "corrupted hashtable");
+   entry->relid = relid;
 
-   return entry->relid;
+   return relid;
 }