Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Drop slot's LWLock before returning from SaveSlotToPath()
authorPeter Eisentraut <peter@eisentraut.org>
Thu, 26 Mar 2020 10:51:39 +0000 (11:51 +0100)
committerPeter Eisentraut <peter@eisentraut.org>
Thu, 26 Mar 2020 12:51:22 +0000 (13:51 +0100)
When SaveSlotToPath() is called with elevel=LOG, the early exits didn't
release the slot's io_in_progress_lock.

This could result in a walsender being stuck on the lock forever.  A
possible way to get into this situation is if the offending code paths
are triggered in a low disk space situation.

Author: Pavan Deolasee <pavan.deolasee@2ndquadrant.com>
Reported-by: Craig Ringer <craig@2ndquadrant.com>
Discussion: https://www.postgresql.org/message-id/flat/56a138c5-de61-f553-7e8f-6789296de785%402ndquadrant.com

src/backend/replication/slot.c

index d59c5488c7fddae485165a4258935c1cdcc7422f..137e9f5dc528df40948dfd871a7a4dc495f086dd 100644 (file)
@@ -1251,6 +1251,12 @@ SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel)
                           S_IRUSR | S_IWUSR);
    if (fd < 0)
    {
+       /*
+        * If not an ERROR, then release the lock before returning.  In case
+        * of an ERROR, the error recovery path automatically releases the
+        * lock, but no harm in explicitly releasing even in that case.
+        */
+       LWLockRelease(&slot->io_in_progress_lock);
        ereport(elevel,
                (errcode_for_file_access(),
                 errmsg("could not create file \"%s\": %m",
@@ -1282,6 +1288,7 @@ SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel)
 
        pgstat_report_wait_end();
        CloseTransientFile(fd);
+       LWLockRelease(&slot->io_in_progress_lock);
 
        /* if write didn't set errno, assume problem is no disk space */
        errno = save_errno ? save_errno : ENOSPC;
@@ -1301,6 +1308,7 @@ SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel)
 
        pgstat_report_wait_end();
        CloseTransientFile(fd);
+       LWLockRelease(&slot->io_in_progress_lock);
        errno = save_errno;
        ereport(elevel,
                (errcode_for_file_access(),
@@ -1315,6 +1323,7 @@ SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel)
    /* rename to permanent file, fsync file and directory */
    if (rename(tmppath, path) != 0)
    {
+       LWLockRelease(&slot->io_in_progress_lock);
        ereport(elevel,
                (errcode_for_file_access(),
                 errmsg("could not rename file \"%s\" to \"%s\": %m",