Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 66c2922

Browse files
committed
Take care to de-duplicate entries in standby.c's table of locks.
The RecoveryLockLists data structure, which tracks all exclusive locks that the startup process is holding on behalf of transactions being replayed, did not have any provision for avoiding duplicate entries for the same lock. Maybe that was okay when the code was first written. However, modern practice is for checkpoints to write fresh lists of all active exclusive locks into the WAL. Thus, an exclusive lock that survives across multiple checkpoints causes bloat in standbys' startup processes. If there are a lot of such locks this can look like a memory leak, and it's even possible to drive the startup process into a palloc failure from an over-length List. To fix, use a hash table instead of simple lists to track the locks being held. Allowing for dynahash overhead, this requires a little more space per lock than the old way (although it's the same size as what we were allocating prior to c6e0fe1). It's probably a shade slower too. However, testing indicates that the penalty is negligible on ordinary workloads, so let's make this change to improve robustness in extreme cases. Patch by me, per report from Dmitriy Kuzmin. No back-patch (for now anyway), since it seems that a significant improvement would only occur in corner cases. Discussion: https://postgr.es/m/CAHLDt=_ts0A7Agn=hCpUh+RCFkxd+G6uuT=kcTfqFtGur0dp=A@mail.gmail.com
1 parent 58640f3 commit 66c2922

File tree

1 file changed

+106
-65
lines changed

1 file changed

+106
-65
lines changed

src/backend/storage/ipc/standby.c

Lines changed: 106 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,29 @@ int max_standby_archive_delay = 30 * 1000;
4242
int max_standby_streaming_delay = 30 * 1000;
4343
bool log_recovery_conflict_waits = false;
4444

45-
static HTAB *RecoveryLockLists;
45+
/*
46+
* Keep track of all the exclusive locks owned by original transactions.
47+
* For each known exclusive lock, there is a RecoveryLockEntry in the
48+
* RecoveryLockHash hash table. All RecoveryLockEntrys belonging to a
49+
* given XID are chained together so that we can find them easily.
50+
* For each original transaction that is known to have any such locks,
51+
* there is a RecoveryLockXidEntry in the RecoveryLockXidHash hash table,
52+
* which stores the head of the chain of its locks.
53+
*/
54+
typedef struct RecoveryLockEntry
55+
{
56+
xl_standby_lock key; /* hash key: xid, dbOid, relOid */
57+
struct RecoveryLockEntry *next; /* chain link */
58+
} RecoveryLockEntry;
59+
60+
typedef struct RecoveryLockXidEntry
61+
{
62+
TransactionId xid; /* hash key -- must be first */
63+
struct RecoveryLockEntry *head; /* chain head */
64+
} RecoveryLockXidEntry;
65+
66+
static HTAB *RecoveryLockHash = NULL;
67+
static HTAB *RecoveryLockXidHash = NULL;
4668

4769
/* Flags set by timeout handlers */
4870
static volatile sig_atomic_t got_standby_deadlock_timeout = false;
@@ -58,15 +80,6 @@ static XLogRecPtr LogCurrentRunningXacts(RunningTransactions CurrRunningXacts);
5880
static void LogAccessExclusiveLocks(int nlocks, xl_standby_lock *locks);
5981
static const char *get_recovery_conflict_desc(ProcSignalReason reason);
6082

61-
/*
62-
* Keep track of all the locks owned by a given transaction.
63-
*/
64-
typedef struct RecoveryLockListsEntry
65-
{
66-
TransactionId xid;
67-
List *locks;
68-
} RecoveryLockListsEntry;
69-
7083
/*
7184
* InitRecoveryTransactionEnvironment
7285
* Initialize tracking of our primary's in-progress transactions.
@@ -85,16 +98,24 @@ InitRecoveryTransactionEnvironment(void)
8598
VirtualTransactionId vxid;
8699
HASHCTL hash_ctl;
87100

101+
Assert(RecoveryLockHash == NULL); /* don't run this twice */
102+
88103
/*
89-
* Initialize the hash table for tracking the list of locks held by each
104+
* Initialize the hash tables for tracking the locks held by each
90105
* transaction.
91106
*/
107+
hash_ctl.keysize = sizeof(xl_standby_lock);
108+
hash_ctl.entrysize = sizeof(RecoveryLockEntry);
109+
RecoveryLockHash = hash_create("RecoveryLockHash",
110+
64,
111+
&hash_ctl,
112+
HASH_ELEM | HASH_BLOBS);
92113
hash_ctl.keysize = sizeof(TransactionId);
93-
hash_ctl.entrysize = sizeof(RecoveryLockListsEntry);
94-
RecoveryLockLists = hash_create("RecoveryLockLists",
95-
64,
96-
&hash_ctl,
97-
HASH_ELEM | HASH_BLOBS);
114+
hash_ctl.entrysize = sizeof(RecoveryLockXidEntry);
115+
RecoveryLockXidHash = hash_create("RecoveryLockXidHash",
116+
64,
117+
&hash_ctl,
118+
HASH_ELEM | HASH_BLOBS);
98119

99120
/*
100121
* Initialize shared invalidation management for Startup process, being
@@ -140,12 +161,12 @@ void
140161
ShutdownRecoveryTransactionEnvironment(void)
141162
{
142163
/*
143-
* Do nothing if RecoveryLockLists is NULL because which means that
144-
* transaction tracking has not been yet initialized or has been already
145-
* shutdowned. This prevents transaction tracking from being shutdowned
146-
* unexpectedly more than once.
164+
* Do nothing if RecoveryLockHash is NULL because that means that
165+
* transaction tracking has not yet been initialized or has already been
166+
* shut down. This makes it safe to have possibly-redundant calls of this
167+
* function during process exit.
147168
*/
148-
if (RecoveryLockLists == NULL)
169+
if (RecoveryLockHash == NULL)
149170
return;
150171

151172
/* Mark all tracked in-progress transactions as finished. */
@@ -154,9 +175,11 @@ ShutdownRecoveryTransactionEnvironment(void)
154175
/* Release all locks the tracked transactions were holding */
155176
StandbyReleaseAllLocks();
156177

157-
/* Destroy the hash table of locks. */
158-
hash_destroy(RecoveryLockLists);
159-
RecoveryLockLists = NULL;
178+
/* Destroy the lock hash tables. */
179+
hash_destroy(RecoveryLockHash);
180+
hash_destroy(RecoveryLockXidHash);
181+
RecoveryLockHash = NULL;
182+
RecoveryLockXidHash = NULL;
160183

161184
/* Cleanup our VirtualTransaction */
162185
VirtualXactLockTableCleanup();
@@ -932,12 +955,12 @@ StandbyLockTimeoutHandler(void)
932955
* We only keep track of AccessExclusiveLocks, which are only ever held by
933956
* one transaction on one relation.
934957
*
935-
* We keep a hash table of lists of locks in local memory keyed by xid,
936-
* RecoveryLockLists, so we can keep track of the various entries made by
937-
* the Startup process's virtual xid in the shared lock table.
938-
*
939-
* List elements use type xl_standby_lock, since the WAL record type exactly
940-
* matches the information that we need to keep track of.
958+
* We keep a table of known locks in the RecoveryLockHash hash table.
959+
* The point of that table is to let us efficiently de-duplicate locks,
960+
* which is important because checkpoints will re-report the same locks
961+
* already held. There is also a RecoveryLockXidHash table with one entry
962+
* per xid, which allows us to efficiently find all the locks held by a
963+
* given original transaction.
941964
*
942965
* We use session locks rather than normal locks so we don't need
943966
* ResourceOwners.
@@ -947,8 +970,9 @@ StandbyLockTimeoutHandler(void)
947970
void
948971
StandbyAcquireAccessExclusiveLock(TransactionId xid, Oid dbOid, Oid relOid)
949972
{
950-
RecoveryLockListsEntry *entry;
951-
xl_standby_lock *newlock;
973+
RecoveryLockXidEntry *xidentry;
974+
RecoveryLockEntry *lockentry;
975+
xl_standby_lock key;
952976
LOCKTAG locktag;
953977
bool found;
954978

@@ -964,62 +988,79 @@ StandbyAcquireAccessExclusiveLock(TransactionId xid, Oid dbOid, Oid relOid)
964988
/* dbOid is InvalidOid when we are locking a shared relation. */
965989
Assert(OidIsValid(relOid));
966990

967-
/* Create a new list for this xid, if we don't have one already. */
968-
entry = hash_search(RecoveryLockLists, &xid, HASH_ENTER, &found);
991+
/* Create a hash entry for this xid, if we don't have one already. */
992+
xidentry = hash_search(RecoveryLockXidHash, &xid, HASH_ENTER, &found);
969993
if (!found)
970994
{
971-
entry->xid = xid;
972-
entry->locks = NIL;
995+
Assert(xidentry->xid == xid); /* dynahash should have set this */
996+
xidentry->head = NULL;
973997
}
974998

975-
newlock = palloc(sizeof(xl_standby_lock));
976-
newlock->xid = xid;
977-
newlock->dbOid = dbOid;
978-
newlock->relOid = relOid;
979-
entry->locks = lappend(entry->locks, newlock);
999+
/* Create a hash entry for this lock, unless we have one already. */
1000+
key.xid = xid;
1001+
key.dbOid = dbOid;
1002+
key.relOid = relOid;
1003+
lockentry = hash_search(RecoveryLockHash, &key, HASH_ENTER, &found);
1004+
if (!found)
1005+
{
1006+
/* It's new, so link it into the XID's list ... */
1007+
lockentry->next = xidentry->head;
1008+
xidentry->head = lockentry;
9801009

981-
SET_LOCKTAG_RELATION(locktag, newlock->dbOid, newlock->relOid);
1010+
/* ... and acquire the lock locally. */
1011+
SET_LOCKTAG_RELATION(locktag, dbOid, relOid);
9821012

983-
(void) LockAcquire(&locktag, AccessExclusiveLock, true, false);
1013+
(void) LockAcquire(&locktag, AccessExclusiveLock, true, false);
1014+
}
9841015
}
9851016

1017+
/*
1018+
* Release all the locks associated with this RecoveryLockXidEntry.
1019+
*/
9861020
static void
987-
StandbyReleaseLockList(List *locks)
1021+
StandbyReleaseXidEntryLocks(RecoveryLockXidEntry *xidentry)
9881022
{
989-
ListCell *lc;
1023+
RecoveryLockEntry *entry;
1024+
RecoveryLockEntry *next;
9901025

991-
foreach(lc, locks)
1026+
for (entry = xidentry->head; entry != NULL; entry = next)
9921027
{
993-
xl_standby_lock *lock = (xl_standby_lock *) lfirst(lc);
9941028
LOCKTAG locktag;
9951029

9961030
elog(trace_recovery(DEBUG4),
9971031
"releasing recovery lock: xid %u db %u rel %u",
998-
lock->xid, lock->dbOid, lock->relOid);
999-
SET_LOCKTAG_RELATION(locktag, lock->dbOid, lock->relOid);
1032+
entry->key.xid, entry->key.dbOid, entry->key.relOid);
1033+
/* Release the lock ... */
1034+
SET_LOCKTAG_RELATION(locktag, entry->key.dbOid, entry->key.relOid);
10001035
if (!LockRelease(&locktag, AccessExclusiveLock, true))
10011036
{
10021037
elog(LOG,
1003-
"RecoveryLockLists contains entry for lock no longer recorded by lock manager: xid %u database %u relation %u",
1004-
lock->xid, lock->dbOid, lock->relOid);
1038+
"RecoveryLockHash contains entry for lock no longer recorded by lock manager: xid %u database %u relation %u",
1039+
entry->key.xid, entry->key.dbOid, entry->key.relOid);
10051040
Assert(false);
10061041
}
1042+
/* ... and remove the per-lock hash entry */
1043+
next = entry->next;
1044+
hash_search(RecoveryLockHash, entry, HASH_REMOVE, NULL);
10071045
}
10081046

1009-
list_free_deep(locks);
1047+
xidentry->head = NULL; /* just for paranoia */
10101048
}
10111049

1050+
/*
1051+
* Release locks for specific XID, or all locks if it's InvalidXid.
1052+
*/
10121053
static void
10131054
StandbyReleaseLocks(TransactionId xid)
10141055
{
1015-
RecoveryLockListsEntry *entry;
1056+
RecoveryLockXidEntry *entry;
10161057

10171058
if (TransactionIdIsValid(xid))
10181059
{
1019-
if ((entry = hash_search(RecoveryLockLists, &xid, HASH_FIND, NULL)))
1060+
if ((entry = hash_search(RecoveryLockXidHash, &xid, HASH_FIND, NULL)))
10201061
{
1021-
StandbyReleaseLockList(entry->locks);
1022-
hash_search(RecoveryLockLists, entry, HASH_REMOVE, NULL);
1062+
StandbyReleaseXidEntryLocks(entry);
1063+
hash_search(RecoveryLockXidHash, entry, HASH_REMOVE, NULL);
10231064
}
10241065
}
10251066
else
@@ -1028,7 +1069,7 @@ StandbyReleaseLocks(TransactionId xid)
10281069

10291070
/*
10301071
* Release locks for a transaction tree, starting at xid down, from
1031-
* RecoveryLockLists.
1072+
* RecoveryLockXidHash.
10321073
*
10331074
* Called during WAL replay of COMMIT/ROLLBACK when in hot standby mode,
10341075
* to remove any AccessExclusiveLocks requested by a transaction.
@@ -1051,15 +1092,15 @@ void
10511092
StandbyReleaseAllLocks(void)
10521093
{
10531094
HASH_SEQ_STATUS status;
1054-
RecoveryLockListsEntry *entry;
1095+
RecoveryLockXidEntry *entry;
10551096

10561097
elog(trace_recovery(DEBUG2), "release all standby locks");
10571098

1058-
hash_seq_init(&status, RecoveryLockLists);
1099+
hash_seq_init(&status, RecoveryLockXidHash);
10591100
while ((entry = hash_seq_search(&status)))
10601101
{
1061-
StandbyReleaseLockList(entry->locks);
1062-
hash_search(RecoveryLockLists, entry, HASH_REMOVE, NULL);
1102+
StandbyReleaseXidEntryLocks(entry);
1103+
hash_search(RecoveryLockXidHash, entry, HASH_REMOVE, NULL);
10631104
}
10641105
}
10651106

@@ -1072,9 +1113,9 @@ void
10721113
StandbyReleaseOldLocks(TransactionId oldxid)
10731114
{
10741115
HASH_SEQ_STATUS status;
1075-
RecoveryLockListsEntry *entry;
1116+
RecoveryLockXidEntry *entry;
10761117

1077-
hash_seq_init(&status, RecoveryLockLists);
1118+
hash_seq_init(&status, RecoveryLockXidHash);
10781119
while ((entry = hash_seq_search(&status)))
10791120
{
10801121
Assert(TransactionIdIsValid(entry->xid));
@@ -1088,8 +1129,8 @@ StandbyReleaseOldLocks(TransactionId oldxid)
10881129
continue;
10891130

10901131
/* Remove all locks and hash table entry. */
1091-
StandbyReleaseLockList(entry->locks);
1092-
hash_search(RecoveryLockLists, entry, HASH_REMOVE, NULL);
1132+
StandbyReleaseXidEntryLocks(entry);
1133+
hash_search(RecoveryLockXidHash, entry, HASH_REMOVE, NULL);
10931134
}
10941135
}
10951136

0 commit comments

Comments
 (0)