Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit a0e0fb1

Browse files
committed
Use conditional variable to wait for next MultiXact offset
In one multixact.c edge case, we need a mechanism to wait for one multixact offset to be written before being allowed to read the next one. We used to handle this case by sleeping for one millisecond and retrying, but such sleeps have been reported as problematic in production cases. We can avoid the problem by using a condition variable: readers sleep on it and then every creator of multixacts broadcasts into the CV when creation is sufficiently far along. Author: Kyotaro Horiguchi <horikyotajntt@gmail.com> Reviewed-by: Andrey Borodin <amborodin@acm.org> Discussion: https://postgr.es/m/47A598F4-B4E7-4029-8FEC-A06A6C3CB4B5@yandex-team.ru Discussion: https://postgr.es/m/20200515.090333.24867479329066911.horikyota.ntt
1 parent 473411f commit a0e0fb1

File tree

2 files changed

+29
-2
lines changed

2 files changed

+29
-2
lines changed

src/backend/access/transam/multixact.c

+28-2
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@
8282
#include "lib/ilist.h"
8383
#include "miscadmin.h"
8484
#include "pg_trace.h"
85+
#include "pgstat.h"
8586
#include "postmaster/autovacuum.h"
8687
#include "storage/pmsignal.h"
8788
#include "storage/proc.h"
@@ -232,6 +233,12 @@ typedef struct MultiXactStateData
232233
/* support for members anti-wraparound measures */
233234
MultiXactOffset offsetStopLimit; /* known if oldestOffsetKnown */
234235

236+
/*
237+
* This is used to sleep until a multixact offset is written when we want
238+
* to create the next one.
239+
*/
240+
ConditionVariable nextoff_cv;
241+
235242
/*
236243
* Per-backend data starts here. We have two arrays stored in the area
237244
* immediately following the MultiXactStateData struct. Each is indexed by
@@ -895,6 +902,12 @@ RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset,
895902
/* Release MultiXactOffset SLRU lock. */
896903
LWLockRelease(lock);
897904

905+
/*
906+
* If anybody was waiting to know the offset of this multixact ID we just
907+
* wrote, they can read it now, so wake them up.
908+
*/
909+
ConditionVariableBroadcast(&MultiXactState->nextoff_cv);
910+
898911
prev_pageno = -1;
899912

900913
for (i = 0; i < nmembers; i++, offset++)
@@ -1253,6 +1266,7 @@ GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members,
12531266
MultiXactOffset nextOffset;
12541267
MultiXactMember *ptr;
12551268
LWLock *lock;
1269+
bool slept = false;
12561270

12571271
debug_elog3(DEBUG2, "GetMembers: asked for %u", multi);
12581272

@@ -1340,7 +1354,9 @@ GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members,
13401354
* (because we are careful to pre-zero offset pages). Because
13411355
* GetNewMultiXactId will never return zero as the starting offset for a
13421356
* multixact, when we read zero as the next multixact's offset, we know we
1343-
* have this case. We sleep for a bit and try again.
1357+
* have this case. We handle this by sleeping on the condition variable
1358+
* we have just for this; the process in charge will signal the CV as soon
1359+
* as it has finished writing the multixact offset.
13441360
*
13451361
* 3. Because GetNewMultiXactId increments offset zero to offset one to
13461362
* handle case #2, there is an ambiguity near the point of offset
@@ -1422,7 +1438,10 @@ GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members,
14221438
/* Corner case 2: next multixact is still being filled in */
14231439
LWLockRelease(lock);
14241440
CHECK_FOR_INTERRUPTS();
1425-
pg_usleep(1000L);
1441+
1442+
ConditionVariableSleep(&MultiXactState->nextoff_cv,
1443+
WAIT_EVENT_MULTIXACT_CREATION);
1444+
slept = true;
14261445
goto retry;
14271446
}
14281447

@@ -1432,6 +1451,12 @@ GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members,
14321451
LWLockRelease(lock);
14331452
lock = NULL;
14341453

1454+
/*
1455+
* If we slept above, clean up state; it's no longer needed.
1456+
*/
1457+
if (slept)
1458+
ConditionVariableCancelSleep();
1459+
14351460
ptr = (MultiXactMember *) palloc(length * sizeof(MultiXactMember));
14361461

14371462
truelength = 0;
@@ -1921,6 +1946,7 @@ MultiXactShmemInit(void)
19211946

19221947
/* Make sure we zero out the per-backend state */
19231948
MemSet(MultiXactState, 0, SHARED_MULTIXACT_STATE_SIZE);
1949+
ConditionVariableInit(&MultiXactState->nextoff_cv);
19241950
}
19251951
else
19261952
Assert(found);

src/backend/utils/activity/wait_event_names.txt

+1
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@ MESSAGE_QUEUE_INTERNAL "Waiting for another process to be attached to a shared m
139139
MESSAGE_QUEUE_PUT_MESSAGE "Waiting to write a protocol message to a shared message queue."
140140
MESSAGE_QUEUE_RECEIVE "Waiting to receive bytes from a shared message queue."
141141
MESSAGE_QUEUE_SEND "Waiting to send bytes to a shared message queue."
142+
MULTIXACT_CREATION "Waiting for a multixact creation to complete."
142143
PARALLEL_BITMAP_SCAN "Waiting for parallel bitmap scan to become initialized."
143144
PARALLEL_CREATE_INDEX_SCAN "Waiting for parallel <command>CREATE INDEX</command> workers to finish heap scan."
144145
PARALLEL_FINISH "Waiting for parallel workers to finish computing."

0 commit comments

Comments
 (0)