Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit d73c359

Browse files
author
Commitfest Bot
committed
[CF 5784] Optimize shared LWLock acquisition for high-core-count systems
This branch was automatically generated by a robot using patches from an email thread registered at: https://commitfest.postgresql.org/patch/5784 The branch will be overwritten each time a new patch version is posted to the thread, and also periodically to check for bitrot caused by changes on the master branch. Patch(es): https://www.postgresql.org/message-id/73d53acf-4f66-41df-b438-5c2e6115d4de@intel.com Author(s): Zhiguo Zhou
2 parents c3eda50 + d9d940e commit d73c359

File tree

1 file changed

+57
-16
lines changed

1 file changed

+57
-16
lines changed

src/backend/storage/lmgr/lwlock.c

Lines changed: 57 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -97,20 +97,41 @@
9797
#define LW_FLAG_BITS 3
9898
#define LW_FLAG_MASK (((1<<LW_FLAG_BITS)-1)<<(32-LW_FLAG_BITS))
9999

100-
/* assumes MAX_BACKENDS is a (power of 2) - 1, checked below */
101-
#define LW_VAL_EXCLUSIVE (MAX_BACKENDS + 1)
100+
/*
101+
* already (power of 2)-1, i.e. suitable for a mask
102+
*
103+
* Originally, the LW_SHARED lock reference count was maintained in bits
104+
* [MAX_BACKEND_BITS-1:0] of LWLock.state, with a theoretical maximum of
105+
* MAX_BACKENDS (when all MAX_BACKENDS processes hold the lock concurrently).
106+
*
107+
* To reduce lock acquisition overhead, we optimized LWLockAttemptLock by
108+
* merging the read and update operations for the LW_SHARED lock's state.
109+
* This eliminates the need for separate atomic instructions - a critical
110+
* improvement given the high cost of atomic operations on high-core-count
111+
* systems.
112+
*
113+
* This optimization introduces a scenario where the reference count may
114+
* temporarily increment even when a reader fails to acquire an exclusive lock.
115+
* However, since each process retries lock acquisition up to *twice* before
116+
* waiting on a semaphore, the reference count is bounded by MAX_BACKENDS * 2.
117+
*
118+
* To ensure compatibility with this upper bound:
119+
* 1. LW_SHARED_MASK has been extended by 1 bit
120+
* 2. LW_VAL_EXCLUSIVE is left-shifted by 1 bit
121+
*/
122+
#define LW_SHARED_MASK ((MAX_BACKENDS << 1) + 1)
123+
#define LW_VAL_EXCLUSIVE (LW_SHARED_MASK + 1)
124+
#define LW_LOCK_MASK (LW_SHARED_MASK | LW_VAL_EXCLUSIVE)
102125
#define LW_VAL_SHARED 1
103126

104-
/* already (power of 2)-1, i.e. suitable for a mask */
105-
#define LW_SHARED_MASK MAX_BACKENDS
106-
#define LW_LOCK_MASK (MAX_BACKENDS | LW_VAL_EXCLUSIVE)
127+
/* assumes MAX_BACKENDS is a (power of 2) - 1, checked below */
107128

108129

109130
StaticAssertDecl(((MAX_BACKENDS + 1) & MAX_BACKENDS) == 0,
110131
"MAX_BACKENDS + 1 needs to be a power of 2");
111132

112-
StaticAssertDecl((MAX_BACKENDS & LW_FLAG_MASK) == 0,
113-
"MAX_BACKENDS and LW_FLAG_MASK overlap");
133+
StaticAssertDecl((LW_SHARED_MASK & LW_FLAG_MASK) == 0,
134+
"LW_SHARED_MASK and LW_FLAG_MASK overlap");
114135

115136
StaticAssertDecl((LW_VAL_EXCLUSIVE & LW_FLAG_MASK) == 0,
116137
"LW_VAL_EXCLUSIVE and LW_FLAG_MASK overlap");
@@ -277,15 +298,17 @@ PRINT_LWDEBUG(const char *where, LWLock *lock, LWLockMode mode)
277298
if (Trace_lwlocks)
278299
{
279300
uint32 state = pg_atomic_read_u32(&lock->state);
301+
uint32 excl = (state & LW_VAL_EXCLUSIVE) != 0;
302+
uint32 shared = excl ? 0 : state & LW_SHARED_MASK;
280303

281304
ereport(LOG,
282305
(errhidestmt(true),
283306
errhidecontext(true),
284307
errmsg_internal("%d: %s(%s %p): excl %u shared %u haswaiters %u waiters %u rOK %d",
285308
MyProcPid,
286309
where, T_NAME(lock), lock,
287-
(state & LW_VAL_EXCLUSIVE) != 0,
288-
state & LW_SHARED_MASK,
310+
excl,
311+
shared,
289312
(state & LW_FLAG_HAS_WAITERS) != 0,
290313
pg_atomic_read_u32(&lock->nwaiters),
291314
(state & LW_FLAG_RELEASE_OK) != 0)));
@@ -790,15 +813,30 @@ GetLWLockIdentifier(uint32 classId, uint16 eventId)
790813
* This function will not block waiting for a lock to become free - that's the
791814
* caller's job.
792815
*
816+
* willwait: true if the caller is willing to wait for the lock to become free
817+
* false if the caller is not willing to wait.
818+
*
793819
* Returns true if the lock isn't free and we need to wait.
794820
*/
795821
static bool
796-
LWLockAttemptLock(LWLock *lock, LWLockMode mode)
822+
LWLockAttemptLock(LWLock *lock, LWLockMode mode, bool willwait)
797823
{
798824
uint32 old_state;
799825

800826
Assert(mode == LW_EXCLUSIVE || mode == LW_SHARED);
801827

828+
/*
829+
* To avoid conflicts between the reference count and the LW_VAL_EXCLUSIVE
830+
* flag, this optimization is disabled when willwait is false. See detailed
831+
* comments in this file where LW_SHARED_MASK is defined for more explaination.
832+
*/
833+
if (willwait && mode == LW_SHARED)
834+
{
835+
old_state = pg_atomic_fetch_add_u32(&lock->state, LW_VAL_SHARED);
836+
Assert((old_state & LW_LOCK_MASK) != LW_LOCK_MASK);
837+
return (old_state & LW_VAL_EXCLUSIVE) != 0;
838+
}
839+
802840
/*
803841
* Read once outside the loop, later iterations will get the newer value
804842
* via compare & exchange.
@@ -1242,7 +1280,7 @@ LWLockAcquire(LWLock *lock, LWLockMode mode)
12421280
* Try to grab the lock the first time, we're not in the waitqueue
12431281
* yet/anymore.
12441282
*/
1245-
mustwait = LWLockAttemptLock(lock, mode);
1283+
mustwait = LWLockAttemptLock(lock, mode, true);
12461284

12471285
if (!mustwait)
12481286
{
@@ -1265,7 +1303,7 @@ LWLockAcquire(LWLock *lock, LWLockMode mode)
12651303
LWLockQueueSelf(lock, mode);
12661304

12671305
/* we're now guaranteed to be woken up if necessary */
1268-
mustwait = LWLockAttemptLock(lock, mode);
1306+
mustwait = LWLockAttemptLock(lock, mode, true);
12691307

12701308
/* ok, grabbed the lock the second time round, need to undo queueing */
12711309
if (!mustwait)
@@ -1368,7 +1406,7 @@ LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
13681406
HOLD_INTERRUPTS();
13691407

13701408
/* Check for the lock */
1371-
mustwait = LWLockAttemptLock(lock, mode);
1409+
mustwait = LWLockAttemptLock(lock, mode, false);
13721410

13731411
if (mustwait)
13741412
{
@@ -1435,13 +1473,13 @@ LWLockAcquireOrWait(LWLock *lock, LWLockMode mode)
14351473
* NB: We're using nearly the same twice-in-a-row lock acquisition
14361474
* protocol as LWLockAcquire(). Check its comments for details.
14371475
*/
1438-
mustwait = LWLockAttemptLock(lock, mode);
1476+
mustwait = LWLockAttemptLock(lock, mode, true);
14391477

14401478
if (mustwait)
14411479
{
14421480
LWLockQueueSelf(lock, LW_WAIT_UNTIL_FREE);
14431481

1444-
mustwait = LWLockAttemptLock(lock, mode);
1482+
mustwait = LWLockAttemptLock(lock, mode, true);
14451483

14461484
if (mustwait)
14471485
{
@@ -1843,7 +1881,10 @@ LWLockReleaseInternal(LWLock *lock, LWLockMode mode)
18431881
* others, even if we still have to wakeup other waiters.
18441882
*/
18451883
if (mode == LW_EXCLUSIVE)
1846-
oldstate = pg_atomic_sub_fetch_u32(&lock->state, LW_VAL_EXCLUSIVE);
1884+
{
1885+
oldstate = pg_atomic_fetch_and_u32(&lock->state, ~LW_LOCK_MASK);
1886+
oldstate &= ~LW_LOCK_MASK;
1887+
}
18471888
else
18481889
oldstate = pg_atomic_sub_fetch_u32(&lock->state, LW_VAL_SHARED);
18491890

0 commit comments

Comments
 (0)