Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit a4b25cd

Browse files
committed
Fix fallback implementation of pg_atomic_write_u32().
I somehow had assumed that in the spinlock (in turn possibly using semaphores) based fallback atomics implementation 32 bit writes could be done without a lock. As far as the write goes that's correct, since postgres supports only platforms with single-copy atomicity for aligned 32bit writes. But writing without holding the spinlock breaks read-modify-write operations like pg_atomic_compare_exchange_u32(), since they'll potentially "miss" a concurrent write, which can't happen in actual hardware implementations. In 9.6+ when using the fallback atomics implementation this could lead to buffer header locks not being properly marked as released, and potentially some related state corruption. I don't see a related danger in 9.5 (earliest release with the API), because pg_atomic_write_u32() wasn't used in a concurrent manner there. The state variable of local buffers, before this change, were manipulated using pg_atomic_write_u32(), to avoid unnecessary synchronization overhead. As that'd not be the case anymore, introduce and use pg_atomic_unlocked_write_u32(), which does not correctly interact with RMW operations. This bug only caused issues when postgres is compiled on platforms without atomics support (i.e. no common new platform), or when compiled with --disable-atomics, which explains why this wasn't noticed in testing. Reported-By: Tom Lane Discussion: <14947.1475690465@sss.pgh.pa.us> Backpatch: 9.5-, where the atomic operations API was introduced.
1 parent 2efed55 commit a4b25cd

File tree

7 files changed

+61
-14
lines changed

7 files changed

+61
-14
lines changed

src/backend/port/atomics.c

+13
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,19 @@ pg_atomic_init_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 val_)
104104
ptr->value = val_;
105105
}
106106

107+
void
108+
pg_atomic_write_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 val)
109+
{
110+
/*
111+
* One might think that an unlocked write doesn't need to acquire the
112+
* spinlock, but one would be wrong. Even an unlocked write has to cause a
113+
* concurrent pg_atomic_compare_exchange_u32() (et al) to fail.
114+
*/
115+
SpinLockAcquire((slock_t *) &ptr->sema);
116+
ptr->value = val;
117+
SpinLockRelease((slock_t *) &ptr->sema);
118+
}
119+
107120
bool
108121
pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
109122
uint32 *expected, uint32 newval)

src/backend/storage/buffer/bufmgr.c

+3-3
Original file line numberDiff line numberDiff line change
@@ -821,7 +821,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
821821

822822
Assert(buf_state & BM_VALID);
823823
buf_state &= ~BM_VALID;
824-
pg_atomic_write_u32(&bufHdr->state, buf_state);
824+
pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
825825
}
826826
else
827827
{
@@ -941,7 +941,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
941941
uint32 buf_state = pg_atomic_read_u32(&bufHdr->state);
942942

943943
buf_state |= BM_VALID;
944-
pg_atomic_write_u32(&bufHdr->state, buf_state);
944+
pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
945945
}
946946
else
947947
{
@@ -3167,7 +3167,7 @@ FlushRelationBuffers(Relation rel)
31673167
false);
31683168

31693169
buf_state &= ~(BM_DIRTY | BM_JUST_DIRTIED);
3170-
pg_atomic_write_u32(&bufHdr->state, buf_state);
3170+
pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
31713171

31723172
/* Pop the error context stack */
31733173
error_context_stack = errcallback.previous;

src/backend/storage/buffer/localbuf.c

+8-8
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
138138
if (BUF_STATE_GET_USAGECOUNT(buf_state) < BM_MAX_USAGE_COUNT)
139139
{
140140
buf_state += BUF_USAGECOUNT_ONE;
141-
pg_atomic_write_u32(&bufHdr->state, buf_state);
141+
pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
142142
}
143143
}
144144
LocalRefCount[b]++;
@@ -181,7 +181,7 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
181181
if (BUF_STATE_GET_USAGECOUNT(buf_state) > 0)
182182
{
183183
buf_state -= BUF_USAGECOUNT_ONE;
184-
pg_atomic_write_u32(&bufHdr->state, buf_state);
184+
pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
185185
trycounter = NLocBuffer;
186186
}
187187
else
@@ -222,7 +222,7 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
222222

223223
/* Mark not-dirty now in case we error out below */
224224
buf_state &= ~BM_DIRTY;
225-
pg_atomic_write_u32(&bufHdr->state, buf_state);
225+
pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
226226

227227
pgBufferUsage.local_blks_written++;
228228
}
@@ -249,7 +249,7 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
249249
/* mark buffer invalid just in case hash insert fails */
250250
CLEAR_BUFFERTAG(bufHdr->tag);
251251
buf_state &= ~(BM_VALID | BM_TAG_VALID);
252-
pg_atomic_write_u32(&bufHdr->state, buf_state);
252+
pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
253253
}
254254

255255
hresult = (LocalBufferLookupEnt *)
@@ -266,7 +266,7 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
266266
buf_state |= BM_TAG_VALID;
267267
buf_state &= ~BUF_USAGECOUNT_MASK;
268268
buf_state += BUF_USAGECOUNT_ONE;
269-
pg_atomic_write_u32(&bufHdr->state, buf_state);
269+
pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
270270

271271
*foundPtr = FALSE;
272272
return bufHdr;
@@ -302,7 +302,7 @@ MarkLocalBufferDirty(Buffer buffer)
302302

303303
buf_state |= BM_DIRTY;
304304

305-
pg_atomic_write_u32(&bufHdr->state, buf_state);
305+
pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
306306
}
307307

308308
/*
@@ -351,7 +351,7 @@ DropRelFileNodeLocalBuffers(RelFileNode rnode, ForkNumber forkNum,
351351
CLEAR_BUFFERTAG(bufHdr->tag);
352352
buf_state &= ~BUF_FLAG_MASK;
353353
buf_state &= ~BUF_USAGECOUNT_MASK;
354-
pg_atomic_write_u32(&bufHdr->state, buf_state);
354+
pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
355355
}
356356
}
357357
}
@@ -395,7 +395,7 @@ DropRelFileNodeAllLocalBuffers(RelFileNode rnode)
395395
CLEAR_BUFFERTAG(bufHdr->tag);
396396
buf_state &= ~BUF_FLAG_MASK;
397397
buf_state &= ~BUF_USAGECOUNT_MASK;
398-
pg_atomic_write_u32(&bufHdr->state, buf_state);
398+
pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
399399
}
400400
}
401401
}

src/include/port/atomics.h

+23-2
Original file line numberDiff line numberDiff line change
@@ -255,10 +255,12 @@ pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
255255
}
256256

257257
/*
258-
* pg_atomic_write_u32 - unlocked write to atomic variable.
258+
* pg_atomic_write_u32 - write to atomic variable.
259259
*
260260
* The write is guaranteed to succeed as a whole, i.e. it's not possible to
261-
* observe a partial write for any reader.
261+
* observe a partial write for any reader. Note that this correctly interacts
262+
* with pg_atomic_compare_exchange_u32, in contrast to
263+
* pg_atomic_unlocked_write_u32().
262264
*
263265
* No barrier semantics.
264266
*/
@@ -270,6 +272,25 @@ pg_atomic_write_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
270272
pg_atomic_write_u32_impl(ptr, val);
271273
}
272274

275+
/*
276+
* pg_atomic_unlocked_write_u32 - unlocked write to atomic variable.
277+
*
278+
* The write is guaranteed to succeed as a whole, i.e. it's not possible to
279+
* observe a partial write for any reader. But note that writing this way is
280+
* not guaranteed to correctly interact with read-modify-write operations like
281+
* pg_atomic_compare_exchange_u32. This should only be used in cases where
282+
* minor performance regressions due to atomics emulation are unacceptable.
283+
*
284+
* No barrier semantics.
285+
*/
286+
static inline void
287+
pg_atomic_unlocked_write_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
288+
{
289+
AssertPointerAlignment(ptr, 4);
290+
291+
pg_atomic_unlocked_write_u32_impl(ptr, val);
292+
}
293+
273294
/*
274295
* pg_atomic_exchange_u32 - exchange newval with current value
275296
*

src/include/port/atomics/fallback.h

+3
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,9 @@ pg_atomic_unlocked_test_flag_impl(volatile pg_atomic_flag *ptr)
133133
#define PG_HAVE_ATOMIC_INIT_U32
134134
extern void pg_atomic_init_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 val_);
135135

136+
#define PG_HAVE_ATOMIC_WRITE_U32
137+
extern void pg_atomic_write_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 val);
138+
136139
#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
137140
extern bool pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
138141
uint32 *expected, uint32 newval);

src/include/port/atomics/generic.h

+9
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,15 @@ pg_atomic_write_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 val)
5858
}
5959
#endif
6060

61+
#ifndef PG_HAVE_ATOMIC_UNLOCKED_WRITE_U32
62+
#define PG_HAVE_ATOMIC_UNLOCKED_WRITE_U32
63+
static inline void
64+
pg_atomic_unlocked_write_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 val)
65+
{
66+
ptr->value = val;
67+
}
68+
#endif
69+
6170
/*
6271
* provide fallback for test_and_set using atomic_exchange if available
6372
*/

src/include/storage/buf_internals.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,8 @@ typedef struct buftag
168168
* We use this same struct for local buffer headers, but the locks are not
169169
* used and not all of the flag bits are useful either. To avoid unnecessary
170170
* overhead, manipulations of the state field should be done without actual
171-
* atomic operations (i.e. only pg_atomic_read/write).
171+
* atomic operations (i.e. only pg_atomic_read_u32() and
172+
* pg_atomic_unlocked_write_u32()).
172173
*
173174
* Be careful to avoid increasing the size of the struct when adding or
174175
* reordering members. Keeping it below 64 bytes (the most common CPU

0 commit comments

Comments
 (0)