Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 12f3867

Browse files
committed
bufmgr: Support multiple in-progress IOs by using resowner
A future patch will add support for extending relations by multiple blocks at once. To be concurrency safe, the buffers for those blocks need to be marked as BM_IO_IN_PROGRESS. Until now we only had infrastructure for recovering from an IO error for a single buffer. This commit extends that infrastructure to multiple buffers by using the resource owner infrastructure. This commit increases the size of the ResourceOwnerData struct, which appears to have a just about measurable overhead in very extreme workloads. Medium term we are planning to substantially shrink the size of ResourceOwnerData. Short term the increase is small enough to not worry about it for now. Reviewed-by: Melanie Plageman <melanieplageman@gmail.com> Discussion: https://postgr.es/m/20221029025420.eplyow6k7tgu6he3@awork3.anarazel.de Discussion: https://postgr.es/m/20221029200025.w7bvlgvamjfo6z44@awork3.anarazel.de
1 parent 16dc270 commit 12f3867

File tree

9 files changed

+102
-53
lines changed

9 files changed

+102
-53
lines changed

src/backend/access/transam/xact.c

+1-3
Original file line numberDiff line numberDiff line change
@@ -2719,8 +2719,7 @@ AbortTransaction(void)
27192719
pgstat_report_wait_end();
27202720
pgstat_progress_end_command();
27212721

2722-
/* Clean up buffer I/O and buffer context locks, too */
2723-
AbortBufferIO();
2722+
/* Clean up buffer context locks, too */
27242723
UnlockBuffers();
27252724

27262725
/* Reset WAL record construction state */
@@ -5080,7 +5079,6 @@ AbortSubTransaction(void)
50805079

50815080
pgstat_report_wait_end();
50825081
pgstat_progress_end_command();
5083-
AbortBufferIO();
50845082
UnlockBuffers();
50855083

50865084
/* Reset WAL record construction state */

src/backend/postmaster/autovacuum.c

-1
Original file line numberDiff line numberDiff line change
@@ -526,7 +526,6 @@ AutoVacLauncherMain(int argc, char *argv[])
526526
*/
527527
LWLockReleaseAll();
528528
pgstat_report_wait_end();
529-
AbortBufferIO();
530529
UnlockBuffers();
531530
/* this is probably dead code, but let's be safe: */
532531
if (AuxProcessResourceOwner)

src/backend/postmaster/bgwriter.c

-1
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,6 @@ BackgroundWriterMain(void)
167167
*/
168168
LWLockReleaseAll();
169169
ConditionVariableCancelSleep();
170-
AbortBufferIO();
171170
UnlockBuffers();
172171
ReleaseAuxProcessResources(false);
173172
AtEOXact_Buffers(false);

src/backend/postmaster/checkpointer.c

-1
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,6 @@ CheckpointerMain(void)
271271
LWLockReleaseAll();
272272
ConditionVariableCancelSleep();
273273
pgstat_report_wait_end();
274-
AbortBufferIO();
275274
UnlockBuffers();
276275
ReleaseAuxProcessResources(false);
277276
AtEOXact_Buffers(false);

src/backend/postmaster/walwriter.c

-1
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,6 @@ WalWriterMain(void)
163163
LWLockReleaseAll();
164164
ConditionVariableCancelSleep();
165165
pgstat_report_wait_end();
166-
AbortBufferIO();
167166
UnlockBuffers();
168167
ReleaseAuxProcessResources(false);
169168
AtEOXact_Buffers(false);

src/backend/storage/buffer/bufmgr.c

+35-45
Original file line numberDiff line numberDiff line change
@@ -159,10 +159,6 @@ int checkpoint_flush_after = DEFAULT_CHECKPOINT_FLUSH_AFTER;
159159
int bgwriter_flush_after = DEFAULT_BGWRITER_FLUSH_AFTER;
160160
int backend_flush_after = DEFAULT_BACKEND_FLUSH_AFTER;
161161

162-
/* local state for StartBufferIO and related functions */
163-
static BufferDesc *InProgressBuf = NULL;
164-
static bool IsForInput;
165-
166162
/* local state for LockBufferForCleanup */
167163
static BufferDesc *PinCountWaitBuf = NULL;
168164

@@ -2705,7 +2701,6 @@ InitBufferPoolAccess(void)
27052701
static void
27062702
AtProcExit_Buffers(int code, Datum arg)
27072703
{
2708-
AbortBufferIO();
27092704
UnlockBuffers();
27102705

27112706
CheckForBufferLeaks();
@@ -4647,7 +4642,7 @@ StartBufferIO(BufferDesc *buf, bool forInput)
46474642
{
46484643
uint32 buf_state;
46494644

4650-
Assert(!InProgressBuf);
4645+
ResourceOwnerEnlargeBufferIOs(CurrentResourceOwner);
46514646

46524647
for (;;)
46534648
{
@@ -4671,8 +4666,8 @@ StartBufferIO(BufferDesc *buf, bool forInput)
46714666
buf_state |= BM_IO_IN_PROGRESS;
46724667
UnlockBufHdr(buf, buf_state);
46734668

4674-
InProgressBuf = buf;
4675-
IsForInput = forInput;
4669+
ResourceOwnerRememberBufferIO(CurrentResourceOwner,
4670+
BufferDescriptorGetBuffer(buf));
46764671

46774672
return true;
46784673
}
@@ -4698,8 +4693,6 @@ TerminateBufferIO(BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits)
46984693
{
46994694
uint32 buf_state;
47004695

4701-
Assert(buf == InProgressBuf);
4702-
47034696
buf_state = LockBufHdr(buf);
47044697

47054698
Assert(buf_state & BM_IO_IN_PROGRESS);
@@ -4711,13 +4704,14 @@ TerminateBufferIO(BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits)
47114704
buf_state |= set_flag_bits;
47124705
UnlockBufHdr(buf, buf_state);
47134706

4714-
InProgressBuf = NULL;
4707+
ResourceOwnerForgetBufferIO(CurrentResourceOwner,
4708+
BufferDescriptorGetBuffer(buf));
47154709

47164710
ConditionVariableBroadcast(BufferDescriptorGetIOCV(buf));
47174711
}
47184712

47194713
/*
4720-
* AbortBufferIO: Clean up any active buffer I/O after an error.
4714+
* AbortBufferIO: Clean up active buffer I/O after an error.
47214715
*
47224716
* All LWLocks we might have held have been released,
47234717
* but we haven't yet released buffer pins, so the buffer is still pinned.
@@ -4726,46 +4720,42 @@ TerminateBufferIO(BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits)
47264720
* possible the error condition wasn't related to the I/O.
47274721
*/
47284722
void
4729-
AbortBufferIO(void)
4723+
AbortBufferIO(Buffer buf)
47304724
{
4731-
BufferDesc *buf = InProgressBuf;
4725+
BufferDesc *buf_hdr = GetBufferDescriptor(buf - 1);
4726+
uint32 buf_state;
47324727

4733-
if (buf)
4734-
{
4735-
uint32 buf_state;
4728+
buf_state = LockBufHdr(buf_hdr);
4729+
Assert(buf_state & (BM_IO_IN_PROGRESS | BM_TAG_VALID));
47364730

4737-
buf_state = LockBufHdr(buf);
4738-
Assert(buf_state & BM_IO_IN_PROGRESS);
4739-
if (IsForInput)
4740-
{
4741-
Assert(!(buf_state & BM_DIRTY));
4731+
if (!(buf_state & BM_VALID))
4732+
{
4733+
Assert(!(buf_state & BM_DIRTY));
4734+
UnlockBufHdr(buf_hdr, buf_state);
4735+
}
4736+
else
4737+
{
4738+
Assert(!(buf_state & BM_DIRTY));
4739+
UnlockBufHdr(buf_hdr, buf_state);
47424740

4743-
/* We'd better not think buffer is valid yet */
4744-
Assert(!(buf_state & BM_VALID));
4745-
UnlockBufHdr(buf, buf_state);
4746-
}
4747-
else
4741+
/* Issue notice if this is not the first failure... */
4742+
if (buf_state & BM_IO_ERROR)
47484743
{
4749-
Assert(buf_state & BM_DIRTY);
4750-
UnlockBufHdr(buf, buf_state);
4751-
/* Issue notice if this is not the first failure... */
4752-
if (buf_state & BM_IO_ERROR)
4753-
{
4754-
/* Buffer is pinned, so we can read tag without spinlock */
4755-
char *path;
4756-
4757-
path = relpathperm(BufTagGetRelFileLocator(&buf->tag),
4758-
BufTagGetForkNum(&buf->tag));
4759-
ereport(WARNING,
4760-
(errcode(ERRCODE_IO_ERROR),
4761-
errmsg("could not write block %u of %s",
4762-
buf->tag.blockNum, path),
4763-
errdetail("Multiple failures --- write error might be permanent.")));
4764-
pfree(path);
4765-
}
4744+
/* Buffer is pinned, so we can read tag without spinlock */
4745+
char *path;
4746+
4747+
path = relpathperm(BufTagGetRelFileLocator(&buf_hdr->tag),
4748+
BufTagGetForkNum(&buf_hdr->tag));
4749+
ereport(WARNING,
4750+
(errcode(ERRCODE_IO_ERROR),
4751+
errmsg("could not write block %u of %s",
4752+
buf_hdr->tag.blockNum, path),
4753+
errdetail("Multiple failures --- write error might be permanent.")));
4754+
pfree(path);
47664755
}
4767-
TerminateBufferIO(buf, false, BM_IO_ERROR);
47684756
}
4757+
4758+
TerminateBufferIO(buf_hdr, false, BM_IO_ERROR);
47694759
}
47704760

47714761
/*

src/backend/utils/resowner/resowner.c

+60
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ typedef struct ResourceOwnerData
121121

122122
/* We have built-in support for remembering: */
123123
ResourceArray bufferarr; /* owned buffers */
124+
ResourceArray bufferioarr; /* in-progress buffer IO */
124125
ResourceArray catrefarr; /* catcache references */
125126
ResourceArray catlistrefarr; /* catcache-list pins */
126127
ResourceArray relrefarr; /* relcache references */
@@ -441,6 +442,7 @@ ResourceOwnerCreate(ResourceOwner parent, const char *name)
441442
}
442443

443444
ResourceArrayInit(&(owner->bufferarr), BufferGetDatum(InvalidBuffer));
445+
ResourceArrayInit(&(owner->bufferioarr), BufferGetDatum(InvalidBuffer));
444446
ResourceArrayInit(&(owner->catrefarr), PointerGetDatum(NULL));
445447
ResourceArrayInit(&(owner->catlistrefarr), PointerGetDatum(NULL));
446448
ResourceArrayInit(&(owner->relrefarr), PointerGetDatum(NULL));
@@ -517,6 +519,24 @@ ResourceOwnerReleaseInternal(ResourceOwner owner,
517519

518520
if (phase == RESOURCE_RELEASE_BEFORE_LOCKS)
519521
{
522+
/*
523+
* Abort failed buffer IO. AbortBufferIO()->TerminateBufferIO() calls
524+
* ResourceOwnerForgetBufferIOs(), so we just have to iterate till
525+
* there are none.
526+
*
527+
* Needs to be before we release buffer pins.
528+
*
529+
* During a commit, there shouldn't be any in-progress IO.
530+
*/
531+
while (ResourceArrayGetAny(&(owner->bufferioarr), &foundres))
532+
{
533+
Buffer res = DatumGetBuffer(foundres);
534+
535+
if (isCommit)
536+
elog(PANIC, "lost track of buffer IO on buffer %u", res);
537+
AbortBufferIO(res);
538+
}
539+
520540
/*
521541
* Release buffer pins. Note that ReleaseBuffer will remove the
522542
* buffer entry from our array, so we just have to iterate till there
@@ -746,6 +766,7 @@ ResourceOwnerDelete(ResourceOwner owner)
746766

747767
/* And it better not own any resources, either */
748768
Assert(owner->bufferarr.nitems == 0);
769+
Assert(owner->bufferioarr.nitems == 0);
749770
Assert(owner->catrefarr.nitems == 0);
750771
Assert(owner->catlistrefarr.nitems == 0);
751772
Assert(owner->relrefarr.nitems == 0);
@@ -775,6 +796,7 @@ ResourceOwnerDelete(ResourceOwner owner)
775796

776797
/* And free the object. */
777798
ResourceArrayFree(&(owner->bufferarr));
799+
ResourceArrayFree(&(owner->bufferioarr));
778800
ResourceArrayFree(&(owner->catrefarr));
779801
ResourceArrayFree(&(owner->catlistrefarr));
780802
ResourceArrayFree(&(owner->relrefarr));
@@ -976,6 +998,44 @@ ResourceOwnerForgetBuffer(ResourceOwner owner, Buffer buffer)
976998
buffer, owner->name);
977999
}
9781000

1001+
1002+
/*
1003+
* Make sure there is room for at least one more entry in a ResourceOwner's
1004+
* buffer array.
1005+
*
1006+
* This is separate from actually inserting an entry because if we run out
1007+
* of memory, it's critical to do so *before* acquiring the resource.
1008+
*/
1009+
void
1010+
ResourceOwnerEnlargeBufferIOs(ResourceOwner owner)
1011+
{
1012+
/* We used to allow pinning buffers without a resowner, but no more */
1013+
Assert(owner != NULL);
1014+
ResourceArrayEnlarge(&(owner->bufferioarr));
1015+
}
1016+
1017+
/*
1018+
* Remember that a buffer IO is owned by a ResourceOwner
1019+
*
1020+
* Caller must have previously done ResourceOwnerEnlargeBufferIOs()
1021+
*/
1022+
void
1023+
ResourceOwnerRememberBufferIO(ResourceOwner owner, Buffer buffer)
1024+
{
1025+
ResourceArrayAdd(&(owner->bufferioarr), BufferGetDatum(buffer));
1026+
}
1027+
1028+
/*
1029+
* Forget that a buffer IO is owned by a ResourceOwner
1030+
*/
1031+
void
1032+
ResourceOwnerForgetBufferIO(ResourceOwner owner, Buffer buffer)
1033+
{
1034+
if (!ResourceArrayRemove(&(owner->bufferioarr), BufferGetDatum(buffer)))
1035+
elog(PANIC, "buffer IO %d is not owned by resource owner %s",
1036+
buffer, owner->name);
1037+
}
1038+
9791039
/*
9801040
* Remember that a Local Lock is owned by a ResourceOwner
9811041
*

src/include/storage/bufmgr.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ extern bool ConditionalLockBufferForCleanup(Buffer buffer);
181181
extern bool IsBufferCleanupOK(Buffer buffer);
182182
extern bool HoldingBufferPinThatDelaysRecovery(void);
183183

184-
extern void AbortBufferIO(void);
184+
extern void AbortBufferIO(Buffer buffer);
185185

186186
extern bool BgBufferSync(struct WritebackContext *wb_context);
187187

src/include/utils/resowner_private.h

+5
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,11 @@ extern void ResourceOwnerEnlargeBuffers(ResourceOwner owner);
3030
extern void ResourceOwnerRememberBuffer(ResourceOwner owner, Buffer buffer);
3131
extern void ResourceOwnerForgetBuffer(ResourceOwner owner, Buffer buffer);
3232

33+
/* support for IO-in-progress management */
34+
extern void ResourceOwnerEnlargeBufferIOs(ResourceOwner owner);
35+
extern void ResourceOwnerRememberBufferIO(ResourceOwner owner, Buffer buffer);
36+
extern void ResourceOwnerForgetBufferIO(ResourceOwner owner, Buffer buffer);
37+
3338
/* support for local lock management */
3439
extern void ResourceOwnerRememberLock(ResourceOwner owner, LOCALLOCK *locallock);
3540
extern void ResourceOwnerForgetLock(ResourceOwner owner, LOCALLOCK *locallock);

0 commit comments

Comments
 (0)