Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit a3ed4d1

Browse files
committed
Allow for error or refusal while absorbing a ProcSignalBarrier.
Previously, the per-barrier-type functions tasked with absorbing them were expected to always succeed and never throw an error. However, that's a bit inconvenient. Further study has revealed that there are realistic cases where it might not be possible to absorb a ProcSignalBarrier without terminating the transaction, or even the whole backend. Similarly, for some barrier types, there might be other reasons where it's not reasonably possible to absorb the barrier at certain points in the code, so provide a way for a per-barrier-type function to reject absorbing the barrier. Unfortunately, there's still no committed code making use of this infrastructure; hopefully, we'll get there. :-( Patch by me, reviewed by Andres Freund and Amul Sul. Discussion: http://postgr.es/m/20200908182005.xya7wetdh3pndzim@alap3.anarazel.de Discussion: http://postgr.es/m/CA+Tgmob56Pk1-5aTJdVPCWFHon7me4M96ENpGe9n_R4JUjjhZA@mail.gmail.com
1 parent b2f87b4 commit a3ed4d1

File tree

1 file changed

+113
-12
lines changed

1 file changed

+113
-12
lines changed

src/backend/storage/ipc/procsignal.c

Lines changed: 113 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include <unistd.h>
1919

2020
#include "access/parallel.h"
21+
#include "port/pg_bitutils.h"
2122
#include "commands/async.h"
2223
#include "miscadmin.h"
2324
#include "pgstat.h"
@@ -87,12 +88,17 @@ typedef struct
8788
#define BARRIER_SHOULD_CHECK(flags, type) \
8889
(((flags) & (((uint32) 1) << (uint32) (type))) != 0)
8990

91+
/* Clear the relevant type bit from the flags. */
92+
#define BARRIER_CLEAR_BIT(flags, type) \
93+
((flags) &= ~(((uint32) 1) << (uint32) (type)))
94+
9095
static ProcSignalHeader *ProcSignal = NULL;
9196
static volatile ProcSignalSlot *MyProcSignalSlot = NULL;
9297

9398
static bool CheckProcSignal(ProcSignalReason reason);
9499
static void CleanupProcSignalState(int status, Datum arg);
95-
static void ProcessBarrierPlaceholder(void);
100+
static void ResetProcSignalBarrierBits(uint32 flags);
101+
static bool ProcessBarrierPlaceholder(void);
96102

97103
/*
98104
* ProcSignalShmemSize
@@ -394,6 +400,12 @@ WaitForProcSignalBarrier(uint64 generation)
394400
volatile ProcSignalSlot *slot = &ProcSignal->psh_slot[i];
395401
uint64 oldval;
396402

403+
/*
404+
* It's important that we check only pss_barrierGeneration here and
405+
* not pss_barrierCheckMask. Bits in pss_barrierCheckMask get cleared
406+
* before the barrier is actually absorbed, but pss_barrierGeneration
407+
* is updated only afterward.
408+
*/
397409
oldval = pg_atomic_read_u64(&slot->pss_barrierGeneration);
398410
while (oldval < generation)
399411
{
@@ -453,7 +465,7 @@ ProcessProcSignalBarrier(void)
453465
{
454466
uint64 local_gen;
455467
uint64 shared_gen;
456-
uint32 flags;
468+
volatile uint32 flags;
457469

458470
Assert(MyProcSignalSlot);
459471

@@ -482,21 +494,92 @@ ProcessProcSignalBarrier(void)
482494
* read of the barrier generation above happens before we atomically
483495
* extract the flags, and that any subsequent state changes happen
484496
* afterward.
497+
*
498+
* NB: In order to avoid race conditions, we must zero pss_barrierCheckMask
499+
* first and only afterwards try to do barrier processing. If we did it
500+
* in the other order, someone could send us another barrier of some
501+
* type right after we called the barrier-processing function but before
502+
* we cleared the bit. We would have no way of knowing that the bit needs
503+
* to stay set in that case, so the need to call the barrier-processing
504+
* function again would just get forgotten. So instead, we tentatively
505+
* clear all the bits and then put back any for which we don't manage
506+
* to successfully absorb the barrier.
485507
*/
486508
flags = pg_atomic_exchange_u32(&MyProcSignalSlot->pss_barrierCheckMask, 0);
487509

488510
/*
489-
* Process each type of barrier. It's important that nothing we call from
490-
* here throws an error, because pss_barrierCheckMask has already been
491-
* cleared. If we jumped out of here before processing all barrier types,
492-
* then we'd forget about the need to do so later.
493-
*
494-
* NB: It ought to be OK to call the barrier-processing functions
495-
* unconditionally, but it's more efficient to call only the ones that
496-
* might need us to do something based on the flags.
511+
* If there are no flags set, then we can skip doing any real work.
512+
* Otherwise, establish a PG_TRY block, so that we don't lose track of
513+
* which types of barrier processing are needed if an ERROR occurs.
497514
*/
498-
if (BARRIER_SHOULD_CHECK(flags, PROCSIGNAL_BARRIER_PLACEHOLDER))
499-
ProcessBarrierPlaceholder();
515+
if (flags != 0)
516+
{
517+
bool success = true;
518+
519+
PG_TRY();
520+
{
521+
/*
522+
* Process each type of barrier. The barrier-processing functions
523+
* should normally return true, but may return false if the barrier
524+
* can't be absorbed at the current time. This should be rare,
525+
* because it's pretty expensive. Every single
526+
* CHECK_FOR_INTERRUPTS() will return here until we manage to
527+
* absorb the barrier, and that cost will add up in a hurry.
528+
*
529+
* NB: It ought to be OK to call the barrier-processing functions
530+
* unconditionally, but it's more efficient to call only the ones
531+
* that might need us to do something based on the flags.
532+
*/
533+
while (flags != 0)
534+
{
535+
ProcSignalBarrierType type;
536+
bool processed = true;
537+
538+
type = (ProcSignalBarrierType) pg_rightmost_one_pos32(flags);
539+
switch (type)
540+
{
541+
case PROCSIGNAL_BARRIER_PLACEHOLDER:
542+
processed = ProcessBarrierPlaceholder();
543+
break;
544+
}
545+
546+
/*
547+
* To avoid an infinite loop, we must always unset the bit
548+
* in flags.
549+
*/
550+
BARRIER_CLEAR_BIT(flags, type);
551+
552+
/*
553+
* If we failed to process the barrier, reset the shared bit
554+
* so we try again later, and set a flag so that we don't bump
555+
* our generation.
556+
*/
557+
if (!processed)
558+
{
559+
ResetProcSignalBarrierBits(((uint32) 1) << type);
560+
success = false;
561+
}
562+
}
563+
}
564+
PG_CATCH();
565+
{
566+
/*
567+
* If an ERROR occurred, we'll need to try again later to handle
568+
* that barrier type and any others that haven't been handled yet
569+
* or weren't successfully absorbed.
570+
*/
571+
ResetProcSignalBarrierBits(flags);
572+
PG_RE_THROW();
573+
}
574+
PG_END_TRY();
575+
576+
/*
577+
* If some barrier types were not successfully absorbed, we will have
578+
* to try again later.
579+
*/
580+
if (!success)
581+
return;
582+
}
500583

501584
/*
502585
* State changes related to all types of barriers that might have been
@@ -508,7 +591,20 @@ ProcessProcSignalBarrier(void)
508591
pg_atomic_write_u64(&MyProcSignalSlot->pss_barrierGeneration, shared_gen);
509592
}
510593

594+
/*
595+
* If it turns out that we couldn't absorb one or more barrier types, either
596+
* because the barrier-processing functions returned false or due to an error,
597+
* arrange for processing to be retried later.
598+
*/
511599
static void
600+
ResetProcSignalBarrierBits(uint32 flags)
601+
{
602+
pg_atomic_fetch_or_u32(&MyProcSignalSlot->pss_barrierCheckMask, flags);
603+
ProcSignalBarrierPending = true;
604+
InterruptPending = true;
605+
}
606+
607+
static bool
512608
ProcessBarrierPlaceholder(void)
513609
{
514610
/*
@@ -518,7 +614,12 @@ ProcessBarrierPlaceholder(void)
518614
* appropriately descriptive. Get rid of this function and instead have
519615
* ProcessBarrierSomethingElse. Most likely, that function should live in
520616
* the file pertaining to that subsystem, rather than here.
617+
*
618+
* The return value should be 'true' if the barrier was successfully
619+
* absorbed and 'false' if not. Note that returning 'false' can lead to
620+
* very frequent retries, so try hard to make that an uncommon case.
521621
*/
622+
return true;
522623
}
523624

524625
/*

0 commit comments

Comments
 (0)