Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 496ee64

Browse files
committed
Prefer standby promotion over recovery pause.
Previously if a promotion was triggered while recovery was paused, the paused state continued. Also recovery could be paused by executing pg_wal_replay_pause() even while a promotion was ongoing. That is, recovery pause had higher priority over a standby promotion. But this behavior was not desirable because most users basically wanted the recovery to complete as soon as possible and the server to become the master when they requested a promotion. This commit changes recovery so that it prefers a promotion over recovery pause. That is, if a promotion is triggered while recovery is paused, the paused state ends and a promotion continues. Also this commit makes recovery pause functions like pg_wal_replay_pause() throw an error if they are executed while a promotion is ongoing. Internally, this commit adds new internal function PromoteIsTriggered() that returns true if a promotion is triggered. Since the name of this function and the existing function IsPromoteTriggered() are confusingly similar, the commit changes the name of IsPromoteTriggered() to IsPromoteSignaled, as more appropriate name. Author: Fujii Masao Reviewed-by: Atsushi Torikoshi, Sergei Kornilov Discussion: https://postgr.es/m/00c194b2-dbbb-2e8a-5b39-13f14048ef0a@oss.nttdata.com
1 parent e09ad07 commit 496ee64

File tree

7 files changed

+93
-15
lines changed

7 files changed

+93
-15
lines changed

doc/src/sgml/config.sgml

+3
Original file line numberDiff line numberDiff line change
@@ -3590,6 +3590,9 @@ restore_command = 'copy "C:\\server\\archivedir\\%f" "%p"' # Windows
35903590
This setting has no effect if no recovery target is set.
35913591
If <xref linkend="guc-hot-standby"/> is not enabled, a setting of
35923592
<literal>pause</literal> will act the same as <literal>shutdown</literal>.
3593+
If the recovery target is reached while a promotion is ongoing,
3594+
a setting of <literal>pause</literal> will act the same as
3595+
<literal>promote</literal>.
35933596
</para>
35943597
<para>
35953598
In any case, if a recovery target is configured but the archive

doc/src/sgml/func.sgml

+7
Original file line numberDiff line numberDiff line change
@@ -20177,6 +20177,13 @@ postgres=# SELECT * FROM pg_walfile_name_offset(pg_stop_backup());
2017720177
recovery is resumed.
2017820178
</para>
2017920179

20180+
<para>
20181+
<function>pg_wal_replay_pause</function> and
20182+
<function>pg_wal_replay_resume</function> cannot be executed while
20183+
a promotion is ongoing. If a promotion is triggered while recovery
20184+
is paused, the paused state ends and a promotion continues.
20185+
</para>
20186+
2018020187
<para>
2018120188
If streaming replication is disabled, the paused state may continue
2018220189
indefinitely without problem. While streaming replication is in

src/backend/access/transam/xlog.c

+60-7
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,12 @@ static bool LocalRecoveryInProgress = true;
229229
*/
230230
static bool LocalHotStandbyActive = false;
231231

232+
/*
233+
* Local copy of SharedPromoteIsTriggered variable. False actually means "not
234+
* known, need to check the shared state".
235+
*/
236+
static bool LocalPromoteIsTriggered = false;
237+
232238
/*
233239
* Local state for XLogInsertAllowed():
234240
* 1: unconditionally allowed to insert XLOG
@@ -654,6 +660,12 @@ typedef struct XLogCtlData
654660
*/
655661
bool SharedHotStandbyActive;
656662

663+
/*
664+
* SharedPromoteIsTriggered indicates if a standby promotion has been
665+
* triggered. Protected by info_lck.
666+
*/
667+
bool SharedPromoteIsTriggered;
668+
657669
/*
658670
* WalWriterSleeping indicates whether the WAL writer is currently in
659671
* low-power mode (and hence should be nudged if an async commit occurs).
@@ -912,6 +924,7 @@ static void InitControlFile(uint64 sysidentifier);
912924
static void WriteControlFile(void);
913925
static void ReadControlFile(void);
914926
static char *str_time(pg_time_t tnow);
927+
static void SetPromoteIsTriggered(void);
915928
static bool CheckForStandbyTrigger(void);
916929

917930
#ifdef WAL_DEBUG
@@ -5112,6 +5125,7 @@ XLOGShmemInit(void)
51125125
XLogCtl->XLogCacheBlck = XLOGbuffers - 1;
51135126
XLogCtl->SharedRecoveryInProgress = true;
51145127
XLogCtl->SharedHotStandbyActive = false;
5128+
XLogCtl->SharedPromoteIsTriggered = false;
51155129
XLogCtl->WalWriterSleeping = false;
51165130

51175131
SpinLockInit(&XLogCtl->Insert.insertpos_lck);
@@ -5940,16 +5954,22 @@ recoveryPausesHere(void)
59405954
if (!LocalHotStandbyActive)
59415955
return;
59425956

5957+
/* Don't pause after standby promotion has been triggered */
5958+
if (LocalPromoteIsTriggered)
5959+
return;
5960+
59435961
ereport(LOG,
59445962
(errmsg("recovery has paused"),
59455963
errhint("Execute pg_wal_replay_resume() to continue.")));
59465964

59475965
while (RecoveryIsPaused())
59485966
{
5967+
HandleStartupProcInterrupts();
5968+
if (CheckForStandbyTrigger())
5969+
return;
59495970
pgstat_report_wait_start(WAIT_EVENT_RECOVERY_PAUSE);
59505971
pg_usleep(1000000L); /* 1000 ms */
59515972
pgstat_report_wait_end();
5952-
HandleStartupProcInterrupts();
59535973
}
59545974
}
59555975

@@ -12277,6 +12297,40 @@ emode_for_corrupt_record(int emode, XLogRecPtr RecPtr)
1227712297
return emode;
1227812298
}
1227912299

12300+
/*
12301+
* Has a standby promotion already been triggered?
12302+
*
12303+
* Unlike CheckForStandbyTrigger(), this works in any process
12304+
* that's connected to shared memory.
12305+
*/
12306+
bool
12307+
PromoteIsTriggered(void)
12308+
{
12309+
/*
12310+
* We check shared state each time only until a standby promotion is
12311+
* triggered. We can't trigger a promotion again, so there's no need to
12312+
* keep checking after the shared variable has once been seen true.
12313+
*/
12314+
if (LocalPromoteIsTriggered)
12315+
return true;
12316+
12317+
SpinLockAcquire(&XLogCtl->info_lck);
12318+
LocalPromoteIsTriggered = XLogCtl->SharedPromoteIsTriggered;
12319+
SpinLockRelease(&XLogCtl->info_lck);
12320+
12321+
return LocalPromoteIsTriggered;
12322+
}
12323+
12324+
static void
12325+
SetPromoteIsTriggered(void)
12326+
{
12327+
SpinLockAcquire(&XLogCtl->info_lck);
12328+
XLogCtl->SharedPromoteIsTriggered = true;
12329+
SpinLockRelease(&XLogCtl->info_lck);
12330+
12331+
LocalPromoteIsTriggered = true;
12332+
}
12333+
1228012334
/*
1228112335
* Check to see whether the user-specified trigger file exists and whether a
1228212336
* promote request has arrived. If either condition holds, return true.
@@ -12285,12 +12339,11 @@ static bool
1228512339
CheckForStandbyTrigger(void)
1228612340
{
1228712341
struct stat stat_buf;
12288-
static bool triggered = false;
1228912342

12290-
if (triggered)
12343+
if (LocalPromoteIsTriggered)
1229112344
return true;
1229212345

12293-
if (IsPromoteTriggered())
12346+
if (IsPromoteSignaled())
1229412347
{
1229512348
/*
1229612349
* In 9.1 and 9.2 the postmaster unlinked the promote file inside the
@@ -12313,8 +12366,8 @@ CheckForStandbyTrigger(void)
1231312366

1231412367
ereport(LOG, (errmsg("received promote request")));
1231512368

12316-
ResetPromoteTriggered();
12317-
triggered = true;
12369+
ResetPromoteSignaled();
12370+
SetPromoteIsTriggered();
1231812371
return true;
1231912372
}
1232012373

@@ -12326,7 +12379,7 @@ CheckForStandbyTrigger(void)
1232612379
ereport(LOG,
1232712380
(errmsg("promote trigger file found: %s", PromoteTriggerFile)));
1232812381
unlink(PromoteTriggerFile);
12329-
triggered = true;
12382+
SetPromoteIsTriggered();
1233012383
fast_promote = true;
1233112384
return true;
1233212385
}

src/backend/access/transam/xlogfuncs.c

+14
Original file line numberDiff line numberDiff line change
@@ -531,6 +531,13 @@ pg_wal_replay_pause(PG_FUNCTION_ARGS)
531531
errmsg("recovery is not in progress"),
532532
errhint("Recovery control functions can only be executed during recovery.")));
533533

534+
if (PromoteIsTriggered())
535+
ereport(ERROR,
536+
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
537+
errmsg("standby promotion is ongoing"),
538+
errhint("%s cannot be executed after promotion is triggered.",
539+
"pg_wal_replay_pause()")));
540+
534541
SetRecoveryPause(true);
535542

536543
PG_RETURN_VOID();
@@ -551,6 +558,13 @@ pg_wal_replay_resume(PG_FUNCTION_ARGS)
551558
errmsg("recovery is not in progress"),
552559
errhint("Recovery control functions can only be executed during recovery.")));
553560

561+
if (PromoteIsTriggered())
562+
ereport(ERROR,
563+
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
564+
errmsg("standby promotion is ongoing"),
565+
errhint("%s cannot be executed after promotion is triggered.",
566+
"pg_wal_replay_resume()")));
567+
554568
SetRecoveryPause(false);
555569

556570
PG_RETURN_VOID();

src/backend/postmaster/startup.c

+6-6
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
*/
4040
static volatile sig_atomic_t got_SIGHUP = false;
4141
static volatile sig_atomic_t shutdown_requested = false;
42-
static volatile sig_atomic_t promote_triggered = false;
42+
static volatile sig_atomic_t promote_signaled = false;
4343

4444
/*
4545
* Flag set when executing a restore command, to tell SIGTERM signal handler
@@ -63,7 +63,7 @@ StartupProcTriggerHandler(SIGNAL_ARGS)
6363
{
6464
int save_errno = errno;
6565

66-
promote_triggered = true;
66+
promote_signaled = true;
6767
WakeupRecovery();
6868

6969
errno = save_errno;
@@ -197,13 +197,13 @@ PostRestoreCommand(void)
197197
}
198198

199199
bool
200-
IsPromoteTriggered(void)
200+
IsPromoteSignaled(void)
201201
{
202-
return promote_triggered;
202+
return promote_signaled;
203203
}
204204

205205
void
206-
ResetPromoteTriggered(void)
206+
ResetPromoteSignaled(void)
207207
{
208-
promote_triggered = false;
208+
promote_signaled = false;
209209
}

src/include/access/xlog.h

+1
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,7 @@ extern XLogRecPtr GetFlushRecPtr(void);
313313
extern XLogRecPtr GetLastImportantRecPtr(void);
314314
extern void RemovePromoteSignalFiles(void);
315315

316+
extern bool PromoteIsTriggered(void);
316317
extern bool CheckPromoteSignal(void);
317318
extern void WakeupRecovery(void);
318319
extern void SetWalWriterSleeping(bool sleeping);

src/include/postmaster/startup.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ extern void HandleStartupProcInterrupts(void);
1616
extern void StartupProcessMain(void) pg_attribute_noreturn();
1717
extern void PreRestoreCommand(void);
1818
extern void PostRestoreCommand(void);
19-
extern bool IsPromoteTriggered(void);
20-
extern void ResetPromoteTriggered(void);
19+
extern bool IsPromoteSignaled(void);
20+
extern void ResetPromoteSignaled(void);
2121

2222
#endif /* _STARTUP_H */

0 commit comments

Comments
 (0)