Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 7e48b77

Browse files
committed
Fix some serious bugs in archive recovery, now that bgwriter is active
during it: When bgwriter is active, the startup process can't perform mdsync() correctly because it won't see the fsync requests accumulated in bgwriter's private pendingOpsTable. Therefore make bgwriter responsible for the end-of-recovery checkpoint as well, when it's active. When bgwriter is active (= archive recovery), the startup process must not accumulate fsync requests to its own pendingOpsTable, since bgwriter won't see them there when it performs restartpoints. Make startup process drop its pendingOpsTable when bgwriter is launched to avoid that. Update minimum recovery point one last time when leaving archive recovery. It won't be updated by the end-of-recovery checkpoint because XLogFlush() sees us as out of recovery already. This fixes bug #4879 reported by Fujii Masao.
1 parent e6b8f47 commit 7e48b77

File tree

5 files changed

+100
-38
lines changed

5 files changed

+100
-38
lines changed

src/backend/access/transam/xlog.c

+68-33
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.343 2009/06/11 14:48:54 momjian Exp $
10+
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.344 2009/06/25 21:36:00 heikki Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -4912,12 +4912,18 @@ exitArchiveRecovery(TimeLineID endTLI, uint32 endLogId, uint32 endLogSeg)
49124912
{
49134913
char recoveryPath[MAXPGPATH];
49144914
char xlogpath[MAXPGPATH];
4915+
XLogRecPtr InvalidXLogRecPtr = {0, 0};
49154916

49164917
/*
49174918
* We are no longer in archive recovery state.
49184919
*/
49194920
InArchiveRecovery = false;
49204921

4922+
/*
4923+
* Update min recovery point one last time.
4924+
*/
4925+
UpdateMinRecoveryPoint(InvalidXLogRecPtr, true);
4926+
49214927
/*
49224928
* We should have the ending log segment currently open. Verify, and then
49234929
* close it (to avoid problems on Windows with trying to rename or delete
@@ -5156,6 +5162,7 @@ StartupXLOG(void)
51565162
XLogRecord *record;
51575163
uint32 freespace;
51585164
TransactionId oldestActiveXID;
5165+
bool bgwriterLaunched = false;
51595166

51605167
XLogCtl->SharedRecoveryInProgress = true;
51615168

@@ -5472,7 +5479,11 @@ StartupXLOG(void)
54725479
* process in addition to postmaster!
54735480
*/
54745481
if (InArchiveRecovery && IsUnderPostmaster)
5482+
{
5483+
SetForwardFsyncRequests();
54755484
SendPostmasterSignal(PMSIGNAL_RECOVERY_STARTED);
5485+
bgwriterLaunched = true;
5486+
}
54765487

54775488
/*
54785489
* main redo apply loop
@@ -5709,12 +5720,6 @@ StartupXLOG(void)
57095720
/* Pre-scan prepared transactions to find out the range of XIDs present */
57105721
oldestActiveXID = PrescanPreparedTransactions();
57115722

5712-
/*
5713-
* Allow writing WAL for us, so that we can create a checkpoint record.
5714-
* But not yet for other backends!
5715-
*/
5716-
LocalRecoveryInProgress = false;
5717-
57185723
if (InRecovery)
57195724
{
57205725
int rmid;
@@ -5743,7 +5748,12 @@ StartupXLOG(void)
57435748
* the rule that TLI only changes in shutdown checkpoints, which
57445749
* allows some extra error checking in xlog_redo.
57455750
*/
5746-
CreateCheckPoint(CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_IMMEDIATE);
5751+
if (bgwriterLaunched)
5752+
RequestCheckpoint(CHECKPOINT_END_OF_RECOVERY |
5753+
CHECKPOINT_IMMEDIATE |
5754+
CHECKPOINT_WAIT);
5755+
else
5756+
CreateCheckPoint(CHECKPOINT_END_OF_RECOVERY | CHECKPOINT_IMMEDIATE);
57475757

57485758
/*
57495759
* And finally, execute the recovery_end_command, if any.
@@ -5806,7 +5816,7 @@ StartupXLOG(void)
58065816
}
58075817

58085818
/*
5809-
* All done. Allow others to write WAL.
5819+
* All done. Allow backends to write WAL.
58105820
*/
58115821
XLogCtl->SharedRecoveryInProgress = false;
58125822
}
@@ -6123,12 +6133,13 @@ LogCheckpointStart(int flags, bool restartpoint)
61236133
* the main message, but what about all the flags?
61246134
*/
61256135
if (restartpoint)
6126-
msg = "restartpoint starting:%s%s%s%s%s%s";
6136+
msg = "restartpoint starting:%s%s%s%s%s%s%s";
61276137
else
6128-
msg = "checkpoint starting:%s%s%s%s%s%s";
6138+
msg = "checkpoint starting:%s%s%s%s%s%s%s";
61296139

61306140
elog(LOG, msg,
61316141
(flags & CHECKPOINT_IS_SHUTDOWN) ? " shutdown" : "",
6142+
(flags & CHECKPOINT_END_OF_RECOVERY) ? " end-of-recovery" : "",
61326143
(flags & CHECKPOINT_IMMEDIATE) ? " immediate" : "",
61336144
(flags & CHECKPOINT_FORCE) ? " force" : "",
61346145
(flags & CHECKPOINT_WAIT) ? " wait" : "",
@@ -6190,10 +6201,12 @@ LogCheckpointEnd(bool restartpoint)
61906201
*
61916202
* flags is a bitwise OR of the following:
61926203
* CHECKPOINT_IS_SHUTDOWN: checkpoint is for database shutdown.
6204+
* CHECKPOINT_END_OF_RECOVERY: checkpoint is for end of WAL recovery.
61936205
* CHECKPOINT_IMMEDIATE: finish the checkpoint ASAP,
61946206
* ignoring checkpoint_completion_target parameter.
61956207
* CHECKPOINT_FORCE: force a checkpoint even if no XLOG activity has occured
6196-
* since the last one (implied by CHECKPOINT_IS_SHUTDOWN).
6208+
* since the last one (implied by CHECKPOINT_IS_SHUTDOWN and
6209+
* CHECKPOINT_END_OF_RECOVERY).
61976210
*
61986211
* Note: flags contains other bits, of interest here only for logging purposes.
61996212
* In particular note that this routine is synchronous and does not pay
@@ -6202,7 +6215,7 @@ LogCheckpointEnd(bool restartpoint)
62026215
void
62036216
CreateCheckPoint(int flags)
62046217
{
6205-
bool shutdown = (flags & CHECKPOINT_IS_SHUTDOWN) != 0;
6218+
bool shutdown;
62066219
CheckPoint checkPoint;
62076220
XLogRecPtr recptr;
62086221
XLogCtlInsert *Insert = &XLogCtl->Insert;
@@ -6212,35 +6225,53 @@ CreateCheckPoint(int flags)
62126225
uint32 _logSeg;
62136226
TransactionId *inCommitXids;
62146227
int nInCommit;
6228+
bool OldInRecovery = InRecovery;
62156229

6216-
/* shouldn't happen */
6217-
if (RecoveryInProgress())
6218-
elog(ERROR, "can't create a checkpoint during recovery");
6230+
/*
6231+
* An end-of-recovery checkpoint is really a shutdown checkpoint, just
6232+
* issued at a different time.
6233+
*/
6234+
if (flags & ((CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_END_OF_RECOVERY) != 0))
6235+
shutdown = true;
6236+
else
6237+
shutdown = false;
62196238

62206239
/*
6221-
* Acquire CheckpointLock to ensure only one checkpoint happens at a time.
6222-
* During normal operation, bgwriter is the only process that creates
6223-
* checkpoints, but at the end of archive recovery, the bgwriter can be
6224-
* busy creating a restartpoint while the startup process tries to perform
6225-
* the startup checkpoint.
6240+
* A startup checkpoint is created before anyone else is allowed to
6241+
* write WAL. To allow us to write the checkpoint record, set
6242+
* LocalRecoveryInProgress to false. This lets us write WAL, but others
6243+
* are still not allowed to do so.
62266244
*/
6227-
if (!LWLockConditionalAcquire(CheckpointLock, LW_EXCLUSIVE))
6245+
if (flags & CHECKPOINT_END_OF_RECOVERY)
62286246
{
6229-
Assert(InRecovery);
6247+
Assert(RecoveryInProgress());
6248+
LocalRecoveryInProgress = false;
6249+
InitXLOGAccess();
62306250

62316251
/*
6232-
* A restartpoint is in progress. Wait until it finishes. This can
6233-
* cause an extra restartpoint to be performed, but that's OK because
6234-
* we're just about to perform a checkpoint anyway. Flushing the
6235-
* buffers in this restartpoint can take some time, but that time is
6236-
* saved from the upcoming checkpoint so the net effect is zero.
6252+
* Before 8.4, end-of-recovery checkpoints were always performed by
6253+
* the startup process, and InRecovery was set true. InRecovery is not
6254+
* normally set in bgwriter, but we set it here temporarily to avoid
6255+
* confusing old code in the end-of-recovery checkpoint code path that
6256+
* rely on it.
62376257
*/
6238-
ereport(DEBUG2, (errmsg("hurrying in-progress restartpoint")));
6239-
RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_WAIT);
6240-
6241-
LWLockAcquire(CheckpointLock, LW_EXCLUSIVE);
6258+
InRecovery = true;
6259+
}
6260+
else
6261+
{
6262+
/* shouldn't happen */
6263+
if (RecoveryInProgress())
6264+
elog(ERROR, "can't create a checkpoint during recovery");
62426265
}
62436266

6267+
/*
6268+
* Acquire CheckpointLock to ensure only one checkpoint happens at a time.
6269+
* (This is just pro forma, since in the present system structure there is
6270+
* only one process that is allowed to issue checkpoints at any given
6271+
* time.)
6272+
*/
6273+
LWLockAcquire(CheckpointLock, LW_EXCLUSIVE);
6274+
62446275
/*
62456276
* Prepare to accumulate statistics.
62466277
*
@@ -6298,7 +6329,8 @@ CreateCheckPoint(int flags)
62986329
* the end of the last checkpoint record, and its redo pointer must point
62996330
* to itself.
63006331
*/
6301-
if ((flags & (CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_FORCE)) == 0)
6332+
if ((flags & (CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_END_OF_RECOVERY |
6333+
CHECKPOINT_FORCE)) == 0)
63026334
{
63036335
XLogRecPtr curInsert;
63046336

@@ -6542,6 +6574,9 @@ CreateCheckPoint(int flags)
65426574
CheckpointStats.ckpt_segs_recycled);
65436575

65446576
LWLockRelease(CheckpointLock);
6577+
6578+
/* Restore old value */
6579+
InRecovery = OldInRecovery;
65456580
}
65466581

65476582
/*

src/backend/postmaster/bgwriter.c

+11-2
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
*
3838
*
3939
* IDENTIFICATION
40-
* $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.60 2009/06/11 14:49:01 momjian Exp $
40+
* $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.61 2009/06/25 21:36:00 heikki Exp $
4141
*
4242
*-------------------------------------------------------------------------
4343
*/
@@ -448,6 +448,13 @@ BackgroundWriterMain(void)
448448
bgs->ckpt_started++;
449449
SpinLockRelease(&bgs->ckpt_lck);
450450

451+
/*
452+
* The end-of-recovery checkpoint is a real checkpoint that's
453+
* performed while we're still in recovery.
454+
*/
455+
if (flags & CHECKPOINT_END_OF_RECOVERY)
456+
do_restartpoint = false;
457+
451458
/*
452459
* We will warn if (a) too soon since last checkpoint (whatever
453460
* caused it) and (b) somebody set the CHECKPOINT_CAUSE_XLOG flag
@@ -895,10 +902,12 @@ BgWriterShmemInit(void)
895902
*
896903
* flags is a bitwise OR of the following:
897904
* CHECKPOINT_IS_SHUTDOWN: checkpoint is for database shutdown.
905+
* CHECKPOINT_END_OF_RECOVERY: checkpoint is to finish WAL recovery.
898906
* CHECKPOINT_IMMEDIATE: finish the checkpoint ASAP,
899907
* ignoring checkpoint_completion_target parameter.
900908
* CHECKPOINT_FORCE: force a checkpoint even if no XLOG activity has occured
901-
* since the last one (implied by CHECKPOINT_IS_SHUTDOWN).
909+
* since the last one (implied by CHECKPOINT_IS_SHUTDOWN and
910+
* CHECKPOINT_END_OF_RECOVERY).
902911
* CHECKPOINT_WAIT: wait for completion before returning (otherwise,
903912
* just signal bgwriter to do it, and return).
904913
* CHECKPOINT_CAUSE_XLOG: checkpoint is requested due to xlog filling.

src/backend/storage/smgr/md.c

+16-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.146 2009/06/11 14:49:02 momjian Exp $
11+
* $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.147 2009/06/25 21:36:00 heikki Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -203,6 +203,21 @@ mdinit(void)
203203
}
204204
}
205205

206+
/*
207+
* In archive recovery, we rely on bgwriter to do fsyncs(), but we don't
208+
* know that we do archive recovery at process startup when pendingOpsTable
209+
* has already been created. Calling this function drops pendingOpsTable
210+
* and causes any subsequent requests to be forwarded to bgwriter.
211+
*/
212+
void
213+
SetForwardFsyncRequests(void)
214+
{
215+
/* Perform any pending ops we may have queued up */
216+
if (pendingOpsTable)
217+
mdsync();
218+
pendingOpsTable = NULL;
219+
}
220+
206221
/*
207222
* mdexists() -- Does the physical file exist?
208223
*

src/include/access/xlog.h

+3-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
77
* Portions Copyright (c) 1994, Regents of the University of California
88
*
9-
* $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.91 2009/02/18 15:58:41 heikki Exp $
9+
* $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.92 2009/06/25 21:36:00 heikki Exp $
1010
*/
1111
#ifndef XLOG_H
1212
#define XLOG_H
@@ -166,6 +166,8 @@ extern bool XLOG_DEBUG;
166166
/* These indicate the cause of a checkpoint request */
167167
#define CHECKPOINT_CAUSE_XLOG 0x0010 /* XLOG consumption */
168168
#define CHECKPOINT_CAUSE_TIME 0x0020 /* Elapsed time */
169+
#define CHECKPOINT_END_OF_RECOVERY 0x0040 /* Like shutdown checkpoint, but
170+
* issued at end of WAL recovery */
169171

170172
/* Checkpoint statistics */
171173
typedef struct CheckpointStatsData

src/include/storage/smgr.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.67 2009/06/11 14:49:12 momjian Exp $
10+
* $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.68 2009/06/25 21:36:00 heikki Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -109,6 +109,7 @@ extern void mdpreckpt(void);
109109
extern void mdsync(void);
110110
extern void mdpostckpt(void);
111111

112+
extern void SetForwardFsyncRequests(void);
112113
extern void RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum,
113114
BlockNumber segno);
114115
extern void ForgetRelationFsyncRequests(RelFileNode rnode, ForkNumber forknum);

0 commit comments

Comments
 (0)