Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 3fdf649

Browse files
committed
Fix failure to guarantee that a checkpoint will write out pg_clog updates
for transaction commits that occurred just before the checkpoint. This is an EXTREMELY serious bug --- kudos to Satoshi Okada for creating a reproducible test case to prove its existence.
1 parent bc8a1fc commit 3fdf649

File tree

3 files changed

+45
-8
lines changed

3 files changed

+45
-8
lines changed

src/backend/access/transam/xact.c

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
*
1111
*
1212
* IDENTIFICATION
13-
* $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.177 2004/08/03 15:57:26 tgl Exp $
13+
* $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.178 2004/08/11 04:07:15 tgl Exp $
1414
*
1515
*-------------------------------------------------------------------------
1616
*/
@@ -574,13 +574,28 @@ RecordTransactionCommit(void)
574574
START_CRIT_SECTION();
575575

576576
/*
577-
* We only need to log the commit in XLOG if the transaction made
578-
* any transaction-controlled XLOG entries or will delete files.
577+
* If our transaction made any transaction-controlled XLOG entries,
578+
* we need to lock out checkpoint start between writing our XLOG
579+
* record and updating pg_clog. Otherwise it is possible for the
580+
* checkpoint to set REDO after the XLOG record but fail to flush the
581+
* pg_clog update to disk, leading to loss of the transaction commit
582+
* if we crash a little later. Slightly klugy fix for problem
583+
* discovered 2004-08-10.
584+
*
579585
* (If it made no transaction-controlled XLOG entries, its XID
580586
* appears nowhere in permanent storage, so no one else will ever care
581-
* if it committed.)
587+
* if it committed; so it doesn't matter if we lose the commit flag.)
588+
*
589+
* Note we only need a shared lock.
582590
*/
583591
madeTCentries = (MyLastRecPtr.xrecoff != 0);
592+
if (madeTCentries)
593+
LWLockAcquire(CheckpointStartLock, LW_SHARED);
594+
595+
/*
596+
* We only need to log the commit in XLOG if the transaction made
597+
* any transaction-controlled XLOG entries or will delete files.
598+
*/
584599
if (madeTCentries || nrels > 0)
585600
{
586601
XLogRecData rdata[3];
@@ -668,6 +683,10 @@ RecordTransactionCommit(void)
668683
TransactionIdCommitTree(nchildren, children);
669684
}
670685

686+
/* Unlock checkpoint lock if we acquired it */
687+
if (madeTCentries)
688+
LWLockRelease(CheckpointStartLock);
689+
671690
END_CRIT_SECTION();
672691
}
673692

@@ -850,6 +869,8 @@ RecordTransactionAbort(void)
850869
*
851870
* We do not flush XLOG to disk unless deleting files, since the
852871
* default assumption after a crash would be that we aborted, anyway.
872+
* For the same reason, we don't need to worry about interlocking
873+
* against checkpoint start.
853874
*/
854875
if (MyLastRecPtr.xrecoff != 0 || nrels > 0)
855876
{

src/backend/access/transam/xlog.c

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.158 2004/08/09 16:26:01 tgl Exp $
10+
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.159 2004/08/11 04:07:15 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -4699,6 +4699,15 @@ CreateCheckPoint(bool shutdown, bool force)
46994699
checkPoint.ThisTimeLineID = ThisTimeLineID;
47004700
checkPoint.time = time(NULL);
47014701

4702+
/*
4703+
* We must hold CheckpointStartLock while determining the checkpoint
4704+
* REDO pointer. This ensures that any concurrent transaction commits
4705+
* will be either not yet logged, or logged and recorded in pg_clog.
4706+
* See notes in RecordTransactionCommit().
4707+
*/
4708+
LWLockAcquire(CheckpointStartLock, LW_EXCLUSIVE);
4709+
4710+
/* And we need WALInsertLock too */
47024711
LWLockAcquire(WALInsertLock, LW_EXCLUSIVE);
47034712

47044713
/*
@@ -4731,6 +4740,7 @@ CreateCheckPoint(bool shutdown, bool force)
47314740
ControlFile->checkPointCopy.redo.xrecoff)
47324741
{
47334742
LWLockRelease(WALInsertLock);
4743+
LWLockRelease(CheckpointStartLock);
47344744
LWLockRelease(CheckpointLock);
47354745
END_CRIT_SECTION();
47364746
return;
@@ -4789,6 +4799,9 @@ CreateCheckPoint(bool shutdown, bool force)
47894799
* GetSnapshotData needs to get XidGenLock while holding SInvalLock,
47904800
* so there's a risk of deadlock. Need to find a better solution. See
47914801
* pgsql-hackers discussion of 17-Dec-01.
4802+
*
4803+
* XXX actually, the whole UNDO code is dead code and unlikely to ever
4804+
* be revived, so the lack of a good solution here is not troubling.
47924805
*/
47934806
#ifdef NOT_USED
47944807
checkPoint.undo = GetUndoRecPtr();
@@ -4798,11 +4811,13 @@ CreateCheckPoint(bool shutdown, bool force)
47984811
#endif
47994812

48004813
/*
4801-
* Now we can release insert lock, allowing other xacts to proceed
4802-
* even while we are flushing disk buffers.
4814+
* Now we can release insert lock and checkpoint start lock, allowing
4815+
* other xacts to proceed even while we are flushing disk buffers.
48034816
*/
48044817
LWLockRelease(WALInsertLock);
48054818

4819+
LWLockRelease(CheckpointStartLock);
4820+
48064821
/*
48074822
* Get the other info we need for the checkpoint record.
48084823
*/

src/include/storage/lwlock.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/storage/lwlock.h,v 1.12 2004/06/11 16:43:24 tgl Exp $
10+
* $PostgreSQL: pgsql/src/include/storage/lwlock.h,v 1.13 2004/08/11 04:07:16 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -36,6 +36,7 @@ typedef enum LWLockId
3636
WALWriteLock,
3737
ControlFileLock,
3838
CheckpointLock,
39+
CheckpointStartLock,
3940
RelCacheInitLock,
4041
BgWriterCommLock,
4142

0 commit comments

Comments
 (0)