Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 4b0d28d

Browse files
Remove secondary checkpoint
Previously server reserved WAL for last two checkpoints, which used too much disk space for small servers. Bumps PG_CONTROL_VERSION Author: Simon Riggs <simon@2ndQuadrant.com> Reviewed-by: Michael Paquier <michael.paquier@gmail.com>
1 parent 98267ee commit 4b0d28d

File tree

8 files changed

+62
-126
lines changed

8 files changed

+62
-126
lines changed

doc/src/sgml/backup.sgml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -568,7 +568,7 @@ tar -cf backup.tar /usr/local/pgsql/data
568568
normally creates just a few segment files and then
569569
<quote>recycles</quote> them by renaming no-longer-needed segment files
570570
to higher segment numbers. It's assumed that segment files whose
571-
contents precede the checkpoint-before-last are no longer of
571+
contents precede the last checkpoint are no longer of
572572
interest and can be recycled.
573573
</para>
574574

doc/src/sgml/func.sgml

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17948,11 +17948,6 @@ SELECT collation for ('foo' COLLATE "de_DE");
1794817948
<entry><type>pg_lsn</type></entry>
1794917949
</row>
1795017950

17951-
<row>
17952-
<entry><literal>prior_lsn</literal></entry>
17953-
<entry><type>pg_lsn</type></entry>
17954-
</row>
17955-
1795617951
<row>
1795717952
<entry><literal>redo_lsn</literal></entry>
1795817953
<entry><type>pg_lsn</type></entry>

src/backend/access/transam/xlog.c

Lines changed: 22 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -2221,13 +2221,18 @@ CalculateCheckpointSegments(void)
22212221
* Calculate the distance at which to trigger a checkpoint, to avoid
22222222
* exceeding max_wal_size_mb. This is based on two assumptions:
22232223
*
2224-
* a) we keep WAL for two checkpoint cycles, back to the "prev" checkpoint.
2224+
* a) we keep WAL for only one checkpoint cycle (prior to PG11 we kept
2225+
* WAL for two checkpoint cycles to allow us to recover from the
2226+
* secondary checkpoint if the first checkpoint failed, though we
2227+
* only did this on the master anyway, not on standby. Keeping just
2228+
* one checkpoint simplifies processing and reduces disk space in
2229+
* many smaller databases.)
22252230
* b) during checkpoint, we consume checkpoint_completion_target *
22262231
* number of segments consumed between checkpoints.
22272232
*-------
22282233
*/
22292234
target = (double) ConvertToXSegs(max_wal_size_mb, wal_segment_size) /
2230-
(2.0 + CheckPointCompletionTarget);
2235+
(1.0 + CheckPointCompletionTarget);
22312236

22322237
/* round down */
22332238
CheckPointSegments = (int) target;
@@ -2279,23 +2284,8 @@ XLOGfileslop(XLogRecPtr PriorRedoPtr)
22792284
* To estimate where the next checkpoint will finish, assume that the
22802285
* system runs steadily consuming CheckPointDistanceEstimate bytes between
22812286
* every checkpoint.
2282-
*
2283-
* The reason this calculation is done from the prior checkpoint, not the
2284-
* one that just finished, is that this behaves better if some checkpoint
2285-
* cycles are abnormally short, like if you perform a manual checkpoint
2286-
* right after a timed one. The manual checkpoint will make almost a full
2287-
* cycle's worth of WAL segments available for recycling, because the
2288-
* segments from the prior's prior, fully-sized checkpoint cycle are no
2289-
* longer needed. However, the next checkpoint will make only few segments
2290-
* available for recycling, the ones generated between the timed
2291-
* checkpoint and the manual one right after that. If at the manual
2292-
* checkpoint we only retained enough segments to get us to the next timed
2293-
* one, and removed the rest, then at the next checkpoint we would not
2294-
* have enough segments around for recycling, to get us to the checkpoint
2295-
* after that. Basing the calculations on the distance from the prior redo
2296-
* pointer largely fixes that problem.
2297-
*/
2298-
distance = (2.0 + CheckPointCompletionTarget) * CheckPointDistanceEstimate;
2287+
*/
2288+
distance = (1.0 + CheckPointCompletionTarget) * CheckPointDistanceEstimate;
22992289
/* add 10% for good measure. */
23002290
distance *= 1.10;
23012291

@@ -6593,30 +6583,17 @@ StartupXLOG(void)
65936583
(errmsg("checkpoint record is at %X/%X",
65946584
(uint32) (checkPointLoc >> 32), (uint32) checkPointLoc)));
65956585
}
6596-
else if (StandbyMode)
6586+
else
65976587
{
65986588
/*
6599-
* The last valid checkpoint record required for a streaming
6600-
* recovery exists in neither standby nor the primary.
6589+
* We used to attempt to go back to a secondary checkpoint
6590+
* record here, but only when not in standby_mode. We now
6591+
* just fail if we can't read the last checkpoint because
6592+
* this allows us to simplify processing around checkpoints.
66016593
*/
66026594
ereport(PANIC,
66036595
(errmsg("could not locate a valid checkpoint record")));
66046596
}
6605-
else
6606-
{
6607-
checkPointLoc = ControlFile->prevCheckPoint;
6608-
record = ReadCheckpointRecord(xlogreader, checkPointLoc, 2, true);
6609-
if (record != NULL)
6610-
{
6611-
ereport(LOG,
6612-
(errmsg("using previous checkpoint record at %X/%X",
6613-
(uint32) (checkPointLoc >> 32), (uint32) checkPointLoc)));
6614-
InRecovery = true; /* force recovery even if SHUTDOWNED */
6615-
}
6616-
else
6617-
ereport(PANIC,
6618-
(errmsg("could not locate a valid checkpoint record")));
6619-
}
66206597
memcpy(&checkPoint, XLogRecGetData(xlogreader), sizeof(CheckPoint));
66216598
wasShutdown = ((record->xl_info & ~XLR_INFO_MASK) == XLOG_CHECKPOINT_SHUTDOWN);
66226599
}
@@ -6845,7 +6822,6 @@ StartupXLOG(void)
68456822
recoveryTargetTLI)));
68466823
ControlFile->state = DB_IN_CRASH_RECOVERY;
68476824
}
6848-
ControlFile->prevCheckPoint = ControlFile->checkPoint;
68496825
ControlFile->checkPoint = checkPointLoc;
68506826
ControlFile->checkPointCopy = checkPoint;
68516827
if (InArchiveRecovery)
@@ -7619,12 +7595,11 @@ StartupXLOG(void)
76197595
{
76207596
if (fast_promote)
76217597
{
7622-
checkPointLoc = ControlFile->prevCheckPoint;
7598+
checkPointLoc = ControlFile->checkPoint;
76237599

76247600
/*
76257601
* Confirm the last checkpoint is available for us to recover
7626-
* from if we fail. Note that we don't check for the secondary
7627-
* checkpoint since that isn't available in most base backups.
7602+
* from if we fail.
76287603
*/
76297604
record = ReadCheckpointRecord(xlogreader, checkPointLoc, 1, false);
76307605
if (record != NULL)
@@ -8090,7 +8065,7 @@ LocalSetXLogInsertAllowed(void)
80908065
* Subroutine to try to fetch and validate a prior checkpoint record.
80918066
*
80928067
* whichChkpt identifies the checkpoint (merely for reporting purposes).
8093-
* 1 for "primary", 2 for "secondary", 0 for "other" (backup_label)
8068+
* 1 for "primary", 0 for "other" (backup_label)
80948069
*/
80958070
static XLogRecord *
80968071
ReadCheckpointRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr,
@@ -8110,10 +8085,6 @@ ReadCheckpointRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr,
81108085
ereport(LOG,
81118086
(errmsg("invalid primary checkpoint link in control file")));
81128087
break;
8113-
case 2:
8114-
ereport(LOG,
8115-
(errmsg("invalid secondary checkpoint link in control file")));
8116-
break;
81178088
default:
81188089
ereport(LOG,
81198090
(errmsg("invalid checkpoint link in backup_label file")));
@@ -8135,10 +8106,6 @@ ReadCheckpointRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr,
81358106
ereport(LOG,
81368107
(errmsg("invalid primary checkpoint record")));
81378108
break;
8138-
case 2:
8139-
ereport(LOG,
8140-
(errmsg("invalid secondary checkpoint record")));
8141-
break;
81428109
default:
81438110
ereport(LOG,
81448111
(errmsg("invalid checkpoint record")));
@@ -8154,10 +8121,6 @@ ReadCheckpointRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr,
81548121
ereport(LOG,
81558122
(errmsg("invalid resource manager ID in primary checkpoint record")));
81568123
break;
8157-
case 2:
8158-
ereport(LOG,
8159-
(errmsg("invalid resource manager ID in secondary checkpoint record")));
8160-
break;
81618124
default:
81628125
ereport(LOG,
81638126
(errmsg("invalid resource manager ID in checkpoint record")));
@@ -8175,10 +8138,6 @@ ReadCheckpointRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr,
81758138
ereport(LOG,
81768139
(errmsg("invalid xl_info in primary checkpoint record")));
81778140
break;
8178-
case 2:
8179-
ereport(LOG,
8180-
(errmsg("invalid xl_info in secondary checkpoint record")));
8181-
break;
81828141
default:
81838142
ereport(LOG,
81848143
(errmsg("invalid xl_info in checkpoint record")));
@@ -8194,10 +8153,6 @@ ReadCheckpointRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr,
81948153
ereport(LOG,
81958154
(errmsg("invalid length of primary checkpoint record")));
81968155
break;
8197-
case 2:
8198-
ereport(LOG,
8199-
(errmsg("invalid length of secondary checkpoint record")));
8200-
break;
82018156
default:
82028157
ereport(LOG,
82038158
(errmsg("invalid length of checkpoint record")));
@@ -8933,8 +8888,7 @@ CreateCheckPoint(int flags)
89338888
(errmsg("concurrent write-ahead log activity while database system is shutting down")));
89348889

89358890
/*
8936-
* Remember the prior checkpoint's redo pointer, used later to determine
8937-
* the point where the log can be truncated.
8891+
* Remember the prior checkpoint's redo ptr for UpdateCheckPointDistanceEstimate()
89388892
*/
89398893
PriorRedoPtr = ControlFile->checkPointCopy.redo;
89408894

@@ -8944,7 +8898,6 @@ CreateCheckPoint(int flags)
89448898
LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
89458899
if (shutdown)
89468900
ControlFile->state = DB_SHUTDOWNED;
8947-
ControlFile->prevCheckPoint = ControlFile->checkPoint;
89488901
ControlFile->checkPoint = ProcLastRecPtr;
89498902
ControlFile->checkPointCopy = checkPoint;
89508903
ControlFile->time = (pg_time_t) time(NULL);
@@ -8982,8 +8935,7 @@ CreateCheckPoint(int flags)
89828935
smgrpostckpt();
89838936

89848937
/*
8985-
* Delete old log files (those no longer needed even for previous
8986-
* checkpoint or the standbys in XLOG streaming).
8938+
* Delete old log files and recycle them
89878939
*/
89888940
if (PriorRedoPtr != InvalidXLogRecPtr)
89898941
{
@@ -8992,7 +8944,8 @@ CreateCheckPoint(int flags)
89928944
/* Update the average distance between checkpoints. */
89938945
UpdateCheckPointDistanceEstimate(RedoRecPtr - PriorRedoPtr);
89948946

8995-
XLByteToSeg(PriorRedoPtr, _logSegNo, wal_segment_size);
8947+
/* Trim from the last checkpoint, not the last - 1 */
8948+
XLByteToSeg(RedoRecPtr, _logSegNo, wal_segment_size);
89968949
KeepLogSeg(recptr, &_logSegNo);
89978950
_logSegNo--;
89988951
RemoveOldXlogFiles(_logSegNo, PriorRedoPtr, recptr);
@@ -9258,8 +9211,7 @@ CreateRestartPoint(int flags)
92589211
CheckPointGuts(lastCheckPoint.redo, flags);
92599212

92609213
/*
9261-
* Remember the prior checkpoint's redo pointer, used later to determine
9262-
* the point at which we can truncate the log.
9214+
* Remember the prior checkpoint's redo ptr for UpdateCheckPointDistanceEstimate()
92639215
*/
92649216
PriorRedoPtr = ControlFile->checkPointCopy.redo;
92659217

@@ -9273,7 +9225,6 @@ CreateRestartPoint(int flags)
92739225
if (ControlFile->state == DB_IN_ARCHIVE_RECOVERY &&
92749226
ControlFile->checkPointCopy.redo < lastCheckPoint.redo)
92759227
{
9276-
ControlFile->prevCheckPoint = ControlFile->checkPoint;
92779228
ControlFile->checkPoint = lastCheckPointRecPtr;
92789229
ControlFile->checkPointCopy = lastCheckPoint;
92799230
ControlFile->time = (pg_time_t) time(NULL);

src/backend/utils/misc/pg_controldata.c

Lines changed: 37 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -93,41 +93,39 @@ pg_control_checkpoint(PG_FUNCTION_ARGS)
9393
tupdesc = CreateTemplateTupleDesc(19, false);
9494
TupleDescInitEntry(tupdesc, (AttrNumber) 1, "checkpoint_lsn",
9595
LSNOID, -1, 0);
96-
TupleDescInitEntry(tupdesc, (AttrNumber) 2, "prior_lsn",
96+
TupleDescInitEntry(tupdesc, (AttrNumber) 2, "redo_lsn",
9797
LSNOID, -1, 0);
98-
TupleDescInitEntry(tupdesc, (AttrNumber) 3, "redo_lsn",
99-
LSNOID, -1, 0);
100-
TupleDescInitEntry(tupdesc, (AttrNumber) 4, "redo_wal_file",
98+
TupleDescInitEntry(tupdesc, (AttrNumber) 3, "redo_wal_file",
10199
TEXTOID, -1, 0);
102-
TupleDescInitEntry(tupdesc, (AttrNumber) 5, "timeline_id",
100+
TupleDescInitEntry(tupdesc, (AttrNumber) 4, "timeline_id",
103101
INT4OID, -1, 0);
104-
TupleDescInitEntry(tupdesc, (AttrNumber) 6, "prev_timeline_id",
102+
TupleDescInitEntry(tupdesc, (AttrNumber) 5, "prev_timeline_id",
105103
INT4OID, -1, 0);
106-
TupleDescInitEntry(tupdesc, (AttrNumber) 7, "full_page_writes",
104+
TupleDescInitEntry(tupdesc, (AttrNumber) 6, "full_page_writes",
107105
BOOLOID, -1, 0);
108-
TupleDescInitEntry(tupdesc, (AttrNumber) 8, "next_xid",
106+
TupleDescInitEntry(tupdesc, (AttrNumber) 7, "next_xid",
109107
TEXTOID, -1, 0);
110-
TupleDescInitEntry(tupdesc, (AttrNumber) 9, "next_oid",
108+
TupleDescInitEntry(tupdesc, (AttrNumber) 8, "next_oid",
111109
OIDOID, -1, 0);
112-
TupleDescInitEntry(tupdesc, (AttrNumber) 10, "next_multixact_id",
110+
TupleDescInitEntry(tupdesc, (AttrNumber) 9, "next_multixact_id",
113111
XIDOID, -1, 0);
114-
TupleDescInitEntry(tupdesc, (AttrNumber) 11, "next_multi_offset",
112+
TupleDescInitEntry(tupdesc, (AttrNumber) 10, "next_multi_offset",
115113
XIDOID, -1, 0);
116-
TupleDescInitEntry(tupdesc, (AttrNumber) 12, "oldest_xid",
114+
TupleDescInitEntry(tupdesc, (AttrNumber) 11, "oldest_xid",
117115
XIDOID, -1, 0);
118-
TupleDescInitEntry(tupdesc, (AttrNumber) 13, "oldest_xid_dbid",
116+
TupleDescInitEntry(tupdesc, (AttrNumber) 12, "oldest_xid_dbid",
119117
OIDOID, -1, 0);
120-
TupleDescInitEntry(tupdesc, (AttrNumber) 14, "oldest_active_xid",
118+
TupleDescInitEntry(tupdesc, (AttrNumber) 13, "oldest_active_xid",
121119
XIDOID, -1, 0);
122-
TupleDescInitEntry(tupdesc, (AttrNumber) 15, "oldest_multi_xid",
120+
TupleDescInitEntry(tupdesc, (AttrNumber) 14, "oldest_multi_xid",
123121
XIDOID, -1, 0);
124-
TupleDescInitEntry(tupdesc, (AttrNumber) 16, "oldest_multi_dbid",
122+
TupleDescInitEntry(tupdesc, (AttrNumber) 15, "oldest_multi_dbid",
125123
OIDOID, -1, 0);
126-
TupleDescInitEntry(tupdesc, (AttrNumber) 17, "oldest_commit_ts_xid",
124+
TupleDescInitEntry(tupdesc, (AttrNumber) 16, "oldest_commit_ts_xid",
127125
XIDOID, -1, 0);
128-
TupleDescInitEntry(tupdesc, (AttrNumber) 18, "newest_commit_ts_xid",
126+
TupleDescInitEntry(tupdesc, (AttrNumber) 17, "newest_commit_ts_xid",
129127
XIDOID, -1, 0);
130-
TupleDescInitEntry(tupdesc, (AttrNumber) 19, "checkpoint_time",
128+
TupleDescInitEntry(tupdesc, (AttrNumber) 18, "checkpoint_time",
131129
TIMESTAMPTZOID, -1, 0);
132130
tupdesc = BlessTupleDesc(tupdesc);
133131

@@ -149,62 +147,59 @@ pg_control_checkpoint(PG_FUNCTION_ARGS)
149147
values[0] = LSNGetDatum(ControlFile->checkPoint);
150148
nulls[0] = false;
151149

152-
values[1] = LSNGetDatum(ControlFile->prevCheckPoint);
150+
values[1] = LSNGetDatum(ControlFile->checkPointCopy.redo);
153151
nulls[1] = false;
154152

155-
values[2] = LSNGetDatum(ControlFile->checkPointCopy.redo);
153+
values[2] = CStringGetTextDatum(xlogfilename);
156154
nulls[2] = false;
157155

158-
values[3] = CStringGetTextDatum(xlogfilename);
156+
values[3] = Int32GetDatum(ControlFile->checkPointCopy.ThisTimeLineID);
159157
nulls[3] = false;
160158

161-
values[4] = Int32GetDatum(ControlFile->checkPointCopy.ThisTimeLineID);
159+
values[4] = Int32GetDatum(ControlFile->checkPointCopy.PrevTimeLineID);
162160
nulls[4] = false;
163161

164-
values[5] = Int32GetDatum(ControlFile->checkPointCopy.PrevTimeLineID);
162+
values[5] = BoolGetDatum(ControlFile->checkPointCopy.fullPageWrites);
165163
nulls[5] = false;
166164

167-
values[6] = BoolGetDatum(ControlFile->checkPointCopy.fullPageWrites);
168-
nulls[6] = false;
169-
170-
values[7] = CStringGetTextDatum(psprintf("%u:%u",
165+
values[6] = CStringGetTextDatum(psprintf("%u:%u",
171166
ControlFile->checkPointCopy.nextXidEpoch,
172167
ControlFile->checkPointCopy.nextXid));
168+
nulls[6] = false;
169+
170+
values[7] = ObjectIdGetDatum(ControlFile->checkPointCopy.nextOid);
173171
nulls[7] = false;
174172

175-
values[8] = ObjectIdGetDatum(ControlFile->checkPointCopy.nextOid);
173+
values[8] = TransactionIdGetDatum(ControlFile->checkPointCopy.nextMulti);
176174
nulls[8] = false;
177175

178-
values[9] = TransactionIdGetDatum(ControlFile->checkPointCopy.nextMulti);
176+
values[9] = TransactionIdGetDatum(ControlFile->checkPointCopy.nextMultiOffset);
179177
nulls[9] = false;
180178

181-
values[10] = TransactionIdGetDatum(ControlFile->checkPointCopy.nextMultiOffset);
179+
values[10] = TransactionIdGetDatum(ControlFile->checkPointCopy.oldestXid);
182180
nulls[10] = false;
183181

184-
values[11] = TransactionIdGetDatum(ControlFile->checkPointCopy.oldestXid);
182+
values[11] = ObjectIdGetDatum(ControlFile->checkPointCopy.oldestXidDB);
185183
nulls[11] = false;
186184

187-
values[12] = ObjectIdGetDatum(ControlFile->checkPointCopy.oldestXidDB);
185+
values[12] = TransactionIdGetDatum(ControlFile->checkPointCopy.oldestActiveXid);
188186
nulls[12] = false;
189187

190-
values[13] = TransactionIdGetDatum(ControlFile->checkPointCopy.oldestActiveXid);
188+
values[13] = TransactionIdGetDatum(ControlFile->checkPointCopy.oldestMulti);
191189
nulls[13] = false;
192190

193-
values[14] = TransactionIdGetDatum(ControlFile->checkPointCopy.oldestMulti);
191+
values[14] = ObjectIdGetDatum(ControlFile->checkPointCopy.oldestMultiDB);
194192
nulls[14] = false;
195193

196-
values[15] = ObjectIdGetDatum(ControlFile->checkPointCopy.oldestMultiDB);
194+
values[15] = TransactionIdGetDatum(ControlFile->checkPointCopy.oldestCommitTsXid);
197195
nulls[15] = false;
198196

199-
values[16] = TransactionIdGetDatum(ControlFile->checkPointCopy.oldestCommitTsXid);
197+
values[16] = TransactionIdGetDatum(ControlFile->checkPointCopy.newestCommitTsXid);
200198
nulls[16] = false;
201199

202-
values[17] = TransactionIdGetDatum(ControlFile->checkPointCopy.newestCommitTsXid);
203-
nulls[17] = false;
204-
205-
values[18] = TimestampTzGetDatum(
200+
values[17] = TimestampTzGetDatum(
206201
time_t_to_timestamptz(ControlFile->checkPointCopy.time));
207-
nulls[18] = false;
202+
nulls[17] = false;
208203

209204
htup = heap_form_tuple(tupdesc, values, nulls);
210205

src/bin/pg_controldata/pg_controldata.c

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -222,9 +222,6 @@ main(int argc, char *argv[])
222222
printf(_("Latest checkpoint location: %X/%X\n"),
223223
(uint32) (ControlFile->checkPoint >> 32),
224224
(uint32) ControlFile->checkPoint);
225-
printf(_("Prior checkpoint location: %X/%X\n"),
226-
(uint32) (ControlFile->prevCheckPoint >> 32),
227-
(uint32) ControlFile->prevCheckPoint);
228225
printf(_("Latest checkpoint's REDO location: %X/%X\n"),
229226
(uint32) (ControlFile->checkPointCopy.redo >> 32),
230227
(uint32) ControlFile->checkPointCopy.redo);

0 commit comments

Comments
 (0)