Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexander Korotkov2025-06-14 00:33:15 +0000
committerAlexander Korotkov2025-06-14 01:15:24 +0000
commite2832bd961103a17e281919de7151f80f518cf24 (patch)
treef1d332a7b7367ad0b10d9027a76f05bdebeac7d2 /src/backend
parent7c7c0a77dc8a48ba4f88863d8138abd5fbac6cec (diff)
Keep WAL segments by the flushed value of the slot's restart LSNREL_14_STABLE
The patch fixes the issue with the unexpected removal of old WAL segments after checkpoint, followed by an immediate restart. The issue occurs when a slot is advanced after the start of the checkpoint and before old WAL segments are removed at the end of the checkpoint. The idea of the patch is to get the minimal restart_lsn at the beginning of checkpoint (or restart point) creation and use this value when calculating the oldest LSN for WAL segments removal at the end of checkpoint. This idea was proposed by Tomas Vondra in the discussion. Unlike 291221c46575, this fix doesn't affect ABI and is intended for back branches. Discussion: https://postgr.es/m/flat/1d12d2-67235980-35-19a406a0%4063439497 Author: Vitaly Davydov <v.davydov@postgrespro.ru> Reviewed-by: Tomas Vondra <tomas@vondra.me> Reviewed-by: Alexander Korotkov <aekorotkov@gmail.com> Reviewed-by: Amit Kapila <amit.kapila16@gmail.com> Backpatch-through: 13
Diffstat (limited to 'src/backend')
-rw-r--r--src/backend/access/transam/xlog.c55
-rw-r--r--src/backend/replication/logical/logical.c10
-rw-r--r--src/backend/replication/walsender.c4
3 files changed, 60 insertions, 9 deletions
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 9ce4297e3de..3f8de31b386 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -930,7 +930,8 @@ static void LocalSetXLogInsertAllowed(void);
static void CreateEndOfRecoveryRecord(void);
static XLogRecPtr CreateOverwriteContrecordRecord(XLogRecPtr aborted_lsn);
static void CheckPointGuts(XLogRecPtr checkPointRedo, int flags);
-static void KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo);
+static void KeepLogSeg(XLogRecPtr recptr, XLogRecPtr slotsMinLSN,
+ XLogSegNo *logSegNo);
static XLogRecPtr XLogGetReplicationSlotMinimumLSN(void);
static void AdvanceXLInsertBuffer(XLogRecPtr upto, bool opportunistic);
@@ -9122,6 +9123,7 @@ CreateCheckPoint(int flags)
XLogRecPtr last_important_lsn;
VirtualTransactionId *vxids;
int nvxids;
+ XLogRecPtr slotsMinReqLSN;
/*
* An end-of-recovery checkpoint is really a shutdown checkpoint, just
@@ -9336,6 +9338,15 @@ CreateCheckPoint(int flags)
END_CRIT_SECTION();
/*
+ * Get the current minimum LSN to be used later in the WAL segment
+ * cleanup. We may clean up only WAL segments, which are not needed
+ * according to synchronized LSNs of replication slots. The slot's LSN
+ * might be advanced concurrently, so we call this before
+ * CheckPointReplicationSlots() synchronizes replication slots.
+ */
+ slotsMinReqLSN = XLogGetReplicationSlotMinimumLSN();
+
+ /*
* In some cases there are groups of actions that must all occur on one
* side or the other of a checkpoint record. Before flushing the
* checkpoint record we must explicitly wait for any backend currently
@@ -9499,15 +9510,23 @@ CreateCheckPoint(int flags)
* prevent the disk holding the xlog from growing full.
*/
XLByteToSeg(RedoRecPtr, _logSegNo, wal_segment_size);
- KeepLogSeg(recptr, &_logSegNo);
+ KeepLogSeg(recptr, slotsMinReqLSN, &_logSegNo);
if (InvalidateObsoleteReplicationSlots(_logSegNo))
{
/*
+ * Recalculate the current minimum LSN to be used in the WAL segment
+ * cleanup. Then, we must synchronize the replication slots again in
+ * order to make this LSN safe to use.
+ */
+ slotsMinReqLSN = XLogGetReplicationSlotMinimumLSN();
+ CheckPointReplicationSlots();
+
+ /*
* Some slots have been invalidated; recalculate the old-segment
* horizon, starting again from RedoRecPtr.
*/
XLByteToSeg(RedoRecPtr, _logSegNo, wal_segment_size);
- KeepLogSeg(recptr, &_logSegNo);
+ KeepLogSeg(recptr, slotsMinReqLSN, &_logSegNo);
}
_logSegNo--;
RemoveOldXlogFiles(_logSegNo, RedoRecPtr, recptr);
@@ -9740,6 +9759,7 @@ CreateRestartPoint(int flags)
XLogRecPtr endptr;
XLogSegNo _logSegNo;
TimestampTz xtime;
+ XLogRecPtr slotsMinReqLSN;
/* Get a local copy of the last safe checkpoint record. */
SpinLockAcquire(&XLogCtl->info_lck);
@@ -9820,6 +9840,15 @@ CreateRestartPoint(int flags)
MemSet(&CheckpointStats, 0, sizeof(CheckpointStats));
CheckpointStats.ckpt_start_t = GetCurrentTimestamp();
+ /*
+ * Get the current minimum LSN to be used later in the WAL segment
+ * cleanup. We may clean up only WAL segments, which are not needed
+ * according to synchronized LSNs of replication slots. The slot's LSN
+ * might be advanced concurrently, so we call this before
+ * CheckPointReplicationSlots() synchronizes replication slots.
+ */
+ slotsMinReqLSN = XLogGetReplicationSlotMinimumLSN();
+
if (log_checkpoints)
LogCheckpointStart(flags, true);
@@ -9908,15 +9937,23 @@ CreateRestartPoint(int flags)
receivePtr = GetWalRcvFlushRecPtr(NULL, NULL);
replayPtr = GetXLogReplayRecPtr(&replayTLI);
endptr = (receivePtr < replayPtr) ? replayPtr : receivePtr;
- KeepLogSeg(endptr, &_logSegNo);
+ KeepLogSeg(endptr, slotsMinReqLSN, &_logSegNo);
if (InvalidateObsoleteReplicationSlots(_logSegNo))
{
/*
+ * Recalculate the current minimum LSN to be used in the WAL segment
+ * cleanup. Then, we must synchronize the replication slots again in
+ * order to make this LSN safe to use.
+ */
+ slotsMinReqLSN = XLogGetReplicationSlotMinimumLSN();
+ CheckPointReplicationSlots();
+
+ /*
* Some slots have been invalidated; recalculate the old-segment
* horizon, starting again from RedoRecPtr.
*/
XLByteToSeg(RedoRecPtr, _logSegNo, wal_segment_size);
- KeepLogSeg(endptr, &_logSegNo);
+ KeepLogSeg(endptr, slotsMinReqLSN, &_logSegNo);
}
_logSegNo--;
@@ -10019,6 +10056,7 @@ GetWALAvailability(XLogRecPtr targetLSN)
XLogSegNo oldestSegMaxWalSize; /* oldest segid kept by max_wal_size */
XLogSegNo oldestSlotSeg; /* oldest segid kept by slot */
uint64 keepSegs;
+ XLogRecPtr slotsMinReqLSN;
/*
* slot does not reserve WAL. Either deactivated, or has never been active
@@ -10032,8 +10070,9 @@ GetWALAvailability(XLogRecPtr targetLSN)
* oldestSlotSeg to the current segment.
*/
currpos = GetXLogWriteRecPtr();
+ slotsMinReqLSN = XLogGetReplicationSlotMinimumLSN();
XLByteToSeg(currpos, oldestSlotSeg, wal_segment_size);
- KeepLogSeg(currpos, &oldestSlotSeg);
+ KeepLogSeg(currpos, slotsMinReqLSN, &oldestSlotSeg);
/*
* Find the oldest extant segment file. We get 1 until checkpoint removes
@@ -10094,7 +10133,7 @@ GetWALAvailability(XLogRecPtr targetLSN)
* invalidation is optionally done here, instead.
*/
static void
-KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo)
+KeepLogSeg(XLogRecPtr recptr, XLogRecPtr slotsMinReqLSN, XLogSegNo *logSegNo)
{
XLogSegNo currSegNo;
XLogSegNo segno;
@@ -10107,7 +10146,7 @@ KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo)
* Calculate how many segments are kept by slots first, adjusting for
* max_slot_wal_keep_size.
*/
- keep = XLogGetReplicationSlotMinimumLSN();
+ keep = slotsMinReqLSN;
if (keep != InvalidXLogRecPtr && keep < recptr)
{
XLByteToSeg(keep, segno, wal_segment_size);
diff --git a/src/backend/replication/logical/logical.c b/src/backend/replication/logical/logical.c
index 897bdbf7364..f40b84592a6 100644
--- a/src/backend/replication/logical/logical.c
+++ b/src/backend/replication/logical/logical.c
@@ -1768,7 +1768,15 @@ LogicalConfirmReceivedLocation(XLogRecPtr lsn)
SpinLockRelease(&MyReplicationSlot->mutex);
- /* first write new xmin to disk, so we know what's up after a crash */
+ /*
+ * First, write new xmin and restart_lsn to disk so we know what's up
+ * after a crash. Even when we do this, the checkpointer can see the
+ * updated restart_lsn value in the shared memory; then, a crash can
+ * happen before we manage to write that value to the disk. Thus,
+ * checkpointer still needs to make special efforts to keep WAL
+ * segments required by the restart_lsn written to the disk. See
+ * CreateCheckPoint() and CreateRestartPoint() for details.
+ */
if (updated_xmin || updated_restart)
{
ReplicationSlotMarkDirty();
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index 28f0a294736..5f66059c0f4 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -1906,6 +1906,10 @@ PhysicalConfirmReceivedLocation(XLogRecPtr lsn)
* be energy wasted - the worst lost information can do here is give us
* wrong information in a statistics view - we'll just potentially be more
* conservative in removing files.
+ *
+ * Checkpointer makes special efforts to keep the WAL segments required by
+ * the restart_lsn written to the disk. See CreateCheckPoint() and
+ * CreateRestartPoint() for details.
*/
}