Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend')
-rw-r--r--src/backend/access/nbtree/nbtsearch.c11
-rw-r--r--src/backend/access/transam/xlog.c4
-rw-r--r--src/backend/executor/execGrouping.c4
-rw-r--r--src/backend/executor/nodeTidrangescan.c6
-rw-r--r--src/backend/replication/logical/logical.c18
-rw-r--r--src/backend/replication/slot.c57
6 files changed, 92 insertions, 8 deletions
diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c
index 070f14c8b91..36544ecfd58 100644
--- a/src/backend/access/nbtree/nbtsearch.c
+++ b/src/backend/access/nbtree/nbtsearch.c
@@ -2282,9 +2282,12 @@ _bt_readfirstpage(IndexScanDesc scan, OffsetNumber offnum, ScanDirection dir)
* previously-saved right link or left link. lastcurrblkno is the page that
* was current at the point where the blkno link was saved, which we use to
* reason about concurrent page splits/page deletions during backwards scans.
+ * In the common case where seized=false, blkno is either so->currPos.nextPage
+ * or so->currPos.prevPage, and lastcurrblkno is so->currPos.currPage.
*
- * On entry, caller shouldn't hold any locks or pins on any page (we work
- * directly off of blkno and lastcurrblkno instead). Parallel scan callers
+ * On entry, so->currPos shouldn't be locked by caller. so->currPos.buf must
+ * be InvalidBuffer/unpinned as needed by caller (note that lastcurrblkno
+ * won't need to be read again in almost all cases). Parallel scan callers
* that seized the scan before calling here should pass seized=true; such a
* caller's blkno and lastcurrblkno arguments come from the seized scan.
* seized=false callers just pass us the blkno/lastcurrblkno taken from their
@@ -2301,8 +2304,8 @@ _bt_readfirstpage(IndexScanDesc scan, OffsetNumber offnum, ScanDirection dir)
* success exit (except during so->dropPin index scans, when we drop the pin
* eagerly to avoid blocking VACUUM).
*
- * If there are no more matching records in the given direction, we drop all
- * locks and pins, invalidate so->currPos, and return false.
+ * If there are no more matching records in the given direction, we invalidate
+ * so->currPos (while ensuring it retains no locks or pins), and return false.
*
* We always release the scan for a parallel scan caller, regardless of
* success or failure; we'll call _bt_parallel_release as soon as possible.
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 1914859b2ee..47ffc0a2307 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -7498,6 +7498,10 @@ CreateCheckPoint(int flags)
if (PriorRedoPtr != InvalidXLogRecPtr)
UpdateCheckPointDistanceEstimate(RedoRecPtr - PriorRedoPtr);
+#ifdef USE_INJECTION_POINTS
+ INJECTION_POINT("checkpoint-before-old-wal-removal", NULL);
+#endif
+
/*
* Delete old log files, those no longer needed for last checkpoint to
* prevent the disk holding the xlog from growing full.
diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c
index 255bd795361..b5400749353 100644
--- a/src/backend/executor/execGrouping.c
+++ b/src/backend/executor/execGrouping.c
@@ -144,7 +144,7 @@ execTuplesHashPrepare(int numCols,
* hashfunctions: FmgrInfos of datatype-specific hashing functions to use
* collations: collations to use in comparisons
* nbuckets: initial estimate of hashtable size
- * additionalsize: size of data stored in ->additional
+ * additionalsize: size of data that may be stored along with the hash entry
* metacxt: memory context for long-lived allocation, but not per-entry data
* tablecxt: memory context in which to store table entries
* tempcxt: short-lived context for evaluation hash and comparison functions
@@ -288,7 +288,7 @@ ResetTupleHashTable(TupleHashTable hashtable)
*
* If isnew isn't NULL, then a new entry is created if no existing entry
* matches. On return, *isnew is true if the entry is newly created,
- * false if it existed already. ->additional_data in the new entry has
+ * false if it existed already. The additional data in the new entry has
* been zeroed.
*/
TupleHashEntry
diff --git a/src/backend/executor/nodeTidrangescan.c b/src/backend/executor/nodeTidrangescan.c
index ab2eab9596e..26f7420b64b 100644
--- a/src/backend/executor/nodeTidrangescan.c
+++ b/src/backend/executor/nodeTidrangescan.c
@@ -128,9 +128,11 @@ TidExprListCreate(TidRangeScanState *tidrangestate)
* TidRangeEval
*
* Compute and set node's block and offset range to scan by evaluating
- * the trss_tidexprs. Returns false if we detect the range cannot
+ * node->trss_tidexprs. Returns false if we detect the range cannot
* contain any tuples. Returns true if it's possible for the range to
- * contain tuples.
+ * contain tuples. We don't bother validating that trss_mintid is less
+ * than or equal to trss_maxtid, as the scan_set_tidrange() table AM
+ * function will handle that.
* ----------------------------------------------------------------
*/
static bool
diff --git a/src/backend/replication/logical/logical.c b/src/backend/replication/logical/logical.c
index 1d56d0c4ef3..f1eb798f3e9 100644
--- a/src/backend/replication/logical/logical.c
+++ b/src/backend/replication/logical/logical.c
@@ -29,6 +29,7 @@
#include "postgres.h"
#include "access/xact.h"
+#include "access/xlog_internal.h"
#include "access/xlogutils.h"
#include "fmgr.h"
#include "miscadmin.h"
@@ -41,6 +42,7 @@
#include "storage/proc.h"
#include "storage/procarray.h"
#include "utils/builtins.h"
+#include "utils/injection_point.h"
#include "utils/inval.h"
#include "utils/memutils.h"
@@ -1825,9 +1827,13 @@ LogicalConfirmReceivedLocation(XLogRecPtr lsn)
{
bool updated_xmin = false;
bool updated_restart = false;
+ XLogRecPtr restart_lsn pg_attribute_unused();
SpinLockAcquire(&MyReplicationSlot->mutex);
+ /* remember the old restart lsn */
+ restart_lsn = MyReplicationSlot->data.restart_lsn;
+
/*
* Prevent moving the confirmed_flush backwards, as this could lead to
* data duplication issues caused by replicating already replicated
@@ -1881,6 +1887,18 @@ LogicalConfirmReceivedLocation(XLogRecPtr lsn)
/* first write new xmin to disk, so we know what's up after a crash */
if (updated_xmin || updated_restart)
{
+#ifdef USE_INJECTION_POINTS
+ XLogSegNo seg1,
+ seg2;
+
+ XLByteToSeg(restart_lsn, seg1, wal_segment_size);
+ XLByteToSeg(MyReplicationSlot->data.restart_lsn, seg2, wal_segment_size);
+
+ /* trigger injection point, but only if segment changes */
+ if (seg1 != seg2)
+ INJECTION_POINT("logical-replication-slot-advance-segment", NULL);
+#endif
+
ReplicationSlotMarkDirty();
ReplicationSlotSave();
elog(DEBUG1, "updated xmin: %u restart: %u", updated_xmin, updated_restart);
diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c
index 600b87fa9cb..c64f020742f 100644
--- a/src/backend/replication/slot.c
+++ b/src/backend/replication/slot.c
@@ -424,6 +424,7 @@ ReplicationSlotCreate(const char *name, bool db_specific,
slot->candidate_restart_valid = InvalidXLogRecPtr;
slot->candidate_restart_lsn = InvalidXLogRecPtr;
slot->last_saved_confirmed_flush = InvalidXLogRecPtr;
+ slot->last_saved_restart_lsn = InvalidXLogRecPtr;
slot->inactive_since = 0;
/*
@@ -1165,20 +1166,41 @@ ReplicationSlotsComputeRequiredLSN(void)
{
ReplicationSlot *s = &ReplicationSlotCtl->replication_slots[i];
XLogRecPtr restart_lsn;
+ XLogRecPtr last_saved_restart_lsn;
bool invalidated;
+ ReplicationSlotPersistency persistency;
if (!s->in_use)
continue;
SpinLockAcquire(&s->mutex);
+ persistency = s->data.persistency;
restart_lsn = s->data.restart_lsn;
invalidated = s->data.invalidated != RS_INVAL_NONE;
+ last_saved_restart_lsn = s->last_saved_restart_lsn;
SpinLockRelease(&s->mutex);
/* invalidated slots need not apply */
if (invalidated)
continue;
+ /*
+ * For persistent slot use last_saved_restart_lsn to compute the
+ * oldest LSN for removal of WAL segments. The segments between
+ * last_saved_restart_lsn and restart_lsn might be needed by a
+ * persistent slot in the case of database crash. Non-persistent
+ * slots can't survive the database crash, so we don't care about
+ * last_saved_restart_lsn for them.
+ */
+ if (persistency == RS_PERSISTENT)
+ {
+ if (last_saved_restart_lsn != InvalidXLogRecPtr &&
+ restart_lsn > last_saved_restart_lsn)
+ {
+ restart_lsn = last_saved_restart_lsn;
+ }
+ }
+
if (restart_lsn != InvalidXLogRecPtr &&
(min_required == InvalidXLogRecPtr ||
restart_lsn < min_required))
@@ -1216,7 +1238,9 @@ ReplicationSlotsComputeLogicalRestartLSN(void)
{
ReplicationSlot *s;
XLogRecPtr restart_lsn;
+ XLogRecPtr last_saved_restart_lsn;
bool invalidated;
+ ReplicationSlotPersistency persistency;
s = &ReplicationSlotCtl->replication_slots[i];
@@ -1230,14 +1254,33 @@ ReplicationSlotsComputeLogicalRestartLSN(void)
/* read once, it's ok if it increases while we're checking */
SpinLockAcquire(&s->mutex);
+ persistency = s->data.persistency;
restart_lsn = s->data.restart_lsn;
invalidated = s->data.invalidated != RS_INVAL_NONE;
+ last_saved_restart_lsn = s->last_saved_restart_lsn;
SpinLockRelease(&s->mutex);
/* invalidated slots need not apply */
if (invalidated)
continue;
+ /*
+ * For persistent slot use last_saved_restart_lsn to compute the
+ * oldest LSN for removal of WAL segments. The segments between
+ * last_saved_restart_lsn and restart_lsn might be needed by a
+ * persistent slot in the case of database crash. Non-persistent
+ * slots can't survive the database crash, so we don't care about
+ * last_saved_restart_lsn for them.
+ */
+ if (persistency == RS_PERSISTENT)
+ {
+ if (last_saved_restart_lsn != InvalidXLogRecPtr &&
+ restart_lsn > last_saved_restart_lsn)
+ {
+ restart_lsn = last_saved_restart_lsn;
+ }
+ }
+
if (restart_lsn == InvalidXLogRecPtr)
continue;
@@ -1455,6 +1498,7 @@ ReplicationSlotReserveWal(void)
Assert(slot != NULL);
Assert(slot->data.restart_lsn == InvalidXLogRecPtr);
+ Assert(slot->last_saved_restart_lsn == InvalidXLogRecPtr);
/*
* The replication slot mechanism is used to prevent removal of required
@@ -1766,6 +1810,8 @@ InvalidatePossiblyObsoleteSlot(uint32 possible_causes,
*/
SpinLockAcquire(&s->mutex);
+ Assert(s->data.restart_lsn >= s->last_saved_restart_lsn);
+
restart_lsn = s->data.restart_lsn;
/* we do nothing if the slot is already invalid */
@@ -1835,7 +1881,10 @@ InvalidatePossiblyObsoleteSlot(uint32 possible_causes,
* just rely on .invalidated.
*/
if (invalidation_cause == RS_INVAL_WAL_REMOVED)
+ {
s->data.restart_lsn = InvalidXLogRecPtr;
+ s->last_saved_restart_lsn = InvalidXLogRecPtr;
+ }
/* Let caller know */
*invalidated = true;
@@ -2079,6 +2128,12 @@ CheckPointReplicationSlots(bool is_shutdown)
SaveSlotToPath(s, path, LOG);
}
LWLockRelease(ReplicationSlotAllocationLock);
+
+ /*
+ * Recompute the required LSN as SaveSlotToPath() updated
+ * last_saved_restart_lsn for slots.
+ */
+ ReplicationSlotsComputeRequiredLSN();
}
/*
@@ -2354,6 +2409,7 @@ SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel)
if (!slot->just_dirtied)
slot->dirty = false;
slot->last_saved_confirmed_flush = cp.slotdata.confirmed_flush;
+ slot->last_saved_restart_lsn = cp.slotdata.restart_lsn;
SpinLockRelease(&slot->mutex);
LWLockRelease(&slot->io_in_progress_lock);
@@ -2569,6 +2625,7 @@ RestoreSlotFromDisk(const char *name)
slot->effective_xmin = cp.slotdata.xmin;
slot->effective_catalog_xmin = cp.slotdata.catalog_xmin;
slot->last_saved_confirmed_flush = cp.slotdata.confirmed_flush;
+ slot->last_saved_restart_lsn = cp.slotdata.restart_lsn;
slot->candidate_catalog_xmin = InvalidTransactionId;
slot->candidate_xmin_lsn = InvalidXLogRecPtr;