Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 2666975

Browse files
committed
Handle logical slot conflicts on standby
During WAL replay on the standby, when a conflict with a logical slot is identified, invalidate such slots. There are two sources of conflicts: 1) Using the information added in 6af1793, logical slots are invalidated if required rows are removed 2) wal_level on the primary server is reduced to below logical Uses the infrastructure introduced in the prior commit. FIXME: add commit reference. Change InvalidatePossiblyObsoleteSlot() to use a recovery conflict to interrupt use of a slot, if called in the startup process. The new recovery conflict is added to pg_stat_database_conflicts, as confl_active_logicalslot. See 6af1793 for an overall design of logical decoding on a standby. Bumps catversion for the addition of the pg_stat_database_conflicts column. Bumps PGSTAT_FILE_FORMAT_ID for the same reason. Author: "Drouvot, Bertrand" <bertranddrouvot.pg@gmail.com> Author: Andres Freund <andres@anarazel.de> Author: Amit Khandekar <amitdkhan.pg@gmail.com> (in an older version) Reviewed-by: "Drouvot, Bertrand" <bertranddrouvot.pg@gmail.com> Reviewed-by: Andres Freund <andres@anarazel.de> Reviewed-by: Robert Haas <robertmhaas@gmail.com> Reviewed-by: Fabrízio de Royes Mello <fabriziomello@gmail.com> Reviewed-by: Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com> Reviewed-by: Amit Kapila <amit.kapila16@gmail.com> Reviewed-by: Alvaro Herrera <alvherre@alvh.no-ip.org> Discussion: https://postgr.es/m/20230407075009.igg7be27ha2htkbt@awork3.anarazel.de
1 parent be87200 commit 2666975

File tree

20 files changed

+95
-6
lines changed

20 files changed

+95
-6
lines changed

doc/src/sgml/monitoring.sgml

+11
Original file line numberDiff line numberDiff line change
@@ -4742,6 +4742,17 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
47424742
deadlocks
47434743
</para></entry>
47444744
</row>
4745+
4746+
<row>
4747+
<entry role="catalog_table_entry"><para role="column_definition">
4748+
<structfield>confl_active_logicalslot</structfield> <type>bigint</type>
4749+
</para>
4750+
<para>
4751+
Number of uses of logical slots in this database that have been
4752+
canceled due to old snapshots or a too low <xref linkend="guc-wal-level"/>
4753+
on the primary
4754+
</para></entry>
4755+
</row>
47454756
</tbody>
47464757
</tgroup>
47474758
</table>

src/backend/access/gist/gistxlog.c

+2
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,7 @@ gistRedoDeleteRecord(XLogReaderState *record)
197197
XLogRecGetBlockTag(record, 0, &rlocator, NULL, NULL);
198198

199199
ResolveRecoveryConflictWithSnapshot(xldata->snapshotConflictHorizon,
200+
xldata->isCatalogRel,
200201
rlocator);
201202
}
202203

@@ -390,6 +391,7 @@ gistRedoPageReuse(XLogReaderState *record)
390391
*/
391392
if (InHotStandby)
392393
ResolveRecoveryConflictWithSnapshotFullXid(xlrec->snapshotConflictHorizon,
394+
xlrec->isCatalogRel,
393395
xlrec->locator);
394396
}
395397

src/backend/access/hash/hash_xlog.c

+1
Original file line numberDiff line numberDiff line change
@@ -1003,6 +1003,7 @@ hash_xlog_vacuum_one_page(XLogReaderState *record)
10031003

10041004
XLogRecGetBlockTag(record, 0, &rlocator, NULL, NULL);
10051005
ResolveRecoveryConflictWithSnapshot(xldata->snapshotConflictHorizon,
1006+
xldata->isCatalogRel,
10061007
rlocator);
10071008
}
10081009

src/backend/access/heap/heapam.c

+3
Original file line numberDiff line numberDiff line change
@@ -8769,6 +8769,7 @@ heap_xlog_prune(XLogReaderState *record)
87698769
*/
87708770
if (InHotStandby)
87718771
ResolveRecoveryConflictWithSnapshot(xlrec->snapshotConflictHorizon,
8772+
xlrec->isCatalogRel,
87728773
rlocator);
87738774

87748775
/*
@@ -8940,6 +8941,7 @@ heap_xlog_visible(XLogReaderState *record)
89408941
*/
89418942
if (InHotStandby)
89428943
ResolveRecoveryConflictWithSnapshot(xlrec->snapshotConflictHorizon,
8944+
xlrec->flags & VISIBILITYMAP_XLOG_CATALOG_REL,
89438945
rlocator);
89448946

89458947
/*
@@ -9061,6 +9063,7 @@ heap_xlog_freeze_page(XLogReaderState *record)
90619063

90629064
XLogRecGetBlockTag(record, 0, &rlocator, NULL, NULL);
90639065
ResolveRecoveryConflictWithSnapshot(xlrec->snapshotConflictHorizon,
9066+
xlrec->isCatalogRel,
90649067
rlocator);
90659068
}
90669069

src/backend/access/nbtree/nbtxlog.c

+2
Original file line numberDiff line numberDiff line change
@@ -669,6 +669,7 @@ btree_xlog_delete(XLogReaderState *record)
669669
XLogRecGetBlockTag(record, 0, &rlocator, NULL, NULL);
670670

671671
ResolveRecoveryConflictWithSnapshot(xlrec->snapshotConflictHorizon,
672+
xlrec->isCatalogRel,
672673
rlocator);
673674
}
674675

@@ -1007,6 +1008,7 @@ btree_xlog_reuse_page(XLogReaderState *record)
10071008

10081009
if (InHotStandby)
10091010
ResolveRecoveryConflictWithSnapshotFullXid(xlrec->snapshotConflictHorizon,
1011+
xlrec->isCatalogRel,
10101012
xlrec->locator);
10111013
}
10121014

src/backend/access/spgist/spgxlog.c

+1
Original file line numberDiff line numberDiff line change
@@ -879,6 +879,7 @@ spgRedoVacuumRedirect(XLogReaderState *record)
879879

880880
XLogRecGetBlockTag(record, 0, &locator, NULL, NULL);
881881
ResolveRecoveryConflictWithSnapshot(xldata->snapshotConflictHorizon,
882+
xldata->isCatalogRel,
882883
locator);
883884
}
884885

src/backend/access/transam/xlog.c

+15
Original file line numberDiff line numberDiff line change
@@ -7970,6 +7970,21 @@ xlog_redo(XLogReaderState *record)
79707970
/* Update our copy of the parameters in pg_control */
79717971
memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_parameter_change));
79727972

7973+
/*
7974+
* Invalidate logical slots if we are in hot standby and the primary
7975+
* does not have a WAL level sufficient for logical decoding. No need
7976+
* to search for potentially conflicting logically slots if standby is
7977+
* running with wal_level lower than logical, because in that case, we
7978+
* would have either disallowed creation of logical slots or
7979+
* invalidated existing ones.
7980+
*/
7981+
if (InRecovery && InHotStandby &&
7982+
xlrec.wal_level < WAL_LEVEL_LOGICAL &&
7983+
wal_level >= WAL_LEVEL_LOGICAL)
7984+
InvalidateObsoleteReplicationSlots(RS_INVAL_WAL_LEVEL,
7985+
0, InvalidOid,
7986+
InvalidTransactionId);
7987+
79737988
LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
79747989
ControlFile->MaxConnections = xlrec.MaxConnections;
79757990
ControlFile->max_worker_processes = xlrec.max_worker_processes;

src/backend/catalog/system_views.sql

+2-1
Original file line numberDiff line numberDiff line change
@@ -1069,7 +1069,8 @@ CREATE VIEW pg_stat_database_conflicts AS
10691069
pg_stat_get_db_conflict_lock(D.oid) AS confl_lock,
10701070
pg_stat_get_db_conflict_snapshot(D.oid) AS confl_snapshot,
10711071
pg_stat_get_db_conflict_bufferpin(D.oid) AS confl_bufferpin,
1072-
pg_stat_get_db_conflict_startup_deadlock(D.oid) AS confl_deadlock
1072+
pg_stat_get_db_conflict_startup_deadlock(D.oid) AS confl_deadlock,
1073+
pg_stat_get_db_conflict_logicalslot(D.oid) AS confl_active_logicalslot
10731074
FROM pg_database D;
10741075

10751076
CREATE VIEW pg_stat_user_functions AS

src/backend/replication/slot.c

+7-1
Original file line numberDiff line numberDiff line change
@@ -1442,7 +1442,13 @@ InvalidatePossiblyObsoleteSlot(ReplicationSlotInvalidationCause cause,
14421442
slotname, restart_lsn,
14431443
oldestLSN, snapshotConflictHorizon);
14441444

1445-
(void) kill(active_pid, SIGTERM);
1445+
if (MyBackendType == B_STARTUP)
1446+
(void) SendProcSignal(active_pid,
1447+
PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT,
1448+
InvalidBackendId);
1449+
else
1450+
(void) kill(active_pid, SIGTERM);
1451+
14461452
last_signaled_pid = active_pid;
14471453
}
14481454

src/backend/storage/ipc/procsignal.c

+3
Original file line numberDiff line numberDiff line change
@@ -673,6 +673,9 @@ procsignal_sigusr1_handler(SIGNAL_ARGS)
673673
if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_SNAPSHOT))
674674
RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_SNAPSHOT);
675675

676+
if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT))
677+
RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT);
678+
676679
if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK))
677680
RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
678681

src/backend/storage/ipc/standby.c

+19-1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "access/xlogutils.h"
2525
#include "miscadmin.h"
2626
#include "pgstat.h"
27+
#include "replication/slot.h"
2728
#include "storage/bufmgr.h"
2829
#include "storage/lmgr.h"
2930
#include "storage/proc.h"
@@ -466,6 +467,7 @@ ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
466467
*/
467468
void
468469
ResolveRecoveryConflictWithSnapshot(TransactionId snapshotConflictHorizon,
470+
bool isCatalogRel,
469471
RelFileLocator locator)
470472
{
471473
VirtualTransactionId *backends;
@@ -491,6 +493,16 @@ ResolveRecoveryConflictWithSnapshot(TransactionId snapshotConflictHorizon,
491493
PROCSIG_RECOVERY_CONFLICT_SNAPSHOT,
492494
WAIT_EVENT_RECOVERY_CONFLICT_SNAPSHOT,
493495
true);
496+
497+
/*
498+
* Note that WaitExceedsMaxStandbyDelay() is not taken into account here
499+
* (as opposed to ResolveRecoveryConflictWithVirtualXIDs() above). That
500+
* seems OK, given that this kind of conflict should not normally be
501+
* reached, e.g. due to using a physical replication slot.
502+
*/
503+
if (wal_level >= WAL_LEVEL_LOGICAL && isCatalogRel)
504+
InvalidateObsoleteReplicationSlots(RS_INVAL_HORIZON, 0, locator.dbOid,
505+
snapshotConflictHorizon);
494506
}
495507

496508
/*
@@ -499,6 +511,7 @@ ResolveRecoveryConflictWithSnapshot(TransactionId snapshotConflictHorizon,
499511
*/
500512
void
501513
ResolveRecoveryConflictWithSnapshotFullXid(FullTransactionId snapshotConflictHorizon,
514+
bool isCatalogRel,
502515
RelFileLocator locator)
503516
{
504517
/*
@@ -517,7 +530,9 @@ ResolveRecoveryConflictWithSnapshotFullXid(FullTransactionId snapshotConflictHor
517530
TransactionId truncated;
518531

519532
truncated = XidFromFullTransactionId(snapshotConflictHorizon);
520-
ResolveRecoveryConflictWithSnapshot(truncated, locator);
533+
ResolveRecoveryConflictWithSnapshot(truncated,
534+
isCatalogRel,
535+
locator);
521536
}
522537
}
523538

@@ -1478,6 +1493,9 @@ get_recovery_conflict_desc(ProcSignalReason reason)
14781493
case PROCSIG_RECOVERY_CONFLICT_SNAPSHOT:
14791494
reasonDesc = _("recovery conflict on snapshot");
14801495
break;
1496+
case PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT:
1497+
reasonDesc = _("recovery conflict on replication slot");
1498+
break;
14811499
case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK:
14821500
reasonDesc = _("recovery conflict on buffer deadlock");
14831501
break;

src/backend/tcop/postgres.c

+9
Original file line numberDiff line numberDiff line change
@@ -2526,6 +2526,9 @@ errdetail_recovery_conflict(void)
25262526
case PROCSIG_RECOVERY_CONFLICT_SNAPSHOT:
25272527
errdetail("User query might have needed to see row versions that must be removed.");
25282528
break;
2529+
case PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT:
2530+
errdetail("User was using a logical slot that must be invalidated.");
2531+
break;
25292532
case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK:
25302533
errdetail("User transaction caused buffer deadlock with recovery.");
25312534
break;
@@ -3143,6 +3146,12 @@ RecoveryConflictInterrupt(ProcSignalReason reason)
31433146
InterruptPending = true;
31443147
break;
31453148

3149+
case PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT:
3150+
RecoveryConflictPending = true;
3151+
QueryCancelPending = true;
3152+
InterruptPending = true;
3153+
break;
3154+
31463155
default:
31473156
elog(FATAL, "unrecognized conflict mode: %d",
31483157
(int) reason);

src/backend/utils/activity/pgstat_database.c

+4
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,9 @@ pgstat_report_recovery_conflict(int reason)
109109
case PROCSIG_RECOVERY_CONFLICT_BUFFERPIN:
110110
dbentry->conflict_bufferpin++;
111111
break;
112+
case PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT:
113+
dbentry->conflict_logicalslot++;
114+
break;
112115
case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK:
113116
dbentry->conflict_startup_deadlock++;
114117
break;
@@ -387,6 +390,7 @@ pgstat_database_flush_cb(PgStat_EntryRef *entry_ref, bool nowait)
387390
PGSTAT_ACCUM_DBCOUNT(conflict_tablespace);
388391
PGSTAT_ACCUM_DBCOUNT(conflict_lock);
389392
PGSTAT_ACCUM_DBCOUNT(conflict_snapshot);
393+
PGSTAT_ACCUM_DBCOUNT(conflict_logicalslot);
390394
PGSTAT_ACCUM_DBCOUNT(conflict_bufferpin);
391395
PGSTAT_ACCUM_DBCOUNT(conflict_startup_deadlock);
392396

src/backend/utils/adt/pgstatfuncs.c

+3
Original file line numberDiff line numberDiff line change
@@ -1071,6 +1071,8 @@ PG_STAT_GET_DBENTRY_INT64(xact_commit)
10711071
/* pg_stat_get_db_xact_rollback */
10721072
PG_STAT_GET_DBENTRY_INT64(xact_rollback)
10731073

1074+
/* pg_stat_get_db_conflict_logicalslot */
1075+
PG_STAT_GET_DBENTRY_INT64(conflict_logicalslot)
10741076

10751077
Datum
10761078
pg_stat_get_db_stat_reset_time(PG_FUNCTION_ARGS)
@@ -1104,6 +1106,7 @@ pg_stat_get_db_conflict_all(PG_FUNCTION_ARGS)
11041106
result = (int64) (dbentry->conflict_tablespace +
11051107
dbentry->conflict_lock +
11061108
dbentry->conflict_snapshot +
1109+
dbentry->conflict_logicalslot +
11071110
dbentry->conflict_bufferpin +
11081111
dbentry->conflict_startup_deadlock);
11091112

src/include/catalog/catversion.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,6 @@
5757
*/
5858

5959
/* yyyymmddN */
60-
#define CATALOG_VERSION_NO 202304073
60+
#define CATALOG_VERSION_NO 202304074
6161

6262
#endif

src/include/catalog/pg_proc.dat

+5
Original file line numberDiff line numberDiff line change
@@ -5611,6 +5611,11 @@
56115611
proname => 'pg_stat_get_db_conflict_snapshot', provolatile => 's',
56125612
proparallel => 'r', prorettype => 'int8', proargtypes => 'oid',
56135613
prosrc => 'pg_stat_get_db_conflict_snapshot' },
5614+
{ oid => '9901',
5615+
descr => 'statistics: recovery conflicts in database caused by logical replication slot',
5616+
proname => 'pg_stat_get_db_conflict_logicalslot', provolatile => 's',
5617+
proparallel => 'r', prorettype => 'int8', proargtypes => 'oid',
5618+
prosrc => 'pg_stat_get_db_conflict_logicalslot' },
56145619
{ oid => '3068',
56155620
descr => 'statistics: recovery conflicts in database caused by shared buffer pin',
56165621
proname => 'pg_stat_get_db_conflict_bufferpin', provolatile => 's',

src/include/pgstat.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ typedef struct PgStat_TableXactStatus
235235
* ------------------------------------------------------------
236236
*/
237237

238-
#define PGSTAT_FILE_FORMAT_ID 0x01A5BCAB
238+
#define PGSTAT_FILE_FORMAT_ID 0x01A5BCAC
239239

240240
typedef struct PgStat_ArchiverStats
241241
{
@@ -332,6 +332,7 @@ typedef struct PgStat_StatDBEntry
332332
PgStat_Counter conflict_tablespace;
333333
PgStat_Counter conflict_lock;
334334
PgStat_Counter conflict_snapshot;
335+
PgStat_Counter conflict_logicalslot;
335336
PgStat_Counter conflict_bufferpin;
336337
PgStat_Counter conflict_startup_deadlock;
337338
PgStat_Counter temp_files;

src/include/storage/procsignal.h

+1
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ typedef enum
4242
PROCSIG_RECOVERY_CONFLICT_TABLESPACE,
4343
PROCSIG_RECOVERY_CONFLICT_LOCK,
4444
PROCSIG_RECOVERY_CONFLICT_SNAPSHOT,
45+
PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT,
4546
PROCSIG_RECOVERY_CONFLICT_BUFFERPIN,
4647
PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK,
4748

src/include/storage/standby.h

+2
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,10 @@ extern void InitRecoveryTransactionEnvironment(void);
3030
extern void ShutdownRecoveryTransactionEnvironment(void);
3131

3232
extern void ResolveRecoveryConflictWithSnapshot(TransactionId snapshotConflictHorizon,
33+
bool isCatalogRel,
3334
RelFileLocator locator);
3435
extern void ResolveRecoveryConflictWithSnapshotFullXid(FullTransactionId snapshotConflictHorizon,
36+
bool isCatalogRel,
3537
RelFileLocator locator);
3638
extern void ResolveRecoveryConflictWithTablespace(Oid tsid);
3739
extern void ResolveRecoveryConflictWithDatabase(Oid dbid);

src/test/regress/expected/rules.out

+2-1
Original file line numberDiff line numberDiff line change
@@ -1870,7 +1870,8 @@ pg_stat_database_conflicts| SELECT oid AS datid,
18701870
pg_stat_get_db_conflict_lock(oid) AS confl_lock,
18711871
pg_stat_get_db_conflict_snapshot(oid) AS confl_snapshot,
18721872
pg_stat_get_db_conflict_bufferpin(oid) AS confl_bufferpin,
1873-
pg_stat_get_db_conflict_startup_deadlock(oid) AS confl_deadlock
1873+
pg_stat_get_db_conflict_startup_deadlock(oid) AS confl_deadlock,
1874+
pg_stat_get_db_conflict_logicalslot(oid) AS confl_active_logicalslot
18741875
FROM pg_database d;
18751876
pg_stat_gssapi| SELECT pid,
18761877
gss_auth AS gss_authenticated,

0 commit comments

Comments
 (0)