Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit e85662d

Browse files
committed
Fix false reports in pg_visibility
Currently, pg_visibility computes its xid horizon using the GetOldestNonRemovableTransactionId(). The problem is that this horizon can sometimes go backward. That can lead to reporting false errors. In order to fix that, this commit implements a new function GetStrictOldestNonRemovableTransactionId(). This function computes the xid horizon, which would be guaranteed to be newer or equal to any xid horizon computed before. We have to do the following to achieve this. 1. Ignore processes xmin's, because they consider connection to other databases that were ignored before. 2. Ignore KnownAssignedXids, because they are not database-aware. At the same time, the primary could compute its horizons database-aware. 3. Ignore walsender xmin, because it could go backward if some replication connections don't use replication slots. As a result, we're using only currently running xids to compute the horizon. Surely these would significantly sacrifice accuracy. But we have to do so to avoid reporting false errors. Inspired by earlier patch by Daniel Shelepanov and the following discussion with Robert Haas and Tom Lane. Discussion: https://postgr.es/m/1649062270.289865713%40f403.i.mail.ru Reviewed-by: Alexander Lakhin, Dmitry Koval
1 parent cc6e64a commit e85662d

File tree

6 files changed

+129
-6
lines changed

6 files changed

+129
-6
lines changed

contrib/pg_visibility/Makefile

+1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ DATA = pg_visibility--1.1.sql pg_visibility--1.1--1.2.sql \
1111
PGFILEDESC = "pg_visibility - page visibility information"
1212

1313
REGRESS = pg_visibility
14+
TAP_TESTS = 1
1415

1516
ifdef USE_PGXS
1617
PG_CONFIG = pg_config

contrib/pg_visibility/meson.build

+4
Original file line numberDiff line numberDiff line change
@@ -32,5 +32,9 @@ tests += {
3232
'sql': [
3333
'pg_visibility',
3434
],
35+
'tap': {
36+
'tests': [
37+
't/001_concurrent_transaction.pl',
38+
],
3539
},
3640
}

contrib/pg_visibility/pg_visibility.c

+63-5
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "funcapi.h"
2020
#include "miscadmin.h"
2121
#include "storage/bufmgr.h"
22+
#include "storage/proc.h"
2223
#include "storage/procarray.h"
2324
#include "storage/smgr.h"
2425
#include "utils/rel.h"
@@ -532,6 +533,63 @@ collect_visibility_data(Oid relid, bool include_pd)
532533
return info;
533534
}
534535

536+
/*
537+
* The "strict" version of GetOldestNonRemovableTransactionId(). The
538+
* pg_visibility check can tolerate false positives (don't report some of the
539+
* errors), but can't tolerate false negatives (report false errors). Normally,
540+
* horizons move forwards, but there are cases when it could move backward
541+
* (see comment for ComputeXidHorizons()).
542+
*
543+
* This is why we have to implement our own function for xid horizon, which
544+
* would be guaranteed to be newer or equal to any xid horizon computed before.
545+
* We have to do the following to achieve this.
546+
*
547+
* 1. Ignore processes xmin's, because they consider connection to other
548+
* databases that were ignored before.
549+
* 2. Ignore KnownAssignedXids, because they are not database-aware. At the
550+
* same time, the primary could compute its horizons database-aware.
551+
* 3. Ignore walsender xmin, because it could go backward if some replication
552+
* connections don't use replication slots.
553+
*
554+
* As a result, we're using only currently running xids to compute the horizon.
555+
* Surely these would significantly sacrifice accuracy. But we have to do so
556+
* to avoid reporting false errors.
557+
*/
558+
static TransactionId
559+
GetStrictOldestNonRemovableTransactionId(Relation rel)
560+
{
561+
RunningTransactions runningTransactions;
562+
563+
if (rel == NULL || rel->rd_rel->relisshared || RecoveryInProgress())
564+
{
565+
/* Shared relation: take into account all running xids */
566+
runningTransactions = GetRunningTransactionData();
567+
LWLockRelease(ProcArrayLock);
568+
LWLockRelease(XidGenLock);
569+
return runningTransactions->oldestRunningXid;
570+
}
571+
else if (!RELATION_IS_LOCAL(rel))
572+
{
573+
/*
574+
* Normal relation: take into account xids running within the current
575+
* database
576+
*/
577+
runningTransactions = GetRunningTransactionData();
578+
LWLockRelease(ProcArrayLock);
579+
LWLockRelease(XidGenLock);
580+
return runningTransactions->oldestDatabaseRunningXid;
581+
}
582+
else
583+
{
584+
/*
585+
* For temporary relations, ComputeXidHorizons() uses only
586+
* TransamVariables->latestCompletedXid and MyProc->xid. These two
587+
* shouldn't go backwards. So we're fine with this horizon.
588+
*/
589+
return GetOldestNonRemovableTransactionId(rel);
590+
}
591+
}
592+
535593
/*
536594
* Returns a list of items whose visibility map information does not match
537595
* the status of the tuples on the page.
@@ -563,7 +621,7 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen)
563621
check_relation_relkind(rel);
564622

565623
if (all_visible)
566-
OldestXmin = GetOldestNonRemovableTransactionId(rel);
624+
OldestXmin = GetStrictOldestNonRemovableTransactionId(rel);
567625

568626
nblocks = RelationGetNumberOfBlocks(rel);
569627

@@ -671,11 +729,11 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen)
671729
* retake ProcArrayLock here while we're holding the buffer
672730
* exclusively locked, but it should be safe against
673731
* deadlocks, because surely
674-
* GetOldestNonRemovableTransactionId() should never take a
675-
* buffer lock. And this shouldn't happen often, so it's worth
676-
* being careful so as to avoid false positives.
732+
* GetStrictOldestNonRemovableTransactionId() should never
733+
* take a buffer lock. And this shouldn't happen often, so
734+
* it's worth being careful so as to avoid false positives.
677735
*/
678-
RecomputedOldestXmin = GetOldestNonRemovableTransactionId(rel);
736+
RecomputedOldestXmin = GetStrictOldestNonRemovableTransactionId(rel);
679737

680738
if (!TransactionIdPrecedes(OldestXmin, RecomputedOldestXmin))
681739
record_corrupt_item(items, &tuple.t_self);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
2+
# Copyright (c) 2021-2024, PostgreSQL Global Development Group
3+
4+
# Check that a concurrent transaction doesn't cause false negatives in
5+
# pg_check_visible() function
6+
use strict;
7+
use warnings FATAL => 'all';
8+
use PostgreSQL::Test::Cluster;
9+
use PostgreSQL::Test::Utils;
10+
use Test::More;
11+
12+
13+
my $node = PostgreSQL::Test::Cluster->new('main');
14+
15+
$node->init;
16+
$node->start;
17+
18+
# Setup another database
19+
$node->safe_psql("postgres", "CREATE DATABASE other_database;\n");
20+
my $bsession = $node->background_psql('other_database');
21+
22+
# Run a concurrent transaction
23+
$bsession->query_safe(
24+
qq[
25+
BEGIN;
26+
SELECT txid_current();
27+
]);
28+
29+
# Create a sample table and run vacuum
30+
$node->safe_psql("postgres",
31+
"CREATE EXTENSION pg_visibility;\n"
32+
. "CREATE TABLE vacuum_test AS SELECT 42 i;\n"
33+
. "VACUUM (disable_page_skipping) vacuum_test;");
34+
35+
# Run pg_check_visible()
36+
my $result = $node->safe_psql("postgres",
37+
"SELECT * FROM pg_check_visible('vacuum_test');");
38+
39+
# There should be no false negatives
40+
ok($result eq "", "pg_check_visible() detects no errors");
41+
42+
# Shutdown
43+
$bsession->query_safe("COMMIT;");
44+
$bsession->quit;
45+
$node->stop;
46+
47+
done_testing();

src/backend/storage/ipc/procarray.c

+12-1
Original file line numberDiff line numberDiff line change
@@ -2688,6 +2688,7 @@ GetRunningTransactionData(void)
26882688
RunningTransactions CurrentRunningXacts = &CurrentRunningXactsData;
26892689
TransactionId latestCompletedXid;
26902690
TransactionId oldestRunningXid;
2691+
TransactionId oldestDatabaseRunningXid;
26912692
TransactionId *xids;
26922693
int index;
26932694
int count;
@@ -2732,14 +2733,16 @@ GetRunningTransactionData(void)
27322733

27332734
latestCompletedXid =
27342735
XidFromFullTransactionId(TransamVariables->latestCompletedXid);
2735-
oldestRunningXid =
2736+
oldestDatabaseRunningXid = oldestRunningXid =
27362737
XidFromFullTransactionId(TransamVariables->nextXid);
27372738

27382739
/*
27392740
* Spin over procArray collecting all xids
27402741
*/
27412742
for (index = 0; index < arrayP->numProcs; index++)
27422743
{
2744+
int pgprocno = arrayP->pgprocnos[index];
2745+
PGPROC *proc = &allProcs[pgprocno];
27432746
TransactionId xid;
27442747

27452748
/* Fetch xid just once - see GetNewTransactionId */
@@ -2760,6 +2763,13 @@ GetRunningTransactionData(void)
27602763
if (TransactionIdPrecedes(xid, oldestRunningXid))
27612764
oldestRunningXid = xid;
27622765

2766+
/*
2767+
* Also, update the oldest running xid within the current database.
2768+
*/
2769+
if (proc->databaseId == MyDatabaseId &&
2770+
TransactionIdPrecedes(xid, oldestRunningXid))
2771+
oldestDatabaseRunningXid = xid;
2772+
27632773
if (ProcGlobal->subxidStates[index].overflowed)
27642774
suboverflowed = true;
27652775

@@ -2826,6 +2836,7 @@ GetRunningTransactionData(void)
28262836
CurrentRunningXacts->subxid_overflow = suboverflowed;
28272837
CurrentRunningXacts->nextXid = XidFromFullTransactionId(TransamVariables->nextXid);
28282838
CurrentRunningXacts->oldestRunningXid = oldestRunningXid;
2839+
CurrentRunningXacts->oldestDatabaseRunningXid = oldestDatabaseRunningXid;
28292840
CurrentRunningXacts->latestCompletedXid = latestCompletedXid;
28302841

28312842
Assert(TransactionIdIsValid(CurrentRunningXacts->nextXid));

src/include/storage/standby.h

+2
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,8 @@ typedef struct RunningTransactionsData
8282
bool subxid_overflow; /* snapshot overflowed, subxids missing */
8383
TransactionId nextXid; /* xid from TransamVariables->nextXid */
8484
TransactionId oldestRunningXid; /* *not* oldestXmin */
85+
TransactionId oldestDatabaseRunningXid; /* same as above, but within the
86+
* current database */
8587
TransactionId latestCompletedXid; /* so we can set xmax */
8688

8789
TransactionId *xids; /* array of (sub)xids still running */

0 commit comments

Comments
 (0)