10
10
* the passed-in buffer. The caller must hold not only a pin, but at least
11
11
* shared buffer content lock on the buffer containing the tuple.
12
12
*
13
- * NOTE: must check TransactionIdIsInProgress (which looks in PGXACT array)
13
+ * NOTE: When using a non-MVCC snapshot, we must check
14
+ * TransactionIdIsInProgress (which looks in the PGXACT array)
14
15
* before TransactionIdDidCommit/TransactionIdDidAbort (which look in
15
16
* pg_clog). Otherwise we have a race condition: we might decide that a
16
17
* just-committed transaction crashed, because none of the tests succeed.
17
18
* xact.c is careful to record commit/abort in pg_clog before it unsets
18
- * MyPgXact->xid in PGXACT array. That fixes that problem, but it also
19
- * means there is a window where TransactionIdIsInProgress and
19
+ * MyPgXact->xid in the PGXACT array. That fixes that problem, but it
20
+ * also means there is a window where TransactionIdIsInProgress and
20
21
* TransactionIdDidCommit will both return true. If we check only
21
22
* TransactionIdDidCommit, we could consider a tuple committed when a
22
23
* later GetSnapshotData call will still think the originating transaction
26
27
* subtransactions of our own main transaction and so there can't be any
27
28
* race condition.
28
29
*
30
+ * When using an MVCC snapshot, we rely on XidInMVCCSnapshot rather than
31
+ * TransactionIdIsInProgress, but the logic is otherwise the same: do not
32
+ * check pg_clog until after deciding that the xact is no longer in progress.
33
+ *
34
+ *
29
35
* Summary of visibility functions:
30
36
*
31
37
* HeapTupleSatisfiesMVCC()
@@ -936,9 +942,21 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot,
936
942
* transactions started after the snapshot was taken
937
943
* changes made by the current command
938
944
*
939
- * (Notice, however, that the tuple status hint bits will be updated on the
940
- * basis of the true state of the transaction, even if we then pretend we
941
- * can't see it.)
945
+ * Notice that here, we will not update the tuple status hint bits if the
946
+ * inserting/deleting transaction is still running according to our snapshot,
947
+ * even if in reality it's committed or aborted by now. This is intentional.
948
+ * Checking the true transaction state would require access to high-traffic
949
+ * shared data structures, creating contention we'd rather do without, and it
950
+ * would not change the result of our visibility check anyway. The hint bits
951
+ * will be updated by the first visitor that has a snapshot new enough to see
952
+ * the inserting/deleting transaction as done. In the meantime, the cost of
953
+ * leaving the hint bits unset is basically that each HeapTupleSatisfiesMVCC
954
+ * call will need to run TransactionIdIsCurrentTransactionId in addition to
955
+ * XidInMVCCSnapshot (but it would have to do the latter anyway). In the old
956
+ * coding where we tried to set the hint bits as soon as possible, we instead
957
+ * did TransactionIdIsInProgress in each call --- to no avail, as long as the
958
+ * inserting/deleting transaction was still running --- which was more cycles
959
+ * and more contention on the PGXACT array.
942
960
*/
943
961
bool
944
962
HeapTupleSatisfiesMVCC (HeapTuple htup , Snapshot snapshot ,
@@ -961,7 +979,7 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot,
961
979
962
980
if (TransactionIdIsCurrentTransactionId (xvac ))
963
981
return false;
964
- if (!TransactionIdIsInProgress (xvac ))
982
+ if (!XidInMVCCSnapshot (xvac , snapshot ))
965
983
{
966
984
if (TransactionIdDidCommit (xvac ))
967
985
{
@@ -980,7 +998,7 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot,
980
998
981
999
if (!TransactionIdIsCurrentTransactionId (xvac ))
982
1000
{
983
- if (TransactionIdIsInProgress (xvac ))
1001
+ if (XidInMVCCSnapshot (xvac , snapshot ))
984
1002
return false;
985
1003
if (TransactionIdDidCommit (xvac ))
986
1004
SetHintBits (tuple , buffer , HEAP_XMIN_COMMITTED ,
@@ -1035,7 +1053,7 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot,
1035
1053
else
1036
1054
return false; /* deleted before scan started */
1037
1055
}
1038
- else if (TransactionIdIsInProgress (HeapTupleHeaderGetRawXmin (tuple )))
1056
+ else if (XidInMVCCSnapshot (HeapTupleHeaderGetRawXmin (tuple ), snapshot ))
1039
1057
return false;
1040
1058
else if (TransactionIdDidCommit (HeapTupleHeaderGetRawXmin (tuple )))
1041
1059
SetHintBits (tuple , buffer , HEAP_XMIN_COMMITTED ,
@@ -1048,14 +1066,15 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot,
1048
1066
return false;
1049
1067
}
1050
1068
}
1069
+ else
1070
+ {
1071
+ /* xmin is committed, but maybe not according to our snapshot */
1072
+ if (!HeapTupleHeaderXminFrozen (tuple ) &&
1073
+ XidInMVCCSnapshot (HeapTupleHeaderGetRawXmin (tuple ), snapshot ))
1074
+ return false; /* treat as still in progress */
1075
+ }
1051
1076
1052
- /*
1053
- * By here, the inserting transaction has committed - have to check
1054
- * when...
1055
- */
1056
- if (!HeapTupleHeaderXminFrozen (tuple )
1057
- && XidInMVCCSnapshot (HeapTupleHeaderGetRawXmin (tuple ), snapshot ))
1058
- return false; /* treat as still in progress */
1077
+ /* by here, the inserting transaction has committed */
1059
1078
1060
1079
if (tuple -> t_infomask & HEAP_XMAX_INVALID ) /* xid invalid or aborted */
1061
1080
return true;
@@ -1082,15 +1101,10 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot,
1082
1101
else
1083
1102
return false; /* deleted before scan started */
1084
1103
}
1085
- if (TransactionIdIsInProgress (xmax ))
1104
+ if (XidInMVCCSnapshot (xmax , snapshot ))
1086
1105
return true;
1087
1106
if (TransactionIdDidCommit (xmax ))
1088
- {
1089
- /* updating transaction committed, but when? */
1090
- if (XidInMVCCSnapshot (xmax , snapshot ))
1091
- return true; /* treat as still in progress */
1092
- return false;
1093
- }
1107
+ return false; /* updating transaction committed */
1094
1108
/* it must have aborted or crashed */
1095
1109
return true;
1096
1110
}
@@ -1105,7 +1119,7 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot,
1105
1119
return false; /* deleted before scan started */
1106
1120
}
1107
1121
1108
- if (TransactionIdIsInProgress (HeapTupleHeaderGetRawXmax (tuple )))
1122
+ if (XidInMVCCSnapshot (HeapTupleHeaderGetRawXmax (tuple ), snapshot ))
1109
1123
return true;
1110
1124
1111
1125
if (!TransactionIdDidCommit (HeapTupleHeaderGetRawXmax (tuple )))
@@ -1120,12 +1134,14 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot,
1120
1134
SetHintBits (tuple , buffer , HEAP_XMAX_COMMITTED ,
1121
1135
HeapTupleHeaderGetRawXmax (tuple ));
1122
1136
}
1137
+ else
1138
+ {
1139
+ /* xmax is committed, but maybe not according to our snapshot */
1140
+ if (XidInMVCCSnapshot (HeapTupleHeaderGetRawXmax (tuple ), snapshot ))
1141
+ return true; /* treat as still in progress */
1142
+ }
1123
1143
1124
- /*
1125
- * OK, the deleting transaction committed too ... but when?
1126
- */
1127
- if (XidInMVCCSnapshot (HeapTupleHeaderGetRawXmax (tuple ), snapshot ))
1128
- return true; /* treat as still in progress */
1144
+ /* xmax transaction committed */
1129
1145
1130
1146
return false;
1131
1147
}
@@ -1383,14 +1399,15 @@ HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin,
1383
1399
/*
1384
1400
* HeapTupleIsSurelyDead
1385
1401
*
1386
- * Determine whether a tuple is surely dead. We sometimes use this
1387
- * in lieu of HeapTupleSatisifesVacuum when the tuple has just been
1388
- * tested by HeapTupleSatisfiesMVCC and, therefore, any hint bits that
1389
- * can be set should already be set. We assume that if no hint bits
1390
- * either for xmin or xmax, the transaction is still running. This is
1391
- * therefore faster than HeapTupleSatisfiesVacuum, because we don't
1392
- * consult CLOG (and also because we don't need to give an exact answer,
1393
- * just whether or not the tuple is surely dead).
1402
+ * Cheaply determine whether a tuple is surely dead to all onlookers.
1403
+ * We sometimes use this in lieu of HeapTupleSatisfiesVacuum when the
1404
+ * tuple has just been tested by another visibility routine (usually
1405
+ * HeapTupleSatisfiesMVCC) and, therefore, any hint bits that can be set
1406
+ * should already be set. We assume that if no hint bits are set, the xmin
1407
+ * or xmax transaction is still running. This is therefore faster than
1408
+ * HeapTupleSatisfiesVacuum, because we don't consult PGXACT nor CLOG.
1409
+ * It's okay to return FALSE when in doubt, but we must return TRUE only
1410
+ * if the tuple is removable.
1394
1411
*/
1395
1412
bool
1396
1413
HeapTupleIsSurelyDead (HeapTuple htup , TransactionId OldestXmin )
@@ -1443,8 +1460,9 @@ HeapTupleIsSurelyDead(HeapTuple htup, TransactionId OldestXmin)
1443
1460
*
1444
1461
* Note: GetSnapshotData never stores either top xid or subxids of our own
1445
1462
* backend into a snapshot, so these xids will not be reported as "running"
1446
- * by this function. This is OK for current uses, because we actually only
1447
- * apply this for known-committed XIDs.
1463
+ * by this function. This is OK for current uses, because we always check
1464
+ * TransactionIdIsCurrentTransactionId first, except for known-committed
1465
+ * XIDs which could not be ours anyway.
1448
1466
*/
1449
1467
static bool
1450
1468
XidInMVCCSnapshot (TransactionId xid , Snapshot snapshot )
@@ -1481,7 +1499,7 @@ XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot)
1481
1499
*/
1482
1500
if (!snapshot -> suboverflowed )
1483
1501
{
1484
- /* full data, so search subxip */
1502
+ /* we have full data, so search subxip */
1485
1503
int32 j ;
1486
1504
1487
1505
for (j = 0 ; j < snapshot -> subxcnt ; j ++ )
@@ -1494,7 +1512,10 @@ XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot)
1494
1512
}
1495
1513
else
1496
1514
{
1497
- /* overflowed, so convert xid to top-level */
1515
+ /*
1516
+ * Snapshot overflowed, so convert xid to top-level. This is safe
1517
+ * because we eliminated too-old XIDs above.
1518
+ */
1498
1519
xid = SubTransGetTopmostTransaction (xid );
1499
1520
1500
1521
/*
@@ -1525,7 +1546,10 @@ XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot)
1525
1546
*/
1526
1547
if (snapshot -> suboverflowed )
1527
1548
{
1528
- /* overflowed, so convert xid to top-level */
1549
+ /*
1550
+ * Snapshot overflowed, so convert xid to top-level. This is safe
1551
+ * because we eliminated too-old XIDs above.
1552
+ */
1529
1553
xid = SubTransGetTopmostTransaction (xid );
1530
1554
1531
1555
/*
0 commit comments