25
25
#include "utils/rel.h"
26
26
27
27
28
- static void _bt_drop_lock_and_maybe_pin (IndexScanDesc scan , BTScanPos sp );
28
+ static void _bt_drop_lock_and_maybe_pin (IndexScanDesc scan , BTScanPos sp , BTScanOpaque so );
29
29
static Buffer _bt_moveright (Relation rel , Relation heaprel , BTScanInsert key ,
30
30
Buffer buf , bool forupdate , BTStack stack ,
31
31
int access );
@@ -54,6 +54,12 @@ static Buffer _bt_lock_and_validate_left(Relation rel, BlockNumber *blkno,
54
54
static bool _bt_endpoint (IndexScanDesc scan , ScanDirection dir );
55
55
56
56
57
+ /*
58
+ * Execute vischecks at the index level?
59
+ * Enabled by default.
60
+ */
61
+ #define DEBUG_IOS_VISCHECKS_ENABLED true
62
+
57
63
/*
58
64
* _bt_drop_lock_and_maybe_pin()
59
65
*
@@ -64,13 +70,109 @@ static bool _bt_endpoint(IndexScanDesc scan, ScanDirection dir);
64
70
* See nbtree/README section on making concurrent TID recycling safe.
65
71
*/
66
72
static void
67
- _bt_drop_lock_and_maybe_pin (IndexScanDesc scan , BTScanPos sp )
73
+ _bt_drop_lock_and_maybe_pin (IndexScanDesc scan , BTScanPos sp , BTScanOpaque so )
68
74
{
69
75
_bt_unlockbuf (scan -> indexRelation , sp -> buf );
70
76
77
+ /*
78
+ * Do some visibility checks if this is an index-only scan; allowing us to
79
+ * drop the pin on this page before we have returned all tuples from this
80
+ * IOS to the executor.
81
+ */
82
+ if (scan -> xs_want_itup && DEBUG_IOS_VISCHECKS_ENABLED )
83
+ {
84
+ int initOffset = sp -> firstItem ;
85
+ int ntids = 1 + sp -> lastItem - initOffset ;
86
+
87
+ if (ntids > 0 )
88
+ {
89
+ TM_IndexVisibilityCheckOp visCheck ;
90
+ Relation heaprel = scan -> heapRelation ;
91
+ TM_VisCheck * check ;
92
+ BTScanPosItem * item ;
93
+
94
+ visCheck .checkntids = ntids ;
95
+
96
+ if (so -> vischeckcap == 0 )
97
+ {
98
+ so -> vischecksbuf = palloc_array (TM_VisCheck , ntids );
99
+ so -> vischeckcap = ntids ;
100
+ }
101
+ else if (so -> vischeckcap < visCheck .checkntids )
102
+ {
103
+ so -> vischecksbuf = repalloc_array (so -> vischecksbuf ,
104
+ TM_VisCheck , ntids );
105
+ so -> vischeckcap = ntids ;
106
+ }
107
+
108
+ visCheck .checktids = so -> vischecksbuf ;
109
+ visCheck .vmbuf = & so -> vmbuf ;
110
+
111
+ check = so -> vischecksbuf ;
112
+ item = & so -> currPos .items [initOffset ];
113
+
114
+ for (int i = 0 ; i < visCheck .checkntids ; i ++ )
115
+ {
116
+ Assert (item -> visrecheck == TMVC_Unchecked );
117
+ Assert (ItemPointerIsValid (& item -> heapTid ));
118
+
119
+ PopulateTMVischeck (check , & item -> heapTid , initOffset + i );
120
+
121
+ item ++ ;
122
+ check ++ ;
123
+ }
124
+
125
+ table_index_vischeck_tuples (heaprel , & visCheck );
126
+ check = so -> vischecksbuf ;
127
+
128
+ for (int i = 0 ; i < visCheck .checkntids ; i ++ )
129
+ {
130
+ item = & so -> currPos .items [check -> idxoffnum ];
131
+ /* We must have a valid visibility check result */
132
+ Assert (check -> vischeckresult != TMVC_Unchecked );
133
+ /* The offset number should still indicate the right item */
134
+ Assert (check -> tidblkno == ItemPointerGetBlockNumberNoCheck (& item -> heapTid ));
135
+ Assert (check -> tidoffset == ItemPointerGetOffsetNumberNoCheck (& item -> heapTid ));
136
+
137
+ /* Store the visibility check result */
138
+ item -> visrecheck = check -> vischeckresult ;
139
+ check ++ ;
140
+ }
141
+ }
142
+ }
143
+
144
+ /*
145
+ * We may need to hold a pin on the page for one of several reasons:
146
+ *
147
+ * 1.) To safely apply kill_prior_tuple, we need to know that the tuples
148
+ * were not removed from the page (and subsequently re-inserted).
149
+ * A page's LSN can also allow us to detect modifications on the page,
150
+ * which then allows us to bail out of setting the hint bits, but that
151
+ * requires the index to be WAL-logged; so unless the index is WAL-logged
152
+ * we must hold a pin on the page to apply the kill_prior_tuple
153
+ * optimization.
154
+ *
155
+ * 2.) Non-MVCC scans need pin coupling to make sure the scan covers
156
+ * exactly the whole index keyspace.
157
+ *
158
+ * 3.) For Index-Only Scans, the scan needs to check the visibility of the
159
+ * table tuple while the relevant index tuple is guaranteed to still be
160
+ * contained in the index (so that vacuum hasn't yet marked any pages that
161
+ * could contain the value as ALL_VISIBLE after reclaiming a dead tuple
162
+ * that might be buffered in the scan). A pin must therefore be held
163
+ * at least while the basic visibility of the page's tuples is being
164
+ * checked.
165
+ *
166
+ * For cases 1 and 2, we must hold the pin after we've finished processing
167
+ * the index page.
168
+ *
169
+ * For case 3, we can release the pin if we first do the visibility checks
170
+ * of to-be-returned tuples using table_index_vischeck_tuples, which we've
171
+ * done just above.
172
+ */
71
173
if (IsMVCCSnapshot (scan -> xs_snapshot ) &&
72
174
RelationNeedsWAL (scan -> indexRelation ) &&
73
- !scan -> xs_want_itup )
175
+ ( !scan -> xs_want_itup || DEBUG_IOS_VISCHECKS_ENABLED ) )
74
176
{
75
177
ReleaseBuffer (sp -> buf );
76
178
sp -> buf = InvalidBuffer ;
@@ -2007,6 +2109,8 @@ _bt_saveitem(BTScanOpaque so, int itemIndex,
2007
2109
2008
2110
currItem -> heapTid = itup -> t_tid ;
2009
2111
currItem -> indexOffset = offnum ;
2112
+ currItem -> visrecheck = TMVC_Unchecked ;
2113
+
2010
2114
if (so -> currTuples )
2011
2115
{
2012
2116
Size itupsz = IndexTupleSize (itup );
@@ -2037,6 +2141,8 @@ _bt_setuppostingitems(BTScanOpaque so, int itemIndex, OffsetNumber offnum,
2037
2141
2038
2142
currItem -> heapTid = * heapTid ;
2039
2143
currItem -> indexOffset = offnum ;
2144
+ currItem -> visrecheck = TMVC_Unchecked ;
2145
+
2040
2146
if (so -> currTuples )
2041
2147
{
2042
2148
/* Save base IndexTuple (truncate posting list) */
@@ -2073,6 +2179,7 @@ _bt_savepostingitem(BTScanOpaque so, int itemIndex, OffsetNumber offnum,
2073
2179
2074
2180
currItem -> heapTid = * heapTid ;
2075
2181
currItem -> indexOffset = offnum ;
2182
+ currItem -> visrecheck = TMVC_Unchecked ;
2076
2183
2077
2184
/*
2078
2185
* Have index-only scans return the same base IndexTuple for every TID
@@ -2098,6 +2205,14 @@ _bt_returnitem(IndexScanDesc scan, BTScanOpaque so)
2098
2205
2099
2206
/* Return next item, per amgettuple contract */
2100
2207
scan -> xs_heaptid = currItem -> heapTid ;
2208
+
2209
+ if (scan -> xs_want_itup )
2210
+ {
2211
+ scan -> xs_visrecheck = currItem -> visrecheck ;
2212
+ Assert (currItem -> visrecheck != TMVC_Unchecked ||
2213
+ BufferIsValid (so -> currPos .buf ));
2214
+ }
2215
+
2101
2216
if (so -> currTuples )
2102
2217
scan -> xs_itup = (IndexTuple ) (so -> currTuples + currItem -> tupleOffset );
2103
2218
}
@@ -2256,7 +2371,7 @@ _bt_readfirstpage(IndexScanDesc scan, OffsetNumber offnum, ScanDirection dir)
2256
2371
* so->currPos.buf in preparation for btgettuple returning tuples.
2257
2372
*/
2258
2373
Assert (BTScanPosIsPinned (so -> currPos ));
2259
- _bt_drop_lock_and_maybe_pin (scan , & so -> currPos );
2374
+ _bt_drop_lock_and_maybe_pin (scan , & so -> currPos , so );
2260
2375
return true;
2261
2376
}
2262
2377
@@ -2413,7 +2528,7 @@ _bt_readnextpage(IndexScanDesc scan, BlockNumber blkno,
2413
2528
*/
2414
2529
Assert (so -> currPos .currPage == blkno );
2415
2530
Assert (BTScanPosIsPinned (so -> currPos ));
2416
- _bt_drop_lock_and_maybe_pin (scan , & so -> currPos );
2531
+ _bt_drop_lock_and_maybe_pin (scan , & so -> currPos , so );
2417
2532
2418
2533
return true;
2419
2534
}
0 commit comments