@@ -204,6 +204,12 @@ typedef struct LVRelState
204
204
int64 live_tuples ; /* # live tuples remaining */
205
205
int64 recently_dead_tuples ; /* # dead, but not yet removable */
206
206
int64 missed_dead_tuples ; /* # removable, but not removed */
207
+
208
+ /* State maintained by heap_vac_scan_next_block() */
209
+ BlockNumber current_block ; /* last block returned */
210
+ BlockNumber next_unskippable_block ; /* next unskippable block */
211
+ bool next_unskippable_allvis ; /* its visibility status */
212
+ Buffer next_unskippable_vmbuffer ; /* buffer containing its VM bit */
207
213
} LVRelState ;
208
214
209
215
/* Struct for saving and restoring vacuum error information. */
@@ -217,10 +223,9 @@ typedef struct LVSavedErrInfo
217
223
218
224
/* non-export function prototypes */
219
225
static void lazy_scan_heap (LVRelState * vacrel );
220
- static BlockNumber lazy_scan_skip (LVRelState * vacrel , Buffer * vmbuffer ,
221
- BlockNumber next_block ,
222
- bool * next_unskippable_allvis ,
223
- bool * skipping_current_range );
226
+ static bool heap_vac_scan_next_block (LVRelState * vacrel , BlockNumber * blkno ,
227
+ bool * all_visible_according_to_vm );
228
+ static void find_next_unskippable_block (LVRelState * vacrel , bool * skipsallvis );
224
229
static bool lazy_scan_new_or_empty (LVRelState * vacrel , Buffer buf ,
225
230
BlockNumber blkno , Page page ,
226
231
bool sharelock , Buffer vmbuffer );
@@ -803,12 +808,11 @@ lazy_scan_heap(LVRelState *vacrel)
803
808
{
804
809
BlockNumber rel_pages = vacrel -> rel_pages ,
805
810
blkno ,
806
- next_unskippable_block ,
807
811
next_fsm_block_to_vacuum = 0 ;
812
+ bool all_visible_according_to_vm ;
813
+
808
814
VacDeadItems * dead_items = vacrel -> dead_items ;
809
815
Buffer vmbuffer = InvalidBuffer ;
810
- bool next_unskippable_allvis ,
811
- skipping_current_range ;
812
816
const int initprog_index [] = {
813
817
PROGRESS_VACUUM_PHASE ,
814
818
PROGRESS_VACUUM_TOTAL_HEAP_BLKS ,
@@ -822,44 +826,19 @@ lazy_scan_heap(LVRelState *vacrel)
822
826
initprog_val [2 ] = dead_items -> max_items ;
823
827
pgstat_progress_update_multi_param (3 , initprog_index , initprog_val );
824
828
825
- /* Set up an initial range of skippable blocks using the visibility map */
826
- next_unskippable_block = lazy_scan_skip (vacrel , & vmbuffer , 0 ,
827
- & next_unskippable_allvis ,
828
- & skipping_current_range );
829
- for (blkno = 0 ; blkno < rel_pages ; blkno ++ )
829
+ /* Initialize for the first heap_vac_scan_next_block() call */
830
+ vacrel -> current_block = InvalidBlockNumber ;
831
+ vacrel -> next_unskippable_block = InvalidBlockNumber ;
832
+ vacrel -> next_unskippable_allvis = false;
833
+ vacrel -> next_unskippable_vmbuffer = InvalidBuffer ;
834
+
835
+ while (heap_vac_scan_next_block (vacrel , & blkno , & all_visible_according_to_vm ))
830
836
{
831
837
Buffer buf ;
832
838
Page page ;
833
- bool all_visible_according_to_vm ;
834
839
bool has_lpdead_items ;
835
840
bool got_cleanup_lock = false;
836
841
837
- if (blkno == next_unskippable_block )
838
- {
839
- /*
840
- * Can't skip this page safely. Must scan the page. But
841
- * determine the next skippable range after the page first.
842
- */
843
- all_visible_according_to_vm = next_unskippable_allvis ;
844
- next_unskippable_block = lazy_scan_skip (vacrel , & vmbuffer ,
845
- blkno + 1 ,
846
- & next_unskippable_allvis ,
847
- & skipping_current_range );
848
-
849
- Assert (next_unskippable_block >= blkno + 1 );
850
- }
851
- else
852
- {
853
- /* Last page always scanned (may need to set nonempty_pages) */
854
- Assert (blkno < rel_pages - 1 );
855
-
856
- if (skipping_current_range )
857
- continue ;
858
-
859
- /* Current range is too small to skip -- just scan the page */
860
- all_visible_according_to_vm = true;
861
- }
862
-
863
842
vacrel -> scanned_pages ++ ;
864
843
865
844
/* Report as block scanned, update error traceback information */
@@ -1077,47 +1056,146 @@ lazy_scan_heap(LVRelState *vacrel)
1077
1056
}
1078
1057
1079
1058
/*
1080
- * lazy_scan_skip () -- set up range of skippable blocks using visibility map.
1059
+ * heap_vac_scan_next_block () -- get next block for vacuum to process
1081
1060
*
1082
- * lazy_scan_heap() calls here every time it needs to set up a new range of
1083
- * blocks to skip via the visibility map. Caller passes the next block in
1084
- * line. We return a next_unskippable_block for this range. When there are
1085
- * no skippable blocks we just return caller's next_block. The all-visible
1086
- * status of the returned block is set in *next_unskippable_allvis for caller,
1087
- * too. Block usually won't be all-visible (since it's unskippable), but it
1088
- * can be during aggressive VACUUMs (as well as in certain edge cases).
1061
+ * lazy_scan_heap() calls here every time it needs to get the next block to
1062
+ * prune and vacuum. The function uses the visibility map, vacuum options,
1063
+ * and various thresholds to skip blocks which do not need to be processed and
1064
+ * sets blkno to the next block to process.
1089
1065
*
1090
- * Sets *skipping_current_range to indicate if caller should skip this range.
1091
- * Costs and benefits drive our decision. Very small ranges won't be skipped.
1066
+ * The block number and visibility status of the next block to process are set
1067
+ * in *blkno and *all_visible_according_to_vm. The return value is false if
1068
+ * there are no further blocks to process.
1069
+ *
1070
+ * vacrel is an in/out parameter here. Vacuum options and information about
1071
+ * the relation are read. vacrel->skippedallvis is set if we skip a block
1072
+ * that's all-visible but not all-frozen, to ensure that we don't update
1073
+ * relfrozenxid in that case. vacrel also holds information about the next
1074
+ * unskippable block, as bookkeeping for this function.
1075
+ */
1076
+ static bool
1077
+ heap_vac_scan_next_block (LVRelState * vacrel , BlockNumber * blkno ,
1078
+ bool * all_visible_according_to_vm )
1079
+ {
1080
+ BlockNumber next_block ;
1081
+
1082
+ /* relies on InvalidBlockNumber + 1 overflowing to 0 on first call */
1083
+ next_block = vacrel -> current_block + 1 ;
1084
+
1085
+ /* Have we reached the end of the relation? */
1086
+ if (next_block >= vacrel -> rel_pages )
1087
+ {
1088
+ if (BufferIsValid (vacrel -> next_unskippable_vmbuffer ))
1089
+ {
1090
+ ReleaseBuffer (vacrel -> next_unskippable_vmbuffer );
1091
+ vacrel -> next_unskippable_vmbuffer = InvalidBuffer ;
1092
+ }
1093
+ * blkno = vacrel -> rel_pages ;
1094
+ return false;
1095
+ }
1096
+
1097
+ /*
1098
+ * We must be in one of the three following states:
1099
+ */
1100
+ if (next_block > vacrel -> next_unskippable_block ||
1101
+ vacrel -> next_unskippable_block == InvalidBlockNumber )
1102
+ {
1103
+ /*
1104
+ * 1. We have just processed an unskippable block (or we're at the
1105
+ * beginning of the scan). Find the next unskippable block using the
1106
+ * visibility map.
1107
+ */
1108
+ bool skipsallvis ;
1109
+
1110
+ find_next_unskippable_block (vacrel , & skipsallvis );
1111
+
1112
+ /*
1113
+ * We now know the next block that we must process. It can be the
1114
+ * next block after the one we just processed, or something further
1115
+ * ahead. If it's further ahead, we can jump to it, but we choose to
1116
+ * do so only if we can skip at least SKIP_PAGES_THRESHOLD consecutive
1117
+ * pages. Since we're reading sequentially, the OS should be doing
1118
+ * readahead for us, so there's no gain in skipping a page now and
1119
+ * then. Skipping such a range might even discourage sequential
1120
+ * detection.
1121
+ *
1122
+ * This test also enables more frequent relfrozenxid advancement
1123
+ * during non-aggressive VACUUMs. If the range has any all-visible
1124
+ * pages then skipping makes updating relfrozenxid unsafe, which is a
1125
+ * real downside.
1126
+ */
1127
+ if (vacrel -> next_unskippable_block - next_block >= SKIP_PAGES_THRESHOLD )
1128
+ {
1129
+ next_block = vacrel -> next_unskippable_block ;
1130
+ if (skipsallvis )
1131
+ vacrel -> skippedallvis = true;
1132
+ }
1133
+ }
1134
+
1135
+ /* Now we must be in one of the two remaining states: */
1136
+ if (next_block < vacrel -> next_unskippable_block )
1137
+ {
1138
+ /*
1139
+ * 2. We are processing a range of blocks that we could have skipped
1140
+ * but chose not to. We know that they are all-visible in the VM,
1141
+ * otherwise they would've been unskippable.
1142
+ */
1143
+ * blkno = vacrel -> current_block = next_block ;
1144
+ * all_visible_according_to_vm = true;
1145
+ return true;
1146
+ }
1147
+ else
1148
+ {
1149
+ /*
1150
+ * 3. We reached the next unskippable block. Process it. On next
1151
+ * iteration, we will be back in state 1.
1152
+ */
1153
+ Assert (next_block == vacrel -> next_unskippable_block );
1154
+
1155
+ * blkno = vacrel -> current_block = next_block ;
1156
+ * all_visible_according_to_vm = vacrel -> next_unskippable_allvis ;
1157
+ return true;
1158
+ }
1159
+ }
1160
+
1161
+ /*
1162
+ * Find the next unskippable block in a vacuum scan using the visibility map.
1163
+ * The next unskippable block and its visibility information is updated in
1164
+ * vacrel.
1092
1165
*
1093
1166
* Note: our opinion of which blocks can be skipped can go stale immediately.
1094
1167
* It's okay if caller "misses" a page whose all-visible or all-frozen marking
1095
1168
* was concurrently cleared, though. All that matters is that caller scan all
1096
1169
* pages whose tuples might contain XIDs < OldestXmin, or MXIDs < OldestMxact.
1097
1170
* (Actually, non-aggressive VACUUMs can choose to skip all-visible pages with
1098
- * older XIDs/MXIDs. The vacrel-> skippedallvis flag will be set here when the
1099
- * choice to skip such a range is actually made, making everything safe.)
1171
+ * older XIDs/MXIDs. The * skippedallvis flag will be set here when the choice
1172
+ * to skip such a range is actually made, making everything safe.)
1100
1173
*/
1101
- static BlockNumber
1102
- lazy_scan_skip (LVRelState * vacrel , Buffer * vmbuffer , BlockNumber next_block ,
1103
- bool * next_unskippable_allvis , bool * skipping_current_range )
1174
+ static void
1175
+ find_next_unskippable_block (LVRelState * vacrel , bool * skipsallvis )
1104
1176
{
1105
- BlockNumber rel_pages = vacrel -> rel_pages ,
1106
- next_unskippable_block = next_block ,
1107
- nskippable_blocks = 0 ;
1108
- bool skipsallvis = false;
1177
+ BlockNumber rel_pages = vacrel -> rel_pages ;
1178
+ BlockNumber next_unskippable_block = vacrel -> next_unskippable_block + 1 ;
1179
+ Buffer next_unskippable_vmbuffer = vacrel -> next_unskippable_vmbuffer ;
1180
+ bool next_unskippable_allvis ;
1181
+
1182
+ * skipsallvis = false;
1109
1183
1110
- * next_unskippable_allvis = true;
1111
- while (next_unskippable_block < rel_pages )
1184
+ for (;;)
1112
1185
{
1113
1186
uint8 mapbits = visibilitymap_get_status (vacrel -> rel ,
1114
1187
next_unskippable_block ,
1115
- vmbuffer );
1188
+ & next_unskippable_vmbuffer );
1116
1189
1117
- if ((mapbits & VISIBILITYMAP_ALL_VISIBLE ) == 0 )
1190
+ next_unskippable_allvis = (mapbits & VISIBILITYMAP_ALL_VISIBLE ) != 0 ;
1191
+
1192
+ /*
1193
+ * A block is unskippable if it is not all visible according to the
1194
+ * visibility map.
1195
+ */
1196
+ if (!next_unskippable_allvis )
1118
1197
{
1119
1198
Assert ((mapbits & VISIBILITYMAP_ALL_FROZEN ) == 0 );
1120
- * next_unskippable_allvis = false;
1121
1199
break ;
1122
1200
}
1123
1201
@@ -1152,34 +1230,17 @@ lazy_scan_skip(LVRelState *vacrel, Buffer *vmbuffer, BlockNumber next_block,
1152
1230
* All-visible block is safe to skip in non-aggressive case. But
1153
1231
* remember that the final range contains such a block for later.
1154
1232
*/
1155
- skipsallvis = true;
1233
+ * skipsallvis = true;
1156
1234
}
1157
1235
1158
1236
vacuum_delay_point ();
1159
1237
next_unskippable_block ++ ;
1160
- nskippable_blocks ++ ;
1161
- }
1162
-
1163
- /*
1164
- * We only skip a range with at least SKIP_PAGES_THRESHOLD consecutive
1165
- * pages. Since we're reading sequentially, the OS should be doing
1166
- * readahead for us, so there's no gain in skipping a page now and then.
1167
- * Skipping such a range might even discourage sequential detection.
1168
- *
1169
- * This test also enables more frequent relfrozenxid advancement during
1170
- * non-aggressive VACUUMs. If the range has any all-visible pages then
1171
- * skipping makes updating relfrozenxid unsafe, which is a real downside.
1172
- */
1173
- if (nskippable_blocks < SKIP_PAGES_THRESHOLD )
1174
- * skipping_current_range = false;
1175
- else
1176
- {
1177
- * skipping_current_range = true;
1178
- if (skipsallvis )
1179
- vacrel -> skippedallvis = true;
1180
1238
}
1181
1239
1182
- return next_unskippable_block ;
1240
+ /* write the local variables back to vacrel */
1241
+ vacrel -> next_unskippable_block = next_unskippable_block ;
1242
+ vacrel -> next_unskippable_allvis = next_unskippable_allvis ;
1243
+ vacrel -> next_unskippable_vmbuffer = next_unskippable_vmbuffer ;
1183
1244
}
1184
1245
1185
1246
/*
@@ -1752,8 +1813,8 @@ lazy_scan_prune(LVRelState *vacrel,
1752
1813
1753
1814
/*
1754
1815
* Handle setting visibility map bit based on information from the VM (as
1755
- * of last lazy_scan_skip () call), and from all_visible and all_frozen
1756
- * variables
1816
+ * of last heap_vac_scan_next_block () call), and from all_visible and
1817
+ * all_frozen variables
1757
1818
*/
1758
1819
if (!all_visible_according_to_vm && all_visible )
1759
1820
{
@@ -1788,8 +1849,8 @@ lazy_scan_prune(LVRelState *vacrel,
1788
1849
/*
1789
1850
* As of PostgreSQL 9.2, the visibility map bit should never be set if the
1790
1851
* page-level bit is clear. However, it's possible that the bit got
1791
- * cleared after lazy_scan_skip () was called, so we must recheck with
1792
- * buffer lock before concluding that the VM is corrupt.
1852
+ * cleared after heap_vac_scan_next_block () was called, so we must recheck
1853
+ * with buffer lock before concluding that the VM is corrupt.
1793
1854
*/
1794
1855
else if (all_visible_according_to_vm && !PageIsAllVisible (page ) &&
1795
1856
visibilitymap_get_status (vacrel -> rel , blkno , & vmbuffer ) != 0 )
0 commit comments