@@ -157,6 +157,9 @@ static void bt_check_every_level(Relation rel, Relation heaprel,
157
157
bool rootdescend , bool checkunique );
158
158
static BtreeLevel bt_check_level_from_leftmost (BtreeCheckState * state ,
159
159
BtreeLevel level );
160
+ static bool bt_leftmost_ignoring_half_dead (BtreeCheckState * state ,
161
+ BlockNumber start ,
162
+ BTPageOpaque start_opaque );
160
163
static void bt_recheck_sibling_links (BtreeCheckState * state ,
161
164
BlockNumber btpo_prev_from_target ,
162
165
BlockNumber leftcurrent );
@@ -826,7 +829,7 @@ bt_check_level_from_leftmost(BtreeCheckState *state, BtreeLevel level)
826
829
*/
827
830
if (state -> readonly )
828
831
{
829
- if (!P_LEFTMOST ( opaque ))
832
+ if (!bt_leftmost_ignoring_half_dead ( state , current , opaque ))
830
833
ereport (ERROR ,
831
834
(errcode (ERRCODE_INDEX_CORRUPTED ),
832
835
errmsg ("block %u is not leftmost in index \"%s\"" ,
@@ -880,8 +883,16 @@ bt_check_level_from_leftmost(BtreeCheckState *state, BtreeLevel level)
880
883
*/
881
884
}
882
885
883
- /* Sibling links should be in mutual agreement */
884
- if (opaque -> btpo_prev != leftcurrent )
886
+ /*
887
+ * Sibling links should be in mutual agreement. There arises
888
+ * leftcurrent == P_NONE && btpo_prev != P_NONE when the left sibling
889
+ * of the parent's low-key downlink is half-dead. (A half-dead page
890
+ * has no downlink from its parent.) Under heavyweight locking, the
891
+ * last bt_leftmost_ignoring_half_dead() validated this btpo_prev.
892
+ * Without heavyweight locking, validation of the P_NONE case remains
893
+ * unimplemented.
894
+ */
895
+ if (opaque -> btpo_prev != leftcurrent && leftcurrent != P_NONE )
885
896
bt_recheck_sibling_links (state , opaque -> btpo_prev , leftcurrent );
886
897
887
898
/* Check level */
@@ -1117,6 +1128,66 @@ bt_entry_unique_check(BtreeCheckState *state, IndexTuple itup,
1117
1128
}
1118
1129
}
1119
1130
1131
+ /*
1132
+ * Like P_LEFTMOST(start_opaque), but accept an arbitrarily-long chain of
1133
+ * half-dead, sibling-linked pages to the left. If a half-dead page appears
1134
+ * under state->readonly, the database exited recovery between the first-stage
1135
+ * and second-stage WAL records of a deletion.
1136
+ */
1137
+ static bool
1138
+ bt_leftmost_ignoring_half_dead (BtreeCheckState * state ,
1139
+ BlockNumber start ,
1140
+ BTPageOpaque start_opaque )
1141
+ {
1142
+ BlockNumber reached = start_opaque -> btpo_prev ,
1143
+ reached_from = start ;
1144
+ bool all_half_dead = true;
1145
+
1146
+ /*
1147
+ * To handle the !readonly case, we'd need to accept BTP_DELETED pages and
1148
+ * potentially observe nbtree/README "Page deletion and backwards scans".
1149
+ */
1150
+ Assert (state -> readonly );
1151
+
1152
+ while (reached != P_NONE && all_half_dead )
1153
+ {
1154
+ Page page = palloc_btree_page (state , reached );
1155
+ BTPageOpaque reached_opaque = BTPageGetOpaque (page );
1156
+
1157
+ CHECK_FOR_INTERRUPTS ();
1158
+
1159
+ /*
1160
+ * Try to detect btpo_prev circular links. _bt_unlink_halfdead_page()
1161
+ * writes that side-links will continue to point to the siblings.
1162
+ * Check btpo_next for that property.
1163
+ */
1164
+ all_half_dead = P_ISHALFDEAD (reached_opaque ) &&
1165
+ reached != start &&
1166
+ reached != reached_from &&
1167
+ reached_opaque -> btpo_next == reached_from ;
1168
+ if (all_half_dead )
1169
+ {
1170
+ XLogRecPtr pagelsn = PageGetLSN (page );
1171
+
1172
+ /* pagelsn should point to an XLOG_BTREE_MARK_PAGE_HALFDEAD */
1173
+ ereport (DEBUG1 ,
1174
+ (errcode (ERRCODE_NO_DATA ),
1175
+ errmsg_internal ("harmless interrupted page deletion detected in index \"%s\"" ,
1176
+ RelationGetRelationName (state -> rel )),
1177
+ errdetail_internal ("Block=%u right block=%u page lsn=%X/%X." ,
1178
+ reached , reached_from ,
1179
+ LSN_FORMAT_ARGS (pagelsn ))));
1180
+
1181
+ reached_from = reached ;
1182
+ reached = reached_opaque -> btpo_prev ;
1183
+ }
1184
+
1185
+ pfree (page );
1186
+ }
1187
+
1188
+ return all_half_dead ;
1189
+ }
1190
+
1120
1191
/*
1121
1192
* Raise an error when target page's left link does not point back to the
1122
1193
* previous target page, called leftcurrent here. The leftcurrent page's
@@ -1157,6 +1228,9 @@ bt_recheck_sibling_links(BtreeCheckState *state,
1157
1228
BlockNumber btpo_prev_from_target ,
1158
1229
BlockNumber leftcurrent )
1159
1230
{
1231
+ /* passing metapage to BTPageGetOpaque() would give irrelevant findings */
1232
+ Assert (leftcurrent != P_NONE );
1233
+
1160
1234
if (!state -> readonly )
1161
1235
{
1162
1236
Buffer lbuf ;
@@ -2235,7 +2309,8 @@ bt_child_highkey_check(BtreeCheckState *state,
2235
2309
opaque = BTPageGetOpaque (page );
2236
2310
2237
2311
/* The first page we visit at the level should be leftmost */
2238
- if (first && !BlockNumberIsValid (state -> prevrightlink ) && !P_LEFTMOST (opaque ))
2312
+ if (first && !BlockNumberIsValid (state -> prevrightlink ) &&
2313
+ !bt_leftmost_ignoring_half_dead (state , blkno , opaque ))
2239
2314
ereport (ERROR ,
2240
2315
(errcode (ERRCODE_INDEX_CORRUPTED ),
2241
2316
errmsg ("the first child of leftmost target page is not leftmost of its level in index \"%s\"" ,
0 commit comments