Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit ec42a1d

Browse files
committed
Fix BRIN summarization concurrent with extension
If a process is extending a table concurrently with some BRIN summarization process, it is possible for the latter to miss pages added by the former because the number of pages is computed ahead of time. Fix by determining a fresh relation size after inserting the placeholder tuple: any process that further extends the table concurrently will update the placeholder tuple, while previous pages will be processed by the heap scan. Reported-by: Tomas Vondra Reviewed-by: Tom Lane Author: Álvaro Herrera Discussion: https://postgr.es/m/083d996a-4a8a-0e13-800a-851dd09ad8cc@2ndquadrant.com Backpatch-to: 9.5
1 parent 7164991 commit ec42a1d

File tree

1 file changed

+61
-29
lines changed

1 file changed

+61
-29
lines changed

src/backend/access/brin/brin.c

+61-29
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ static BrinBuildState *initialize_brin_buildstate(Relation idxRel,
6767
BrinRevmap *revmap, BlockNumber pagesPerRange);
6868
static void terminate_brin_buildstate(BrinBuildState *state);
6969
static void brinsummarize(Relation index, Relation heapRel, BlockNumber pageRange,
70-
double *numSummarized, double *numExisting);
70+
bool include_partial, double *numSummarized, double *numExisting);
7171
static void form_and_insert_tuple(BrinBuildState *state);
7272
static void union_tuples(BrinDesc *bdesc, BrinMemTuple *a,
7373
BrinTuple *b);
@@ -791,7 +791,7 @@ brinvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
791791

792792
brin_vacuum_scan(info->index, info->strategy);
793793

794-
brinsummarize(info->index, heapRel, BRIN_ALL_BLOCKRANGES,
794+
brinsummarize(info->index, heapRel, BRIN_ALL_BLOCKRANGES, false,
795795
&stats->num_index_tuples, &stats->num_index_tuples);
796796

797797
heap_close(heapRel, AccessShareLock);
@@ -909,7 +909,7 @@ brin_summarize_range(PG_FUNCTION_ARGS)
909909
RelationGetRelationName(indexRel))));
910910

911911
/* OK, do it */
912-
brinsummarize(indexRel, heapRel, heapBlk, &numSummarized, NULL);
912+
brinsummarize(indexRel, heapRel, heapBlk, true, &numSummarized, NULL);
913913

914914
relation_close(indexRel, ShareUpdateExclusiveLock);
915915
relation_close(heapRel, ShareUpdateExclusiveLock);
@@ -1129,7 +1129,8 @@ terminate_brin_buildstate(BrinBuildState *state)
11291129
}
11301130

11311131
/*
1132-
* Summarize the given page range of the given index.
1132+
* On the given BRIN index, summarize the heap page range that corresponds
1133+
* to the heap block number given.
11331134
*
11341135
* This routine can run in parallel with insertions into the heap. To avoid
11351136
* missing those values from the summary tuple, we first insert a placeholder
@@ -1139,6 +1140,12 @@ terminate_brin_buildstate(BrinBuildState *state)
11391140
* update of the index value happens in a loop, so that if somebody updates
11401141
* the placeholder tuple after we read it, we detect the case and try again.
11411142
* This ensures that the concurrently inserted tuples are not lost.
1143+
*
1144+
* A further corner case is this routine being asked to summarize the partial
1145+
* range at the end of the table. heapNumBlocks is the (possibly outdated)
1146+
* table size; if we notice that the requested range lies beyond that size,
1147+
* we re-compute the table size after inserting the placeholder tuple, to
1148+
* avoid missing pages that were appended recently.
11421149
*/
11431150
static void
11441151
summarize_range(IndexInfo *indexInfo, BrinBuildState *state, Relation heapRel,
@@ -1159,6 +1166,33 @@ summarize_range(IndexInfo *indexInfo, BrinBuildState *state, Relation heapRel,
11591166
state->bs_rmAccess, &phbuf,
11601167
heapBlk, phtup, phsz);
11611168

1169+
/*
1170+
* Compute range end. We hold ShareUpdateExclusive lock on table, so it
1171+
* cannot shrink concurrently (but it can grow).
1172+
*/
1173+
Assert(heapBlk % state->bs_pagesPerRange == 0);
1174+
if (heapBlk + state->bs_pagesPerRange > heapNumBlks)
1175+
{
1176+
/*
1177+
* If we're asked to scan what we believe to be the final range on the
1178+
* table (i.e. a range that might be partial) we need to recompute our
1179+
* idea of what the latest page is after inserting the placeholder
1180+
* tuple. Anyone that grows the table later will update the
1181+
* placeholder tuple, so it doesn't matter that we won't scan these
1182+
* pages ourselves. Careful: the table might have been extended
1183+
* beyond the current range, so clamp our result.
1184+
*
1185+
* Fortunately, this should occur infrequently.
1186+
*/
1187+
scanNumBlks = Min(RelationGetNumberOfBlocks(heapRel) - heapBlk,
1188+
state->bs_pagesPerRange);
1189+
}
1190+
else
1191+
{
1192+
/* Easy case: range is known to be complete */
1193+
scanNumBlks = state->bs_pagesPerRange;
1194+
}
1195+
11621196
/*
11631197
* Execute the partial heap scan covering the heap blocks in the specified
11641198
* page range, summarizing the heap tuples in it. This scan stops just
@@ -1169,8 +1203,6 @@ summarize_range(IndexInfo *indexInfo, BrinBuildState *state, Relation heapRel,
11691203
* by transactions that are still in progress, among other corner cases.
11701204
*/
11711205
state->bs_currRangeStart = heapBlk;
1172-
scanNumBlks = heapBlk + state->bs_pagesPerRange <= heapNumBlks ?
1173-
state->bs_pagesPerRange : heapNumBlks - heapBlk;
11741206
IndexBuildHeapRangeScan(heapRel, state->bs_irel, indexInfo, false, true,
11751207
heapBlk, scanNumBlks,
11761208
brinbuildCallback, (void *) state);
@@ -1234,63 +1266,63 @@ summarize_range(IndexInfo *indexInfo, BrinBuildState *state, Relation heapRel,
12341266
* Summarize page ranges that are not already summarized. If pageRange is
12351267
* BRIN_ALL_BLOCKRANGES then the whole table is scanned; otherwise, only the
12361268
* page range containing the given heap page number is scanned.
1269+
* If include_partial is true, then the partial range at the end of the table
1270+
* is summarized, otherwise not.
12371271
*
12381272
* For each new index tuple inserted, *numSummarized (if not NULL) is
12391273
* incremented; for each existing tuple, *numExisting (if not NULL) is
12401274
* incremented.
12411275
*/
12421276
static void
12431277
brinsummarize(Relation index, Relation heapRel, BlockNumber pageRange,
1244-
double *numSummarized, double *numExisting)
1278+
bool include_partial, double *numSummarized, double *numExisting)
12451279
{
12461280
BrinRevmap *revmap;
12471281
BrinBuildState *state = NULL;
12481282
IndexInfo *indexInfo = NULL;
12491283
BlockNumber heapNumBlocks;
1250-
BlockNumber heapBlk;
12511284
BlockNumber pagesPerRange;
12521285
Buffer buf;
12531286
BlockNumber startBlk;
1254-
BlockNumber endBlk;
1255-
1256-
/* determine range of pages to process; nothing to do for an empty table */
1257-
heapNumBlocks = RelationGetNumberOfBlocks(heapRel);
1258-
if (heapNumBlocks == 0)
1259-
return;
12601287

12611288
revmap = brinRevmapInitialize(index, &pagesPerRange, NULL);
12621289

1290+
/* determine range of pages to process */
1291+
heapNumBlocks = RelationGetNumberOfBlocks(heapRel);
12631292
if (pageRange == BRIN_ALL_BLOCKRANGES)
1264-
{
12651293
startBlk = 0;
1266-
endBlk = heapNumBlocks;
1267-
}
12681294
else
1269-
{
12701295
startBlk = (pageRange / pagesPerRange) * pagesPerRange;
1296+
if (startBlk >= heapNumBlocks)
1297+
{
12711298
/* Nothing to do if start point is beyond end of table */
1272-
if (startBlk > heapNumBlocks)
1273-
{
1274-
brinRevmapTerminate(revmap);
1275-
return;
1276-
}
1277-
endBlk = startBlk + pagesPerRange;
1278-
if (endBlk > heapNumBlocks)
1279-
endBlk = heapNumBlocks;
1299+
brinRevmapTerminate(revmap);
1300+
return;
12801301
}
12811302

12821303
/*
12831304
* Scan the revmap to find unsummarized items.
12841305
*/
12851306
buf = InvalidBuffer;
1286-
for (heapBlk = startBlk; heapBlk < endBlk; heapBlk += pagesPerRange)
1307+
for (; startBlk < heapNumBlocks; startBlk += pagesPerRange)
12871308
{
12881309
BrinTuple *tup;
12891310
OffsetNumber off;
12901311

1312+
/*
1313+
* Unless requested to summarize even a partial range, go away now if
1314+
* we think the next range is partial. Caller would pass true when
1315+
* it is typically run once bulk data loading is done
1316+
* (brin_summarize_new_values), and false when it is typically the
1317+
* result of arbitrarily-scheduled maintenance command (vacuuming).
1318+
*/
1319+
if (!include_partial &&
1320+
(startBlk + pagesPerRange > heapNumBlocks))
1321+
break;
1322+
12911323
CHECK_FOR_INTERRUPTS();
12921324

1293-
tup = brinGetTupleForHeapBlock(revmap, heapBlk, &buf, &off, NULL,
1325+
tup = brinGetTupleForHeapBlock(revmap, startBlk, &buf, &off, NULL,
12941326
BUFFER_LOCK_SHARE, NULL);
12951327
if (tup == NULL)
12961328
{
@@ -1303,7 +1335,7 @@ brinsummarize(Relation index, Relation heapRel, BlockNumber pageRange,
13031335
pagesPerRange);
13041336
indexInfo = BuildIndexInfo(index);
13051337
}
1306-
summarize_range(indexInfo, state, heapRel, heapBlk, heapNumBlocks);
1338+
summarize_range(indexInfo, state, heapRel, startBlk, heapNumBlocks);
13071339

13081340
/* and re-initialize state for the next range */
13091341
brin_memtuple_initialize(state->bs_dtuple, state->bs_bdesc);

0 commit comments

Comments
 (0)