Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 37a8565

Browse files
committed
Fix BRIN summarization concurrent with extension
If a process is extending a table concurrently with some BRIN summarization process, it is possible for the latter to miss pages added by the former because the number of pages is computed ahead of time. Fix by determining a fresh relation size after inserting the placeholder tuple: any process that further extends the table concurrently will update the placeholder tuple, while previous pages will be processed by the heap scan. Reported-by: Tomas Vondra Reviewed-by: Tom Lane Author: Álvaro Herrera Discussion: https://postgr.es/m/083d996a-4a8a-0e13-800a-851dd09ad8cc@2ndquadrant.com Backpatch-to: 9.5
1 parent 0f6bd53 commit 37a8565

File tree

1 file changed

+61
-29
lines changed

1 file changed

+61
-29
lines changed

src/backend/access/brin/brin.c

Lines changed: 61 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ static BrinBuildState *initialize_brin_buildstate(Relation idxRel,
6767
BrinRevmap *revmap, BlockNumber pagesPerRange);
6868
static void terminate_brin_buildstate(BrinBuildState *state);
6969
static void brinsummarize(Relation index, Relation heapRel, BlockNumber pageRange,
70-
double *numSummarized, double *numExisting);
70+
bool include_partial, double *numSummarized, double *numExisting);
7171
static void form_and_insert_tuple(BrinBuildState *state);
7272
static void union_tuples(BrinDesc *bdesc, BrinMemTuple *a,
7373
BrinTuple *b);
@@ -789,7 +789,7 @@ brinvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
789789

790790
brin_vacuum_scan(info->index, info->strategy);
791791

792-
brinsummarize(info->index, heapRel, BRIN_ALL_BLOCKRANGES,
792+
brinsummarize(info->index, heapRel, BRIN_ALL_BLOCKRANGES, false,
793793
&stats->num_index_tuples, &stats->num_index_tuples);
794794

795795
heap_close(heapRel, AccessShareLock);
@@ -907,7 +907,7 @@ brin_summarize_range(PG_FUNCTION_ARGS)
907907
RelationGetRelationName(indexRel))));
908908

909909
/* OK, do it */
910-
brinsummarize(indexRel, heapRel, heapBlk, &numSummarized, NULL);
910+
brinsummarize(indexRel, heapRel, heapBlk, true, &numSummarized, NULL);
911911

912912
relation_close(indexRel, ShareUpdateExclusiveLock);
913913
relation_close(heapRel, ShareUpdateExclusiveLock);
@@ -1127,7 +1127,8 @@ terminate_brin_buildstate(BrinBuildState *state)
11271127
}
11281128

11291129
/*
1130-
* Summarize the given page range of the given index.
1130+
* On the given BRIN index, summarize the heap page range that corresponds
1131+
* to the heap block number given.
11311132
*
11321133
* This routine can run in parallel with insertions into the heap. To avoid
11331134
* missing those values from the summary tuple, we first insert a placeholder
@@ -1137,6 +1138,12 @@ terminate_brin_buildstate(BrinBuildState *state)
11371138
* update of the index value happens in a loop, so that if somebody updates
11381139
* the placeholder tuple after we read it, we detect the case and try again.
11391140
* This ensures that the concurrently inserted tuples are not lost.
1141+
*
1142+
* A further corner case is this routine being asked to summarize the partial
1143+
* range at the end of the table. heapNumBlocks is the (possibly outdated)
1144+
* table size; if we notice that the requested range lies beyond that size,
1145+
* we re-compute the table size after inserting the placeholder tuple, to
1146+
* avoid missing pages that were appended recently.
11401147
*/
11411148
static void
11421149
summarize_range(IndexInfo *indexInfo, BrinBuildState *state, Relation heapRel,
@@ -1157,6 +1164,33 @@ summarize_range(IndexInfo *indexInfo, BrinBuildState *state, Relation heapRel,
11571164
state->bs_rmAccess, &phbuf,
11581165
heapBlk, phtup, phsz);
11591166

1167+
/*
1168+
* Compute range end. We hold ShareUpdateExclusive lock on table, so it
1169+
* cannot shrink concurrently (but it can grow).
1170+
*/
1171+
Assert(heapBlk % state->bs_pagesPerRange == 0);
1172+
if (heapBlk + state->bs_pagesPerRange > heapNumBlks)
1173+
{
1174+
/*
1175+
* If we're asked to scan what we believe to be the final range on the
1176+
* table (i.e. a range that might be partial) we need to recompute our
1177+
* idea of what the latest page is after inserting the placeholder
1178+
* tuple. Anyone that grows the table later will update the
1179+
* placeholder tuple, so it doesn't matter that we won't scan these
1180+
* pages ourselves. Careful: the table might have been extended
1181+
* beyond the current range, so clamp our result.
1182+
*
1183+
* Fortunately, this should occur infrequently.
1184+
*/
1185+
scanNumBlks = Min(RelationGetNumberOfBlocks(heapRel) - heapBlk,
1186+
state->bs_pagesPerRange);
1187+
}
1188+
else
1189+
{
1190+
/* Easy case: range is known to be complete */
1191+
scanNumBlks = state->bs_pagesPerRange;
1192+
}
1193+
11601194
/*
11611195
* Execute the partial heap scan covering the heap blocks in the specified
11621196
* page range, summarizing the heap tuples in it. This scan stops just
@@ -1167,8 +1201,6 @@ summarize_range(IndexInfo *indexInfo, BrinBuildState *state, Relation heapRel,
11671201
* by transactions that are still in progress, among other corner cases.
11681202
*/
11691203
state->bs_currRangeStart = heapBlk;
1170-
scanNumBlks = heapBlk + state->bs_pagesPerRange <= heapNumBlks ?
1171-
state->bs_pagesPerRange : heapNumBlks - heapBlk;
11721204
IndexBuildHeapRangeScan(heapRel, state->bs_irel, indexInfo, false, true,
11731205
heapBlk, scanNumBlks,
11741206
brinbuildCallback, (void *) state);
@@ -1232,63 +1264,63 @@ summarize_range(IndexInfo *indexInfo, BrinBuildState *state, Relation heapRel,
12321264
* Summarize page ranges that are not already summarized. If pageRange is
12331265
* BRIN_ALL_BLOCKRANGES then the whole table is scanned; otherwise, only the
12341266
* page range containing the given heap page number is scanned.
1267+
* If include_partial is true, then the partial range at the end of the table
1268+
* is summarized, otherwise not.
12351269
*
12361270
* For each new index tuple inserted, *numSummarized (if not NULL) is
12371271
* incremented; for each existing tuple, *numExisting (if not NULL) is
12381272
* incremented.
12391273
*/
12401274
static void
12411275
brinsummarize(Relation index, Relation heapRel, BlockNumber pageRange,
1242-
double *numSummarized, double *numExisting)
1276+
bool include_partial, double *numSummarized, double *numExisting)
12431277
{
12441278
BrinRevmap *revmap;
12451279
BrinBuildState *state = NULL;
12461280
IndexInfo *indexInfo = NULL;
12471281
BlockNumber heapNumBlocks;
1248-
BlockNumber heapBlk;
12491282
BlockNumber pagesPerRange;
12501283
Buffer buf;
12511284
BlockNumber startBlk;
1252-
BlockNumber endBlk;
1253-
1254-
/* determine range of pages to process; nothing to do for an empty table */
1255-
heapNumBlocks = RelationGetNumberOfBlocks(heapRel);
1256-
if (heapNumBlocks == 0)
1257-
return;
12581285

12591286
revmap = brinRevmapInitialize(index, &pagesPerRange, NULL);
12601287

1288+
/* determine range of pages to process */
1289+
heapNumBlocks = RelationGetNumberOfBlocks(heapRel);
12611290
if (pageRange == BRIN_ALL_BLOCKRANGES)
1262-
{
12631291
startBlk = 0;
1264-
endBlk = heapNumBlocks;
1265-
}
12661292
else
1267-
{
12681293
startBlk = (pageRange / pagesPerRange) * pagesPerRange;
1294+
if (startBlk >= heapNumBlocks)
1295+
{
12691296
/* Nothing to do if start point is beyond end of table */
1270-
if (startBlk > heapNumBlocks)
1271-
{
1272-
brinRevmapTerminate(revmap);
1273-
return;
1274-
}
1275-
endBlk = startBlk + pagesPerRange;
1276-
if (endBlk > heapNumBlocks)
1277-
endBlk = heapNumBlocks;
1297+
brinRevmapTerminate(revmap);
1298+
return;
12781299
}
12791300

12801301
/*
12811302
* Scan the revmap to find unsummarized items.
12821303
*/
12831304
buf = InvalidBuffer;
1284-
for (heapBlk = startBlk; heapBlk < endBlk; heapBlk += pagesPerRange)
1305+
for (; startBlk < heapNumBlocks; startBlk += pagesPerRange)
12851306
{
12861307
BrinTuple *tup;
12871308
OffsetNumber off;
12881309

1310+
/*
1311+
* Unless requested to summarize even a partial range, go away now if
1312+
* we think the next range is partial. Caller would pass true when
1313+
* it is typically run once bulk data loading is done
1314+
* (brin_summarize_new_values), and false when it is typically the
1315+
* result of arbitrarily-scheduled maintenance command (vacuuming).
1316+
*/
1317+
if (!include_partial &&
1318+
(startBlk + pagesPerRange > heapNumBlocks))
1319+
break;
1320+
12891321
CHECK_FOR_INTERRUPTS();
12901322

1291-
tup = brinGetTupleForHeapBlock(revmap, heapBlk, &buf, &off, NULL,
1323+
tup = brinGetTupleForHeapBlock(revmap, startBlk, &buf, &off, NULL,
12921324
BUFFER_LOCK_SHARE, NULL);
12931325
if (tup == NULL)
12941326
{
@@ -1301,7 +1333,7 @@ brinsummarize(Relation index, Relation heapRel, BlockNumber pageRange,
13011333
pagesPerRange);
13021334
indexInfo = BuildIndexInfo(index);
13031335
}
1304-
summarize_range(indexInfo, state, heapRel, heapBlk, heapNumBlocks);
1336+
summarize_range(indexInfo, state, heapRel, startBlk, heapNumBlocks);
13051337

13061338
/* and re-initialize state for the next range */
13071339
brin_memtuple_initialize(state->bs_dtuple, state->bs_bdesc);

0 commit comments

Comments
 (0)