Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 3ed3683

Browse files
committed
Fix unfairness in all-cached parallel seq scan.
Commit b5a9b18 introduced block streaming infrastructure with a special fast path for all-cached scans, and commit b7b0f3f connected the infrastructure up to sequential scans. One of the fast path micro-optimizations had an unintended consequence: it interfered with parallel sequential scan's block range allocator (from commit 56788d2), which has its own ramp-up and ramp-down algorithm when handing out groups of pages to workers. A scan of an all-cached table could give extra blocks to one worker, when others had finished. In some plans (probably already very bad plans, such as the one reported by Alexander), the unfairness could be magnified. An internal buffer of 16 block numbers is removed, keeping just a single block buffer for technical reasons. Back-patch to 17. Reported-by: Alexander Lakhin <exclusion@gmail.com> Discussion: https://postgr.es/m/63a63690-dd92-c809-0b47-af05459e95d1%40gmail.com
1 parent 34226d4 commit 3ed3683

File tree

1 file changed

+23
-58
lines changed

1 file changed

+23
-58
lines changed

src/backend/storage/aio/read_stream.c

+23-58
Original file line numberDiff line numberDiff line change
@@ -117,13 +117,10 @@ struct ReadStream
117117
bool advice_enabled;
118118

119119
/*
120-
* Small buffer of block numbers, useful for 'ungetting' to resolve flow
121-
* control problems when I/Os are split. Also useful for batch-loading
122-
* block numbers in the fast path.
120+
* One-block buffer to support 'ungetting' a block number, to resolve flow
121+
* control problems when I/Os are split.
123122
*/
124-
BlockNumber blocknums[16];
125-
int16 blocknums_count;
126-
int16 blocknums_next;
123+
BlockNumber buffered_blocknum;
127124

128125
/*
129126
* The callback that will tell us which block numbers to read, and an
@@ -167,23 +164,23 @@ get_per_buffer_data(ReadStream *stream, int16 buffer_index)
167164
}
168165

169166
/*
170-
* Ask the callback which block it would like us to read next, with a small
171-
* buffer in front to allow read_stream_unget_block() to work and to allow the
172-
* fast path to skip this function and work directly from the array.
167+
* Ask the callback which block it would like us to read next, with a one block
168+
* buffer in front to allow read_stream_unget_block() to work.
173169
*/
174170
static inline BlockNumber
175171
read_stream_get_block(ReadStream *stream, void *per_buffer_data)
176172
{
177-
if (stream->blocknums_next < stream->blocknums_count)
178-
return stream->blocknums[stream->blocknums_next++];
173+
BlockNumber blocknum;
179174

180-
/*
181-
* We only bother to fetch one at a time here (but see the fast path which
182-
* uses more).
183-
*/
184-
return stream->callback(stream,
185-
stream->callback_private_data,
186-
per_buffer_data);
175+
blocknum = stream->buffered_blocknum;
176+
if (blocknum != InvalidBlockNumber)
177+
stream->buffered_blocknum = InvalidBlockNumber;
178+
else
179+
blocknum = stream->callback(stream,
180+
stream->callback_private_data,
181+
per_buffer_data);
182+
183+
return blocknum;
187184
}
188185

189186
/*
@@ -193,42 +190,12 @@ read_stream_get_block(ReadStream *stream, void *per_buffer_data)
193190
static inline void
194191
read_stream_unget_block(ReadStream *stream, BlockNumber blocknum)
195192
{
196-
if (stream->blocknums_next == stream->blocknums_count)
197-
{
198-
/* Never initialized or entirely consumed. Re-initialize. */
199-
stream->blocknums[0] = blocknum;
200-
stream->blocknums_count = 1;
201-
stream->blocknums_next = 0;
202-
}
203-
else
204-
{
205-
/* Must be the last value return from blocknums array. */
206-
Assert(stream->blocknums_next > 0);
207-
stream->blocknums_next--;
208-
Assert(stream->blocknums[stream->blocknums_next] == blocknum);
209-
}
193+
/* We shouldn't ever unget more than one block. */
194+
Assert(stream->buffered_blocknum == InvalidBlockNumber);
195+
Assert(blocknum != InvalidBlockNumber);
196+
stream->buffered_blocknum = blocknum;
210197
}
211198

212-
#ifndef READ_STREAM_DISABLE_FAST_PATH
213-
static void
214-
read_stream_fill_blocknums(ReadStream *stream)
215-
{
216-
BlockNumber blocknum;
217-
int i = 0;
218-
219-
do
220-
{
221-
blocknum = stream->callback(stream,
222-
stream->callback_private_data,
223-
NULL);
224-
stream->blocknums[i++] = blocknum;
225-
} while (i < lengthof(stream->blocknums) &&
226-
blocknum != InvalidBlockNumber);
227-
stream->blocknums_count = i;
228-
stream->blocknums_next = 0;
229-
}
230-
#endif
231-
232199
static void
233200
read_stream_start_pending_read(ReadStream *stream, bool suppress_advice)
234201
{
@@ -531,6 +498,7 @@ read_stream_begin_relation(int flags,
531498
stream->queue_size = queue_size;
532499
stream->callback = callback;
533500
stream->callback_private_data = callback_private_data;
501+
stream->buffered_blocknum = InvalidBlockNumber;
534502

535503
/*
536504
* Skip the initial ramp-up phase if the caller says we're going to be
@@ -601,9 +569,7 @@ read_stream_next_buffer(ReadStream *stream, void **per_buffer_data)
601569
Assert(buffer != InvalidBuffer);
602570

603571
/* Choose the next block to pin. */
604-
if (unlikely(stream->blocknums_next == stream->blocknums_count))
605-
read_stream_fill_blocknums(stream);
606-
next_blocknum = stream->blocknums[stream->blocknums_next++];
572+
next_blocknum = read_stream_get_block(stream, NULL);
607573

608574
if (likely(next_blocknum != InvalidBlockNumber))
609575
{
@@ -779,9 +745,8 @@ read_stream_reset(ReadStream *stream)
779745
/* Stop looking ahead. */
780746
stream->distance = 0;
781747

782-
/* Forget buffered block numbers and fast path state. */
783-
stream->blocknums_next = 0;
784-
stream->blocknums_count = 0;
748+
/* Forget buffered block number and fast path state. */
749+
stream->buffered_blocknum = InvalidBlockNumber;
785750
stream->fast_path = false;
786751

787752
/* Unpin anything that wasn't consumed. */

0 commit comments

Comments
 (0)