Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit e0c91a7

Browse files
committed
Improve some O(N^2) behavior in window function evaluation.
Repositioning the tuplestore seek pointer in window_gettupleslot() turns out to be a very significant expense when the window frame is sizable and the frame end can move. To fix, introduce a tuplestore function for skipping an arbitrary number of tuples in one call, parallel to the one we introduced for tuplesort objects in commit 8d65da1. This reduces the cost of window_gettupleslot() to O(1) if the tuplestore has not spilled to disk. As in the previous commit, I didn't try to do any real optimization of tuplestore_skiptuples for the case where the tuplestore has spilled to disk. There is probably no practical way to get the cost to less than O(N) anyway, but perhaps someone can think of something later. Also fix PersistHoldablePortal() to make use of this API now that we have it. Based on a suggestion by Dean Rasheed, though this turns out not to look much like his patch.
1 parent 46a60ab commit e0c91a7

File tree

4 files changed

+129
-32
lines changed

4 files changed

+129
-32
lines changed

src/backend/commands/portalcmds.c

+11-16
Original file line numberDiff line numberDiff line change
@@ -389,38 +389,33 @@ PersistHoldablePortal(Portal portal)
389389
FreeQueryDesc(queryDesc);
390390

391391
/*
392-
* Set the position in the result set: ideally, this could be
393-
* implemented by just skipping straight to the tuple # that we need
394-
* to be at, but the tuplestore API doesn't support that. So we start
395-
* at the beginning of the tuplestore and iterate through it until we
396-
* reach where we need to be. FIXME someday? (Fortunately, the
397-
* typical case is that we're supposed to be at or near the start of
398-
* the result set, so this isn't as bad as it sounds.)
392+
* Set the position in the result set.
399393
*/
400394
MemoryContextSwitchTo(portal->holdContext);
401395

402396
if (portal->atEnd)
403397
{
404-
/* we can handle this case even if posOverflow */
405-
while (tuplestore_advance(portal->holdStore, true))
398+
/*
399+
* We can handle this case even if posOverflow: just force the
400+
* tuplestore forward to its end. The size of the skip request
401+
* here is arbitrary.
402+
*/
403+
while (tuplestore_skiptuples(portal->holdStore, 1000000, true))
406404
/* continue */ ;
407405
}
408406
else
409407
{
410-
long store_pos;
411-
412408
if (portal->posOverflow) /* oops, cannot trust portalPos */
413409
ereport(ERROR,
414410
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
415411
errmsg("could not reposition held cursor")));
416412

417413
tuplestore_rescan(portal->holdStore);
418414

419-
for (store_pos = 0; store_pos < portal->portalPos; store_pos++)
420-
{
421-
if (!tuplestore_advance(portal->holdStore, true))
422-
elog(ERROR, "unexpected end of tuple stream");
423-
}
415+
if (!tuplestore_skiptuples(portal->holdStore,
416+
portal->portalPos,
417+
true))
418+
elog(ERROR, "unexpected end of tuple stream");
424419
}
425420
}
426421
PG_CATCH();

src/backend/executor/nodeWindowAgg.c

+43-14
Original file line numberDiff line numberDiff line change
@@ -2371,31 +2371,56 @@ window_gettupleslot(WindowObject winobj, int64 pos, TupleTableSlot *slot)
23712371
tuplestore_select_read_pointer(winstate->buffer, winobj->readptr);
23722372

23732373
/*
2374-
* There's no API to refetch the tuple at the current position. We have to
2375-
* move one tuple forward, and then one backward. (We don't do it the
2376-
* other way because we might try to fetch the row before our mark, which
2377-
* isn't allowed.) XXX this case could stand to be optimized.
2374+
* Advance or rewind until we are within one tuple of the one we want.
23782375
*/
2379-
if (winobj->seekpos == pos)
2376+
if (winobj->seekpos < pos - 1)
23802377
{
2378+
if (!tuplestore_skiptuples(winstate->buffer,
2379+
pos - 1 - winobj->seekpos,
2380+
true))
2381+
elog(ERROR, "unexpected end of tuplestore");
2382+
winobj->seekpos = pos - 1;
2383+
}
2384+
else if (winobj->seekpos > pos + 1)
2385+
{
2386+
if (!tuplestore_skiptuples(winstate->buffer,
2387+
winobj->seekpos - (pos + 1),
2388+
false))
2389+
elog(ERROR, "unexpected end of tuplestore");
2390+
winobj->seekpos = pos + 1;
2391+
}
2392+
else if (winobj->seekpos == pos)
2393+
{
2394+
/*
2395+
* There's no API to refetch the tuple at the current position. We
2396+
* have to move one tuple forward, and then one backward. (We don't
2397+
* do it the other way because we might try to fetch the row before
2398+
* our mark, which isn't allowed.) XXX this case could stand to be
2399+
* optimized.
2400+
*/
23812401
tuplestore_advance(winstate->buffer, true);
23822402
winobj->seekpos++;
23832403
}
23842404

2385-
while (winobj->seekpos > pos)
2405+
/*
2406+
* Now we should be on the tuple immediately before or after the one we
2407+
* want, so just fetch forwards or backwards as appropriate.
2408+
*/
2409+
if (winobj->seekpos > pos)
23862410
{
23872411
if (!tuplestore_gettupleslot(winstate->buffer, false, true, slot))
23882412
elog(ERROR, "unexpected end of tuplestore");
23892413
winobj->seekpos--;
23902414
}
2391-
2392-
while (winobj->seekpos < pos)
2415+
else
23932416
{
23942417
if (!tuplestore_gettupleslot(winstate->buffer, true, true, slot))
23952418
elog(ERROR, "unexpected end of tuplestore");
23962419
winobj->seekpos++;
23972420
}
23982421

2422+
Assert(winobj->seekpos == pos);
2423+
23992424
MemoryContextSwitchTo(oldcontext);
24002425

24012426
return true;
@@ -2478,16 +2503,20 @@ WinSetMarkPosition(WindowObject winobj, int64 markpos)
24782503
if (markpos < winobj->markpos)
24792504
elog(ERROR, "cannot move WindowObject's mark position backward");
24802505
tuplestore_select_read_pointer(winstate->buffer, winobj->markptr);
2481-
while (markpos > winobj->markpos)
2506+
if (markpos > winobj->markpos)
24822507
{
2483-
tuplestore_advance(winstate->buffer, true);
2484-
winobj->markpos++;
2508+
tuplestore_skiptuples(winstate->buffer,
2509+
markpos - winobj->markpos,
2510+
true);
2511+
winobj->markpos = markpos;
24852512
}
24862513
tuplestore_select_read_pointer(winstate->buffer, winobj->readptr);
2487-
while (markpos > winobj->seekpos)
2514+
if (markpos > winobj->seekpos)
24882515
{
2489-
tuplestore_advance(winstate->buffer, true);
2490-
winobj->seekpos++;
2516+
tuplestore_skiptuples(winstate->buffer,
2517+
markpos - winobj->seekpos,
2518+
true);
2519+
winobj->seekpos = markpos;
24912520
}
24922521
}
24932522

src/backend/utils/sort/tuplestore.c

+71-2
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
#include "access/htup_details.h"
5858
#include "commands/tablespace.h"
5959
#include "executor/executor.h"
60+
#include "miscadmin.h"
6061
#include "storage/buffile.h"
6162
#include "utils/memutils.h"
6263
#include "utils/resowner.h"
@@ -1065,8 +1066,7 @@ tuplestore_gettupleslot(Tuplestorestate *state, bool forward,
10651066
* tuplestore_advance - exported function to adjust position without fetching
10661067
*
10671068
* We could optimize this case to avoid palloc/pfree overhead, but for the
1068-
* moment it doesn't seem worthwhile. (XXX this probably needs to be
1069-
* reconsidered given the needs of window functions.)
1069+
* moment it doesn't seem worthwhile.
10701070
*/
10711071
bool
10721072
tuplestore_advance(Tuplestorestate *state, bool forward)
@@ -1088,6 +1088,75 @@ tuplestore_advance(Tuplestorestate *state, bool forward)
10881088
}
10891089
}
10901090

1091+
/*
1092+
* Advance over N tuples in either forward or back direction,
1093+
* without returning any data. N<=0 is a no-op.
1094+
* Returns TRUE if successful, FALSE if ran out of tuples.
1095+
*/
1096+
bool
1097+
tuplestore_skiptuples(Tuplestorestate *state, int64 ntuples, bool forward)
1098+
{
1099+
TSReadPointer *readptr = &state->readptrs[state->activeptr];
1100+
1101+
Assert(forward || (readptr->eflags & EXEC_FLAG_BACKWARD));
1102+
1103+
if (ntuples <= 0)
1104+
return true;
1105+
1106+
switch (state->status)
1107+
{
1108+
case TSS_INMEM:
1109+
if (forward)
1110+
{
1111+
if (readptr->eof_reached)
1112+
return false;
1113+
if (state->memtupcount - readptr->current >= ntuples)
1114+
{
1115+
readptr->current += ntuples;
1116+
return true;
1117+
}
1118+
readptr->current = state->memtupcount;
1119+
readptr->eof_reached = true;
1120+
return false;
1121+
}
1122+
else
1123+
{
1124+
if (readptr->eof_reached)
1125+
{
1126+
readptr->current = state->memtupcount;
1127+
readptr->eof_reached = false;
1128+
ntuples--;
1129+
}
1130+
if (readptr->current - state->memtupdeleted > ntuples)
1131+
{
1132+
readptr->current -= ntuples;
1133+
return true;
1134+
}
1135+
Assert(!state->truncated);
1136+
readptr->current = state->memtupdeleted;
1137+
return false;
1138+
}
1139+
break;
1140+
1141+
default:
1142+
/* We don't currently try hard to optimize other cases */
1143+
while (ntuples-- > 0)
1144+
{
1145+
void *tuple;
1146+
bool should_free;
1147+
1148+
tuple = tuplestore_gettuple(state, forward, &should_free);
1149+
1150+
if (tuple == NULL)
1151+
return false;
1152+
if (should_free)
1153+
pfree(tuple);
1154+
CHECK_FOR_INTERRUPTS();
1155+
}
1156+
return true;
1157+
}
1158+
}
1159+
10911160
/*
10921161
* dumptuples - remove tuples from memory and write to tape
10931162
*

src/include/utils/tuplestore.h

+4
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,12 @@ extern bool tuplestore_in_memory(Tuplestorestate *state);
7272

7373
extern bool tuplestore_gettupleslot(Tuplestorestate *state, bool forward,
7474
bool copy, TupleTableSlot *slot);
75+
7576
extern bool tuplestore_advance(Tuplestorestate *state, bool forward);
7677

78+
extern bool tuplestore_skiptuples(Tuplestorestate *state,
79+
int64 ntuples, bool forward);
80+
7781
extern bool tuplestore_ateof(Tuplestorestate *state);
7882

7983
extern void tuplestore_rescan(Tuplestorestate *state);

0 commit comments

Comments
 (0)