Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit d24d75f

Browse files
committed
Small performance improvement for hash joins and hash aggregation:
when the plan is ReScanned, we don't have to rebuild the hash table if there is no parameter change for its child node. This idea has been used for a long time in Sort and Material nodes, but was not in the hash code till now.
1 parent 776d530 commit d24d75f

File tree

2 files changed

+76
-40
lines changed

2 files changed

+76
-40
lines changed

src/backend/executor/nodeAgg.c

+31-2
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
* Portions Copyright (c) 1994, Regents of the University of California
4646
*
4747
* IDENTIFICATION
48-
* $Header: /cvsroot/pgsql/src/backend/executor/nodeAgg.c,v 1.104 2003/02/09 00:30:39 tgl Exp $
48+
* $Header: /cvsroot/pgsql/src/backend/executor/nodeAgg.c,v 1.105 2003/05/30 20:23:10 tgl Exp $
4949
*
5050
*-------------------------------------------------------------------------
5151
*/
@@ -1374,6 +1374,31 @@ ExecReScanAgg(AggState *node, ExprContext *exprCtxt)
13741374
ExprContext *econtext = node->ss.ps.ps_ExprContext;
13751375
int aggno;
13761376

1377+
node->agg_done = false;
1378+
1379+
if (((Agg *) node->ss.ps.plan)->aggstrategy == AGG_HASHED)
1380+
{
1381+
/*
1382+
* In the hashed case, if we haven't yet built the hash table
1383+
* then we can just return; nothing done yet, so nothing to undo.
1384+
* If subnode's chgParam is not NULL then it will be re-scanned by
1385+
* ExecProcNode, else no reason to re-scan it at all.
1386+
*/
1387+
if (!node->table_filled)
1388+
return;
1389+
1390+
/*
1391+
* If we do have the hash table and the subplan does not have any
1392+
* parameter changes, then we can just rescan the existing hash
1393+
* table; no need to build it again.
1394+
*/
1395+
if (((PlanState *) node)->lefttree->chgParam == NULL)
1396+
{
1397+
ResetTupleHashIterator(&node->hashiter);
1398+
return;
1399+
}
1400+
}
1401+
13771402
/* Make sure we have closed any open tuplesorts */
13781403
for (aggno = 0; aggno < node->numaggs; aggno++)
13791404
{
@@ -1384,19 +1409,23 @@ ExecReScanAgg(AggState *node, ExprContext *exprCtxt)
13841409
peraggstate->sortstate = NULL;
13851410
}
13861411

1387-
node->agg_done = false;
1412+
/* Release first tuple of group, if we have made a copy */
13881413
if (node->grp_firstTuple != NULL)
13891414
{
13901415
heap_freetuple(node->grp_firstTuple);
13911416
node->grp_firstTuple = NULL;
13921417
}
1418+
1419+
/* Forget current agg values */
13931420
MemSet(econtext->ecxt_aggvalues, 0, sizeof(Datum) * node->numaggs);
13941421
MemSet(econtext->ecxt_aggnulls, 0, sizeof(bool) * node->numaggs);
13951422

1423+
/* Release all temp storage */
13961424
MemoryContextReset(node->aggcontext);
13971425

13981426
if (((Agg *) node->ss.ps.plan)->aggstrategy == AGG_HASHED)
13991427
{
1428+
/* Rebuild an empty hash table */
14001429
build_hash_table(node);
14011430
node->table_filled = false;
14021431
}

src/backend/executor/nodeHashjoin.c

+45-38
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/executor/nodeHashjoin.c,v 1.50 2003/05/05 17:57:47 tgl Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/executor/nodeHashjoin.c,v 1.51 2003/05/30 20:23:10 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -56,9 +56,7 @@ ExecHashJoin(HashJoinState *node)
5656
HashJoinTable hashtable;
5757
HeapTuple curtuple;
5858
TupleTableSlot *outerTupleSlot;
59-
TupleTableSlot *innerTupleSlot;
6059
int i;
61-
bool hashPhaseDone;
6260

6361
/*
6462
* get information from HashJoin node
@@ -69,7 +67,6 @@ ExecHashJoin(HashJoinState *node)
6967
otherqual = node->js.ps.qual;
7068
hashNode = (HashState *) innerPlanState(node);
7169
outerNode = outerPlanState(node);
72-
hashPhaseDone = node->hj_hashdone;
7370
dir = estate->es_direction;
7471

7572
/*
@@ -114,34 +111,30 @@ ExecHashJoin(HashJoinState *node)
114111
/*
115112
* if this is the first call, build the hash table for inner relation
116113
*/
117-
if (!hashPhaseDone)
118-
{ /* if the hash phase not completed */
119-
if (hashtable == NULL)
120-
{ /* if the hash table has not been created */
121-
122-
/*
123-
* create the hash table
124-
*/
125-
hashtable = ExecHashTableCreate((Hash *) hashNode->ps.plan);
126-
node->hj_HashTable = hashtable;
114+
if (!node->hj_hashdone)
115+
{
116+
/*
117+
* create the hash table
118+
*/
119+
Assert(hashtable == NULL);
120+
hashtable = ExecHashTableCreate((Hash *) hashNode->ps.plan);
121+
node->hj_HashTable = hashtable;
127122

128-
/*
129-
* execute the Hash node, to build the hash table
130-
*/
131-
hashNode->hashtable = hashtable;
132-
innerTupleSlot = ExecProcNode((PlanState *) hashNode);
133-
}
134-
node->hj_hashdone = true;
123+
/*
124+
* execute the Hash node, to build the hash table
125+
*/
126+
hashNode->hashtable = hashtable;
127+
(void) ExecProcNode((PlanState *) hashNode);
135128

136129
/*
137130
* Open temp files for outer batches, if needed. Note that file
138131
* buffers are palloc'd in regular executor context.
139132
*/
140133
for (i = 0; i < hashtable->nbatch; i++)
141134
hashtable->outerBatchFile[i] = BufFileCreateTemp(false);
135+
136+
node->hj_hashdone = true;
142137
}
143-
else if (hashtable == NULL)
144-
return NULL;
145138

146139
/*
147140
* Now get an outer tuple and probe into the hash table for matches
@@ -159,11 +152,7 @@ ExecHashJoin(HashJoinState *node)
159152
node);
160153
if (TupIsNull(outerTupleSlot))
161154
{
162-
/*
163-
* when the last batch runs out, clean up and exit
164-
*/
165-
ExecHashTableDestroy(hashtable);
166-
node->hj_HashTable = NULL;
155+
/* end of join */
167156
return NULL;
168157
}
169158

@@ -410,8 +399,8 @@ ExecInitHashJoin(HashJoin *node, EState *estate)
410399
*/
411400

412401
hjstate->hj_hashdone = false;
413-
414402
hjstate->hj_HashTable = (HashJoinTable) NULL;
403+
415404
hjstate->hj_CurBucketNo = 0;
416405
hjstate->hj_CurTuple = (HashJoinTuple) NULL;
417406

@@ -461,7 +450,7 @@ void
461450
ExecEndHashJoin(HashJoinState *node)
462451
{
463452
/*
464-
* free hash table in case we end plan before all tuples are retrieved
453+
* Free hash table
465454
*/
466455
if (node->hj_HashTable)
467456
{
@@ -682,21 +671,41 @@ ExecHashJoinSaveTuple(HeapTuple heapTuple,
682671
void
683672
ExecReScanHashJoin(HashJoinState *node, ExprContext *exprCtxt)
684673
{
674+
/*
675+
* If we haven't yet built the hash table then we can just return;
676+
* nothing done yet, so nothing to undo.
677+
*/
685678
if (!node->hj_hashdone)
686679
return;
687-
688-
node->hj_hashdone = false;
680+
Assert(node->hj_HashTable != NULL);
689681

690682
/*
691-
* Unfortunately, currently we have to destroy hashtable in all
692-
* cases...
683+
* In a multi-batch join, we currently have to do rescans the hard way,
684+
* primarily because batch temp files may have already been released.
685+
* But if it's a single-batch join, and there is no parameter change
686+
* for the inner subnode, then we can just re-use the existing hash
687+
* table without rebuilding it.
693688
*/
694-
if (node->hj_HashTable)
689+
if (node->hj_HashTable->nbatch == 0 &&
690+
((PlanState *) node)->righttree->chgParam == NULL)
691+
{
692+
/* okay to reuse the hash table; needn't rescan inner, either */
693+
}
694+
else
695695
{
696+
/* must destroy and rebuild hash table */
697+
node->hj_hashdone = false;
696698
ExecHashTableDestroy(node->hj_HashTable);
697699
node->hj_HashTable = NULL;
700+
/*
701+
* if chgParam of subnode is not null then plan will be re-scanned
702+
* by first ExecProcNode.
703+
*/
704+
if (((PlanState *) node)->righttree->chgParam == NULL)
705+
ExecReScan(((PlanState *) node)->righttree, exprCtxt);
698706
}
699707

708+
/* Always reset intra-tuple state */
700709
node->hj_CurBucketNo = 0;
701710
node->hj_CurTuple = (HashJoinTuple) NULL;
702711

@@ -706,11 +715,9 @@ ExecReScanHashJoin(HashJoinState *node, ExprContext *exprCtxt)
706715
node->hj_MatchedOuter = false;
707716

708717
/*
709-
* if chgParam of subnodes is not null then plans will be re-scanned
718+
* if chgParam of subnode is not null then plan will be re-scanned
710719
* by first ExecProcNode.
711720
*/
712721
if (((PlanState *) node)->lefttree->chgParam == NULL)
713722
ExecReScan(((PlanState *) node)->lefttree, exprCtxt);
714-
if (((PlanState *) node)->righttree->chgParam == NULL)
715-
ExecReScan(((PlanState *) node)->righttree, exprCtxt);
716723
}

0 commit comments

Comments
 (0)