Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit f38fbf3

Browse files
committed
If we expect a hash join to be performed in multiple batches, suppress
"physical tlist" optimization on the outer relation (ie, force a projection step to occur in its scan). This avoids storing useless column values when the outer relation's tuples are written to temporary batch files. Modified version of a patch by Michael Henderson and Ramon Lawrence.
1 parent ee4c187 commit f38fbf3

File tree

5 files changed

+25
-6
lines changed

5 files changed

+25
-6
lines changed

src/backend/nodes/outfuncs.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.355 2009/03/21 00:04:39 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.356 2009/03/26 17:15:34 tgl Exp $
1212
*
1313
* NOTES
1414
* Every node type that can appear in stored rules' parsetrees *must*
@@ -1448,6 +1448,7 @@ _outHashPath(StringInfo str, HashPath *node)
14481448
_outJoinPathInfo(str, (JoinPath *) node);
14491449

14501450
WRITE_NODE_FIELD(path_hashclauses);
1451+
WRITE_INT_FIELD(num_batches);
14511452
}
14521453

14531454
static void

src/backend/optimizer/path/costsize.c

+3-1
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454
* Portions Copyright (c) 1994, Regents of the University of California
5555
*
5656
* IDENTIFICATION
57-
* $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.205 2009/03/21 00:04:39 tgl Exp $
57+
* $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.206 2009/03/26 17:15:35 tgl Exp $
5858
*
5959
*-------------------------------------------------------------------------
6060
*/
@@ -1880,6 +1880,8 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
18801880
&numbatches,
18811881
&num_skew_mcvs);
18821882
virtualbuckets = (double) numbuckets *(double) numbatches;
1883+
/* mark the path with estimated # of batches */
1884+
path->num_batches = numbatches;
18831885

18841886
/*
18851887
* Determine bucketsize fraction for inner relation. We use the smallest

src/backend/optimizer/plan/createplan.c

+5-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
*
1111
*
1212
* IDENTIFICATION
13-
* $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.256 2009/03/21 00:04:39 tgl Exp $
13+
* $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.257 2009/03/26 17:15:35 tgl Exp $
1414
*
1515
*-------------------------------------------------------------------------
1616
*/
@@ -1910,6 +1910,10 @@ create_hashjoin_plan(PlannerInfo *root,
19101910
/* We don't want any excess columns in the hashed tuples */
19111911
disuse_physical_tlist(inner_plan, best_path->jpath.innerjoinpath);
19121912

1913+
/* If we expect batching, suppress excess columns in outer tuples too */
1914+
if (best_path->num_batches > 1)
1915+
disuse_physical_tlist(outer_plan, best_path->jpath.outerjoinpath);
1916+
19131917
/*
19141918
* If there is a single join clause and we can identify the outer
19151919
* variable as a simple column reference, supply its identity for

src/backend/optimizer/util/pathnode.c

+13-2
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.150 2009/02/27 00:06:27 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.151 2009/03/26 17:15:35 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -1480,9 +1480,20 @@ create_hashjoin_path(PlannerInfo *root,
14801480
pathnode->jpath.outerjoinpath = outer_path;
14811481
pathnode->jpath.innerjoinpath = inner_path;
14821482
pathnode->jpath.joinrestrictinfo = restrict_clauses;
1483-
/* A hashjoin never has pathkeys, since its ordering is unpredictable */
1483+
/*
1484+
* A hashjoin never has pathkeys, since its output ordering is
1485+
* unpredictable due to possible batching. XXX If the inner relation is
1486+
* small enough, we could instruct the executor that it must not batch,
1487+
* and then we could assume that the output inherits the outer relation's
1488+
* ordering, which might save a sort step. However there is considerable
1489+
* downside if our estimate of the inner relation size is badly off.
1490+
* For the moment we don't risk it. (Note also that if we wanted to take
1491+
* this seriously, joinpath.c would have to consider many more paths for
1492+
* the outer rel than it does now.)
1493+
*/
14841494
pathnode->jpath.path.pathkeys = NIL;
14851495
pathnode->path_hashclauses = hashclauses;
1496+
/* cost_hashjoin will fill in pathnode->num_batches */
14861497

14871498
cost_hashjoin(pathnode, root, sjinfo);
14881499

src/include/nodes/relation.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.170 2009/03/05 23:06:45 tgl Exp $
10+
* $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.171 2009/03/26 17:15:35 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -845,6 +845,7 @@ typedef struct HashPath
845845
{
846846
JoinPath jpath;
847847
List *path_hashclauses; /* join clauses used for hashing */
848+
int num_batches; /* number of batches expected */
848849
} HashPath;
849850

850851
/*

0 commit comments

Comments
 (0)