Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit b39e915

Browse files
committed
Improve hash join to discard input tuples immediately if they can't
match because they contain a null join key (and the join operator is known strict). Improves performance significantly when the inner relation contains a lot of nulls, as per bug #2930.
1 parent 28c480e commit b39e915

File tree

4 files changed

+70
-25
lines changed

4 files changed

+70
-25
lines changed

src/backend/executor/nodeHash.c

Lines changed: 43 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.108 2007/01/05 22:19:28 momjian Exp $
11+
* $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.109 2007/01/28 23:21:26 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -92,11 +92,14 @@ MultiExecHash(HashState *node)
9292
slot = ExecProcNode(outerNode);
9393
if (TupIsNull(slot))
9494
break;
95-
hashtable->totalTuples += 1;
9695
/* We have to compute the hash value */
9796
econtext->ecxt_innertuple = slot;
98-
hashvalue = ExecHashGetHashValue(hashtable, econtext, hashkeys);
99-
ExecHashTableInsert(hashtable, slot, hashvalue);
97+
if (ExecHashGetHashValue(hashtable, econtext, hashkeys, false,
98+
&hashvalue))
99+
{
100+
ExecHashTableInsert(hashtable, slot, hashvalue);
101+
hashtable->totalTuples += 1;
102+
}
100103
}
101104

102105
/* must provide our own instrumentation support */
@@ -261,19 +264,23 @@ ExecHashTableCreate(Hash *node, List *hashOperators)
261264

262265
/*
263266
* Get info about the hash functions to be used for each hash key.
267+
* Also remember whether the join operators are strict.
264268
*/
265269
nkeys = list_length(hashOperators);
266270
hashtable->hashfunctions = (FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo));
271+
hashtable->hashStrict = (bool *) palloc(nkeys * sizeof(bool));
267272
i = 0;
268273
foreach(ho, hashOperators)
269274
{
275+
Oid hashop = lfirst_oid(ho);
270276
Oid hashfn;
271277

272-
hashfn = get_op_hash_function(lfirst_oid(ho));
278+
hashfn = get_op_hash_function(hashop);
273279
if (!OidIsValid(hashfn))
274280
elog(ERROR, "could not find hash function for hash operator %u",
275-
lfirst_oid(ho));
281+
hashop);
276282
fmgr_info(hashfn, &hashtable->hashfunctions[i]);
283+
hashtable->hashStrict[i] = op_strict(hashop);
277284
i++;
278285
}
279286

@@ -657,11 +664,18 @@ ExecHashTableInsert(HashJoinTable hashtable,
657664
* The tuple to be tested must be in either econtext->ecxt_outertuple or
658665
* econtext->ecxt_innertuple. Vars in the hashkeys expressions reference
659666
* either OUTER or INNER.
667+
*
668+
* A TRUE result means the tuple's hash value has been successfully computed
669+
* and stored at *hashvalue. A FALSE result means the tuple cannot match
670+
* because it contains a null attribute, and hence it should be discarded
671+
* immediately. (If keep_nulls is true then FALSE is never returned.)
660672
*/
661-
uint32
673+
bool
662674
ExecHashGetHashValue(HashJoinTable hashtable,
663675
ExprContext *econtext,
664-
List *hashkeys)
676+
List *hashkeys,
677+
bool keep_nulls,
678+
uint32 *hashvalue)
665679
{
666680
uint32 hashkey = 0;
667681
ListCell *hk;
@@ -691,10 +705,27 @@ ExecHashGetHashValue(HashJoinTable hashtable,
691705
keyval = ExecEvalExpr(keyexpr, econtext, &isNull, NULL);
692706

693707
/*
694-
* Compute the hash function
708+
* If the attribute is NULL, and the join operator is strict, then
709+
* this tuple cannot pass the join qual so we can reject it
710+
* immediately (unless we're scanning the outside of an outer join,
711+
* in which case we must not reject it). Otherwise we act like the
712+
* hashcode of NULL is zero (this will support operators that act like
713+
* IS NOT DISTINCT, though not any more-random behavior). We treat
714+
* the hash support function as strict even if the operator is not.
715+
*
716+
* Note: currently, all hashjoinable operators must be strict since
717+
* the hash index AM assumes that. However, it takes so little
718+
* extra code here to allow non-strict that we may as well do it.
695719
*/
696-
if (!isNull) /* treat nulls as having hash key 0 */
720+
if (isNull)
721+
{
722+
if (hashtable->hashStrict[i] && !keep_nulls)
723+
return false; /* cannot match */
724+
/* else, leave hashkey unmodified, equivalent to hashcode 0 */
725+
}
726+
else
697727
{
728+
/* Compute the hash function */
698729
uint32 hkey;
699730

700731
hkey = DatumGetUInt32(FunctionCall1(&hashtable->hashfunctions[i],
@@ -707,7 +738,8 @@ ExecHashGetHashValue(HashJoinTable hashtable,
707738

708739
MemoryContextSwitchTo(oldContext);
709740

710-
return hashkey;
741+
*hashvalue = hashkey;
742+
return true;
711743
}
712744

713745
/*

src/backend/executor/nodeHashjoin.c

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.86 2007/01/05 22:19:28 momjian Exp $
11+
* $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.87 2007/01/28 23:21:26 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -547,9 +547,8 @@ ExecHashJoinOuterGetTuple(PlanState *outerNode,
547547
int curbatch = hashtable->curbatch;
548548
TupleTableSlot *slot;
549549

550-
if (curbatch == 0)
551-
{ /* if it is the first pass */
552-
550+
if (curbatch == 0) /* if it is the first pass */
551+
{
553552
/*
554553
* Check to see if first outer tuple was already fetched by
555554
* ExecHashJoin() and not used yet.
@@ -559,21 +558,31 @@ ExecHashJoinOuterGetTuple(PlanState *outerNode,
559558
hjstate->hj_FirstOuterTupleSlot = NULL;
560559
else
561560
slot = ExecProcNode(outerNode);
562-
if (!TupIsNull(slot))
561+
562+
while (!TupIsNull(slot))
563563
{
564564
/*
565565
* We have to compute the tuple's hash value.
566566
*/
567567
ExprContext *econtext = hjstate->js.ps.ps_ExprContext;
568568

569569
econtext->ecxt_outertuple = slot;
570-
*hashvalue = ExecHashGetHashValue(hashtable, econtext,
571-
hjstate->hj_OuterHashKeys);
570+
if (ExecHashGetHashValue(hashtable, econtext,
571+
hjstate->hj_OuterHashKeys,
572+
(hjstate->js.jointype == JOIN_LEFT),
573+
hashvalue))
574+
{
575+
/* remember outer relation is not empty for possible rescan */
576+
hjstate->hj_OuterNotEmpty = true;
572577

573-
/* remember outer relation is not empty for possible rescan */
574-
hjstate->hj_OuterNotEmpty = true;
578+
return slot;
579+
}
575580

576-
return slot;
581+
/*
582+
* That tuple couldn't match because of a NULL, so discard it
583+
* and continue with the next one.
584+
*/
585+
slot = ExecProcNode(outerNode);
577586
}
578587

579588
/*

src/include/executor/hashjoin.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/executor/hashjoin.h,v 1.42 2007/01/05 22:19:54 momjian Exp $
10+
* $PostgreSQL: pgsql/src/include/executor/hashjoin.h,v 1.43 2007/01/28 23:21:26 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -108,6 +108,8 @@ typedef struct HashJoinTableData
108108
*/
109109
FmgrInfo *hashfunctions; /* lookup data for hash functions */
110110

111+
bool *hashStrict; /* is each hash join operator strict? */
112+
111113
Size spaceUsed; /* memory space currently used by tuples */
112114
Size spaceAllowed; /* upper limit for space used */
113115

src/include/executor/nodeHash.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/executor/nodeHash.h,v 1.42 2007/01/05 22:19:54 momjian Exp $
10+
* $PostgreSQL: pgsql/src/include/executor/nodeHash.h,v 1.43 2007/01/28 23:21:26 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -28,9 +28,11 @@ extern void ExecHashTableDestroy(HashJoinTable hashtable);
2828
extern void ExecHashTableInsert(HashJoinTable hashtable,
2929
TupleTableSlot *slot,
3030
uint32 hashvalue);
31-
extern uint32 ExecHashGetHashValue(HashJoinTable hashtable,
31+
extern bool ExecHashGetHashValue(HashJoinTable hashtable,
3232
ExprContext *econtext,
33-
List *hashkeys);
33+
List *hashkeys,
34+
bool keep_nulls,
35+
uint32 *hashvalue);
3436
extern void ExecHashGetBucketAndBatch(HashJoinTable hashtable,
3537
uint32 hashvalue,
3638
int *bucketno,

0 commit comments

Comments
 (0)