Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit f4e4b32

Browse files
committed
Support RIGHT and FULL OUTER JOIN in hash joins.
This is advantageous first because it allows us to hash the smaller table regardless of the outer-join type, and second because hash join can be more flexible than merge join in dealing with arbitrary join quals in a FULL join. For merge join all the join quals have to be mergejoinable, but hash join will work so long as there's at least one hashjoinable qual --- the others can be any condition. (This is true essentially because we don't keep per-inner-tuple match flags in merge join, while hash join can do so.) To do this, we need a has-it-been-matched flag for each tuple in the hashtable, not just one for the current outer tuple. The key idea that makes this practical is that we can store the match flag in the tuple's infomask, since there are lots of bits there that are of no interest for a MinimalTuple. So we aren't increasing the size of the hashtable at all for the feature. To write this without turning the hash code into even more of a pile of spaghetti than it already was, I rewrote ExecHashJoin in a state-machine style, similar to ExecMergeJoin. Other than that decision, it was pretty straightforward.
1 parent 17cb9e8 commit f4e4b32

File tree

10 files changed

+596
-363
lines changed

10 files changed

+596
-363
lines changed

src/backend/executor/nodeHash.c

+146-5
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,8 @@ MultiExecHash(HashState *node)
105105
break;
106106
/* We have to compute the hash value */
107107
econtext->ecxt_innertuple = slot;
108-
if (ExecHashGetHashValue(hashtable, econtext, hashkeys, false, false,
108+
if (ExecHashGetHashValue(hashtable, econtext, hashkeys,
109+
false, hashtable->keepNulls,
109110
&hashvalue))
110111
{
111112
int bucketNumber;
@@ -231,7 +232,7 @@ ExecEndHash(HashState *node)
231232
* ----------------------------------------------------------------
232233
*/
233234
HashJoinTable
234-
ExecHashTableCreate(Hash *node, List *hashOperators)
235+
ExecHashTableCreate(Hash *node, List *hashOperators, bool keepNulls)
235236
{
236237
HashJoinTable hashtable;
237238
Plan *outerNode;
@@ -273,6 +274,7 @@ ExecHashTableCreate(Hash *node, List *hashOperators)
273274
hashtable->nbuckets = nbuckets;
274275
hashtable->log2_nbuckets = log2_nbuckets;
275276
hashtable->buckets = NULL;
277+
hashtable->keepNulls = keepNulls;
276278
hashtable->skewEnabled = false;
277279
hashtable->skewBucket = NULL;
278280
hashtable->skewBucketLen = 0;
@@ -712,13 +714,26 @@ ExecHashTableInsert(HashJoinTable hashtable,
712714
HashJoinTuple hashTuple;
713715
int hashTupleSize;
714716

717+
/* Create the HashJoinTuple */
715718
hashTupleSize = HJTUPLE_OVERHEAD + tuple->t_len;
716719
hashTuple = (HashJoinTuple) MemoryContextAlloc(hashtable->batchCxt,
717720
hashTupleSize);
718721
hashTuple->hashvalue = hashvalue;
719722
memcpy(HJTUPLE_MINTUPLE(hashTuple), tuple, tuple->t_len);
723+
724+
/*
725+
* We always reset the tuple-matched flag on insertion. This is okay
726+
* even when reloading a tuple from a batch file, since the tuple
727+
* could not possibly have been matched to an outer tuple before it
728+
* went into the batch file.
729+
*/
730+
HeapTupleHeaderClearMatch(HJTUPLE_MINTUPLE(hashTuple));
731+
732+
/* Push it onto the front of the bucket's list */
720733
hashTuple->next = hashtable->buckets[bucketno];
721734
hashtable->buckets[bucketno] = hashTuple;
735+
736+
/* Account for space used, and back off if we've used too much */
722737
hashtable->spaceUsed += hashTupleSize;
723738
if (hashtable->spaceUsed > hashtable->spacePeak)
724739
hashtable->spacePeak = hashtable->spaceUsed;
@@ -878,8 +893,12 @@ ExecHashGetBucketAndBatch(HashJoinTable hashtable,
878893
* scan a hash bucket for matches to the current outer tuple
879894
*
880895
* The current outer tuple must be stored in econtext->ecxt_outertuple.
896+
*
897+
* On success, the inner tuple is stored into hjstate->hj_CurTuple and
898+
* econtext->ecxt_innertuple, using hjstate->hj_HashTupleSlot as the slot
899+
* for the latter.
881900
*/
882-
HashJoinTuple
901+
bool
883902
ExecScanHashBucket(HashJoinState *hjstate,
884903
ExprContext *econtext)
885904
{
@@ -920,7 +939,7 @@ ExecScanHashBucket(HashJoinState *hjstate,
920939
if (ExecQual(hjclauses, econtext, false))
921940
{
922941
hjstate->hj_CurTuple = hashTuple;
923-
return hashTuple;
942+
return true;
924943
}
925944
}
926945

@@ -930,7 +949,99 @@ ExecScanHashBucket(HashJoinState *hjstate,
930949
/*
931950
* no match
932951
*/
933-
return NULL;
952+
return false;
953+
}
954+
955+
/*
956+
* ExecPrepHashTableForUnmatched
957+
* set up for a series of ExecScanHashTableForUnmatched calls
958+
*/
959+
void
960+
ExecPrepHashTableForUnmatched(HashJoinState *hjstate)
961+
{
962+
/*
963+
*----------
964+
* During this scan we use the HashJoinState fields as follows:
965+
*
966+
* hj_CurBucketNo: next regular bucket to scan
967+
* hj_CurSkewBucketNo: next skew bucket (an index into skewBucketNums)
968+
* hj_CurTuple: last tuple returned, or NULL to start next bucket
969+
*----------
970+
*/
971+
hjstate->hj_CurBucketNo = 0;
972+
hjstate->hj_CurSkewBucketNo = 0;
973+
hjstate->hj_CurTuple = NULL;
974+
}
975+
976+
/*
977+
* ExecScanHashTableForUnmatched
978+
* scan the hash table for unmatched inner tuples
979+
*
980+
* On success, the inner tuple is stored into hjstate->hj_CurTuple and
981+
* econtext->ecxt_innertuple, using hjstate->hj_HashTupleSlot as the slot
982+
* for the latter.
983+
*/
984+
bool
985+
ExecScanHashTableForUnmatched(HashJoinState *hjstate, ExprContext *econtext)
986+
{
987+
HashJoinTable hashtable = hjstate->hj_HashTable;
988+
HashJoinTuple hashTuple = hjstate->hj_CurTuple;
989+
990+
for (;;)
991+
{
992+
/*
993+
* hj_CurTuple is the address of the tuple last returned from the
994+
* current bucket, or NULL if it's time to start scanning a new
995+
* bucket.
996+
*/
997+
if (hashTuple != NULL)
998+
hashTuple = hashTuple->next;
999+
else if (hjstate->hj_CurBucketNo < hashtable->nbuckets)
1000+
{
1001+
hashTuple = hashtable->buckets[hjstate->hj_CurBucketNo];
1002+
hjstate->hj_CurBucketNo++;
1003+
}
1004+
else if (hjstate->hj_CurSkewBucketNo < hashtable->nSkewBuckets)
1005+
{
1006+
int j = hashtable->skewBucketNums[hjstate->hj_CurSkewBucketNo];
1007+
1008+
hashTuple = hashtable->skewBucket[j]->tuples;
1009+
hjstate->hj_CurSkewBucketNo++;
1010+
}
1011+
else
1012+
break; /* finished all buckets */
1013+
1014+
while (hashTuple != NULL)
1015+
{
1016+
if (!HeapTupleHeaderHasMatch(HJTUPLE_MINTUPLE(hashTuple)))
1017+
{
1018+
TupleTableSlot *inntuple;
1019+
1020+
/* insert hashtable's tuple into exec slot */
1021+
inntuple = ExecStoreMinimalTuple(HJTUPLE_MINTUPLE(hashTuple),
1022+
hjstate->hj_HashTupleSlot,
1023+
false); /* do not pfree */
1024+
econtext->ecxt_innertuple = inntuple;
1025+
1026+
/*
1027+
* Reset temp memory each time; although this function doesn't
1028+
* do any qual eval, the caller will, so let's keep it
1029+
* parallel to ExecScanHashBucket.
1030+
*/
1031+
ResetExprContext(econtext);
1032+
1033+
hjstate->hj_CurTuple = hashTuple;
1034+
return true;
1035+
}
1036+
1037+
hashTuple = hashTuple->next;
1038+
}
1039+
}
1040+
1041+
/*
1042+
* no more unmatched tuples
1043+
*/
1044+
return false;
9341045
}
9351046

9361047
/*
@@ -960,6 +1071,35 @@ ExecHashTableReset(HashJoinTable hashtable)
9601071
MemoryContextSwitchTo(oldcxt);
9611072
}
9621073

1074+
/*
1075+
* ExecHashTableResetMatchFlags
1076+
* Clear all the HeapTupleHeaderHasMatch flags in the table
1077+
*/
1078+
void
1079+
ExecHashTableResetMatchFlags(HashJoinTable hashtable)
1080+
{
1081+
HashJoinTuple tuple;
1082+
int i;
1083+
1084+
/* Reset all flags in the main table ... */
1085+
for (i = 0; i < hashtable->nbuckets; i++)
1086+
{
1087+
for (tuple = hashtable->buckets[i]; tuple != NULL; tuple = tuple->next)
1088+
HeapTupleHeaderClearMatch(HJTUPLE_MINTUPLE(tuple));
1089+
}
1090+
1091+
/* ... and the same for the skew buckets, if any */
1092+
for (i = 0; i < hashtable->nSkewBuckets; i++)
1093+
{
1094+
int j = hashtable->skewBucketNums[i];
1095+
HashSkewBucket *skewBucket = hashtable->skewBucket[j];
1096+
1097+
for (tuple = skewBucket->tuples; tuple != NULL; tuple = tuple->next)
1098+
HeapTupleHeaderClearMatch(HJTUPLE_MINTUPLE(tuple));
1099+
}
1100+
}
1101+
1102+
9631103
void
9641104
ExecReScanHash(HashState *node)
9651105
{
@@ -1203,6 +1343,7 @@ ExecHashSkewTableInsert(HashJoinTable hashtable,
12031343
hashTupleSize);
12041344
hashTuple->hashvalue = hashvalue;
12051345
memcpy(HJTUPLE_MINTUPLE(hashTuple), tuple, tuple->t_len);
1346+
HeapTupleHeaderClearMatch(HJTUPLE_MINTUPLE(hashTuple));
12061347

12071348
/* Push it onto the front of the skew bucket's list */
12081349
hashTuple->next = hashtable->skewBucket[bucketNumber]->tuples;

0 commit comments

Comments
 (0)