postgresql-cfbot
diff --git a/‎src/backend/executor/execExpr.c
Lines changed: 10 additions & 10 deletions b/‎src/backend/executor/execExpr.c
Lines changed: 10 additions & 10 deletions
diff --git a/‎src/backend/executor/nodeHash.c
Lines changed: 55 additions & 11 deletions b/‎src/backend/executor/nodeHash.c
Lines changed: 55 additions & 11 deletions
@@ -4282,25 +4282,25 @@ ExecBuildHash32FromAttrs(TupleDesc desc, const TupleTableSlotOps *ops,
  * 'hash_exprs'.  When multiple expressions are present, the hash values
  * returned by each hash function are combined to produce a single hash value.
  *
+ * If any hash_expr yields NULL and the corresponding hash function is strict,
+ * the created ExprState will return NULL.
+ *
  * desc: tuple descriptor for the to-be-hashed expressions
  * ops: TupleTableSlotOps for the TupleDesc
  * hashfunc_oids: Oid for each hash function to call, one for each 'hash_expr'
- * collations: collation to use when calling the hash function.
- * hash_expr: list of expressions to hash the value of
- * opstrict: array corresponding to the 'hashfunc_oids' to store op_strict()
+ * collations: collation to use when calling the hash function
+ * hash_exprs: list of expressions to hash the value of
+ * opstrict: strictness flag for each hash function
  * parent: PlanState node that the 'hash_exprs' will be evaluated at
  * init_value: Normally 0, but can be set to other values to seed the hash
  * with some other value.  Using non-zero is slightly less efficient but can
  * be useful.
- * keep_nulls: if true, evaluation of the returned ExprState will abort early
- * returning NULL if the given hash function is strict and the Datum to hash
- * is null.  When set to false, any NULL input Datums are skipped.
  */
 ExprState *
 ExecBuildHash32Expr(TupleDesc desc, const TupleTableSlotOps *ops,
 					const Oid *hashfunc_oids, const List *collations,
 					const List *hash_exprs, const bool *opstrict,
-					PlanState *parent, uint32 init_value, bool keep_nulls)
+					PlanState *parent, uint32 init_value)
 {
 	ExprState  *state = makeNode(ExprState);
 	ExprEvalStep scratch = {0};
@@ -4377,8 +4377,8 @@ ExecBuildHash32Expr(TupleDesc desc, const TupleTableSlotOps *ops,
 		fmgr_info(funcid, finfo);
 
 		/*
-		 * Build the steps to evaluate the hash function's argument have it so
-		 * the value of that is stored in the 0th argument of the hash func.
+		 * Build the steps to evaluate the hash function's argument, placing
+		 * the value in the 0th argument of the hash func.
 		 */
 		ExecInitExprRec(expr,
 						state,
@@ -4413,7 +4413,7 @@ ExecBuildHash32Expr(TupleDesc desc, const TupleTableSlotOps *ops,
 		scratch.d.hashdatum.fcinfo_data = fcinfo;
 		scratch.d.hashdatum.fn_addr = finfo->fn_addr;
 
-		scratch.opcode = opstrict[i] && !keep_nulls ? strict_opcode : opcode;
+		scratch.opcode = opstrict[i] ? strict_opcode : opcode;
 		scratch.d.hashdatum.jumpdone = -1;
 
 		ExprEvalPushStep(state, &scratch);
 
@@ -154,8 +154,11 @@ MultiExecPrivateHash(HashState *node)
 	econtext = node->ps.ps_ExprContext;
 
 	/*
-	 * Get all tuples from the node below the Hash node and insert into the
-	 * hash table (or temp files).
+	 * Get all tuples from the node below the Hash node and insert the
+	 * potentially-matchable ones into the hash table (or temp files).  Tuples
+	 * that can't possibly match because they have null join keys are dumped
+	 * into a separate tuplestore, or just summarily discarded if we don't
+	 * need to emit them with null-extension.
 	 */
 	for (;;)
 	{
@@ -175,6 +178,7 @@ MultiExecPrivateHash(HashState *node)
 
 		if (!isnull)
 		{
+			/* normal case with a non-null join key */
 			uint32		hashvalue = DatumGetUInt32(hashdatum);
 			int			bucketNumber;
 
@@ -193,6 +197,14 @@ MultiExecPrivateHash(HashState *node)
 			}
 			hashtable->totalTuples += 1;
 		}
+		else if (node->keep_null_tuples)
+		{
+			/* null join key, but we must save tuple to be emitted later */
+			if (node->null_tuple_store == NULL)
+				node->null_tuple_store = ExecHashBuildNullTupleStore(hashtable);
+			tuplestore_puttupleslot(node->null_tuple_store, slot);
+		}
+		/* else we can discard the tuple immediately */
 	}
 
 	/* resize the hash table if needed (NTUP_PER_BUCKET exceeded) */
@@ -223,7 +235,6 @@ MultiExecParallelHash(HashState *node)
 	HashJoinTable hashtable;
 	TupleTableSlot *slot;
 	ExprContext *econtext;
-	uint32		hashvalue;
 	Barrier    *build_barrier;
 	int			i;
 
@@ -283,6 +294,7 @@ MultiExecParallelHash(HashState *node)
 			for (;;)
 			{
 				bool		isnull;
+				uint32		hashvalue;
 
 				slot = ExecProcNode(outerNode);
 				if (TupIsNull(slot))
@@ -296,8 +308,19 @@ MultiExecParallelHash(HashState *node)
 																	 &isnull));
 
 				if (!isnull)
+				{
+					/* normal case with a non-null join key */
 					ExecParallelHashTableInsert(hashtable, slot, hashvalue);
-				hashtable->partialTuples++;
+					hashtable->partialTuples++;
+				}
+				else if (node->keep_null_tuples)
+				{
+					/* null join key, but save tuple to be emitted later */
+					if (node->null_tuple_store == NULL)
+						node->null_tuple_store = ExecHashBuildNullTupleStore(hashtable);
+					tuplestore_puttupleslot(node->null_tuple_store, slot);
+				}
+				/* else we can discard the tuple immediately */
 			}
 
 			/*
@@ -405,14 +428,10 @@ ExecInitHash(Hash *node, EState *estate, int eflags)
 
 	Assert(node->plan.qual == NIL);
 
-	/*
-	 * Delay initialization of hash_expr until ExecInitHashJoin().  We cannot
-	 * build the ExprState here as we don't yet know the join type we're going
-	 * to be hashing values for and we need to know that before calling
-	 * ExecBuildHash32Expr as the keep_nulls parameter depends on the join
-	 * type.
-	 */
+	/* these fields will be filled by ExecInitHashJoin() */
 	hashstate->hash_expr = NULL;
+	hashstate->null_tuple_store = NULL;
+	hashstate->keep_null_tuples = false;
 
 	return hashstate;
 }
@@ -2748,6 +2767,31 @@ ExecHashRemoveNextSkewBucket(HashJoinTable hashtable)
 	}
 }
 
+/*
+ * Build a tuplestore suitable for holding null-keyed input tuples.
+ * (This function doesn't care whether it's for outer or inner tuples.)
+ *
+ * Note that in a parallel hash join, each worker has its own tuplestore(s)
+ * for these.  There's no need to interact with other workers to decide
+ * what to do with them.  So they're always in private storage.
+ */
+Tuplestorestate *
+ExecHashBuildNullTupleStore(HashJoinTable hashtable)
+{
+	Tuplestorestate *tstore;
+	MemoryContext oldcxt;
+
+	/*
+	 * We keep the tuplestore in the hashCxt to ensure it won't go away too
+	 * soon.  Size it at work_mem/16 so that it doesn't bloat the node's space
+	 * consumption too much.
+	 */
+	oldcxt = MemoryContextSwitchTo(hashtable->hashCxt);
+	tstore = tuplestore_begin_heap(false, false, work_mem / 16);
+	MemoryContextSwitchTo(oldcxt);
+	return tstore;
+}
+
 /*
  * Reserve space in the DSM segment for instrumentation data.
  */