diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c
index f1569879b529..2cd5eb079858 100644
--- a/src/backend/executor/execExpr.c
+++ b/src/backend/executor/execExpr.c
@@ -4282,25 +4282,25 @@ ExecBuildHash32FromAttrs(TupleDesc desc, const TupleTableSlotOps *ops,
  * 'hash_exprs'.  When multiple expressions are present, the hash values
  * returned by each hash function are combined to produce a single hash value.
  *
+ * If any hash_expr yields NULL and the corresponding hash function is strict,
+ * the created ExprState will return NULL.
+ *
  * desc: tuple descriptor for the to-be-hashed expressions
  * ops: TupleTableSlotOps for the TupleDesc
  * hashfunc_oids: Oid for each hash function to call, one for each 'hash_expr'
- * collations: collation to use when calling the hash function.
- * hash_expr: list of expressions to hash the value of
- * opstrict: array corresponding to the 'hashfunc_oids' to store op_strict()
+ * collations: collation to use when calling the hash function
+ * hash_exprs: list of expressions to hash the value of
+ * opstrict: strictness flag for each hash function
  * parent: PlanState node that the 'hash_exprs' will be evaluated at
  * init_value: Normally 0, but can be set to other values to seed the hash
  * with some other value.  Using non-zero is slightly less efficient but can
  * be useful.
- * keep_nulls: if true, evaluation of the returned ExprState will abort early
- * returning NULL if the given hash function is strict and the Datum to hash
- * is null.  When set to false, any NULL input Datums are skipped.
  */
 ExprState *
 ExecBuildHash32Expr(TupleDesc desc, const TupleTableSlotOps *ops,
 					const Oid *hashfunc_oids, const List *collations,
 					const List *hash_exprs, const bool *opstrict,
-					PlanState *parent, uint32 init_value, bool keep_nulls)
+					PlanState *parent, uint32 init_value)
 {
 	ExprState  *state = makeNode(ExprState);
 	ExprEvalStep scratch = {0};
@@ -4377,8 +4377,8 @@ ExecBuildHash32Expr(TupleDesc desc, const TupleTableSlotOps *ops,
 		fmgr_info(funcid, finfo);
 
 		/*
-		 * Build the steps to evaluate the hash function's argument have it so
-		 * the value of that is stored in the 0th argument of the hash func.
+		 * Build the steps to evaluate the hash function's argument, placing
+		 * the value in the 0th argument of the hash func.
 		 */
 		ExecInitExprRec(expr,
 						state,
@@ -4413,7 +4413,7 @@ ExecBuildHash32Expr(TupleDesc desc, const TupleTableSlotOps *ops,
 		scratch.d.hashdatum.fcinfo_data = fcinfo;
 		scratch.d.hashdatum.fn_addr = finfo->fn_addr;
 
-		scratch.opcode = opstrict[i] && !keep_nulls ? strict_opcode : opcode;
+		scratch.opcode = opstrict[i] ? strict_opcode : opcode;
 		scratch.d.hashdatum.jumpdone = -1;
 
 		ExprEvalPushStep(state, &scratch);
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c
index 8d2201ab67fa..003814a4d311 100644
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -154,8 +154,11 @@ MultiExecPrivateHash(HashState *node)
 	econtext = node->ps.ps_ExprContext;
 
 	/*
-	 * Get all tuples from the node below the Hash node and insert into the
-	 * hash table (or temp files).
+	 * Get all tuples from the node below the Hash node and insert the
+	 * potentially-matchable ones into the hash table (or temp files).  Tuples
+	 * that can't possibly match because they have null join keys are dumped
+	 * into a separate tuplestore, or just summarily discarded if we don't
+	 * need to emit them with null-extension.
 	 */
 	for (;;)
 	{
@@ -175,6 +178,7 @@ MultiExecPrivateHash(HashState *node)
 
 		if (!isnull)
 		{
+			/* normal case with a non-null join key */
 			uint32		hashvalue = DatumGetUInt32(hashdatum);
 			int			bucketNumber;
 
@@ -193,6 +197,14 @@ MultiExecPrivateHash(HashState *node)
 			}
 			hashtable->totalTuples += 1;
 		}
+		else if (node->keep_null_tuples)
+		{
+			/* null join key, but we must save tuple to be emitted later */
+			if (node->null_tuple_store == NULL)
+				node->null_tuple_store = ExecHashBuildNullTupleStore(hashtable);
+			tuplestore_puttupleslot(node->null_tuple_store, slot);
+		}
+		/* else we can discard the tuple immediately */
 	}
 
 	/* resize the hash table if needed (NTUP_PER_BUCKET exceeded) */
@@ -223,7 +235,6 @@ MultiExecParallelHash(HashState *node)
 	HashJoinTable hashtable;
 	TupleTableSlot *slot;
 	ExprContext *econtext;
-	uint32		hashvalue;
 	Barrier    *build_barrier;
 	int			i;
 
@@ -283,6 +294,7 @@ MultiExecParallelHash(HashState *node)
 			for (;;)
 			{
 				bool		isnull;
+				uint32		hashvalue;
 
 				slot = ExecProcNode(outerNode);
 				if (TupIsNull(slot))
@@ -296,8 +308,19 @@ MultiExecParallelHash(HashState *node)
 																	 &isnull));
 
 				if (!isnull)
+				{
+					/* normal case with a non-null join key */
 					ExecParallelHashTableInsert(hashtable, slot, hashvalue);
-				hashtable->partialTuples++;
+					hashtable->partialTuples++;
+				}
+				else if (node->keep_null_tuples)
+				{
+					/* null join key, but save tuple to be emitted later */
+					if (node->null_tuple_store == NULL)
+						node->null_tuple_store = ExecHashBuildNullTupleStore(hashtable);
+					tuplestore_puttupleslot(node->null_tuple_store, slot);
+				}
+				/* else we can discard the tuple immediately */
 			}
 
 			/*
@@ -405,14 +428,10 @@ ExecInitHash(Hash *node, EState *estate, int eflags)
 
 	Assert(node->plan.qual == NIL);
 
-	/*
-	 * Delay initialization of hash_expr until ExecInitHashJoin().  We cannot
-	 * build the ExprState here as we don't yet know the join type we're going
-	 * to be hashing values for and we need to know that before calling
-	 * ExecBuildHash32Expr as the keep_nulls parameter depends on the join
-	 * type.
-	 */
+	/* these fields will be filled by ExecInitHashJoin() */
 	hashstate->hash_expr = NULL;
+	hashstate->null_tuple_store = NULL;
+	hashstate->keep_null_tuples = false;
 
 	return hashstate;
 }
@@ -2748,6 +2767,31 @@ ExecHashRemoveNextSkewBucket(HashJoinTable hashtable)
 	}
 }
 
+/*
+ * Build a tuplestore suitable for holding null-keyed input tuples.
+ * (This function doesn't care whether it's for outer or inner tuples.)
+ *
+ * Note that in a parallel hash join, each worker has its own tuplestore(s)
+ * for these.  There's no need to interact with other workers to decide
+ * what to do with them.  So they're always in private storage.
+ */
+Tuplestorestate *
+ExecHashBuildNullTupleStore(HashJoinTable hashtable)
+{
+	Tuplestorestate *tstore;
+	MemoryContext oldcxt;
+
+	/*
+	 * We keep the tuplestore in the hashCxt to ensure it won't go away too
+	 * soon.  Size it at work_mem/16 so that it doesn't bloat the node's space
+	 * consumption too much.
+	 */
+	oldcxt = MemoryContextSwitchTo(hashtable->hashCxt);
+	tstore = tuplestore_begin_heap(false, false, work_mem / 16);
+	MemoryContextSwitchTo(oldcxt);
+	return tstore;
+}
+
 /*
  * Reserve space in the DSM segment for instrumentation data.
  */
diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c
index 5661ad768300..6a42041c9275 100644
--- a/src/backend/executor/nodeHashjoin.c
+++ b/src/backend/executor/nodeHashjoin.c
@@ -182,7 +182,9 @@
 #define HJ_SCAN_BUCKET			3
 #define HJ_FILL_OUTER_TUPLE		4
 #define HJ_FILL_INNER_TUPLES	5
-#define HJ_NEED_NEW_BATCH		6
+#define HJ_FILL_OUTER_NULL_TUPLES	6
+#define HJ_FILL_INNER_NULL_TUPLES	7
+#define HJ_NEED_NEW_BATCH		8
 
 /* Returns true if doing null-fill on outer relation */
 #define HJ_FILL_OUTER(hjstate)	((hjstate)->hj_NullInnerTupleSlot != NULL)
@@ -346,9 +348,16 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel)
 				/*
 				 * If the inner relation is completely empty, and we're not
 				 * doing a left outer join, we can quit without scanning the
-				 * outer relation.
+				 * outer relation.  (If the inner relation contains only
+				 * null-keyed tuples that we need to emit, we'll fall through
+				 * and do the outer-relation scan.  In principle we could go
+				 * emit those tuples then quit, but it would complicate the
+				 * state machine logic.  The case seems rare enough to not be
+				 * worth optimizing.)
 				 */
-				if (hashtable->totalTuples == 0 && !HJ_FILL_OUTER(node))
+				if (hashtable->totalTuples == 0 &&
+					hashNode->null_tuple_store == NULL &&
+					!HJ_FILL_OUTER(node))
 				{
 					if (parallel)
 					{
@@ -440,12 +449,17 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel)
 						if (parallel)
 						{
 							/*
-							 * Only one process is currently allow to handle
+							 * Only one process is currently allowed to handle
 							 * each batch's unmatched tuples, in a parallel
-							 * join.
+							 * join.  However, each process must deal with any
+							 * null-keyed tuples it found.
 							 */
 							if (ExecParallelPrepHashTableForUnmatched(node))
 								node->hj_JoinState = HJ_FILL_INNER_TUPLES;
+							else if (node->hj_NullOuterTupleStore)
+								node->hj_JoinState = HJ_FILL_OUTER_NULL_TUPLES;
+							else if (hashNode->null_tuple_store)
+								node->hj_JoinState = HJ_FILL_INNER_NULL_TUPLES;
 							else
 								node->hj_JoinState = HJ_NEED_NEW_BATCH;
 						}
@@ -456,7 +470,14 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel)
 						}
 					}
 					else
-						node->hj_JoinState = HJ_NEED_NEW_BATCH;
+					{
+						/* might have outer null-keyed tuples to fill */
+						Assert(hashNode->null_tuple_store == NULL);
+						if (node->hj_NullOuterTupleStore)
+							node->hj_JoinState = HJ_FILL_OUTER_NULL_TUPLES;
+						else
+							node->hj_JoinState = HJ_NEED_NEW_BATCH;
+					}
 					continue;
 				}
 
@@ -632,8 +653,13 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel)
 				if (!(parallel ? ExecParallelScanHashTableForUnmatched(node, econtext)
 					  : ExecScanHashTableForUnmatched(node, econtext)))
 				{
-					/* no more unmatched tuples */
-					node->hj_JoinState = HJ_NEED_NEW_BATCH;
+					/* no more unmatched tuples, but maybe there are nulls */
+					if (node->hj_NullOuterTupleStore)
+						node->hj_JoinState = HJ_FILL_OUTER_NULL_TUPLES;
+					else if (hashNode->null_tuple_store)
+						node->hj_JoinState = HJ_FILL_INNER_NULL_TUPLES;
+					else
+						node->hj_JoinState = HJ_NEED_NEW_BATCH;
 					continue;
 				}
 
@@ -649,6 +675,93 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel)
 					InstrCountFiltered2(node, 1);
 				break;
 
+			case HJ_FILL_OUTER_NULL_TUPLES:
+
+				/*
+				 * We have finished a batch, but we are doing left/full join,
+				 * so any null-keyed outer tuples have to be emitted before we
+				 * continue to the next batch.
+				 *
+				 * (We could delay this till the end of the join, but there
+				 * seems little percentage in that.)
+				 *
+				 * We have to use tuplestore_gettupleslot_force because
+				 * hj_OuterTupleSlot may not be able to store a MinimalTuple.
+				 */
+				while (tuplestore_gettupleslot_force(node->hj_NullOuterTupleStore,
+													 true, false,
+													 node->hj_OuterTupleSlot))
+				{
+					/*
+					 * Generate a fake join tuple with nulls for the inner
+					 * tuple, and return it if it passes the non-join quals.
+					 */
+					econtext->ecxt_outertuple = node->hj_OuterTupleSlot;
+					econtext->ecxt_innertuple = node->hj_NullInnerTupleSlot;
+
+					if (otherqual == NULL || ExecQual(otherqual, econtext))
+						return ExecProject(node->js.ps.ps_ProjInfo);
+					else
+						InstrCountFiltered2(node, 1);
+
+					ResetExprContext(econtext);
+
+					/* allow this loop to be cancellable */
+					CHECK_FOR_INTERRUPTS();
+				}
+
+				/* We don't need the tuplestore any more, so discard it. */
+				tuplestore_end(node->hj_NullOuterTupleStore);
+				node->hj_NullOuterTupleStore = NULL;
+
+				/* Fill inner tuples too if it's a full join, else advance. */
+				if (hashNode->null_tuple_store)
+					node->hj_JoinState = HJ_FILL_INNER_NULL_TUPLES;
+				else
+					node->hj_JoinState = HJ_NEED_NEW_BATCH;
+				break;
+
+			case HJ_FILL_INNER_NULL_TUPLES:
+
+				/*
+				 * We have finished a batch, but we are doing
+				 * right/right-anti/full join, so any null-keyed inner tuples
+				 * have to be emitted before we continue to the next batch.
+				 *
+				 * (We could delay this till the end of the join, but there
+				 * seems little percentage in that.)
+				 */
+				while (tuplestore_gettupleslot(hashNode->null_tuple_store,
+											   true, false,
+											   node->hj_HashTupleSlot))
+				{
+					/*
+					 * Generate a fake join tuple with nulls for the outer
+					 * tuple, and return it if it passes the non-join quals.
+					 */
+					econtext->ecxt_outertuple = node->hj_NullOuterTupleSlot;
+					econtext->ecxt_innertuple = node->hj_HashTupleSlot;
+
+					if (otherqual == NULL || ExecQual(otherqual, econtext))
+						return ExecProject(node->js.ps.ps_ProjInfo);
+					else
+						InstrCountFiltered2(node, 1);
+
+					ResetExprContext(econtext);
+
+					/* allow this loop to be cancellable */
+					CHECK_FOR_INTERRUPTS();
+				}
+
+				/*
+				 * Ideally we'd discard the tuplestore now, but we can't
+				 * because we might need it for rescans.
+				 */
+
+				/* Now we can advance to the next batch. */
+				node->hj_JoinState = HJ_NEED_NEW_BATCH;
+				break;
+
 			case HJ_NEED_NEW_BATCH:
 
 				/*
@@ -831,10 +944,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
 
 		/*
 		 * Build ExprStates to obtain hash values for either side of the join.
-		 * This must be done here as ExecBuildHash32Expr needs to know how to
-		 * handle NULL inputs and the required handling of that depends on the
-		 * jointype.  We don't know the join type in ExecInitHash() and we
-		 * must build the ExprStates before ExecHashTableCreate() so we
+		 * Note: must build the ExprStates before ExecHashTableCreate() so we
 		 * properly attribute any SubPlans that exist in the hash expressions
 		 * to the correct PlanState.
 		 */
@@ -846,7 +956,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
 
 		/*
 		 * Determine the hash function for each side of the join for the given
-		 * hash operator.
+		 * join operator, and detect whether the join operator is strict.
 		 */
 		foreach(lc, node->hashoperators)
 		{
@@ -864,11 +974,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
 
 		/*
 		 * Build an ExprState to generate the hash value for the expressions
-		 * on the outer of the join.  This ExprState must finish generating
-		 * the hash value when HJ_FILL_OUTER() is true.  Otherwise,
-		 * ExecBuildHash32Expr will set up the ExprState to abort early if it
-		 * finds a NULL.  In these cases, we don't need to store these tuples
-		 * in the hash table as the jointype does not require it.
+		 * on the outer side of the join.
 		 */
 		hjstate->hj_OuterHash =
 			ExecBuildHash32Expr(hjstate->js.ps.ps_ResultTupleDesc,
@@ -878,8 +984,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
 								node->hashkeys,
 								hash_strict,
 								&hjstate->js.ps,
-								0,
-								HJ_FILL_OUTER(hjstate));
+								0);
 
 		/* As above, but for the inner side of the join */
 		hashstate->hash_expr =
@@ -890,8 +995,11 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
 								hash->hashkeys,
 								hash_strict,
 								&hashstate->ps,
-								0,
-								HJ_FILL_INNER(hjstate));
+								0);
+
+		/* Remember whether we need to save tuples with null join keys */
+		hjstate->hj_KeepNullTuples = HJ_FILL_OUTER(hjstate);
+		hashstate->keep_null_tuples = HJ_FILL_INNER(hjstate);
 
 		/*
 		 * Set up the skew table hash function while we have a record of the
@@ -924,6 +1032,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
 	 * initialize hash-specific info
 	 */
 	hjstate->hj_HashTable = NULL;
+	hjstate->hj_NullOuterTupleStore = NULL;
 	hjstate->hj_FirstOuterTupleSlot = NULL;
 
 	hjstate->hj_CurHashValue = 0;
@@ -947,6 +1056,23 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
 void
 ExecEndHashJoin(HashJoinState *node)
 {
+	HashState  *hashNode = castNode(HashState, innerPlanState(node));
+
+	/*
+	 * Free tuple stores if we made them (must do this before
+	 * ExecHashTableDestroy deletes hashCxt)
+	 */
+	if (node->hj_NullOuterTupleStore)
+	{
+		tuplestore_end(node->hj_NullOuterTupleStore);
+		node->hj_NullOuterTupleStore = NULL;
+	}
+	if (hashNode->null_tuple_store)
+	{
+		tuplestore_end(hashNode->null_tuple_store);
+		hashNode->null_tuple_store = NULL;
+	}
+
 	/*
 	 * Free hash table
 	 */
@@ -1015,11 +1141,19 @@ ExecHashJoinOuterGetTuple(PlanState *outerNode,
 
 			if (!isnull)
 			{
+				/* normal case with a non-null join key */
 				/* remember outer relation is not empty for possible rescan */
 				hjstate->hj_OuterNotEmpty = true;
 
 				return slot;
 			}
+			else if (hjstate->hj_KeepNullTuples)
+			{
+				/* null join key, but we must save tuple to be emitted later */
+				if (hjstate->hj_NullOuterTupleStore == NULL)
+					hjstate->hj_NullOuterTupleStore = ExecHashBuildNullTupleStore(hashtable);
+				tuplestore_puttupleslot(hjstate->hj_NullOuterTupleStore, slot);
+			}
 
 			/*
 			 * That tuple couldn't match because of a NULL, so discard it and
@@ -1087,7 +1221,17 @@ ExecParallelHashJoinOuterGetTuple(PlanState *outerNode,
 																  &isnull));
 
 			if (!isnull)
+			{
+				/* normal case with a non-null join key */
 				return slot;
+			}
+			else if (hjstate->hj_KeepNullTuples)
+			{
+				/* null join key, but we must save tuple to be emitted later */
+				if (hjstate->hj_NullOuterTupleStore == NULL)
+					hjstate->hj_NullOuterTupleStore = ExecHashBuildNullTupleStore(hashtable);
+				tuplestore_puttupleslot(hjstate->hj_NullOuterTupleStore, slot);
+			}
 
 			/*
 			 * That tuple couldn't match because of a NULL, so discard it and
@@ -1496,6 +1640,17 @@ ExecReScanHashJoin(HashJoinState *node)
 	PlanState  *outerPlan = outerPlanState(node);
 	PlanState  *innerPlan = innerPlanState(node);
 
+	/*
+	 * We're always going to rescan the outer rel, so drop the associated
+	 * null-keys tuplestore; we'll rebuild it during the rescan.  (Must do
+	 * this before ExecHashTableDestroy deletes hashCxt.)
+	 */
+	if (node->hj_NullOuterTupleStore)
+	{
+		tuplestore_end(node->hj_NullOuterTupleStore);
+		node->hj_NullOuterTupleStore = NULL;
+	}
+
 	/*
 	 * In a multi-batch join, we currently have to do rescans the hard way,
 	 * primarily because batch temp files may have already been released. But
@@ -1505,6 +1660,10 @@ ExecReScanHashJoin(HashJoinState *node)
 	 */
 	if (node->hj_HashTable != NULL)
 	{
+		HashState  *hashNode = castNode(HashState, innerPlan);
+
+		Assert(hashNode->hashtable == node->hj_HashTable);
+
 		if (node->hj_HashTable->nbatch == 1 &&
 			innerPlan->chgParam == NULL)
 		{
@@ -1529,15 +1688,20 @@ ExecReScanHashJoin(HashJoinState *node)
 			 */
 			node->hj_OuterNotEmpty = false;
 
+			/*
+			 * Also, rewind inner null-key tuplestore so that we can return
+			 * those tuples again.
+			 */
+			if (hashNode->null_tuple_store)
+				tuplestore_rescan(hashNode->null_tuple_store);
+
 			/* ExecHashJoin can skip the BUILD_HASHTABLE step */
 			node->hj_JoinState = HJ_NEED_NEW_OUTER;
 		}
 		else
 		{
 			/* must destroy and rebuild hash table */
-			HashState  *hashNode = castNode(HashState, innerPlan);
 
-			Assert(hashNode->hashtable == node->hj_HashTable);
 			/* accumulate stats from old hash table, if wanted */
 			/* (this should match ExecShutdownHash) */
 			if (hashNode->ps.instrument && !hashNode->hinstrument)
@@ -1546,6 +1710,14 @@ ExecReScanHashJoin(HashJoinState *node)
 			if (hashNode->hinstrument)
 				ExecHashAccumInstrumentation(hashNode->hinstrument,
 											 hashNode->hashtable);
+
+			/* free inner null-key tuplestore before ExecHashTableDestroy */
+			if (hashNode->null_tuple_store)
+			{
+				tuplestore_end(hashNode->null_tuple_store);
+				hashNode->null_tuple_store = NULL;
+			}
+
 			/* for safety, be sure to clear child plan node's pointer too */
 			hashNode->hashtable = NULL;
 
@@ -1601,7 +1773,6 @@ ExecParallelHashJoinPartitionOuter(HashJoinState *hjstate)
 	ExprContext *econtext = hjstate->js.ps.ps_ExprContext;
 	HashJoinTable hashtable = hjstate->hj_HashTable;
 	TupleTableSlot *slot;
-	uint32		hashvalue;
 	int			i;
 
 	Assert(hjstate->hj_FirstOuterTupleSlot == NULL);
@@ -1610,6 +1781,7 @@ ExecParallelHashJoinPartitionOuter(HashJoinState *hjstate)
 	for (;;)
 	{
 		bool		isnull;
+		uint32		hashvalue;
 
 		slot = ExecProcNode(outerState);
 		if (TupIsNull(slot))
@@ -1624,6 +1796,7 @@ ExecParallelHashJoinPartitionOuter(HashJoinState *hjstate)
 
 		if (!isnull)
 		{
+			/* normal case with a non-null join key */
 			int			batchno;
 			int			bucketno;
 			bool		shouldFree;
@@ -1637,6 +1810,15 @@ ExecParallelHashJoinPartitionOuter(HashJoinState *hjstate)
 			if (shouldFree)
 				heap_free_minimal_tuple(mintup);
 		}
+		else if (hjstate->hj_KeepNullTuples)
+		{
+			/* null join key, but we must save tuple to be emitted later */
+			if (hjstate->hj_NullOuterTupleStore == NULL)
+				hjstate->hj_NullOuterTupleStore = ExecHashBuildNullTupleStore(hashtable);
+			tuplestore_puttupleslot(hjstate->hj_NullOuterTupleStore, slot);
+		}
+		/* else we can just discard the tuple immediately */
+
 		CHECK_FOR_INTERRUPTS();
 	}
 
@@ -1715,6 +1897,7 @@ ExecHashJoinReInitializeDSM(HashJoinState *state, ParallelContext *pcxt)
 {
 	int			plan_node_id = state->js.ps.plan->plan_node_id;
 	ParallelHashJoinState *pstate;
+	HashState  *hashNode;
 
 	/* Nothing to do if we failed to create a DSM segment. */
 	if (pcxt->seg == NULL)
@@ -1744,6 +1927,20 @@ ExecHashJoinReInitializeDSM(HashJoinState *state, ParallelContext *pcxt)
 	/* Clear any shared batch files. */
 	SharedFileSetDeleteAll(&pstate->fileset);
 
+	/* We'd better clear our local null-key tuplestores, too. */
+	if (state->hj_NullOuterTupleStore)
+	{
+		tuplestore_end(state->hj_NullOuterTupleStore);
+		state->hj_NullOuterTupleStore = NULL;
+	}
+	hashNode = (HashState *) innerPlanState(state);
+	if (hashNode->null_tuple_store)
+	{
+		tuplestore_end(hashNode->null_tuple_store);
+		hashNode->null_tuple_store = NULL;
+	}
+
+
 	/* Reset build_barrier to PHJ_BUILD_ELECT so we can go around again. */
 	BarrierInit(&pstate->build_barrier, 0);
 }
diff --git a/src/backend/utils/sort/tuplestore.c b/src/backend/utils/sort/tuplestore.c
index c9aecab8d66c..0e19ecc2f8d6 100644
--- a/src/backend/utils/sort/tuplestore.c
+++ b/src/backend/utils/sort/tuplestore.c
@@ -1152,6 +1152,38 @@ tuplestore_gettupleslot(Tuplestorestate *state, bool forward,
 	}
 }
 
+/*
+ * tuplestore_gettupleslot_force - exported function to fetch a tuple
+ *
+ * This is identical to tuplestore_gettupleslot except the given slot can be
+ * any kind of slot; it need not be one that will accept a MinimalTuple.
+ */
+bool
+tuplestore_gettupleslot_force(Tuplestorestate *state, bool forward,
+							  bool copy, TupleTableSlot *slot)
+{
+	MinimalTuple tuple;
+	bool		should_free;
+
+	tuple = (MinimalTuple) tuplestore_gettuple(state, forward, &should_free);
+
+	if (tuple)
+	{
+		if (copy && !should_free)
+		{
+			tuple = heap_copy_minimal_tuple(tuple, 0);
+			should_free = true;
+		}
+		ExecForceStoreMinimalTuple(tuple, slot, should_free);
+		return true;
+	}
+	else
+	{
+		ExecClearTuple(slot);
+		return false;
+	}
+}
+
 /*
  * tuplestore_advance - exported function to adjust position without fetching
  *
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index 104b059544dd..2a926c0dc354 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -338,7 +338,7 @@ extern ExprState *ExecBuildHash32Expr(TupleDesc desc,
 									  const List *collations,
 									  const List *hash_exprs,
 									  const bool *opstrict, PlanState *parent,
-									  uint32 init_value, bool keep_nulls);
+									  uint32 init_value);
 extern ExprState *ExecBuildGroupingEqual(TupleDesc ldesc, TupleDesc rdesc,
 										 const TupleTableSlotOps *lops, const TupleTableSlotOps *rops,
 										 int numCols,
diff --git a/src/include/executor/hashjoin.h b/src/include/executor/hashjoin.h
index ecff4842fd38..5f59b61f671f 100644
--- a/src/include/executor/hashjoin.h
+++ b/src/include/executor/hashjoin.h
@@ -68,6 +68,15 @@
  * inner batch file.  Subsequently, while reading either inner or outer batch
  * files, we might find tuples that no longer belong to the current batch;
  * if so, we just dump them out to the correct batch file.
+ *
+ * If an input tuple has a null join key, then it cannot match anything from
+ * the other side of the join.  Normally we can just discard such a tuple
+ * immediately, but if it comes from the outer side of an outer join then we
+ * must emit it with null-extension of the other side.  For various reasons
+ * it's not convenient to do that immediately on seeing the tuple, so we dump
+ * the tuple into a tuplestore and emit it later.  (In the unlikely but
+ * supported case of a non-strict join operator, we treat null keys as normal
+ * data.)
  * ----------------------------------------------------------------
  */
 
diff --git a/src/include/executor/nodeHash.h b/src/include/executor/nodeHash.h
index 3c1a09415aa1..55b89febd1a7 100644
--- a/src/include/executor/nodeHash.h
+++ b/src/include/executor/nodeHash.h
@@ -64,6 +64,7 @@ extern void ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,
 									int *numbatches,
 									int *num_skew_mcvs);
 extern int	ExecHashGetSkewBucket(HashJoinTable hashtable, uint32 hashvalue);
+extern Tuplestorestate *ExecHashBuildNullTupleStore(HashJoinTable hashtable);
 extern void ExecHashEstimate(HashState *node, ParallelContext *pcxt);
 extern void ExecHashInitializeDSM(HashState *node, ParallelContext *pcxt);
 extern void ExecHashInitializeWorker(HashState *node, ParallelWorkerContext *pwcxt);
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 2492282213ff..97cd872842b4 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -2236,8 +2236,11 @@ typedef struct MergeJoinState
  *		hj_NullOuterTupleSlot	prepared null tuple for right/right-anti/full
  *								outer joins
  *		hj_NullInnerTupleSlot	prepared null tuple for left/full outer joins
+ *		hj_NullOuterTupleStore	tuplestore holding outer tuples that have
+ *								null join keys (but must be emitted anyway)
  *		hj_FirstOuterTupleSlot	first tuple retrieved from outer plan
  *		hj_JoinState			current state of ExecHashJoin state machine
+ *		hj_KeepNullTuples		true to keep outer tuples with null join keys
  *		hj_MatchedOuter			true if found a join match for current outer
  *		hj_OuterNotEmpty		true if outer relation known not empty
  * ----------------
@@ -2261,8 +2264,10 @@ typedef struct HashJoinState
 	TupleTableSlot *hj_HashTupleSlot;
 	TupleTableSlot *hj_NullOuterTupleSlot;
 	TupleTableSlot *hj_NullInnerTupleSlot;
+	Tuplestorestate *hj_NullOuterTupleStore;
 	TupleTableSlot *hj_FirstOuterTupleSlot;
 	int			hj_JoinState;
+	bool		hj_KeepNullTuples;
 	bool		hj_MatchedOuter;
 	bool		hj_OuterNotEmpty;
 } HashJoinState;
@@ -2812,6 +2817,9 @@ typedef struct HashState
 	FmgrInfo   *skew_hashfunction;	/* lookup data for skew hash function */
 	Oid			skew_collation; /* collation to call skew_hashfunction with */
 
+	Tuplestorestate *null_tuple_store;	/* where to put null-keyed tuples */
+	bool		keep_null_tuples;	/* do we need to save such tuples? */
+
 	/*
 	 * In a parallelized hash join, the leader retains a pointer to the
 	 * shared-memory stats area in its shared_info field, and then copies the
diff --git a/src/include/utils/tuplestore.h b/src/include/utils/tuplestore.h
index 865ba7b82655..b9e152c17017 100644
--- a/src/include/utils/tuplestore.h
+++ b/src/include/utils/tuplestore.h
@@ -73,6 +73,9 @@ extern bool tuplestore_in_memory(Tuplestorestate *state);
 extern bool tuplestore_gettupleslot(Tuplestorestate *state, bool forward,
 									bool copy, TupleTableSlot *slot);
 
+extern bool tuplestore_gettupleslot_force(Tuplestorestate *state, bool forward,
+										  bool copy, TupleTableSlot *slot);
+
 extern bool tuplestore_advance(Tuplestorestate *state, bool forward);
 
 extern bool tuplestore_skiptuples(Tuplestorestate *state,
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index f35a0b18c37c..334d38b1052a 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -4590,7 +4590,7 @@ order by fault;
 explain (costs off)
 select * from
 (values (1, array[10,20]), (2, array[20,30])) as v1(v1x,v1ys)
-left join (values (1, 10), (2, 20)) as v2(v2x,v2y) on v2x = v1x
+left join (values (1, 10), (2, 20), (2, null)) as v2(v2x,v2y) on v2x = v1x
 left join unnest(v1ys) as u1(u1y) on u1y = v2y;
                          QUERY PLAN                          
 -------------------------------------------------------------
@@ -4606,13 +4606,14 @@ left join unnest(v1ys) as u1(u1y) on u1y = v2y;
 
 select * from
 (values (1, array[10,20]), (2, array[20,30])) as v1(v1x,v1ys)
-left join (values (1, 10), (2, 20)) as v2(v2x,v2y) on v2x = v1x
+left join (values (1, 10), (2, 20), (2, null)) as v2(v2x,v2y) on v2x = v1x
 left join unnest(v1ys) as u1(u1y) on u1y = v2y;
  v1x |  v1ys   | v2x | v2y | u1y 
 -----+---------+-----+-----+-----
    1 | {10,20} |   1 |  10 |  10
    2 | {20,30} |   2 |  20 |  20
-(2 rows)
+   2 | {20,30} |   2 |     |    
+(3 rows)
 
 --
 -- test handling of potential equivalence clauses above outer joins
diff --git a/src/test/regress/expected/join_hash.out b/src/test/regress/expected/join_hash.out
index 4fc34a0e72ab..3df9f653d351 100644
--- a/src/test/regress/expected/join_hash.out
+++ b/src/test/regress/expected/join_hash.out
@@ -53,6 +53,7 @@ $$;
 -- estimated size.
 create table simple as
   select generate_series(1, 20000) AS id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa';
+insert into simple values (null, null);
 alter table simple set (parallel_workers = 2);
 analyze simple;
 -- Make a relation whose size we will under-estimate.  We want stats
@@ -308,7 +309,7 @@ $$);
 select count(*) from simple r full outer join simple s using (id);
  count 
 -------
- 20000
+ 20002
 (1 row)
 
 rollback to settings;
@@ -786,7 +787,7 @@ explain (costs off)
 select  count(*) from simple r full outer join simple s using (id);
  count 
 -------
- 20000
+ 20002
 (1 row)
 
 rollback to settings;
@@ -809,7 +810,7 @@ explain (costs off)
 select  count(*) from simple r full outer join simple s using (id);
  count 
 -------
- 20000
+ 20002
 (1 row)
 
 rollback to settings;
@@ -834,7 +835,7 @@ explain (costs off)
 select  count(*) from simple r full outer join simple s using (id);
  count 
 -------
- 20000
+ 20002
 (1 row)
 
 rollback to settings;
@@ -857,7 +858,7 @@ explain (costs off)
 select  count(*) from simple r full outer join simple s on (r.id = 0 - s.id);
  count 
 -------
- 40000
+ 40002
 (1 row)
 
 rollback to settings;
@@ -880,7 +881,7 @@ explain (costs off)
 select  count(*) from simple r full outer join simple s on (r.id = 0 - s.id);
  count 
 -------
- 40000
+ 40002
 (1 row)
 
 rollback to settings;
@@ -905,7 +906,7 @@ explain (costs off)
 select  count(*) from simple r full outer join simple s on (r.id = 0 - s.id);
  count 
 -------
- 40000
+ 40002
 (1 row)
 
 rollback to settings;
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index cc5128add4df..c4946d39e77a 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -1554,12 +1554,12 @@ order by fault;
 explain (costs off)
 select * from
 (values (1, array[10,20]), (2, array[20,30])) as v1(v1x,v1ys)
-left join (values (1, 10), (2, 20)) as v2(v2x,v2y) on v2x = v1x
+left join (values (1, 10), (2, 20), (2, null)) as v2(v2x,v2y) on v2x = v1x
 left join unnest(v1ys) as u1(u1y) on u1y = v2y;
 
 select * from
 (values (1, array[10,20]), (2, array[20,30])) as v1(v1x,v1ys)
-left join (values (1, 10), (2, 20)) as v2(v2x,v2y) on v2x = v1x
+left join (values (1, 10), (2, 20), (2, null)) as v2(v2x,v2y) on v2x = v1x
 left join unnest(v1ys) as u1(u1y) on u1y = v2y;
 
 --
diff --git a/src/test/regress/sql/join_hash.sql b/src/test/regress/sql/join_hash.sql
index 6b0688ab0a61..11e3a164c764 100644
--- a/src/test/regress/sql/join_hash.sql
+++ b/src/test/regress/sql/join_hash.sql
@@ -57,6 +57,7 @@ $$;
 -- estimated size.
 create table simple as
   select generate_series(1, 20000) AS id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa';
+insert into simple values (null, null);
 alter table simple set (parallel_workers = 2);
 analyze simple;