Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 29f45e2

Browse files
committed
Use a hash table to speed up NOT IN(values)
Similar to 50e17ad, which allowed hash tables to be used for IN clauses with a set of constants, here we add the same feature for NOT IN clauses. NOT IN evaluates the same as: WHERE a <> v1 AND a <> v2 AND a <> v3. Obviously, if we're using a hash table we must be exactly equivalent to that and return the same result taking into account that either side of the condition could contain a NULL. This requires a little bit of special handling to make work with the hash table version. When processing NOT IN, the ScalarArrayOpExpr's operator will be the <> operator. To be able to build and lookup a hash table we must use the <>'s negator operator. The planner checks if that exists and is hashable and sets the relevant fields in ScalarArrayOpExpr to instruct the executor to use hashing. Author: David Rowley, James Coleman Reviewed-by: James Coleman, Zhihong Yu Discussion: https://postgr.es/m/CAApHDvoF1mum_FRk6D621edcB6KSHBi2+GAgWmioj5AhOu2vwQ@mail.gmail.com
1 parent d854720 commit 29f45e2

File tree

16 files changed

+240
-27
lines changed

16 files changed

+240
-27
lines changed

src/backend/executor/execExpr.c

+20-4
Original file line numberDiff line numberDiff line change
@@ -1205,19 +1205,34 @@ ExecInitExprRec(Expr *node, ExprState *state,
12051205
AclResult aclresult;
12061206
FmgrInfo *hash_finfo;
12071207
FunctionCallInfo hash_fcinfo;
1208+
Oid cmpfuncid;
1209+
1210+
/*
1211+
* Select the correct comparison function. When we do hashed
1212+
* NOT IN clauses, the opfuncid will be the inequality
1213+
* comparison function and negfuncid will be set to equality.
1214+
* We need to use the equality function for hash probes.
1215+
*/
1216+
if (OidIsValid(opexpr->negfuncid))
1217+
{
1218+
Assert(OidIsValid(opexpr->hashfuncid));
1219+
cmpfuncid = opexpr->negfuncid;
1220+
}
1221+
else
1222+
cmpfuncid = opexpr->opfuncid;
12081223

12091224
Assert(list_length(opexpr->args) == 2);
12101225
scalararg = (Expr *) linitial(opexpr->args);
12111226
arrayarg = (Expr *) lsecond(opexpr->args);
12121227

12131228
/* Check permission to call function */
1214-
aclresult = pg_proc_aclcheck(opexpr->opfuncid,
1229+
aclresult = pg_proc_aclcheck(cmpfuncid,
12151230
GetUserId(),
12161231
ACL_EXECUTE);
12171232
if (aclresult != ACLCHECK_OK)
12181233
aclcheck_error(aclresult, OBJECT_FUNCTION,
1219-
get_func_name(opexpr->opfuncid));
1220-
InvokeFunctionExecuteHook(opexpr->opfuncid);
1234+
get_func_name(cmpfuncid));
1235+
InvokeFunctionExecuteHook(cmpfuncid);
12211236

12221237
if (OidIsValid(opexpr->hashfuncid))
12231238
{
@@ -1233,7 +1248,7 @@ ExecInitExprRec(Expr *node, ExprState *state,
12331248
/* Set up the primary fmgr lookup information */
12341249
finfo = palloc0(sizeof(FmgrInfo));
12351250
fcinfo = palloc0(SizeForFunctionCallInfo(2));
1236-
fmgr_info(opexpr->opfuncid, finfo);
1251+
fmgr_info(cmpfuncid, finfo);
12371252
fmgr_info_set_expr((Node *) node, finfo);
12381253
InitFunctionCallInfoData(*fcinfo, finfo, 2,
12391254
opexpr->inputcollid, NULL, NULL);
@@ -1274,6 +1289,7 @@ ExecInitExprRec(Expr *node, ExprState *state,
12741289

12751290
/* And perform the operation */
12761291
scratch.opcode = EEOP_HASHED_SCALARARRAYOP;
1292+
scratch.d.hashedscalararrayop.inclause = opexpr->useOr;
12771293
scratch.d.hashedscalararrayop.finfo = finfo;
12781294
scratch.d.hashedscalararrayop.fcinfo_data = fcinfo;
12791295
scratch.d.hashedscalararrayop.fn_addr = finfo->fn_addr;

src/backend/executor/execExprInterp.c

+15-2
Original file line numberDiff line numberDiff line change
@@ -3493,6 +3493,7 @@ ExecEvalHashedScalarArrayOp(ExprState *state, ExprEvalStep *op, ExprContext *eco
34933493
{
34943494
ScalarArrayOpExprHashTable *elements_tab = op->d.hashedscalararrayop.elements_tab;
34953495
FunctionCallInfo fcinfo = op->d.hashedscalararrayop.fcinfo_data;
3496+
bool inclause = op->d.hashedscalararrayop.inclause;
34963497
bool strictfunc = op->d.hashedscalararrayop.finfo->fn_strict;
34973498
Datum scalar = fcinfo->args[0].value;
34983499
bool scalar_isnull = fcinfo->args[0].isnull;
@@ -3596,7 +3597,12 @@ ExecEvalHashedScalarArrayOp(ExprState *state, ExprEvalStep *op, ExprContext *eco
35963597
/* Check the hash to see if we have a match. */
35973598
hashfound = NULL != saophash_lookup(elements_tab->hashtab, scalar);
35983599

3599-
result = BoolGetDatum(hashfound);
3600+
/* the result depends on if the clause is an IN or NOT IN clause */
3601+
if (inclause)
3602+
result = BoolGetDatum(hashfound); /* IN */
3603+
else
3604+
result = BoolGetDatum(!hashfound); /* NOT IN */
3605+
36003606
resultnull = false;
36013607

36023608
/*
@@ -3605,7 +3611,7 @@ ExecEvalHashedScalarArrayOp(ExprState *state, ExprEvalStep *op, ExprContext *eco
36053611
* hashtable, but instead marked if we found any when building the table
36063612
* in has_nulls.
36073613
*/
3608-
if (!DatumGetBool(result) && op->d.hashedscalararrayop.has_nulls)
3614+
if (!hashfound && op->d.hashedscalararrayop.has_nulls)
36093615
{
36103616
if (strictfunc)
36113617
{
@@ -3633,6 +3639,13 @@ ExecEvalHashedScalarArrayOp(ExprState *state, ExprEvalStep *op, ExprContext *eco
36333639

36343640
result = op->d.hashedscalararrayop.fn_addr(fcinfo);
36353641
resultnull = fcinfo->isnull;
3642+
3643+
/*
3644+
* Reverse the result for NOT IN clauses since the above function
3645+
* is the equality function and we need not-equals.
3646+
*/
3647+
if (!inclause)
3648+
result = !result;
36363649
}
36373650
}
36383651

src/backend/nodes/copyfuncs.c

+1
Original file line numberDiff line numberDiff line change
@@ -1718,6 +1718,7 @@ _copyScalarArrayOpExpr(const ScalarArrayOpExpr *from)
17181718
COPY_SCALAR_FIELD(opno);
17191719
COPY_SCALAR_FIELD(opfuncid);
17201720
COPY_SCALAR_FIELD(hashfuncid);
1721+
COPY_SCALAR_FIELD(negfuncid);
17211722
COPY_SCALAR_FIELD(useOr);
17221723
COPY_SCALAR_FIELD(inputcollid);
17231724
COPY_NODE_FIELD(args);

src/backend/nodes/equalfuncs.c

+6
Original file line numberDiff line numberDiff line change
@@ -414,6 +414,12 @@ _equalScalarArrayOpExpr(const ScalarArrayOpExpr *a, const ScalarArrayOpExpr *b)
414414
b->hashfuncid != 0)
415415
return false;
416416

417+
/* Likewise for the negfuncid */
418+
if (a->negfuncid != b->negfuncid &&
419+
a->negfuncid != 0 &&
420+
b->negfuncid != 0)
421+
return false;
422+
417423
COMPARE_SCALAR_FIELD(useOr);
418424
COMPARE_SCALAR_FIELD(inputcollid);
419425
COMPARE_NODE_FIELD(args);

src/backend/nodes/outfuncs.c

+1
Original file line numberDiff line numberDiff line change
@@ -1311,6 +1311,7 @@ _outScalarArrayOpExpr(StringInfo str, const ScalarArrayOpExpr *node)
13111311
WRITE_OID_FIELD(opno);
13121312
WRITE_OID_FIELD(opfuncid);
13131313
WRITE_OID_FIELD(hashfuncid);
1314+
WRITE_OID_FIELD(negfuncid);
13141315
WRITE_BOOL_FIELD(useOr);
13151316
WRITE_OID_FIELD(inputcollid);
13161317
WRITE_NODE_FIELD(args);

src/backend/nodes/readfuncs.c

+1
Original file line numberDiff line numberDiff line change
@@ -832,6 +832,7 @@ _readScalarArrayOpExpr(void)
832832
READ_OID_FIELD(opno);
833833
READ_OID_FIELD(opfuncid);
834834
READ_OID_FIELD(hashfuncid);
835+
READ_OID_FIELD(negfuncid);
835836
READ_BOOL_FIELD(useOr);
836837
READ_OID_FIELD(inputcollid);
837838
READ_NODE_FIELD(args);

src/backend/optimizer/plan/setrefs.c

+3
Original file line numberDiff line numberDiff line change
@@ -1687,6 +1687,9 @@ fix_expr_common(PlannerInfo *root, Node *node)
16871687

16881688
if (!OidIsValid(saop->hashfuncid))
16891689
record_plan_function_dependency(root, saop->hashfuncid);
1690+
1691+
if (!OidIsValid(saop->negfuncid))
1692+
record_plan_function_dependency(root, saop->negfuncid);
16901693
}
16911694
else if (IsA(node, Const))
16921695
{

src/backend/optimizer/prep/prepqual.c

+1
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ negate_clause(Node *node)
128128
newopexpr->opno = negator;
129129
newopexpr->opfuncid = InvalidOid;
130130
newopexpr->hashfuncid = InvalidOid;
131+
newopexpr->negfuncid = InvalidOid;
131132
newopexpr->useOr = !saopexpr->useOr;
132133
newopexpr->inputcollid = saopexpr->inputcollid;
133134
newopexpr->args = saopexpr->args;

src/backend/optimizer/util/clauses.c

+60-16
Original file line numberDiff line numberDiff line change
@@ -2140,27 +2140,71 @@ convert_saop_to_hashed_saop_walker(Node *node, void *context)
21402140
Oid lefthashfunc;
21412141
Oid righthashfunc;
21422142

2143-
if (saop->useOr && arrayarg && IsA(arrayarg, Const) &&
2144-
!((Const *) arrayarg)->constisnull &&
2145-
get_op_hash_functions(saop->opno, &lefthashfunc, &righthashfunc) &&
2146-
lefthashfunc == righthashfunc)
2143+
if (arrayarg && IsA(arrayarg, Const) &&
2144+
!((Const *) arrayarg)->constisnull)
21472145
{
2148-
Datum arrdatum = ((Const *) arrayarg)->constvalue;
2149-
ArrayType *arr = (ArrayType *) DatumGetPointer(arrdatum);
2150-
int nitems;
2146+
if (saop->useOr)
2147+
{
2148+
if (get_op_hash_functions(saop->opno, &lefthashfunc, &righthashfunc) &&
2149+
lefthashfunc == righthashfunc)
2150+
{
2151+
Datum arrdatum = ((Const *) arrayarg)->constvalue;
2152+
ArrayType *arr = (ArrayType *) DatumGetPointer(arrdatum);
2153+
int nitems;
21512154

2152-
/*
2153-
* Only fill in the hash functions if the array looks large enough
2154-
* for it to be worth hashing instead of doing a linear search.
2155-
*/
2156-
nitems = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr));
2155+
/*
2156+
* Only fill in the hash functions if the array looks
2157+
* large enough for it to be worth hashing instead of
2158+
* doing a linear search.
2159+
*/
2160+
nitems = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr));
21572161

2158-
if (nitems >= MIN_ARRAY_SIZE_FOR_HASHED_SAOP)
2162+
if (nitems >= MIN_ARRAY_SIZE_FOR_HASHED_SAOP)
2163+
{
2164+
/* Looks good. Fill in the hash functions */
2165+
saop->hashfuncid = lefthashfunc;
2166+
}
2167+
return true;
2168+
}
2169+
}
2170+
else /* !saop->useOr */
21592171
{
2160-
/* Looks good. Fill in the hash functions */
2161-
saop->hashfuncid = lefthashfunc;
2172+
Oid negator = get_negator(saop->opno);
2173+
2174+
/*
2175+
* Check if this is a NOT IN using an operator whose negator
2176+
* is hashable. If so we can still build a hash table and
2177+
* just ensure the lookup items are not in the hash table.
2178+
*/
2179+
if (OidIsValid(negator) &&
2180+
get_op_hash_functions(negator, &lefthashfunc, &righthashfunc) &&
2181+
lefthashfunc == righthashfunc)
2182+
{
2183+
Datum arrdatum = ((Const *) arrayarg)->constvalue;
2184+
ArrayType *arr = (ArrayType *) DatumGetPointer(arrdatum);
2185+
int nitems;
2186+
2187+
/*
2188+
* Only fill in the hash functions if the array looks
2189+
* large enough for it to be worth hashing instead of
2190+
* doing a linear search.
2191+
*/
2192+
nitems = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr));
2193+
2194+
if (nitems >= MIN_ARRAY_SIZE_FOR_HASHED_SAOP)
2195+
{
2196+
/* Looks good. Fill in the hash functions */
2197+
saop->hashfuncid = lefthashfunc;
2198+
2199+
/*
2200+
* Also set the negfuncid. The executor will need
2201+
* that to perform hashtable lookups.
2202+
*/
2203+
saop->negfuncid = get_opcode(negator);
2204+
}
2205+
return true;
2206+
}
21622207
}
2163-
return true;
21642208
}
21652209
}
21662210

src/backend/parser/parse_oper.c

+1
Original file line numberDiff line numberDiff line change
@@ -895,6 +895,7 @@ make_scalar_array_op(ParseState *pstate, List *opname,
895895
result->opno = oprid(tup);
896896
result->opfuncid = opform->oprcode;
897897
result->hashfuncid = InvalidOid;
898+
result->negfuncid = InvalidOid;
898899
result->useOr = useOr;
899900
/* inputcollid will be set by parse_collate.c */
900901
result->args = args;

src/backend/partitioning/partbounds.c

+1
Original file line numberDiff line numberDiff line change
@@ -3878,6 +3878,7 @@ make_partition_op_expr(PartitionKey key, int keynum,
38783878
saopexpr->opno = operoid;
38793879
saopexpr->opfuncid = get_opcode(operoid);
38803880
saopexpr->hashfuncid = InvalidOid;
3881+
saopexpr->negfuncid = InvalidOid;
38813882
saopexpr->useOr = true;
38823883
saopexpr->inputcollid = key->partcollation[keynum];
38833884
saopexpr->args = list_make2(arg1, arrexpr);

src/include/catalog/catversion.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,6 @@
5353
*/
5454

5555
/* yyyymmddN */
56-
#define CATALOG_VERSION_NO 202106151
56+
#define CATALOG_VERSION_NO 202107071
5757

5858
#endif

src/include/executor/execExpr.h

+1
Original file line numberDiff line numberDiff line change
@@ -574,6 +574,7 @@ typedef struct ExprEvalStep
574574
struct
575575
{
576576
bool has_nulls;
577+
bool inclause; /* true for IN and false for NOT IN */
577578
struct ScalarArrayOpExprHashTable *elements_tab;
578579
FmgrInfo *finfo; /* function's lookup data */
579580
FunctionCallInfo fcinfo_data; /* arguments etc */

src/include/nodes/primnodes.h

+14-4
Original file line numberDiff line numberDiff line change
@@ -580,17 +580,27 @@ typedef OpExpr NullIfExpr;
580580
* the result type (or the collation) because it must be boolean.
581581
*
582582
* A ScalarArrayOpExpr with a valid hashfuncid is evaluated during execution
583-
* by building a hash table containing the Const values from the rhs arg.
584-
* This table is probed during expression evaluation. Only useOr=true
585-
* ScalarArrayOpExpr with Const arrays on the rhs can have the hashfuncid
586-
* field set. See convert_saop_to_hashed_saop().
583+
* by building a hash table containing the Const values from the RHS arg.
584+
* This table is probed during expression evaluation. The planner will set
585+
* hashfuncid to the hash function which must be used to build and probe the
586+
* hash table. The executor determines if it should use hash-based checks or
587+
* the more traditional means based on if the hashfuncid is set or not.
588+
*
589+
* When performing hashed NOT IN, the negfuncid will also be set to the
590+
* equality function which the hash table must use to build and probe the hash
591+
* table. opno and opfuncid will remain set to the <> operator and its
592+
* corresponding function and won't be used during execution. For
593+
* non-hashtable based NOT INs, negfuncid will be set to InvalidOid. See
594+
* convert_saop_to_hashed_saop().
587595
*/
588596
typedef struct ScalarArrayOpExpr
589597
{
590598
Expr xpr;
591599
Oid opno; /* PG_OPERATOR OID of the operator */
592600
Oid opfuncid; /* PG_PROC OID of comparison function */
593601
Oid hashfuncid; /* PG_PROC OID of hash func or InvalidOid */
602+
Oid negfuncid; /* PG_PROC OID of negator of opfuncid function
603+
* or InvalidOid. See above */
594604
bool useOr; /* true for ANY, false for ALL */
595605
Oid inputcollid; /* OID of collation that operator should use */
596606
List *args; /* the scalar and array operands */

0 commit comments

Comments
 (0)