Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit d4378c0

Browse files
committed
Transform OR-clauses to SAOP's during index matching
This commit makes match_clause_to_indexcol() match "(indexkey op C1) OR (indexkey op C2) ... (indexkey op CN)" expression to the index while transforming it into "indexkey op ANY(ARRAY[C1, C2, ...])" (ScalarArrayOpExpr node). This transformation allows handling long OR-clauses with single IndexScan avoiding diving them into a slower BitmapOr. We currently restrict Ci to be either Const or Param to apply this transformation only when it's clearly beneficial. However, in the future, we might switch to a liberal understanding of constants, as it is in other cases. Discussion: https://postgr.es/m/567ED6CA.2040504%40sigaev.ru Author: Alena Rybakina, Andrey Lepikhov, Alexander Korotkov Reviewed-by: Peter Geoghegan, Ranier Vilela, Alexander Korotkov, Robert Haas Reviewed-by: Jian He, Tom Lane, Nikolay Shaplov
1 parent 869ee4f commit d4378c0

File tree

11 files changed

+732
-23
lines changed

11 files changed

+732
-23
lines changed

src/backend/optimizer/path/indxpath.c

+280-4
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "access/stratnum.h"
2121
#include "access/sysattr.h"
2222
#include "catalog/pg_am.h"
23+
#include "catalog/pg_amop.h"
2324
#include "catalog/pg_operator.h"
2425
#include "catalog/pg_opfamily.h"
2526
#include "catalog/pg_type.h"
@@ -32,8 +33,10 @@
3233
#include "optimizer/paths.h"
3334
#include "optimizer/prep.h"
3435
#include "optimizer/restrictinfo.h"
36+
#include "utils/array.h"
3537
#include "utils/lsyscache.h"
3638
#include "utils/selfuncs.h"
39+
#include "utils/syscache.h"
3740

3841

3942
/* XXX see PartCollMatchesExprColl */
@@ -177,6 +180,10 @@ static IndexClause *match_rowcompare_to_indexcol(PlannerInfo *root,
177180
RestrictInfo *rinfo,
178181
int indexcol,
179182
IndexOptInfo *index);
183+
static IndexClause *match_orclause_to_indexcol(PlannerInfo *root,
184+
RestrictInfo *rinfo,
185+
int indexcol,
186+
IndexOptInfo *index);
180187
static IndexClause *expand_indexqual_rowcompare(PlannerInfo *root,
181188
RestrictInfo *rinfo,
182189
int indexcol,
@@ -2149,7 +2156,10 @@ match_clause_to_index(PlannerInfo *root,
21492156
* (3) must match the collation of the index, if collation is relevant.
21502157
*
21512158
* Our definition of "const" is exceedingly liberal: we allow anything that
2152-
* doesn't involve a volatile function or a Var of the index's relation.
2159+
* doesn't involve a volatile function or a Var of the index's relation
2160+
* except for a boolean OR expression input: due to a trade-off between the
2161+
* expected execution speedup and planning complexity, we limit or->saop
2162+
* transformation by obvious cases when an index scan can get a profit.
21532163
* In particular, Vars belonging to other relations of the query are
21542164
* accepted here, since a clause of that form can be used in a
21552165
* parameterized indexscan. It's the responsibility of higher code levels
@@ -2179,6 +2189,10 @@ match_clause_to_index(PlannerInfo *root,
21792189
* It is also possible to match ScalarArrayOpExpr clauses to indexes, when
21802190
* the clause is of the form "indexkey op ANY (arrayconst)".
21812191
*
2192+
* It is also possible to match a list of OR clauses if it might be
2193+
* transformed into a single ScalarArrayOpExpr clause. On success,
2194+
* the returning index clause will contain a trasformed clause.
2195+
*
21822196
* For boolean indexes, it is also possible to match the clause directly
21832197
* to the indexkey; or perhaps the clause is (NOT indexkey).
21842198
*
@@ -2228,9 +2242,9 @@ match_clause_to_indexcol(PlannerInfo *root,
22282242
}
22292243

22302244
/*
2231-
* Clause must be an opclause, funcclause, ScalarArrayOpExpr, or
2232-
* RowCompareExpr. Or, if the index supports it, we can handle IS
2233-
* NULL/NOT NULL clauses.
2245+
* Clause must be an opclause, funcclause, ScalarArrayOpExpr,
2246+
* RowCompareExpr, or OR-clause that could be converted to SAOP. Or, if
2247+
* the index supports it, we can handle IS NULL/NOT NULL clauses.
22342248
*/
22352249
if (IsA(clause, OpExpr))
22362250
{
@@ -2248,6 +2262,10 @@ match_clause_to_indexcol(PlannerInfo *root,
22482262
{
22492263
return match_rowcompare_to_indexcol(root, rinfo, indexcol, index);
22502264
}
2265+
else if (restriction_is_or_clause(rinfo))
2266+
{
2267+
return match_orclause_to_indexcol(root, rinfo, indexcol, index);
2268+
}
22512269
else if (index->amsearchnulls && IsA(clause, NullTest))
22522270
{
22532271
NullTest *nt = (NullTest *) clause;
@@ -2771,6 +2789,264 @@ match_rowcompare_to_indexcol(PlannerInfo *root,
27712789
return NULL;
27722790
}
27732791

2792+
/*
2793+
* match_orclause_to_indexcol()
2794+
* Handles the OR-expr case for match_clause_to_indexcol() in the case
2795+
* when it could be transformed to ScalarArrayOpExpr.
2796+
*
2797+
* In this routine, we attempt to transform a list of OR-clause args into a
2798+
* single SAOP expression matching the target index column. On success,
2799+
* return an IndexClause, containing the transformed expression or NULL,
2800+
* if failed.
2801+
*/
2802+
static IndexClause *
2803+
match_orclause_to_indexcol(PlannerInfo *root,
2804+
RestrictInfo *rinfo,
2805+
int indexcol,
2806+
IndexOptInfo *index)
2807+
{
2808+
ListCell *lc;
2809+
BoolExpr *orclause = (BoolExpr *) rinfo->orclause;
2810+
Node *indexExpr = NULL;
2811+
List *consts = NIL;
2812+
Node *arrayNode = NULL;
2813+
ScalarArrayOpExpr *saopexpr = NULL;
2814+
Oid matchOpno = InvalidOid;
2815+
IndexClause *iclause;
2816+
Oid consttype = InvalidOid;
2817+
Oid arraytype = InvalidOid;
2818+
Oid inputcollid = InvalidOid;
2819+
bool firstTime = true;
2820+
bool haveParam = false;
2821+
2822+
Assert(IsA(orclause, BoolExpr));
2823+
Assert(orclause->boolop == OR_EXPR);
2824+
2825+
/*
2826+
* Try to convert a list of OR-clauses to a single SAOP expression. Each
2827+
* OR entry must be in the form: (indexkey operator constant) or (constant
2828+
* operator indexkey). Operators of all the entries must match. Constant
2829+
* might be either Const or Param. To be effective, give up on the first
2830+
* non-matching entry. Exit is implemented as a break from the loop,
2831+
* which is catched afterwards.
2832+
*/
2833+
foreach(lc, orclause->args)
2834+
{
2835+
RestrictInfo *subRinfo;
2836+
OpExpr *subClause;
2837+
Oid opno;
2838+
Node *leftop,
2839+
*rightop;
2840+
Node *constExpr;
2841+
2842+
if (!IsA(lfirst(lc), RestrictInfo))
2843+
break;
2844+
2845+
subRinfo = (RestrictInfo *) lfirst(lc);
2846+
2847+
/* Only operator clauses can match */
2848+
if (!IsA(subRinfo->clause, OpExpr))
2849+
break;
2850+
2851+
subClause = (OpExpr *) subRinfo->clause;
2852+
opno = subClause->opno;
2853+
2854+
/* Only binary operators can match */
2855+
if (list_length(subClause->args) != 2)
2856+
break;
2857+
2858+
/*
2859+
* The parameters below must match between sub-rinfo and its parent as
2860+
* make_restrictinfo() fills them with the same values, and further
2861+
* modifications are also the same for the whole subtree. However,
2862+
* still make a sanity check.
2863+
*/
2864+
Assert(subRinfo->is_pushed_down == rinfo->is_pushed_down);
2865+
Assert(subRinfo->is_clone == rinfo->is_clone);
2866+
Assert(subRinfo->security_level == rinfo->security_level);
2867+
Assert(bms_equal(subRinfo->incompatible_relids, rinfo->incompatible_relids));
2868+
Assert(bms_equal(subRinfo->outer_relids, rinfo->outer_relids));
2869+
2870+
/*
2871+
* Also, check that required_relids in sub-rinfo is subset of parent's
2872+
* required_relids.
2873+
*/
2874+
Assert(bms_is_subset(subRinfo->required_relids, rinfo->required_relids));
2875+
2876+
/* Only the operator returning a boolean suit the transformation. */
2877+
if (get_op_rettype(opno) != BOOLOID)
2878+
break;
2879+
2880+
/*
2881+
* Check for clauses of the form: (indexkey operator constant) or
2882+
* (constant operator indexkey). Determine indexkey side first, check
2883+
* the constant later.
2884+
*/
2885+
leftop = (Node *) linitial(subClause->args);
2886+
rightop = (Node *) lsecond(subClause->args);
2887+
if (match_index_to_operand(leftop, indexcol, index))
2888+
{
2889+
indexExpr = leftop;
2890+
constExpr = rightop;
2891+
}
2892+
else if (match_index_to_operand(rightop, indexcol, index))
2893+
{
2894+
opno = get_commutator(opno);
2895+
if (!OidIsValid(opno))
2896+
{
2897+
/* commutator doesn't exist, we can't reverse the order */
2898+
break;
2899+
}
2900+
indexExpr = rightop;
2901+
constExpr = leftop;
2902+
}
2903+
else
2904+
{
2905+
break;
2906+
}
2907+
2908+
/*
2909+
* Ignore any RelabelType node above the operands. This is needed to
2910+
* be able to apply indexscanning in binary-compatible-operator cases.
2911+
* Note: we can assume there is at most one RelabelType node;
2912+
* eval_const_expressions() will have simplified if more than one.
2913+
*/
2914+
if (IsA(constExpr, RelabelType))
2915+
constExpr = (Node *) ((RelabelType *) constExpr)->arg;
2916+
if (IsA(indexExpr, RelabelType))
2917+
indexExpr = (Node *) ((RelabelType *) indexExpr)->arg;
2918+
2919+
/* We allow constant to be Const or Param */
2920+
if (!IsA(constExpr, Const) && !IsA(constExpr, Param))
2921+
break;
2922+
2923+
/* Forbid transformation for composite types, records. */
2924+
if (type_is_rowtype(exprType(constExpr)) ||
2925+
type_is_rowtype(exprType(indexExpr)))
2926+
break;
2927+
2928+
/*
2929+
* Save information about the operator, type, and collation for the
2930+
* first matching qual. Then, check that subsequent quals match the
2931+
* first.
2932+
*/
2933+
if (firstTime)
2934+
{
2935+
matchOpno = opno;
2936+
consttype = exprType(constExpr);
2937+
arraytype = get_array_type(consttype);
2938+
inputcollid = subClause->inputcollid;
2939+
2940+
/*
2941+
* Check that the operator is presented in the opfamily and that
2942+
* the expression collation matches the index collation. Also,
2943+
* there must be an array type to construct an array later.
2944+
*/
2945+
if (!IndexCollMatchesExprColl(index->indexcollations[indexcol], inputcollid) ||
2946+
!op_in_opfamily(matchOpno, index->opfamily[indexcol]) ||
2947+
!OidIsValid(arraytype))
2948+
break;
2949+
firstTime = false;
2950+
}
2951+
else
2952+
{
2953+
if (opno != matchOpno ||
2954+
inputcollid != subClause->inputcollid ||
2955+
consttype != exprType(constExpr))
2956+
break;
2957+
}
2958+
2959+
if (IsA(constExpr, Param))
2960+
haveParam = true;
2961+
consts = lappend(consts, constExpr);
2962+
}
2963+
2964+
/*
2965+
* Catch the break from the loop above. Normally, a foreach() loop ends
2966+
* up with a NULL list cell. A non-NULL list cell indicates a break from
2967+
* the foreach() loop. Free the consts list and return NULL then.
2968+
*/
2969+
if (lc != NULL)
2970+
{
2971+
list_free(consts);
2972+
return NULL;
2973+
}
2974+
2975+
/*
2976+
* Assemble an array from the list of constants. It seems more profitable
2977+
* to build a const array. But in the presence of parameters, we don't
2978+
* have a specific value here and must employ an ArrayExpr instead.
2979+
*/
2980+
if (haveParam)
2981+
{
2982+
ArrayExpr *arrayExpr = makeNode(ArrayExpr);
2983+
2984+
/* array_collid will be set by parse_collate.c */
2985+
arrayExpr->element_typeid = consttype;
2986+
arrayExpr->array_typeid = arraytype;
2987+
arrayExpr->multidims = false;
2988+
arrayExpr->elements = consts;
2989+
arrayExpr->location = -1;
2990+
2991+
arrayNode = (Node *) arrayExpr;
2992+
}
2993+
else
2994+
{
2995+
int16 typlen;
2996+
bool typbyval;
2997+
char typalign;
2998+
Datum *elems;
2999+
int i = 0;
3000+
ArrayType *arrayConst;
3001+
3002+
get_typlenbyvalalign(consttype, &typlen, &typbyval, &typalign);
3003+
3004+
elems = (Datum *) palloc(sizeof(Datum) * list_length(consts));
3005+
foreach_node(Const, value, consts)
3006+
{
3007+
Assert(!value->constisnull && value->constvalue);
3008+
3009+
elems[i++] = value->constvalue;
3010+
}
3011+
3012+
arrayConst = construct_array(elems, i, consttype,
3013+
typlen, typbyval, typalign);
3014+
arrayNode = (Node *) makeConst(arraytype, -1, inputcollid,
3015+
-1, PointerGetDatum(arrayConst),
3016+
false, false);
3017+
3018+
pfree(elems);
3019+
list_free(consts);
3020+
}
3021+
3022+
/* Build the SAOP expression node */
3023+
saopexpr = makeNode(ScalarArrayOpExpr);
3024+
saopexpr->opno = matchOpno;
3025+
saopexpr->opfuncid = get_opcode(matchOpno);
3026+
saopexpr->hashfuncid = InvalidOid;
3027+
saopexpr->negfuncid = InvalidOid;
3028+
saopexpr->useOr = true;
3029+
saopexpr->inputcollid = inputcollid;
3030+
saopexpr->args = list_make2(indexExpr, arrayNode);
3031+
saopexpr->location = -1;
3032+
3033+
/*
3034+
* Finally, build an IndexClause based on the SAOP node. Use
3035+
* make_simple_restrictinfo() to get RestrictInfo with clean selectivity
3036+
* estimations, because they may differ from the estimation made for an OR
3037+
* clause. Although it is not a lossy expression, keep the original rinfo
3038+
* in iclause->rinfo as prescribed.
3039+
*/
3040+
iclause = makeNode(IndexClause);
3041+
iclause->rinfo = rinfo;
3042+
iclause->indexquals = list_make1(make_simple_restrictinfo(root,
3043+
&saopexpr->xpr));
3044+
iclause->lossy = false;
3045+
iclause->indexcol = indexcol;
3046+
iclause->indexcols = NIL;
3047+
return iclause;
3048+
}
3049+
27743050
/*
27753051
* expand_indexqual_rowcompare --- expand a single indexqual condition
27763052
* that is a RowCompareExpr

0 commit comments

Comments
 (0)