Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 36d22dd

Browse files
committed
Don't generate EEOP_*_FETCHSOME operations for slots know to be virtual.
That avoids unnecessary work during both interpreted execution, and JIT compiled expression evaluation. Both benefit from fewer expression steps needing be processed, and for interpreted execution there now is a fastpath dedicated to just fetching a value from a virtual slot. That's e.g. beneficial for hashjoins over nodes that perform projections, as the hashed columns are currently fetched individually. Author: Soumyadeep Chakraborty, Andres Freund Discussion: https://postgr.es/m/CAE-ML+9OKSN71+mHtfMD-L24oDp8dGTfaVjDU6U+j+FNAW5kRQ@mail.gmail.com
1 parent 34c9c53 commit 36d22dd

File tree

3 files changed

+160
-22
lines changed

3 files changed

+160
-22
lines changed

src/backend/executor/execExpr.c

+28-15
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ static void ExecInitFunc(ExprEvalStep *scratch, Expr *node, List *args,
6565
static void ExecInitExprSlots(ExprState *state, Node *node);
6666
static void ExecPushExprSlots(ExprState *state, LastAttnumInfo *info);
6767
static bool get_last_attnums_walker(Node *node, LastAttnumInfo *info);
68-
static void ExecComputeSlotInfo(ExprState *state, ExprEvalStep *op);
68+
static bool ExecComputeSlotInfo(ExprState *state, ExprEvalStep *op);
6969
static void ExecInitWholeRowVar(ExprEvalStep *scratch, Var *variable,
7070
ExprState *state);
7171
static void ExecInitSubscriptingRef(ExprEvalStep *scratch,
@@ -2285,8 +2285,8 @@ ExecPushExprSlots(ExprState *state, LastAttnumInfo *info)
22852285
scratch.d.fetch.fixed = false;
22862286
scratch.d.fetch.kind = NULL;
22872287
scratch.d.fetch.known_desc = NULL;
2288-
ExecComputeSlotInfo(state, &scratch);
2289-
ExprEvalPushStep(state, &scratch);
2288+
if (ExecComputeSlotInfo(state, &scratch))
2289+
ExprEvalPushStep(state, &scratch);
22902290
}
22912291
if (info->last_outer > 0)
22922292
{
@@ -2295,8 +2295,8 @@ ExecPushExprSlots(ExprState *state, LastAttnumInfo *info)
22952295
scratch.d.fetch.fixed = false;
22962296
scratch.d.fetch.kind = NULL;
22972297
scratch.d.fetch.known_desc = NULL;
2298-
ExecComputeSlotInfo(state, &scratch);
2299-
ExprEvalPushStep(state, &scratch);
2298+
if (ExecComputeSlotInfo(state, &scratch))
2299+
ExprEvalPushStep(state, &scratch);
23002300
}
23012301
if (info->last_scan > 0)
23022302
{
@@ -2305,8 +2305,8 @@ ExecPushExprSlots(ExprState *state, LastAttnumInfo *info)
23052305
scratch.d.fetch.fixed = false;
23062306
scratch.d.fetch.kind = NULL;
23072307
scratch.d.fetch.known_desc = NULL;
2308-
ExecComputeSlotInfo(state, &scratch);
2309-
ExprEvalPushStep(state, &scratch);
2308+
if (ExecComputeSlotInfo(state, &scratch))
2309+
ExprEvalPushStep(state, &scratch);
23102310
}
23112311
}
23122312

@@ -2364,14 +2364,21 @@ get_last_attnums_walker(Node *node, LastAttnumInfo *info)
23642364
* The goal is to determine whether a slot is 'fixed', that is, every
23652365
* evaluation of the expression will have the same type of slot, with an
23662366
* equivalent descriptor.
2367+
*
2368+
* Returns true if the the deforming step is required, false otherwise.
23672369
*/
2368-
static void
2370+
static bool
23692371
ExecComputeSlotInfo(ExprState *state, ExprEvalStep *op)
23702372
{
23712373
PlanState *parent = state->parent;
23722374
TupleDesc desc = NULL;
23732375
const TupleTableSlotOps *tts_ops = NULL;
23742376
bool isfixed = false;
2377+
ExprEvalOp opcode = op->opcode;
2378+
2379+
Assert(opcode == EEOP_INNER_FETCHSOME ||
2380+
opcode == EEOP_OUTER_FETCHSOME ||
2381+
opcode == EEOP_SCAN_FETCHSOME);
23752382

23762383
if (op->d.fetch.known_desc != NULL)
23772384
{
@@ -2383,7 +2390,7 @@ ExecComputeSlotInfo(ExprState *state, ExprEvalStep *op)
23832390
{
23842391
isfixed = false;
23852392
}
2386-
else if (op->opcode == EEOP_INNER_FETCHSOME)
2393+
else if (opcode == EEOP_INNER_FETCHSOME)
23872394
{
23882395
PlanState *is = innerPlanState(parent);
23892396

@@ -2403,7 +2410,7 @@ ExecComputeSlotInfo(ExprState *state, ExprEvalStep *op)
24032410
desc = ExecGetResultType(is);
24042411
}
24052412
}
2406-
else if (op->opcode == EEOP_OUTER_FETCHSOME)
2413+
else if (opcode == EEOP_OUTER_FETCHSOME)
24072414
{
24082415
PlanState *os = outerPlanState(parent);
24092416

@@ -2423,7 +2430,7 @@ ExecComputeSlotInfo(ExprState *state, ExprEvalStep *op)
24232430
desc = ExecGetResultType(os);
24242431
}
24252432
}
2426-
else if (op->opcode == EEOP_SCAN_FETCHSOME)
2433+
else if (opcode == EEOP_SCAN_FETCHSOME)
24272434
{
24282435
desc = parent->scandesc;
24292436

@@ -2446,6 +2453,12 @@ ExecComputeSlotInfo(ExprState *state, ExprEvalStep *op)
24462453
op->d.fetch.kind = NULL;
24472454
op->d.fetch.known_desc = NULL;
24482455
}
2456+
2457+
/* if the slot is known to always virtual we never need to deform */
2458+
if (op->d.fetch.fixed && op->d.fetch.kind == &TTSOpsVirtual)
2459+
return false;
2460+
2461+
return true;
24492462
}
24502463

24512464
/*
@@ -3360,16 +3373,16 @@ ExecBuildGroupingEqual(TupleDesc ldesc, TupleDesc rdesc,
33603373
scratch.d.fetch.fixed = false;
33613374
scratch.d.fetch.known_desc = ldesc;
33623375
scratch.d.fetch.kind = lops;
3363-
ExecComputeSlotInfo(state, &scratch);
3364-
ExprEvalPushStep(state, &scratch);
3376+
if (ExecComputeSlotInfo(state, &scratch))
3377+
ExprEvalPushStep(state, &scratch);
33653378

33663379
scratch.opcode = EEOP_OUTER_FETCHSOME;
33673380
scratch.d.fetch.last_var = maxatt;
33683381
scratch.d.fetch.fixed = false;
33693382
scratch.d.fetch.known_desc = rdesc;
33703383
scratch.d.fetch.kind = rops;
3371-
ExecComputeSlotInfo(state, &scratch);
3372-
ExprEvalPushStep(state, &scratch);
3384+
if (ExecComputeSlotInfo(state, &scratch))
3385+
ExprEvalPushStep(state, &scratch);
33733386

33743387
/*
33753388
* Start comparing at the last field (least significant sort key). That's

src/backend/executor/execExprInterp.c

+129-4
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,12 @@ static Datum ExecJustAssignOuterVar(ExprState *state, ExprContext *econtext, boo
160160
static Datum ExecJustAssignScanVar(ExprState *state, ExprContext *econtext, bool *isnull);
161161
static Datum ExecJustApplyFuncToCase(ExprState *state, ExprContext *econtext, bool *isnull);
162162
static Datum ExecJustConst(ExprState *state, ExprContext *econtext, bool *isnull);
163+
static Datum ExecJustInnerVarVirt(ExprState *state, ExprContext *econtext, bool *isnull);
164+
static Datum ExecJustOuterVarVirt(ExprState *state, ExprContext *econtext, bool *isnull);
165+
static Datum ExecJustScanVarVirt(ExprState *state, ExprContext *econtext, bool *isnull);
166+
static Datum ExecJustAssignInnerVarVirt(ExprState *state, ExprContext *econtext, bool *isnull);
167+
static Datum ExecJustAssignOuterVarVirt(ExprState *state, ExprContext *econtext, bool *isnull);
168+
static Datum ExecJustAssignScanVarVirt(ExprState *state, ExprContext *econtext, bool *isnull);
163169

164170

165171
/*
@@ -255,11 +261,45 @@ ExecReadyInterpretedExpr(ExprState *state)
255261
return;
256262
}
257263
}
258-
else if (state->steps_len == 2 &&
259-
state->steps[0].opcode == EEOP_CONST)
264+
else if (state->steps_len == 2)
260265
{
261-
state->evalfunc_private = (void *) ExecJustConst;
262-
return;
266+
ExprEvalOp step0 = state->steps[0].opcode;
267+
268+
if (step0 == EEOP_CONST)
269+
{
270+
state->evalfunc_private = (void *) ExecJustConst;
271+
return;
272+
}
273+
else if (step0 == EEOP_INNER_VAR)
274+
{
275+
state->evalfunc_private = (void *) ExecJustInnerVarVirt;
276+
return;
277+
}
278+
else if (step0 == EEOP_OUTER_VAR)
279+
{
280+
state->evalfunc_private = (void *) ExecJustOuterVarVirt;
281+
return;
282+
}
283+
else if (step0 == EEOP_SCAN_VAR)
284+
{
285+
state->evalfunc_private = (void *) ExecJustScanVarVirt;
286+
return;
287+
}
288+
else if (step0 == EEOP_ASSIGN_INNER_VAR)
289+
{
290+
state->evalfunc_private = (void *) ExecJustAssignInnerVarVirt;
291+
return;
292+
}
293+
else if (step0 == EEOP_ASSIGN_OUTER_VAR)
294+
{
295+
state->evalfunc_private = (void *) ExecJustAssignOuterVarVirt;
296+
return;
297+
}
298+
else if (step0 == EEOP_ASSIGN_SCAN_VAR)
299+
{
300+
state->evalfunc_private = (void *) ExecJustAssignScanVarVirt;
301+
return;
302+
}
263303
}
264304

265305
#if defined(EEO_USE_COMPUTED_GOTO)
@@ -2096,6 +2136,91 @@ ExecJustConst(ExprState *state, ExprContext *econtext, bool *isnull)
20962136
return op->d.constval.value;
20972137
}
20982138

2139+
/* implementation of ExecJust(Inner|Outer|Scan)VarVirt */
2140+
static pg_attribute_always_inline Datum
2141+
ExecJustVarVirtImpl(ExprState *state, TupleTableSlot *slot, bool *isnull)
2142+
{
2143+
ExprEvalStep *op = &state->steps[0];
2144+
int attnum = op->d.var.attnum;
2145+
2146+
/*
2147+
* As it is guaranteed that a virtual slot is used, there never is a need
2148+
* to perform tuple deforming (nor would it be possible). Therefore
2149+
* execExpr.c has not emitted an EEOP_*_FETCHSOME step. Verify, as much as
2150+
* possible, that that determination was accurate.
2151+
*/
2152+
Assert(TTS_IS_VIRTUAL(slot));
2153+
Assert(TTS_FIXED(slot));
2154+
Assert(attnum >= 0 && attnum < slot->tts_nvalid);
2155+
2156+
*isnull = slot->tts_isnull[attnum];
2157+
2158+
return slot->tts_values[attnum];
2159+
}
2160+
2161+
/* Like ExecJustInnerVar, optimized for virtual slots */
2162+
static Datum
2163+
ExecJustInnerVarVirt(ExprState *state, ExprContext *econtext, bool *isnull)
2164+
{
2165+
return ExecJustVarVirtImpl(state, econtext->ecxt_innertuple, isnull);
2166+
}
2167+
2168+
/* Like ExecJustOuterVar, optimized for virtual slots */
2169+
static Datum
2170+
ExecJustOuterVarVirt(ExprState *state, ExprContext *econtext, bool *isnull)
2171+
{
2172+
return ExecJustVarVirtImpl(state, econtext->ecxt_outertuple, isnull);
2173+
}
2174+
2175+
/* Like ExecJustScanVar, optimized for virtual slots */
2176+
static Datum
2177+
ExecJustScanVarVirt(ExprState *state, ExprContext *econtext, bool *isnull)
2178+
{
2179+
return ExecJustVarVirtImpl(state, econtext->ecxt_scantuple, isnull);
2180+
}
2181+
2182+
/* implementation of ExecJustAssign(Inner|Outer|Scan)VarVirt */
2183+
static pg_attribute_always_inline Datum
2184+
ExecJustAssignVarVirtImpl(ExprState *state, TupleTableSlot *inslot, bool *isnull)
2185+
{
2186+
ExprEvalStep *op = &state->steps[0];
2187+
int attnum = op->d.assign_var.attnum;
2188+
int resultnum = op->d.assign_var.resultnum;
2189+
TupleTableSlot *outslot = state->resultslot;
2190+
2191+
/* see ExecJustVarVirtImpl for comments */
2192+
2193+
Assert(TTS_IS_VIRTUAL(inslot));
2194+
Assert(TTS_FIXED(inslot));
2195+
Assert(attnum >= 0 && attnum < inslot->tts_nvalid);
2196+
2197+
outslot->tts_values[resultnum] = inslot->tts_values[attnum];
2198+
outslot->tts_isnull[resultnum] = inslot->tts_isnull[attnum];
2199+
2200+
return 0;
2201+
}
2202+
2203+
/* Like ExecJustAssignInnerVar, optimized for virtual slots */
2204+
static Datum
2205+
ExecJustAssignInnerVarVirt(ExprState *state, ExprContext *econtext, bool *isnull)
2206+
{
2207+
return ExecJustAssignVarVirtImpl(state, econtext->ecxt_innertuple, isnull);
2208+
}
2209+
2210+
/* Like ExecJustAssignOuterVar, optimized for virtual slots */
2211+
static Datum
2212+
ExecJustAssignOuterVarVirt(ExprState *state, ExprContext *econtext, bool *isnull)
2213+
{
2214+
return ExecJustAssignVarVirtImpl(state, econtext->ecxt_outertuple, isnull);
2215+
}
2216+
2217+
/* Like ExecJustAssignScanVar, optimized for virtual slots */
2218+
static Datum
2219+
ExecJustAssignScanVarVirt(ExprState *state, ExprContext *econtext, bool *isnull)
2220+
{
2221+
return ExecJustAssignVarVirtImpl(state, econtext->ecxt_scantuple, isnull);
2222+
}
2223+
20992224
#if defined(EEO_USE_COMPUTED_GOTO)
21002225
/*
21012226
* Comparator used when building address->opcode lookup table for

src/backend/jit/llvm/llvmjit_expr.c

+3-3
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,9 @@ llvm_compile_expr(ExprState *state)
287287
if (op->d.fetch.fixed)
288288
tts_ops = op->d.fetch.kind;
289289

290+
/* step should not have been generated */
291+
Assert(tts_ops != &TTSOpsVirtual);
292+
290293
if (opcode == EEOP_INNER_FETCHSOME)
291294
v_slot = v_innerslot;
292295
else if (opcode == EEOP_OUTER_FETCHSOME)
@@ -297,9 +300,6 @@ llvm_compile_expr(ExprState *state)
297300
/*
298301
* Check if all required attributes are available, or
299302
* whether deforming is required.
300-
*
301-
* TODO: skip nvalid check if slot is fixed and known to
302-
* be a virtual slot.
303303
*/
304304
v_nvalid =
305305
l_load_struct_gep(b, v_slot,

0 commit comments

Comments
 (0)