Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 069d0ff

Browse files
author
Richard Guo
committed
Check lateral references within PHVs for memoize cache keys
If we intend to generate a Memoize node on top of a path, we need cache keys of some sort. Currently we search for the cache keys in the parameterized clauses of the path as well as the lateral_vars of its parent. However, it turns out that this is not sufficient because there might be lateral references derived from PlaceHolderVars, which we fail to take into consideration. This oversight can cause us to miss opportunities to utilize the Memoize node. Moreover, in some plans, failing to recognize all the cache keys could result in performance regressions. This is because without identifying all the cache keys, we would need to purge the entire cache every time we get a new outer tuple during execution. This patch fixes this issue by extracting lateral Vars from within PlaceHolderVars and subsequently including them in the cache keys. In passing, this patch also includes a comment clarifying that Memoize nodes are currently not added on top of join relation paths. This explains why this patch only considers PlaceHolderVars that are due to be evaluated at baserels. Author: Richard Guo Reviewed-by: Tom Lane, David Rowley, Andrei Lepikhov Discussion: https://postgr.es/m/CAMbWs48jLxn0pAPZpJ50EThZ569Xrw+=4Ac3QvkpQvNszbeoNg@mail.gmail.com
1 parent f96c2c7 commit 069d0ff

File tree

4 files changed

+245
-9
lines changed

4 files changed

+245
-9
lines changed

contrib/postgres_fdw/expected/postgres_fdw.out

+8-4
Original file line numberDiff line numberDiff line change
@@ -3774,15 +3774,19 @@ ORDER BY ref_0."C 1";
37743774
-> Index Scan using t1_pkey on "S 1"."T 1" ref_0
37753775
Output: ref_0."C 1", ref_0.c2, ref_0.c3, ref_0.c4, ref_0.c5, ref_0.c6, ref_0.c7, ref_0.c8
37763776
Index Cond: (ref_0."C 1" < 10)
3777-
-> Foreign Scan on public.ft1 ref_1
3778-
Output: ref_1.c3, ref_0.c2
3779-
Remote SQL: SELECT c3 FROM "S 1"."T 1" WHERE ((c3 = '00001'))
3777+
-> Memoize
3778+
Output: ref_1.c3, (ref_0.c2)
3779+
Cache Key: ref_0.c2
3780+
Cache Mode: binary
3781+
-> Foreign Scan on public.ft1 ref_1
3782+
Output: ref_1.c3, ref_0.c2
3783+
Remote SQL: SELECT c3 FROM "S 1"."T 1" WHERE ((c3 = '00001'))
37803784
-> Materialize
37813785
Output: ref_3.c3
37823786
-> Foreign Scan on public.ft2 ref_3
37833787
Output: ref_3.c3
37843788
Remote SQL: SELECT c3 FROM "S 1"."T 1" WHERE ((c3 = '00001'))
3785-
(15 rows)
3789+
(19 rows)
37863790

37873791
SELECT ref_0.c2, subq_1.*
37883792
FROM

src/backend/optimizer/path/joinpath.c

+109-5
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "optimizer/optimizer.h"
2424
#include "optimizer/pathnode.h"
2525
#include "optimizer/paths.h"
26+
#include "optimizer/placeholder.h"
2627
#include "optimizer/planmain.h"
2728
#include "utils/typcache.h"
2829

@@ -425,7 +426,7 @@ have_unsafe_outer_join_ref(PlannerInfo *root,
425426

426427
/*
427428
* paraminfo_get_equal_hashops
428-
* Determine if the clauses in param_info and innerrel's lateral_vars
429+
* Determine if the clauses in param_info and innerrel's lateral vars
429430
* can be hashed.
430431
* Returns true if hashing is possible, otherwise false.
431432
*
@@ -438,10 +439,11 @@ have_unsafe_outer_join_ref(PlannerInfo *root,
438439
static bool
439440
paraminfo_get_equal_hashops(PlannerInfo *root, ParamPathInfo *param_info,
440441
RelOptInfo *outerrel, RelOptInfo *innerrel,
441-
List **param_exprs, List **operators,
442-
bool *binary_mode)
442+
List *ph_lateral_vars, List **param_exprs,
443+
List **operators, bool *binary_mode)
443444

444445
{
446+
List *lateral_vars;
445447
ListCell *lc;
446448

447449
*param_exprs = NIL;
@@ -521,7 +523,8 @@ paraminfo_get_equal_hashops(PlannerInfo *root, ParamPathInfo *param_info,
521523
}
522524

523525
/* Now add any lateral vars to the cache key too */
524-
foreach(lc, innerrel->lateral_vars)
526+
lateral_vars = list_concat(ph_lateral_vars, innerrel->lateral_vars);
527+
foreach(lc, lateral_vars)
525528
{
526529
Node *expr = (Node *) lfirst(lc);
527530
TypeCacheEntry *typentry;
@@ -572,10 +575,101 @@ paraminfo_get_equal_hashops(PlannerInfo *root, ParamPathInfo *param_info,
572575
return true;
573576
}
574577

578+
/*
579+
* extract_lateral_vars_from_PHVs
580+
* Extract lateral references within PlaceHolderVars that are due to be
581+
* evaluated at 'innerrelids'.
582+
*/
583+
static List *
584+
extract_lateral_vars_from_PHVs(PlannerInfo *root, Relids innerrelids)
585+
{
586+
List *ph_lateral_vars = NIL;
587+
ListCell *lc;
588+
589+
/* Nothing would be found if the query contains no LATERAL RTEs */
590+
if (!root->hasLateralRTEs)
591+
return NIL;
592+
593+
/*
594+
* No need to consider PHVs that are due to be evaluated at joinrels,
595+
* since we do not add Memoize nodes on top of joinrel paths.
596+
*/
597+
if (bms_membership(innerrelids) == BMS_MULTIPLE)
598+
return NIL;
599+
600+
foreach(lc, root->placeholder_list)
601+
{
602+
PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc);
603+
List *vars;
604+
ListCell *cell;
605+
606+
/* PHV is uninteresting if no lateral refs */
607+
if (phinfo->ph_lateral == NULL)
608+
continue;
609+
610+
/* PHV is uninteresting if not due to be evaluated at innerrelids */
611+
if (!bms_equal(phinfo->ph_eval_at, innerrelids))
612+
continue;
613+
614+
/*
615+
* If the PHV does not reference any rels in innerrelids, use its
616+
* contained expression as a cache key rather than extracting the
617+
* Vars/PHVs from it and using those. This can be beneficial in cases
618+
* where the expression results in fewer distinct values to cache
619+
* tuples for.
620+
*/
621+
if (!bms_overlap(pull_varnos(root, (Node *) phinfo->ph_var->phexpr),
622+
innerrelids))
623+
{
624+
ph_lateral_vars = lappend(ph_lateral_vars, phinfo->ph_var->phexpr);
625+
continue;
626+
}
627+
628+
/* Fetch Vars and PHVs of lateral references within PlaceHolderVars */
629+
vars = pull_vars_of_level((Node *) phinfo->ph_var->phexpr, 0);
630+
foreach(cell, vars)
631+
{
632+
Node *node = (Node *) lfirst(cell);
633+
634+
if (IsA(node, Var))
635+
{
636+
Var *var = (Var *) node;
637+
638+
Assert(var->varlevelsup == 0);
639+
640+
if (bms_is_member(var->varno, phinfo->ph_lateral))
641+
ph_lateral_vars = lappend(ph_lateral_vars, node);
642+
}
643+
else if (IsA(node, PlaceHolderVar))
644+
{
645+
PlaceHolderVar *phv = (PlaceHolderVar *) node;
646+
647+
Assert(phv->phlevelsup == 0);
648+
649+
if (bms_is_subset(find_placeholder_info(root, phv)->ph_eval_at,
650+
phinfo->ph_lateral))
651+
ph_lateral_vars = lappend(ph_lateral_vars, node);
652+
}
653+
else
654+
Assert(false);
655+
}
656+
657+
list_free(vars);
658+
}
659+
660+
return ph_lateral_vars;
661+
}
662+
575663
/*
576664
* get_memoize_path
577665
* If possible, make and return a Memoize path atop of 'inner_path'.
578666
* Otherwise return NULL.
667+
*
668+
* Note that currently we do not add Memoize nodes on top of join relation
669+
* paths. This is because the ParamPathInfos for join relation paths do not
670+
* maintain ppi_clauses, as the set of relevant clauses varies depending on how
671+
* the join is formed. In addition, joinrels do not maintain lateral_vars. So
672+
* we do not have a way to extract cache keys from joinrels.
579673
*/
580674
static Path *
581675
get_memoize_path(PlannerInfo *root, RelOptInfo *innerrel,
@@ -587,6 +681,7 @@ get_memoize_path(PlannerInfo *root, RelOptInfo *innerrel,
587681
List *hash_operators;
588682
ListCell *lc;
589683
bool binary_mode;
684+
List *ph_lateral_vars;
590685

591686
/* Obviously not if it's disabled */
592687
if (!enable_memoize)
@@ -601,14 +696,22 @@ get_memoize_path(PlannerInfo *root, RelOptInfo *innerrel,
601696
if (outer_path->parent->rows < 2)
602697
return NULL;
603698

699+
/*
700+
* Extract lateral Vars/PHVs within PlaceHolderVars that are due to be
701+
* evaluated at innerrel. These lateral Vars/PHVs could be used as
702+
* memoize cache keys.
703+
*/
704+
ph_lateral_vars = extract_lateral_vars_from_PHVs(root, innerrel->relids);
705+
604706
/*
605707
* We can only have a memoize node when there's some kind of cache key,
606708
* either parameterized path clauses or lateral Vars. No cache key sounds
607709
* more like something a Materialize node might be more useful for.
608710
*/
609711
if ((inner_path->param_info == NULL ||
610712
inner_path->param_info->ppi_clauses == NIL) &&
611-
innerrel->lateral_vars == NIL)
713+
innerrel->lateral_vars == NIL &&
714+
ph_lateral_vars == NIL)
612715
return NULL;
613716

614717
/*
@@ -695,6 +798,7 @@ get_memoize_path(PlannerInfo *root, RelOptInfo *innerrel,
695798
outerrel->top_parent ?
696799
outerrel->top_parent : outerrel,
697800
innerrel,
801+
ph_lateral_vars,
698802
&param_exprs,
699803
&hash_operators,
700804
&binary_mode))

src/test/regress/expected/memoize.out

+93
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,99 @@ WHERE t1.unique1 < 10;
129129
20 | 0.50000000000000000000
130130
(1 row)
131131

132+
-- Try with LATERAL references within PlaceHolderVars
133+
SELECT explain_memoize('
134+
SELECT COUNT(*), AVG(t1.twenty) FROM tenk1 t1 LEFT JOIN
135+
LATERAL (SELECT t1.two+1 AS c1, t2.unique1 AS c2 FROM tenk1 t2) s ON TRUE
136+
WHERE s.c1 = s.c2 AND t1.unique1 < 1000;', false);
137+
explain_memoize
138+
-------------------------------------------------------------------------------------------
139+
Aggregate (actual rows=1 loops=N)
140+
-> Nested Loop (actual rows=1000 loops=N)
141+
-> Seq Scan on tenk1 t1 (actual rows=1000 loops=N)
142+
Filter: (unique1 < 1000)
143+
Rows Removed by Filter: 9000
144+
-> Memoize (actual rows=1 loops=N)
145+
Cache Key: (t1.two + 1)
146+
Cache Mode: binary
147+
Hits: 998 Misses: 2 Evictions: Zero Overflows: 0 Memory Usage: NkB
148+
-> Index Only Scan using tenk1_unique1 on tenk1 t2 (actual rows=1 loops=N)
149+
Filter: ((t1.two + 1) = unique1)
150+
Rows Removed by Filter: 9999
151+
Heap Fetches: N
152+
(13 rows)
153+
154+
-- And check we get the expected results.
155+
SELECT COUNT(*), AVG(t1.twenty) FROM tenk1 t1 LEFT JOIN
156+
LATERAL (SELECT t1.two+1 AS c1, t2.unique1 AS c2 FROM tenk1 t2) s ON TRUE
157+
WHERE s.c1 = s.c2 AND t1.unique1 < 1000;
158+
count | avg
159+
-------+--------------------
160+
1000 | 9.5000000000000000
161+
(1 row)
162+
163+
-- Try with LATERAL references within PlaceHolderVars
164+
SELECT explain_memoize('
165+
SELECT COUNT(*), AVG(t1.twenty) FROM tenk1 t1 LEFT JOIN
166+
LATERAL (SELECT t1.two+t2.two AS c1, t2.unique1 AS c2 FROM tenk1 t2) s ON TRUE
167+
WHERE s.c1 = s.c2 AND t1.unique1 < 1000;', false);
168+
explain_memoize
169+
--------------------------------------------------------------------------------------
170+
Aggregate (actual rows=1 loops=N)
171+
-> Nested Loop (actual rows=1000 loops=N)
172+
-> Seq Scan on tenk1 t1 (actual rows=1000 loops=N)
173+
Filter: (unique1 < 1000)
174+
Rows Removed by Filter: 9000
175+
-> Memoize (actual rows=1 loops=N)
176+
Cache Key: t1.two
177+
Cache Mode: binary
178+
Hits: 998 Misses: 2 Evictions: Zero Overflows: 0 Memory Usage: NkB
179+
-> Seq Scan on tenk1 t2 (actual rows=1 loops=N)
180+
Filter: ((t1.two + two) = unique1)
181+
Rows Removed by Filter: 9999
182+
(12 rows)
183+
184+
-- And check we get the expected results.
185+
SELECT COUNT(*), AVG(t1.twenty) FROM tenk1 t1 LEFT JOIN
186+
LATERAL (SELECT t1.two+t2.two AS c1, t2.unique1 AS c2 FROM tenk1 t2) s ON TRUE
187+
WHERE s.c1 = s.c2 AND t1.unique1 < 1000;
188+
count | avg
189+
-------+--------------------
190+
1000 | 9.0000000000000000
191+
(1 row)
192+
193+
-- Ensure we do not omit the cache keys from PlaceHolderVars
194+
SELECT explain_memoize('
195+
SELECT COUNT(*), AVG(t1.twenty) FROM tenk1 t1 LEFT JOIN
196+
LATERAL (SELECT t1.twenty AS c1, t2.unique1 AS c2, t2.two FROM tenk1 t2) s
197+
ON t1.two = s.two
198+
WHERE s.c1 = s.c2 AND t1.unique1 < 1000;', false);
199+
explain_memoize
200+
---------------------------------------------------------------------------------------
201+
Aggregate (actual rows=1 loops=N)
202+
-> Nested Loop (actual rows=1000 loops=N)
203+
-> Seq Scan on tenk1 t1 (actual rows=1000 loops=N)
204+
Filter: (unique1 < 1000)
205+
Rows Removed by Filter: 9000
206+
-> Memoize (actual rows=1 loops=N)
207+
Cache Key: t1.two, t1.twenty
208+
Cache Mode: binary
209+
Hits: 980 Misses: 20 Evictions: Zero Overflows: 0 Memory Usage: NkB
210+
-> Seq Scan on tenk1 t2 (actual rows=1 loops=N)
211+
Filter: ((t1.twenty = unique1) AND (t1.two = two))
212+
Rows Removed by Filter: 9999
213+
(12 rows)
214+
215+
-- And check we get the expected results.
216+
SELECT COUNT(*), AVG(t1.twenty) FROM tenk1 t1 LEFT JOIN
217+
LATERAL (SELECT t1.twenty AS c1, t2.unique1 AS c2, t2.two FROM tenk1 t2) s
218+
ON t1.two = s.two
219+
WHERE s.c1 = s.c2 AND t1.unique1 < 1000;
220+
count | avg
221+
-------+--------------------
222+
1000 | 9.5000000000000000
223+
(1 row)
224+
132225
SET enable_mergejoin TO off;
133226
-- Test for varlena datatype with expr evaluation
134227
CREATE TABLE expr_key (x numeric, t text);

src/test/regress/sql/memoize.sql

+35
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,41 @@ LATERAL (
7474
ON t1.two = t2.two
7575
WHERE t1.unique1 < 10;
7676

77+
-- Try with LATERAL references within PlaceHolderVars
78+
SELECT explain_memoize('
79+
SELECT COUNT(*), AVG(t1.twenty) FROM tenk1 t1 LEFT JOIN
80+
LATERAL (SELECT t1.two+1 AS c1, t2.unique1 AS c2 FROM tenk1 t2) s ON TRUE
81+
WHERE s.c1 = s.c2 AND t1.unique1 < 1000;', false);
82+
83+
-- And check we get the expected results.
84+
SELECT COUNT(*), AVG(t1.twenty) FROM tenk1 t1 LEFT JOIN
85+
LATERAL (SELECT t1.two+1 AS c1, t2.unique1 AS c2 FROM tenk1 t2) s ON TRUE
86+
WHERE s.c1 = s.c2 AND t1.unique1 < 1000;
87+
88+
-- Try with LATERAL references within PlaceHolderVars
89+
SELECT explain_memoize('
90+
SELECT COUNT(*), AVG(t1.twenty) FROM tenk1 t1 LEFT JOIN
91+
LATERAL (SELECT t1.two+t2.two AS c1, t2.unique1 AS c2 FROM tenk1 t2) s ON TRUE
92+
WHERE s.c1 = s.c2 AND t1.unique1 < 1000;', false);
93+
94+
-- And check we get the expected results.
95+
SELECT COUNT(*), AVG(t1.twenty) FROM tenk1 t1 LEFT JOIN
96+
LATERAL (SELECT t1.two+t2.two AS c1, t2.unique1 AS c2 FROM tenk1 t2) s ON TRUE
97+
WHERE s.c1 = s.c2 AND t1.unique1 < 1000;
98+
99+
-- Ensure we do not omit the cache keys from PlaceHolderVars
100+
SELECT explain_memoize('
101+
SELECT COUNT(*), AVG(t1.twenty) FROM tenk1 t1 LEFT JOIN
102+
LATERAL (SELECT t1.twenty AS c1, t2.unique1 AS c2, t2.two FROM tenk1 t2) s
103+
ON t1.two = s.two
104+
WHERE s.c1 = s.c2 AND t1.unique1 < 1000;', false);
105+
106+
-- And check we get the expected results.
107+
SELECT COUNT(*), AVG(t1.twenty) FROM tenk1 t1 LEFT JOIN
108+
LATERAL (SELECT t1.twenty AS c1, t2.unique1 AS c2, t2.two FROM tenk1 t2) s
109+
ON t1.two = s.two
110+
WHERE s.c1 = s.c2 AND t1.unique1 < 1000;
111+
77112
SET enable_mergejoin TO off;
78113

79114
-- Test for varlena datatype with expr evaluation

0 commit comments

Comments
 (0)