Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 4058c72

Browse files
committed
Consider the number of columns when estimating the Sort's cost.
During the multicolumn sort, we apply a comparison operator to each column one by one. So, even keeping aside the cost of a specific operator, we should take into account the number of its calls. This code impacts the cost model of GatherMerge, IncrementalSort, Sort, and MergeAppend. It also alters the balance between ordered and hashed operations, which should be benchmarked and tuned carefully.
1 parent b8aa44f commit 4058c72

File tree

6 files changed

+78
-66
lines changed

6 files changed

+78
-66
lines changed

contrib/postgres_fdw/expected/postgres_fdw.out

+9-6
Original file line numberDiff line numberDiff line change
@@ -9984,13 +9984,16 @@ SELECT t1.a,t2.b,t3.c FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) INNER J
99849984
-- left outer join + nullable clause
99859985
EXPLAIN (VERBOSE, COSTS OFF)
99869986
SELECT t1.a,t2.b,t2.c FROM fprt1 t1 LEFT JOIN (SELECT * FROM fprt2 WHERE a < 10) t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a < 10 ORDER BY 1,2,3;
9987-
QUERY PLAN
9988-
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
9989-
Foreign Scan
9987+
QUERY PLAN
9988+
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
9989+
Sort
99909990
Output: t1.a, fprt2.b, fprt2.c
9991-
Relations: (public.ftprt1_p1 t1) LEFT JOIN (public.ftprt2_p1 fprt2)
9992-
Remote SQL: SELECT r5.a, r6.b, r6.c FROM (public.fprt1_p1 r5 LEFT JOIN public.fprt2_p1 r6 ON (((r5.a = r6.b)) AND ((r5.b = r6.a)) AND ((r6.a < 10)))) WHERE ((r5.a < 10)) ORDER BY r5.a ASC NULLS LAST, r6.b ASC NULLS LAST, r6.c ASC NULLS LAST
9993-
(4 rows)
9991+
Sort Key: t1.a, fprt2.b, fprt2.c
9992+
-> Foreign Scan
9993+
Output: t1.a, fprt2.b, fprt2.c
9994+
Relations: (public.ftprt1_p1 t1) LEFT JOIN (public.ftprt2_p1 fprt2)
9995+
Remote SQL: SELECT r5.a, r6.b, r6.c FROM (public.fprt1_p1 r5 LEFT JOIN public.fprt2_p1 r6 ON (((r5.a = r6.b)) AND ((r5.b = r6.a)) AND ((r6.a < 10)))) WHERE ((r5.a < 10))
9996+
(7 rows)
99949997

99959998
SELECT t1.a,t2.b,t2.c FROM fprt1 t1 LEFT JOIN (SELECT * FROM fprt2 WHERE a < 10) t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a < 10 ORDER BY 1,2,3;
99969999
a | b | c

src/backend/optimizer/path/costsize.c

+15-7
Original file line numberDiff line numberDiff line change
@@ -481,6 +481,8 @@ cost_gather_merge(GatherMergePath *path, PlannerInfo *root,
481481
Cost comparison_cost;
482482
double N;
483483
double logN;
484+
int npathkeys = list_length(((Path *) path)->pathkeys);
485+
int cmpMultiplier = (npathkeys == 0) ? 2.0 : npathkeys + 1.0;
484486

485487
/* Mark the path with the correct row estimate */
486488
if (rows)
@@ -503,7 +505,7 @@ cost_gather_merge(GatherMergePath *path, PlannerInfo *root,
503505
logN = LOG2(N);
504506

505507
/* Assumed cost per tuple comparison */
506-
comparison_cost = 2.0 * cpu_operator_cost;
508+
comparison_cost = cmpMultiplier * cpu_operator_cost;
507509

508510
/* Heap creation cost */
509511
startup_cost += comparison_cost * N * logN;
@@ -1861,7 +1863,7 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
18611863
*/
18621864
static void
18631865
cost_tuplesort(Cost *startup_cost, Cost *run_cost,
1864-
double tuples, int width,
1866+
double tuples, int width, int cmpMultiplier,
18651867
Cost comparison_cost, int sort_mem,
18661868
double limit_tuples)
18671869
{
@@ -1878,7 +1880,7 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
18781880
tuples = 2.0;
18791881

18801882
/* Include the default cost-per-comparison */
1881-
comparison_cost += 2.0 * cpu_operator_cost;
1883+
comparison_cost += cmpMultiplier * cpu_operator_cost;
18821884

18831885
/* Do we have a useful LIMIT? */
18841886
if (limit_tuples > 0 && limit_tuples < tuples)
@@ -2050,7 +2052,9 @@ cost_incremental_sort(Path *path,
20502052
* are equal.
20512053
*/
20522054
cost_tuplesort(&group_startup_cost, &group_run_cost,
2053-
group_tuples, width, comparison_cost, sort_mem,
2055+
group_tuples, width,
2056+
list_length(pathkeys) + 1,
2057+
comparison_cost, sort_mem,
20542058
limit_tuples);
20552059

20562060
/*
@@ -2074,7 +2078,7 @@ cost_incremental_sort(Path *path,
20742078
* detect the sort groups. This is roughly equal to one extra copy and
20752079
* comparison per tuple.
20762080
*/
2077-
run_cost += (cpu_tuple_cost + comparison_cost) * input_tuples;
2081+
run_cost += (cpu_tuple_cost + (presorted_keys + 1) * comparison_cost) * input_tuples;
20782082

20792083
/*
20802084
* Additionally, we charge double cpu_tuple_cost for each input group to
@@ -2108,9 +2112,11 @@ cost_sort(Path *path, PlannerInfo *root,
21082112
{
21092113
Cost startup_cost;
21102114
Cost run_cost;
2115+
int cmpMultiplier =
2116+
(pathkeys == NIL) ? 2.0 : list_length(pathkeys) + 1.0;
21112117

21122118
cost_tuplesort(&startup_cost, &run_cost,
2113-
tuples, width,
2119+
tuples, width, cmpMultiplier,
21142120
comparison_cost, sort_mem,
21152121
limit_tuples);
21162122

@@ -2390,6 +2396,8 @@ cost_merge_append(Path *path, PlannerInfo *root,
23902396
Cost comparison_cost;
23912397
double N;
23922398
double logN;
2399+
int cmpMultiplier =
2400+
(pathkeys == NIL) ? 2.0 : list_length(pathkeys) + 1.0;
23932401

23942402
/*
23952403
* Avoid log(0)...
@@ -2398,7 +2406,7 @@ cost_merge_append(Path *path, PlannerInfo *root,
23982406
logN = LOG2(N);
23992407

24002408
/* Assumed cost per tuple comparison */
2401-
comparison_cost = 2.0 * cpu_operator_cost;
2409+
comparison_cost = cmpMultiplier * cpu_operator_cost;
24022410

24032411
/* Heap creation cost */
24042412
startup_cost += comparison_cost * N * logN;

src/test/regress/expected/aggregates.out

+7-6
Original file line numberDiff line numberDiff line change
@@ -3001,17 +3001,18 @@ ANALYZE agg_sort_order;
30013001
EXPLAIN (COSTS OFF)
30023002
SELECT array_agg(c1 ORDER BY c2),c2
30033003
FROM agg_sort_order WHERE c2 < 100 GROUP BY c1 ORDER BY 2;
3004-
QUERY PLAN
3005-
----------------------------------------------------------------------------
3004+
QUERY PLAN
3005+
--------------------------------------------------------------------------
30063006
Sort
30073007
Sort Key: c2
30083008
-> GroupAggregate
30093009
Group Key: c1
3010-
-> Sort
3010+
-> Incremental Sort
30113011
Sort Key: c1, c2
3012-
-> Index Scan using agg_sort_order_c2_idx on agg_sort_order
3013-
Index Cond: (c2 < 100)
3014-
(8 rows)
3012+
Presorted Key: c1
3013+
-> Index Scan using agg_sort_order_pkey on agg_sort_order
3014+
Filter: (c2 < 100)
3015+
(9 rows)
30153016

30163017
DROP TABLE agg_sort_order CASCADE;
30173018
DROP TABLE btg;

src/test/regress/expected/join.out

+11-9
Original file line numberDiff line numberDiff line change
@@ -5726,18 +5726,20 @@ select d.* from d left join (select * from b group by b.id, b.c_id) s
57265726
explain (costs off)
57275727
select d.* from d left join (select distinct * from b) s
57285728
on d.a = s.id;
5729-
QUERY PLAN
5730-
--------------------------------------
5731-
Merge Right Join
5732-
Merge Cond: (b.id = d.a)
5733-
-> Unique
5734-
-> Sort
5735-
Sort Key: b.id, b.c_id
5736-
-> Seq Scan on b
5729+
QUERY PLAN
5730+
---------------------------------------------
5731+
Merge Left Join
5732+
Merge Cond: (d.a = s.id)
57375733
-> Sort
57385734
Sort Key: d.a
57395735
-> Seq Scan on d
5740-
(9 rows)
5736+
-> Sort
5737+
Sort Key: s.id
5738+
-> Subquery Scan on s
5739+
-> HashAggregate
5740+
Group Key: b.id, b.c_id
5741+
-> Seq Scan on b
5742+
(11 rows)
57415743

57425744
-- join removal is not possible here
57435745
explain (costs off)

src/test/regress/expected/partition_join.out

+5-3
Original file line numberDiff line numberDiff line change
@@ -1235,9 +1235,11 @@ EXPLAIN (COSTS OFF)
12351235
SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM (prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b) RIGHT JOIN prt1_e t3 ON (t1.a = (t3.a + t3.b)/2) WHERE t3.c = 0 ORDER BY t1.a, t2.b, t3.a + t3.b;
12361236
QUERY PLAN
12371237
----------------------------------------------------------------------------
1238-
Sort
1238+
Incremental Sort
12391239
Sort Key: t1.a, t2.b, ((t3.a + t3.b))
1240-
-> Append
1240+
Presorted Key: t1.a
1241+
-> Merge Append
1242+
Sort Key: t1.a
12411243
-> Merge Left Join
12421244
Merge Cond: (t1_1.a = t2_1.b)
12431245
-> Sort
@@ -1286,7 +1288,7 @@ SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM (prt1 t1 LEFT JOIN prt2 t2
12861288
-> Sort
12871289
Sort Key: t2_3.b
12881290
-> Seq Scan on prt2_p3 t2_3
1289-
(51 rows)
1291+
(53 rows)
12901292

12911293
SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM (prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b) RIGHT JOIN prt1_e t3 ON (t1.a = (t3.a + t3.b)/2) WHERE t3.c = 0 ORDER BY t1.a, t2.b, t3.a + t3.b;
12921294
a | c | b | c | ?column? | c

src/test/regress/expected/union.out

+31-35
Original file line numberDiff line numberDiff line change
@@ -1224,18 +1224,17 @@ SELECT * FROM
12241224
SELECT 2 AS t, 4 AS x) ss
12251225
WHERE x < 4
12261226
ORDER BY x;
1227-
QUERY PLAN
1228-
--------------------------------------------------
1227+
QUERY PLAN
1228+
--------------------------------------------
12291229
Sort
12301230
Sort Key: (2)
1231-
-> Unique
1232-
-> Sort
1233-
Sort Key: (1), (2)
1234-
-> Append
1235-
-> Result
1236-
-> Result
1237-
One-Time Filter: false
1238-
(9 rows)
1231+
-> HashAggregate
1232+
Group Key: (1), (2)
1233+
-> Append
1234+
-> Result
1235+
-> Result
1236+
One-Time Filter: false
1237+
(8 rows)
12391238

12401239
SELECT * FROM
12411240
(SELECT 1 AS t, 2 AS x
@@ -1289,19 +1288,18 @@ SELECT * FROM
12891288
SELECT 2 AS t, 4 AS x) ss
12901289
WHERE x > 3
12911290
ORDER BY x;
1292-
QUERY PLAN
1293-
------------------------------------------------------------------------------------
1291+
QUERY PLAN
1292+
-------------------------------------------------------------------------------
12941293
Sort
12951294
Sort Key: ss.x
12961295
-> Subquery Scan on ss
12971296
Filter: (ss.x > 3)
1298-
-> Unique
1299-
-> Sort
1300-
Sort Key: (1), (((random() * '3'::double precision))::integer)
1301-
-> Append
1302-
-> Result
1303-
-> Result
1304-
(10 rows)
1297+
-> HashAggregate
1298+
Group Key: (1), (((random() * '3'::double precision))::integer)
1299+
-> Append
1300+
-> Result
1301+
-> Result
1302+
(9 rows)
13051303

13061304
SELECT * FROM
13071305
(SELECT 1 AS t, (random()*3)::int AS x
@@ -1322,24 +1320,22 @@ select distinct q1 from
13221320
union all
13231321
select distinct * from int8_tbl i82) ss
13241322
where q2 = q2;
1325-
QUERY PLAN
1326-
----------------------------------------------------------
1327-
Unique
1328-
-> Merge Append
1329-
Sort Key: "*SELECT* 1".q1
1323+
QUERY PLAN
1324+
----------------------------------------------------
1325+
HashAggregate
1326+
Group Key: "*SELECT* 1".q1
1327+
-> Append
13301328
-> Subquery Scan on "*SELECT* 1"
1331-
-> Unique
1332-
-> Sort
1333-
Sort Key: i81.q1, i81.q2
1334-
-> Seq Scan on int8_tbl i81
1335-
Filter: (q2 IS NOT NULL)
1329+
-> HashAggregate
1330+
Group Key: i81.q1, i81.q2
1331+
-> Seq Scan on int8_tbl i81
1332+
Filter: (q2 IS NOT NULL)
13361333
-> Subquery Scan on "*SELECT* 2"
1337-
-> Unique
1338-
-> Sort
1339-
Sort Key: i82.q1, i82.q2
1340-
-> Seq Scan on int8_tbl i82
1341-
Filter: (q2 IS NOT NULL)
1342-
(15 rows)
1334+
-> HashAggregate
1335+
Group Key: i82.q1, i82.q2
1336+
-> Seq Scan on int8_tbl i82
1337+
Filter: (q2 IS NOT NULL)
1338+
(13 rows)
13431339

13441340
select distinct q1 from
13451341
(select distinct * from int8_tbl i81

0 commit comments

Comments
 (0)