Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit aaa6761

Browse files
committed
Apply all available functional dependencies
When considering functional dependencies during selectivity estimation, it's not necessary to bother with selecting the best extended statistic object and then use just dependencies from it. We can simply consider all applicable functional dependencies at once. This means we need to deserialie all (applicable) dependencies before applying them to the clauses. This is a bit more expensive than picking the best statistics and deserializing dependencies for it. To minimize the additional cost, we ignore statistics that are not applicable. Author: Tomas Vondra Reviewed-by: Mark Dilger Discussion: https://postgr.es/m/20191028152048.jc6pqv5hb7j77ocp@development
1 parent 652686a commit aaa6761

File tree

3 files changed

+169
-39
lines changed

3 files changed

+169
-39
lines changed

src/backend/statistics/dependencies.c

+77-39
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,8 @@ static bool dependency_implies_attribute(MVDependency *dependency,
7777
AttrNumber attnum);
7878
static bool dependency_is_compatible_clause(Node *clause, Index relid,
7979
AttrNumber *attnum);
80-
static MVDependency *find_strongest_dependency(StatisticExtInfo *stats,
81-
MVDependencies *dependencies,
80+
static MVDependency *find_strongest_dependency(MVDependencies **dependencies,
81+
int ndependencies,
8282
Bitmapset *attnums);
8383

8484
static void
@@ -862,10 +862,10 @@ dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
862862
* (see the comment in dependencies_clauselist_selectivity).
863863
*/
864864
static MVDependency *
865-
find_strongest_dependency(StatisticExtInfo *stats, MVDependencies *dependencies,
865+
find_strongest_dependency(MVDependencies **dependencies, int ndependencies,
866866
Bitmapset *attnums)
867867
{
868-
int i;
868+
int i, j;
869869
MVDependency *strongest = NULL;
870870

871871
/* number of attnums in clauses */
@@ -876,36 +876,39 @@ find_strongest_dependency(StatisticExtInfo *stats, MVDependencies *dependencies,
876876
* fully-matched dependencies. We do the cheap checks first, before
877877
* matching it against the attnums.
878878
*/
879-
for (i = 0; i < dependencies->ndeps; i++)
879+
for (i = 0; i < ndependencies; i++)
880880
{
881-
MVDependency *dependency = dependencies->deps[i];
882-
883-
/*
884-
* Skip dependencies referencing more attributes than available
885-
* clauses, as those can't be fully matched.
886-
*/
887-
if (dependency->nattributes > nattnums)
888-
continue;
889-
890-
if (strongest)
881+
for (j = 0; j < dependencies[i]->ndeps; j++)
891882
{
892-
/* skip dependencies on fewer attributes than the strongest. */
893-
if (dependency->nattributes < strongest->nattributes)
894-
continue;
883+
MVDependency *dependency = dependencies[i]->deps[j];
895884

896-
/* also skip weaker dependencies when attribute count matches */
897-
if (strongest->nattributes == dependency->nattributes &&
898-
strongest->degree > dependency->degree)
885+
/*
886+
* Skip dependencies referencing more attributes than available
887+
* clauses, as those can't be fully matched.
888+
*/
889+
if (dependency->nattributes > nattnums)
899890
continue;
900-
}
901891

902-
/*
903-
* this dependency is stronger, but we must still check that it's
904-
* fully matched to these attnums. We perform this check last as it's
905-
* slightly more expensive than the previous checks.
906-
*/
907-
if (dependency_is_fully_matched(dependency, attnums))
908-
strongest = dependency; /* save new best match */
892+
if (strongest)
893+
{
894+
/* skip dependencies on fewer attributes than the strongest. */
895+
if (dependency->nattributes < strongest->nattributes)
896+
continue;
897+
898+
/* also skip weaker dependencies when attribute count matches */
899+
if (strongest->nattributes == dependency->nattributes &&
900+
strongest->degree > dependency->degree)
901+
continue;
902+
}
903+
904+
/*
905+
* this dependency is stronger, but we must still check that it's
906+
* fully matched to these attnums. We perform this check last as it's
907+
* slightly more expensive than the previous checks.
908+
*/
909+
if (dependency_is_fully_matched(dependency, attnums))
910+
strongest = dependency; /* save new best match */
911+
}
909912
}
910913

911914
return strongest;
@@ -949,10 +952,11 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
949952
Selectivity s1 = 1.0;
950953
ListCell *l;
951954
Bitmapset *clauses_attnums = NULL;
952-
StatisticExtInfo *stat;
953-
MVDependencies *dependencies;
954955
Bitmapset **list_attnums;
955956
int listidx;
957+
MVDependencies **dependencies = NULL;
958+
int ndependencies = 0;
959+
int i;
956960

957961
/* check if there's any stats that might be useful for us. */
958962
if (!has_stats_of_kind(rel->statlist, STATS_EXT_DEPENDENCIES))
@@ -1001,20 +1005,50 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
10011005
return 1.0;
10021006
}
10031007

1004-
/* find the best suited statistics object for these attnums */
1005-
stat = choose_best_statistics(rel->statlist, STATS_EXT_DEPENDENCIES,
1006-
list_attnums, list_length(clauses));
1008+
/*
1009+
* Load all functional dependencies matching at least two parameters. We
1010+
* can simply consider all dependencies at once, without having to search
1011+
* for the best statistics object.
1012+
*
1013+
* To not waste cycles and memory, we deserialize dependencies only for
1014+
* statistics that match at least two attributes. The array is allocated
1015+
* with the assumption that all objects match - we could grow the array
1016+
* to make it just the right size, but it's likely wasteful anyway thanks
1017+
* to moving the freed chunks to freelists etc.
1018+
*/
1019+
ndependencies = 0;
1020+
dependencies = (MVDependencies **) palloc(sizeof(MVDependencies *) *
1021+
list_length(rel->statlist));
1022+
1023+
foreach(l,rel->statlist)
1024+
{
1025+
StatisticExtInfo *stat = (StatisticExtInfo *) lfirst(l);
1026+
Bitmapset *matched;
1027+
int num_matched;
1028+
1029+
/* skip statistics that are not of the correct type */
1030+
if (stat->kind != STATS_EXT_DEPENDENCIES)
1031+
continue;
1032+
1033+
matched = bms_intersect(clauses_attnums, stat->keys);
1034+
num_matched = bms_num_members(matched);
1035+
bms_free(matched);
1036+
1037+
/* skip objects matching fewer than two attributes from clauses */
1038+
if (num_matched < 2)
1039+
continue;
1040+
1041+
dependencies[ndependencies++]
1042+
= statext_dependencies_load(stat->statOid);
1043+
}
10071044

10081045
/* if no matching stats could be found then we've nothing to do */
1009-
if (!stat)
1046+
if (!ndependencies)
10101047
{
10111048
pfree(list_attnums);
10121049
return 1.0;
10131050
}
10141051

1015-
/* load the dependency items stored in the statistics object */
1016-
dependencies = statext_dependencies_load(stat->statOid);
1017-
10181052
/*
10191053
* Apply the dependencies recursively, starting with the widest/strongest
10201054
* ones, and proceeding to the smaller/weaker ones. At the end of each
@@ -1027,7 +1061,7 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
10271061
MVDependency *dependency;
10281062

10291063
/* the widest/strongest dependency, fully matched by clauses */
1030-
dependency = find_strongest_dependency(stat, dependencies,
1064+
dependency = find_strongest_dependency(dependencies, ndependencies,
10311065
clauses_attnums);
10321066

10331067
/* if no suitable dependency was found, we're done */
@@ -1097,6 +1131,10 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
10971131
s1 *= (dependency->degree + (1 - dependency->degree) * s2);
10981132
}
10991133

1134+
/* free deserialized functional dependencies (and then the array) */
1135+
for (i = 0; i < ndependencies; i++)
1136+
pfree(dependencies[i]);
1137+
11001138
pfree(dependencies);
11011139
pfree(list_attnums);
11021140

src/test/regress/expected/stats_ext.out

+57
Original file line numberDiff line numberDiff line change
@@ -451,6 +451,63 @@ SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE
451451
50 | 50
452452
(1 row)
453453

454+
-- check the ability to use multiple functional dependencies
455+
CREATE TABLE functional_dependencies_multi (
456+
a INTEGER,
457+
b INTEGER,
458+
c INTEGER,
459+
d INTEGER
460+
);
461+
INSERT INTO functional_dependencies_multi (a, b, c, d)
462+
SELECT
463+
mod(i,7),
464+
mod(i,7),
465+
mod(i,11),
466+
mod(i,11)
467+
FROM generate_series(1,5000) s(i);
468+
ANALYZE functional_dependencies_multi;
469+
-- estimates without any functional dependencies
470+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE a = 0 AND b = 0');
471+
estimated | actual
472+
-----------+--------
473+
102 | 714
474+
(1 row)
475+
476+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE c = 0 AND d = 0');
477+
estimated | actual
478+
-----------+--------
479+
41 | 454
480+
(1 row)
481+
482+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE a = 0 AND b = 0 AND c = 0 AND d = 0');
483+
estimated | actual
484+
-----------+--------
485+
1 | 64
486+
(1 row)
487+
488+
-- create separate functional dependencies
489+
CREATE STATISTICS functional_dependencies_multi_1 (dependencies) ON a, b FROM functional_dependencies_multi;
490+
CREATE STATISTICS functional_dependencies_multi_2 (dependencies) ON c, d FROM functional_dependencies_multi;
491+
ANALYZE functional_dependencies_multi;
492+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE a = 0 AND b = 0');
493+
estimated | actual
494+
-----------+--------
495+
714 | 714
496+
(1 row)
497+
498+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE c = 0 AND d = 0');
499+
estimated | actual
500+
-----------+--------
501+
454 | 454
502+
(1 row)
503+
504+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE a = 0 AND b = 0 AND c = 0 AND d = 0');
505+
estimated | actual
506+
-----------+--------
507+
65 | 64
508+
(1 row)
509+
510+
DROP TABLE functional_dependencies_multi;
454511
-- MCV lists
455512
CREATE TABLE mcv_lists (
456513
filler1 TEXT,

src/test/regress/sql/stats_ext.sql

+35
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,41 @@ ANALYZE functional_dependencies;
291291

292292
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1');
293293

294+
-- check the ability to use multiple functional dependencies
295+
CREATE TABLE functional_dependencies_multi (
296+
a INTEGER,
297+
b INTEGER,
298+
c INTEGER,
299+
d INTEGER
300+
);
301+
302+
INSERT INTO functional_dependencies_multi (a, b, c, d)
303+
SELECT
304+
mod(i,7),
305+
mod(i,7),
306+
mod(i,11),
307+
mod(i,11)
308+
FROM generate_series(1,5000) s(i);
309+
310+
ANALYZE functional_dependencies_multi;
311+
312+
-- estimates without any functional dependencies
313+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE a = 0 AND b = 0');
314+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE c = 0 AND d = 0');
315+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE a = 0 AND b = 0 AND c = 0 AND d = 0');
316+
317+
-- create separate functional dependencies
318+
CREATE STATISTICS functional_dependencies_multi_1 (dependencies) ON a, b FROM functional_dependencies_multi;
319+
CREATE STATISTICS functional_dependencies_multi_2 (dependencies) ON c, d FROM functional_dependencies_multi;
320+
321+
ANALYZE functional_dependencies_multi;
322+
323+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE a = 0 AND b = 0');
324+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE c = 0 AND d = 0');
325+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE a = 0 AND b = 0 AND c = 0 AND d = 0');
326+
327+
DROP TABLE functional_dependencies_multi;
328+
294329
-- MCV lists
295330
CREATE TABLE mcv_lists (
296331
filler1 TEXT,

0 commit comments

Comments
 (0)