@@ -1148,9 +1148,13 @@ statext_is_compatible_clause(PlannerInfo *root, Node *clause, Index relid,
1148
1148
* statext_mcv_clauselist_selectivity
1149
1149
* Estimate clauses using the best multi-column statistics.
1150
1150
*
1151
- * Selects the best extended (multi-column) statistic on a table (measured by
1152
- * the number of attributes extracted from the clauses and covered by it), and
1153
- * computes the selectivity for the supplied clauses.
1151
+ * Applies available extended (multi-column) statistics on a table. There may
1152
+ * be multiple applicable statistics (with respect to the clauses), in which
1153
+ * case we use greedy approach. In each round we select the best statistic on
1154
+ * a table (measured by the number of attributes extracted from the clauses
1155
+ * and covered by it), and compute the selectivity for the supplied clauses.
1156
+ * We repeat this process with the remaining clauses (if any), until none of
1157
+ * the available statistics can be used.
1154
1158
*
1155
1159
* One of the main challenges with using MCV lists is how to extrapolate the
1156
1160
* estimate to the data not covered by the MCV list. To do that, we compute
@@ -1194,11 +1198,6 @@ statext_is_compatible_clause(PlannerInfo *root, Node *clause, Index relid,
1194
1198
* 'estimatedclauses' is an input/output parameter. We set bits for the
1195
1199
* 0-based 'clauses' indexes we estimate for and also skip clause items that
1196
1200
* already have a bit set.
1197
- *
1198
- * XXX If we were to use multiple statistics, this is where it would happen.
1199
- * We would simply repeat this on a loop on the "remaining" clauses, possibly
1200
- * using the already estimated clauses as conditions (and combining the values
1201
- * using conditional probability formula).
1202
1201
*/
1203
1202
static Selectivity
1204
1203
statext_mcv_clauselist_selectivity (PlannerInfo * root , List * clauses , int varRelid ,
@@ -1208,14 +1207,7 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
1208
1207
ListCell * l ;
1209
1208
Bitmapset * * list_attnums ;
1210
1209
int listidx ;
1211
- StatisticExtInfo * stat ;
1212
- List * stat_clauses ;
1213
- Selectivity simple_sel ,
1214
- mcv_sel ,
1215
- mcv_basesel ,
1216
- mcv_totalsel ,
1217
- other_sel ,
1218
- sel ;
1210
+ Selectivity sel = 1.0 ;
1219
1211
1220
1212
/* check if there's any stats that might be useful for us. */
1221
1213
if (!has_stats_of_kind (rel -> statlist , STATS_EXT_MCV ))
@@ -1250,65 +1242,84 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
1250
1242
listidx ++ ;
1251
1243
}
1252
1244
1253
- /* find the best suited statistics object for these attnums */
1254
- stat = choose_best_statistics (rel -> statlist , STATS_EXT_MCV ,
1255
- list_attnums , list_length (clauses ));
1256
-
1257
- /* if no matching stats could be found then we've nothing to do */
1258
- if (!stat )
1259
- return 1.0 ;
1245
+ /* apply as many extended statistics as possible */
1246
+ while (true)
1247
+ {
1248
+ StatisticExtInfo * stat ;
1249
+ List * stat_clauses ;
1250
+ Selectivity simple_sel ,
1251
+ mcv_sel ,
1252
+ mcv_basesel ,
1253
+ mcv_totalsel ,
1254
+ other_sel ,
1255
+ stat_sel ;
1256
+
1257
+ /* find the best suited statistics object for these attnums */
1258
+ stat = choose_best_statistics (rel -> statlist , STATS_EXT_MCV ,
1259
+ list_attnums , list_length (clauses ));
1260
+
1261
+ /* if no (additional) matching stats could be found then we've nothing to do */
1262
+ if (!stat )
1263
+ break ;
1260
1264
1261
- /* Ensure choose_best_statistics produced an expected stats type. */
1262
- Assert (stat -> kind == STATS_EXT_MCV );
1265
+ /* Ensure choose_best_statistics produced an expected stats type. */
1266
+ Assert (stat -> kind == STATS_EXT_MCV );
1263
1267
1264
- /* now filter the clauses to be estimated using the selected MCV */
1265
- stat_clauses = NIL ;
1268
+ /* now filter the clauses to be estimated using the selected MCV */
1269
+ stat_clauses = NIL ;
1266
1270
1267
- listidx = 0 ;
1268
- foreach (l , clauses )
1269
- {
1270
- /*
1271
- * If the clause is compatible with the selected statistics, mark it
1272
- * as estimated and add it to the list to estimate.
1273
- */
1274
- if (list_attnums [listidx ] != NULL &&
1275
- bms_is_subset (list_attnums [listidx ], stat -> keys ))
1271
+ listidx = 0 ;
1272
+ foreach (l , clauses )
1276
1273
{
1277
- stat_clauses = lappend (stat_clauses , (Node * ) lfirst (l ));
1278
- * estimatedclauses = bms_add_member (* estimatedclauses , listidx );
1274
+ /*
1275
+ * If the clause is compatible with the selected statistics, mark it
1276
+ * as estimated and add it to the list to estimate.
1277
+ */
1278
+ if (list_attnums [listidx ] != NULL &&
1279
+ bms_is_subset (list_attnums [listidx ], stat -> keys ))
1280
+ {
1281
+ stat_clauses = lappend (stat_clauses , (Node * ) lfirst (l ));
1282
+ * estimatedclauses = bms_add_member (* estimatedclauses , listidx );
1283
+
1284
+ bms_free (list_attnums [listidx ]);
1285
+ list_attnums [listidx ] = NULL ;
1286
+ }
1287
+
1288
+ listidx ++ ;
1279
1289
}
1280
1290
1281
- listidx ++ ;
1282
- }
1291
+ /*
1292
+ * First compute "simple" selectivity, i.e. without the extended
1293
+ * statistics, and essentially assuming independence of the
1294
+ * columns/clauses. We'll then use the various selectivities computed from
1295
+ * MCV list to improve it.
1296
+ */
1297
+ simple_sel = clauselist_selectivity_simple (root , stat_clauses , varRelid ,
1298
+ jointype , sjinfo , NULL );
1283
1299
1284
- /*
1285
- * First compute "simple" selectivity, i.e. without the extended
1286
- * statistics, and essentially assuming independence of the
1287
- * columns/clauses. We'll then use the various selectivities computed from
1288
- * MCV list to improve it.
1289
- */
1290
- simple_sel = clauselist_selectivity_simple (root , stat_clauses , varRelid ,
1291
- jointype , sjinfo , NULL );
1300
+ /*
1301
+ * Now compute the multi-column estimate from the MCV list, along with the
1302
+ * other selectivities (base & total selectivity).
1303
+ */
1304
+ mcv_sel = mcv_clauselist_selectivity (root , stat , stat_clauses , varRelid ,
1305
+ jointype , sjinfo , rel ,
1306
+ & mcv_basesel , & mcv_totalsel );
1292
1307
1293
- /*
1294
- * Now compute the multi-column estimate from the MCV list, along with the
1295
- * other selectivities (base & total selectivity).
1296
- */
1297
- mcv_sel = mcv_clauselist_selectivity (root , stat , stat_clauses , varRelid ,
1298
- jointype , sjinfo , rel ,
1299
- & mcv_basesel , & mcv_totalsel );
1308
+ /* Estimated selectivity of values not covered by MCV matches */
1309
+ other_sel = simple_sel - mcv_basesel ;
1310
+ CLAMP_PROBABILITY (other_sel );
1300
1311
1301
- /* Estimated selectivity of values not covered by MCV matches */
1302
- other_sel = simple_sel - mcv_basesel ;
1303
- CLAMP_PROBABILITY ( other_sel ) ;
1312
+ /* The non-MCV selectivity can't exceed the 1 - mcv_totalsel. */
1313
+ if ( other_sel > 1.0 - mcv_totalsel )
1314
+ other_sel = 1.0 - mcv_totalsel ;
1304
1315
1305
- /* The non-MCV selectivity can't exceed the 1 - mcv_totalsel . */
1306
- if ( other_sel > 1.0 - mcv_totalsel )
1307
- other_sel = 1.0 - mcv_totalsel ;
1316
+ /* Overall selectivity is the combination of MCV and non-MCV estimates . */
1317
+ stat_sel = mcv_sel + other_sel ;
1318
+ CLAMP_PROBABILITY ( stat_sel ) ;
1308
1319
1309
- /* Overall selectivity is the combination of MCV and non-MCV estimates . */
1310
- sel = mcv_sel + other_sel ;
1311
- CLAMP_PROBABILITY ( sel );
1320
+ /* Factor the estimate from this MCV to the oveall estimate . */
1321
+ sel *= stat_sel ;
1322
+ }
1312
1323
1313
1324
return sel ;
1314
1325
}
0 commit comments