@@ -78,6 +78,9 @@ static MultiSortSupport build_mss(VacAttrStats **stats, int numattrs);
78
78
static SortItem * build_distinct_groups (int numrows , SortItem * items ,
79
79
MultiSortSupport mss , int * ndistinct );
80
80
81
+ static SortItem * * build_column_frequencies (SortItem * groups , int ngroups ,
82
+ MultiSortSupport mss , int * ncounts );
83
+
81
84
static int count_distinct_groups (int numrows , SortItem * items ,
82
85
MultiSortSupport mss );
83
86
@@ -242,6 +245,20 @@ statext_mcv_build(int numrows, HeapTuple *rows, Bitmapset *attrs,
242
245
if (nitems > 0 )
243
246
{
244
247
int j ;
248
+ SortItem key ;
249
+ MultiSortSupport tmp ;
250
+
251
+ /* frequencies for values in each attribute */
252
+ SortItem * * freqs ;
253
+ int * nfreqs ;
254
+
255
+ /* used to search values */
256
+ tmp = (MultiSortSupport ) palloc (offsetof(MultiSortSupportData , ssup )
257
+ + sizeof (SortSupportData ));
258
+
259
+ /* compute frequencies for values in each column */
260
+ nfreqs = (int * ) palloc0 (sizeof (int ) * numattrs );
261
+ freqs = build_column_frequencies (groups , ngroups , mss , nfreqs );
245
262
246
263
/*
247
264
* Allocate the MCV list structure, set the global parameters.
@@ -281,18 +298,26 @@ statext_mcv_build(int numrows, HeapTuple *rows, Bitmapset *attrs,
281
298
item -> base_frequency = 1.0 ;
282
299
for (j = 0 ; j < numattrs ; j ++ )
283
300
{
284
- int count = 0 ;
285
- int k ;
301
+ SortItem * freq ;
286
302
287
- for (k = 0 ; k < ngroups ; k ++ )
288
- {
289
- if (multi_sort_compare_dim (j , & groups [i ], & groups [k ], mss ) == 0 )
290
- count += groups [k ].count ;
291
- }
303
+ /* single dimension */
304
+ tmp -> ndims = 1 ;
305
+ tmp -> ssup [0 ] = mss -> ssup [j ];
292
306
293
- item -> base_frequency *= (double ) count / numrows ;
307
+ /* fill search key */
308
+ key .values = & groups [i ].values [j ];
309
+ key .isnull = & groups [i ].isnull [j ];
310
+
311
+ freq = (SortItem * ) bsearch_arg (& key , freqs [j ], nfreqs [j ],
312
+ sizeof (SortItem ),
313
+ multi_sort_compare , tmp );
314
+
315
+ item -> base_frequency *= ((double ) freq -> count ) / numrows ;
294
316
}
295
317
}
318
+
319
+ pfree (nfreqs );
320
+ pfree (freqs );
296
321
}
297
322
298
323
pfree (items );
@@ -419,6 +444,95 @@ build_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss,
419
444
return groups ;
420
445
}
421
446
447
+ /* compare sort items (single dimension) */
448
+ static int
449
+ sort_item_compare (const void * a , const void * b , void * arg )
450
+ {
451
+ SortSupport ssup = (SortSupport ) arg ;
452
+ SortItem * ia = (SortItem * ) a ;
453
+ SortItem * ib = (SortItem * ) b ;
454
+
455
+ return ApplySortComparator (ia -> values [0 ], ia -> isnull [0 ],
456
+ ib -> values [0 ], ib -> isnull [0 ],
457
+ ssup );
458
+ }
459
+
460
+ /*
461
+ * build_column_frequencies
462
+ * compute frequencies of values in each column
463
+ *
464
+ * This returns an array of SortItems for each attibute the MCV is built
465
+ * on, with a frequency (number of occurrences) for each value. This is
466
+ * then used to compute "base" frequency of MCV items.
467
+ *
468
+ * All the memory is allocated in a single chunk, so that a single pfree
469
+ * is enough to release it. We do not allocate space for values/isnull
470
+ * arrays in the SortItems, because we can simply point into the input
471
+ * groups directly.
472
+ */
473
+ static SortItem * *
474
+ build_column_frequencies (SortItem * groups , int ngroups ,
475
+ MultiSortSupport mss , int * ncounts )
476
+ {
477
+ int i ,
478
+ dim ;
479
+ SortItem * * result ;
480
+ char * ptr ;
481
+
482
+ Assert (groups );
483
+ Assert (ncounts );
484
+
485
+ /* allocate arrays for all columns as a single chunk */
486
+ ptr = palloc (MAXALIGN (sizeof (SortItem * ) * mss -> ndims ) +
487
+ mss -> ndims * MAXALIGN (sizeof (SortItem ) * ngroups ));
488
+
489
+ /* initial array of pointers */
490
+ result = (SortItem * * ) ptr ;
491
+ ptr += MAXALIGN (sizeof (SortItem * ) * mss -> ndims );
492
+
493
+ for (dim = 0 ; dim < mss -> ndims ; dim ++ )
494
+ {
495
+ SortSupport ssup = & mss -> ssup [dim ];
496
+
497
+ /* array of values for a single column */
498
+ result [dim ] = (SortItem * ) ptr ;
499
+ ptr += MAXALIGN (sizeof (SortItem ) * ngroups );
500
+
501
+ /* extract data for the dimension */
502
+ for (i = 0 ; i < ngroups ; i ++ )
503
+ {
504
+ /* point into the input groups */
505
+ result [dim ][i ].values = & groups [i ].values [dim ];
506
+ result [dim ][i ].isnull = & groups [i ].isnull [dim ];
507
+ result [dim ][i ].count = groups [i ].count ;
508
+ }
509
+
510
+ /* sort the values, deduplicate */
511
+ qsort_arg ((void * ) result [dim ], ngroups , sizeof (SortItem ),
512
+ sort_item_compare , ssup );
513
+
514
+ /*
515
+ * Identify distinct values, compute frequency (there might be
516
+ * multiple MCV items containing this value, so we need to sum
517
+ * counts from all of them.
518
+ */
519
+ ncounts [dim ] = 1 ;
520
+ for (i = 1 ; i < ngroups ; i ++ )
521
+ {
522
+ if (sort_item_compare (& result [dim ][i - 1 ], & result [dim ][i ], ssup ) == 0 )
523
+ {
524
+ result [dim ][ncounts [dim ]- 1 ].count += result [dim ][i ].count ;
525
+ continue ;
526
+ }
527
+
528
+ result [dim ][ncounts [dim ]] = result [dim ][i ];
529
+
530
+ ncounts [dim ]++ ;
531
+ }
532
+ }
533
+
534
+ return result ;
535
+ }
422
536
423
537
/*
424
538
* statext_mcv_load
0 commit comments