@@ -259,6 +259,48 @@ typedef struct BloomFilter
259
259
char data [FLEXIBLE_ARRAY_MEMBER ];
260
260
} BloomFilter ;
261
261
262
+ /*
263
+ * bloom_filter_size
264
+ * Calculate Bloom filter parameters (nbits, nbytes, nhashes).
265
+ *
266
+ * Given expected number of distinct values and desired false positive rate,
267
+ * calculates the optimal parameters of the Bloom filter.
268
+ *
269
+ * The resulting parameters are returned through nbytesp (number of bytes),
270
+ * nbitsp (number of bits) and nhashesp (number of hash functions). If a
271
+ * pointer is NULL, the parameter is not returned.
272
+ */
273
+ static void
274
+ bloom_filter_size (int ndistinct , double false_positive_rate ,
275
+ int * nbytesp , int * nbitsp , int * nhashesp )
276
+ {
277
+ double k ;
278
+ int nbits ,
279
+ nbytes ;
280
+
281
+ /* sizing bloom filter: -(n * ln(p)) / (ln(2))^2 */
282
+ nbits = ceil (- (ndistinct * log (false_positive_rate )) / pow (log (2.0 ), 2 ));
283
+
284
+ /* round m to whole bytes */
285
+ nbytes = ((nbits + 7 ) / 8 );
286
+ nbits = nbytes * 8 ;
287
+
288
+ /*
289
+ * round(log(2.0) * m / ndistinct), but assume round() may not be
290
+ * available on Windows
291
+ */
292
+ k = log (2.0 ) * nbits / ndistinct ;
293
+ k = (k - floor (k ) >= 0.5 ) ? ceil (k ) : floor (k );
294
+
295
+ if (nbytesp )
296
+ * nbytesp = nbytes ;
297
+
298
+ if (nbitsp )
299
+ * nbitsp = nbits ;
300
+
301
+ if (nhashesp )
302
+ * nhashesp = (int ) k ;
303
+ }
262
304
263
305
/*
264
306
* bloom_init
@@ -275,19 +317,15 @@ bloom_init(int ndistinct, double false_positive_rate)
275
317
276
318
int nbits ; /* size of filter / number of bits */
277
319
int nbytes ; /* size of filter / number of bytes */
278
-
279
- double k ; /* number of hash functions */
320
+ int nhashes ; /* number of hash functions */
280
321
281
322
Assert (ndistinct > 0 );
282
323
Assert ((false_positive_rate >= BLOOM_MIN_FALSE_POSITIVE_RATE ) &&
283
324
(false_positive_rate < BLOOM_MAX_FALSE_POSITIVE_RATE ));
284
325
285
- /* sizing bloom filter: -(n * ln(p)) / (ln(2))^2 */
286
- nbits = ceil (- (ndistinct * log (false_positive_rate )) / pow (log (2.0 ), 2 ));
287
-
288
- /* round m to whole bytes */
289
- nbytes = ((nbits + 7 ) / 8 );
290
- nbits = nbytes * 8 ;
326
+ /* calculate bloom filter size / parameters */
327
+ bloom_filter_size (ndistinct , false_positive_rate ,
328
+ & nbytes , & nbits , & nhashes );
291
329
292
330
/*
293
331
* Reject filters that are obviously too large to store on a page.
@@ -310,13 +348,6 @@ bloom_init(int ndistinct, double false_positive_rate)
310
348
elog (ERROR , "the bloom filter is too large (%d > %zu)" , nbytes ,
311
349
BloomMaxFilterSize );
312
350
313
- /*
314
- * round(log(2.0) * m / ndistinct), but assume round() may not be
315
- * available on Windows
316
- */
317
- k = log (2.0 ) * nbits / ndistinct ;
318
- k = (k - floor (k ) >= 0.5 ) ? ceil (k ) : floor (k );
319
-
320
351
/*
321
352
* We allocate the whole filter. Most of it is going to be 0 bits, so the
322
353
* varlena is easy to compress.
@@ -326,7 +357,7 @@ bloom_init(int ndistinct, double false_positive_rate)
326
357
filter = (BloomFilter * ) palloc0 (len );
327
358
328
359
filter -> flags = 0 ;
329
- filter -> nhashes = ( int ) k ;
360
+ filter -> nhashes = nhashes ;
330
361
filter -> nbits = nbits ;
331
362
332
363
SET_VARSIZE (filter , len );
0 commit comments