Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 2b8b285

Browse files
committed
Introduce bloom_filter_size for BRIN bloom opclass
Move the calculation of Bloom filter parameters (for BRIN indexes) into a separate function to make reuse easier. At the moment we only call it from one place, but that may change and it's easier to read anyway. Reviewed-by: Heikki Linnakangas Discussion: https://postgr.es/m/0e1f3350-c9cf-ab62-43a5-5dae314de89c%40enterprisedb.com
1 parent 28d03fe commit 2b8b285

File tree

1 file changed

+47
-16
lines changed

1 file changed

+47
-16
lines changed

src/backend/access/brin/brin_bloom.c

Lines changed: 47 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,48 @@ typedef struct BloomFilter
259259
char data[FLEXIBLE_ARRAY_MEMBER];
260260
} BloomFilter;
261261

262+
/*
263+
* bloom_filter_size
264+
* Calculate Bloom filter parameters (nbits, nbytes, nhashes).
265+
*
266+
* Given expected number of distinct values and desired false positive rate,
267+
* calculates the optimal parameters of the Bloom filter.
268+
*
269+
* The resulting parameters are returned through nbytesp (number of bytes),
270+
* nbitsp (number of bits) and nhashesp (number of hash functions). If a
271+
* pointer is NULL, the parameter is not returned.
272+
*/
273+
static void
274+
bloom_filter_size(int ndistinct, double false_positive_rate,
275+
int *nbytesp, int *nbitsp, int *nhashesp)
276+
{
277+
double k;
278+
int nbits,
279+
nbytes;
280+
281+
/* sizing bloom filter: -(n * ln(p)) / (ln(2))^2 */
282+
nbits = ceil(-(ndistinct * log(false_positive_rate)) / pow(log(2.0), 2));
283+
284+
/* round m to whole bytes */
285+
nbytes = ((nbits + 7) / 8);
286+
nbits = nbytes * 8;
287+
288+
/*
289+
* round(log(2.0) * m / ndistinct), but assume round() may not be
290+
* available on Windows
291+
*/
292+
k = log(2.0) * nbits / ndistinct;
293+
k = (k - floor(k) >= 0.5) ? ceil(k) : floor(k);
294+
295+
if (nbytesp)
296+
*nbytesp = nbytes;
297+
298+
if (nbitsp)
299+
*nbitsp = nbits;
300+
301+
if (nhashesp)
302+
*nhashesp = (int) k;
303+
}
262304

263305
/*
264306
* bloom_init
@@ -275,19 +317,15 @@ bloom_init(int ndistinct, double false_positive_rate)
275317

276318
int nbits; /* size of filter / number of bits */
277319
int nbytes; /* size of filter / number of bytes */
278-
279-
double k; /* number of hash functions */
320+
int nhashes; /* number of hash functions */
280321

281322
Assert(ndistinct > 0);
282323
Assert((false_positive_rate >= BLOOM_MIN_FALSE_POSITIVE_RATE) &&
283324
(false_positive_rate < BLOOM_MAX_FALSE_POSITIVE_RATE));
284325

285-
/* sizing bloom filter: -(n * ln(p)) / (ln(2))^2 */
286-
nbits = ceil(-(ndistinct * log(false_positive_rate)) / pow(log(2.0), 2));
287-
288-
/* round m to whole bytes */
289-
nbytes = ((nbits + 7) / 8);
290-
nbits = nbytes * 8;
326+
/* calculate bloom filter size / parameters */
327+
bloom_filter_size(ndistinct, false_positive_rate,
328+
&nbytes, &nbits, &nhashes);
291329

292330
/*
293331
* Reject filters that are obviously too large to store on a page.
@@ -310,13 +348,6 @@ bloom_init(int ndistinct, double false_positive_rate)
310348
elog(ERROR, "the bloom filter is too large (%d > %zu)", nbytes,
311349
BloomMaxFilterSize);
312350

313-
/*
314-
* round(log(2.0) * m / ndistinct), but assume round() may not be
315-
* available on Windows
316-
*/
317-
k = log(2.0) * nbits / ndistinct;
318-
k = (k - floor(k) >= 0.5) ? ceil(k) : floor(k);
319-
320351
/*
321352
* We allocate the whole filter. Most of it is going to be 0 bits, so the
322353
* varlena is easy to compress.
@@ -326,7 +357,7 @@ bloom_init(int ndistinct, double false_positive_rate)
326357
filter = (BloomFilter *) palloc0(len);
327358

328359
filter->flags = 0;
329-
filter->nhashes = (int) k;
360+
filter->nhashes = nhashes;
330361
filter->nbits = nbits;
331362

332363
SET_VARSIZE(filter, len);

0 commit comments

Comments
 (0)