|
| 1 | +/*------------------------------------------------------------------------- |
| 2 | + * |
| 3 | + * bloomfilter.c |
| 4 | + * Space-efficient set membership testing |
| 5 | + * |
| 6 | + * A Bloom filter is a probabilistic data structure that is used to test an |
| 7 | + * element's membership of a set. False positives are possible, but false |
| 8 | + * negatives are not; a test of membership of the set returns either "possibly |
| 9 | + * in set" or "definitely not in set". This is typically very space efficient, |
| 10 | + * which can be a decisive advantage. |
| 11 | + * |
| 12 | + * Elements can be added to the set, but not removed. The more elements that |
| 13 | + * are added, the larger the probability of false positives. Caller must hint |
| 14 | + * an estimated total size of the set when the Bloom filter is initialized. |
| 15 | + * This is used to balance the use of memory against the final false positive |
| 16 | + * rate. |
| 17 | + * |
| 18 | + * The implementation is well suited to data synchronization problems between |
| 19 | + * unordered sets, especially where predictable performance is important and |
| 20 | + * some false positives are acceptable. It's also well suited to cache |
| 21 | + * filtering problems where a relatively small and/or low cardinality set is |
| 22 | + * fingerprinted, especially when many subsequent membership tests end up |
| 23 | + * indicating that values of interest are not present. That should save the |
| 24 | + * caller many authoritative lookups, such as expensive probes of a much larger |
| 25 | + * on-disk structure. |
| 26 | + * |
| 27 | + * Copyright (c) 2018, PostgreSQL Global Development Group |
| 28 | + * |
| 29 | + * IDENTIFICATION |
| 30 | + * src/backend/lib/bloomfilter.c |
| 31 | + * |
| 32 | + *------------------------------------------------------------------------- |
| 33 | + */ |
| 34 | +#include "postgres.h" |
| 35 | + |
| 36 | +#include <math.h> |
| 37 | + |
| 38 | +#include "access/hash.h" |
| 39 | +#include "lib/bloomfilter.h" |
| 40 | + |
| 41 | +#define MAX_HASH_FUNCS 10 |
| 42 | + |
| 43 | +struct bloom_filter |
| 44 | +{ |
| 45 | + /* K hash functions are used, seeded by caller's seed */ |
| 46 | + int k_hash_funcs; |
| 47 | + uint64 seed; |
| 48 | + /* m is bitset size, in bits. Must be a power of two <= 2^32. */ |
| 49 | + uint64 m; |
| 50 | + unsigned char bitset[FLEXIBLE_ARRAY_MEMBER]; |
| 51 | +}; |
| 52 | + |
| 53 | +static int my_bloom_power(uint64 target_bitset_bits); |
| 54 | +static int optimal_k(uint64 bitset_bits, int64 total_elems); |
| 55 | +static void k_hashes(bloom_filter *filter, uint32 *hashes, unsigned char *elem, |
| 56 | + size_t len); |
| 57 | +static inline uint32 mod_m(uint32 a, uint64 m); |
| 58 | + |
| 59 | +/* |
| 60 | + * Create Bloom filter in caller's memory context. We aim for a false positive |
| 61 | + * rate of between 1% and 2% when bitset size is not constrained by memory |
| 62 | + * availability. |
| 63 | + * |
| 64 | + * total_elems is an estimate of the final size of the set. It should be |
| 65 | + * approximately correct, but the implementation can cope well with it being |
| 66 | + * off by perhaps a factor of five or more. See "Bloom Filters in |
| 67 | + * Probabilistic Verification" (Dillinger & Manolios, 2004) for details of why |
| 68 | + * this is the case. |
| 69 | + * |
| 70 | + * bloom_work_mem is sized in KB, in line with the general work_mem convention. |
| 71 | + * This determines the size of the underlying bitset (trivial bookkeeping space |
| 72 | + * isn't counted). The bitset is always sized as a power of two number of |
| 73 | + * bits, and the largest possible bitset is 512MB (2^32 bits). The |
| 74 | + * implementation allocates only enough memory to target its standard false |
| 75 | + * positive rate, using a simple formula with caller's total_elems estimate as |
| 76 | + * an input. The bitset might be as small as 1MB, even when bloom_work_mem is |
| 77 | + * much higher. |
| 78 | + * |
| 79 | + * The Bloom filter is seeded using a value provided by the caller. Using a |
| 80 | + * distinct seed value on every call makes it unlikely that the same false |
| 81 | + * positives will reoccur when the same set is fingerprinted a second time. |
| 82 | + * Callers that don't care about this pass a constant as their seed, typically |
| 83 | + * 0. Callers can use a pseudo-random seed in the range of 0 - INT_MAX by |
| 84 | + * calling random(). |
| 85 | + */ |
| 86 | +bloom_filter * |
| 87 | +bloom_create(int64 total_elems, int bloom_work_mem, uint64 seed) |
| 88 | +{ |
| 89 | + bloom_filter *filter; |
| 90 | + int bloom_power; |
| 91 | + uint64 bitset_bytes; |
| 92 | + uint64 bitset_bits; |
| 93 | + |
| 94 | + /* |
| 95 | + * Aim for two bytes per element; this is sufficient to get a false |
| 96 | + * positive rate below 1%, independent of the size of the bitset or total |
| 97 | + * number of elements. Also, if rounding down the size of the bitset to |
| 98 | + * the next lowest power of two turns out to be a significant drop, the |
| 99 | + * false positive rate still won't exceed 2% in almost all cases. |
| 100 | + */ |
| 101 | + bitset_bytes = Min(bloom_work_mem * UINT64CONST(1024), total_elems * 2); |
| 102 | + bitset_bytes = Max(1024 * 1024, bitset_bytes); |
| 103 | + |
| 104 | + /* |
| 105 | + * Size in bits should be the highest power of two <= target. bitset_bits |
| 106 | + * is uint64 because PG_UINT32_MAX is 2^32 - 1, not 2^32 |
| 107 | + */ |
| 108 | + bloom_power = my_bloom_power(bitset_bytes * BITS_PER_BYTE); |
| 109 | + bitset_bits = UINT64CONST(1) << bloom_power; |
| 110 | + bitset_bytes = bitset_bits / BITS_PER_BYTE; |
| 111 | + |
| 112 | + /* Allocate bloom filter with unset bitset */ |
| 113 | + filter = palloc0(offsetof(bloom_filter, bitset) + |
| 114 | + sizeof(unsigned char) * bitset_bytes); |
| 115 | + filter->k_hash_funcs = optimal_k(bitset_bits, total_elems); |
| 116 | + filter->seed = seed; |
| 117 | + filter->m = bitset_bits; |
| 118 | + |
| 119 | + return filter; |
| 120 | +} |
| 121 | + |
| 122 | +/* |
| 123 | + * Free Bloom filter |
| 124 | + */ |
| 125 | +void |
| 126 | +bloom_free(bloom_filter *filter) |
| 127 | +{ |
| 128 | + pfree(filter); |
| 129 | +} |
| 130 | + |
| 131 | +/* |
| 132 | + * Add element to Bloom filter |
| 133 | + */ |
| 134 | +void |
| 135 | +bloom_add_element(bloom_filter *filter, unsigned char *elem, size_t len) |
| 136 | +{ |
| 137 | + uint32 hashes[MAX_HASH_FUNCS]; |
| 138 | + int i; |
| 139 | + |
| 140 | + k_hashes(filter, hashes, elem, len); |
| 141 | + |
| 142 | + /* Map a bit-wise address to a byte-wise address + bit offset */ |
| 143 | + for (i = 0; i < filter->k_hash_funcs; i++) |
| 144 | + { |
| 145 | + filter->bitset[hashes[i] >> 3] |= 1 << (hashes[i] & 7); |
| 146 | + } |
| 147 | +} |
| 148 | + |
| 149 | +/* |
| 150 | + * Test if Bloom filter definitely lacks element. |
| 151 | + * |
| 152 | + * Returns true if the element is definitely not in the set of elements |
| 153 | + * observed by bloom_add_element(). Otherwise, returns false, indicating that |
| 154 | + * element is probably present in set. |
| 155 | + */ |
| 156 | +bool |
| 157 | +bloom_lacks_element(bloom_filter *filter, unsigned char *elem, size_t len) |
| 158 | +{ |
| 159 | + uint32 hashes[MAX_HASH_FUNCS]; |
| 160 | + int i; |
| 161 | + |
| 162 | + k_hashes(filter, hashes, elem, len); |
| 163 | + |
| 164 | + /* Map a bit-wise address to a byte-wise address + bit offset */ |
| 165 | + for (i = 0; i < filter->k_hash_funcs; i++) |
| 166 | + { |
| 167 | + if (!(filter->bitset[hashes[i] >> 3] & (1 << (hashes[i] & 7)))) |
| 168 | + return true; |
| 169 | + } |
| 170 | + |
| 171 | + return false; |
| 172 | +} |
| 173 | + |
| 174 | +/* |
| 175 | + * What proportion of bits are currently set? |
| 176 | + * |
| 177 | + * Returns proportion, expressed as a multiplier of filter size. That should |
| 178 | + * generally be close to 0.5, even when we have more than enough memory to |
| 179 | + * ensure a false positive rate within target 1% to 2% band, since more hash |
| 180 | + * functions are used as more memory is available per element. |
| 181 | + * |
| 182 | + * This is the only instrumentation that is low overhead enough to appear in |
| 183 | + * debug traces. When debugging Bloom filter code, it's likely to be far more |
| 184 | + * interesting to directly test the false positive rate. |
| 185 | + */ |
| 186 | +double |
| 187 | +bloom_prop_bits_set(bloom_filter *filter) |
| 188 | +{ |
| 189 | + int bitset_bytes = filter->m / BITS_PER_BYTE; |
| 190 | + uint64 bits_set = 0; |
| 191 | + int i; |
| 192 | + |
| 193 | + for (i = 0; i < bitset_bytes; i++) |
| 194 | + { |
| 195 | + unsigned char byte = filter->bitset[i]; |
| 196 | + |
| 197 | + while (byte) |
| 198 | + { |
| 199 | + bits_set++; |
| 200 | + byte &= (byte - 1); |
| 201 | + } |
| 202 | + } |
| 203 | + |
| 204 | + return bits_set / (double) filter->m; |
| 205 | +} |
| 206 | + |
| 207 | +/* |
| 208 | + * Which element in the sequence of powers of two is less than or equal to |
| 209 | + * target_bitset_bits? |
| 210 | + * |
| 211 | + * Value returned here must be generally safe as the basis for actual bitset |
| 212 | + * size. |
| 213 | + * |
| 214 | + * Bitset is never allowed to exceed 2 ^ 32 bits (512MB). This is sufficient |
| 215 | + * for the needs of all current callers, and allows us to use 32-bit hash |
| 216 | + * functions. It also makes it easy to stay under the MaxAllocSize restriction |
| 217 | + * (caller needs to leave room for non-bitset fields that appear before |
| 218 | + * flexible array member, so a 1GB bitset would use an allocation that just |
| 219 | + * exceeds MaxAllocSize). |
| 220 | + */ |
| 221 | +static int |
| 222 | +my_bloom_power(uint64 target_bitset_bits) |
| 223 | +{ |
| 224 | + int bloom_power = -1; |
| 225 | + |
| 226 | + while (target_bitset_bits > 0 && bloom_power < 32) |
| 227 | + { |
| 228 | + bloom_power++; |
| 229 | + target_bitset_bits >>= 1; |
| 230 | + } |
| 231 | + |
| 232 | + return bloom_power; |
| 233 | +} |
| 234 | + |
| 235 | +/* |
| 236 | + * Determine optimal number of hash functions based on size of filter in bits, |
| 237 | + * and projected total number of elements. The optimal number is the number |
| 238 | + * that minimizes the false positive rate. |
| 239 | + */ |
| 240 | +static int |
| 241 | +optimal_k(uint64 bitset_bits, int64 total_elems) |
| 242 | +{ |
| 243 | + int k = round(log(2.0) * bitset_bits / total_elems); |
| 244 | + |
| 245 | + return Max(1, Min(k, MAX_HASH_FUNCS)); |
| 246 | +} |
| 247 | + |
| 248 | +/* |
| 249 | + * Generate k hash values for element. |
| 250 | + * |
| 251 | + * Caller passes array, which is filled-in with k values determined by hashing |
| 252 | + * caller's element. |
| 253 | + * |
| 254 | + * Only 2 real independent hash functions are actually used to support an |
| 255 | + * interface of up to MAX_HASH_FUNCS hash functions; enhanced double hashing is |
| 256 | + * used to make this work. The main reason we prefer enhanced double hashing |
| 257 | + * to classic double hashing is that the latter has an issue with collisions |
| 258 | + * when using power of two sized bitsets. See Dillinger & Manolios for full |
| 259 | + * details. |
| 260 | + */ |
| 261 | +static void |
| 262 | +k_hashes(bloom_filter *filter, uint32 *hashes, unsigned char *elem, size_t len) |
| 263 | +{ |
| 264 | + uint64 hash; |
| 265 | + uint32 x, y; |
| 266 | + uint64 m; |
| 267 | + int i; |
| 268 | + |
| 269 | + /* Use 64-bit hashing to get two independent 32-bit hashes */ |
| 270 | + hash = DatumGetUInt64(hash_any_extended(elem, len, filter->seed)); |
| 271 | + x = (uint32) hash; |
| 272 | + y = (uint32) (hash >> 32); |
| 273 | + m = filter->m; |
| 274 | + |
| 275 | + x = mod_m(x, m); |
| 276 | + y = mod_m(y, m); |
| 277 | + |
| 278 | + /* Accumulate hashes */ |
| 279 | + hashes[0] = x; |
| 280 | + for (i = 1; i < filter->k_hash_funcs; i++) |
| 281 | + { |
| 282 | + x = mod_m(x + y, m); |
| 283 | + y = mod_m(y + i, m); |
| 284 | + |
| 285 | + hashes[i] = x; |
| 286 | + } |
| 287 | +} |
| 288 | + |
| 289 | +/* |
| 290 | + * Calculate "val MOD m" inexpensively. |
| 291 | + * |
| 292 | + * Assumes that m (which is bitset size) is a power of two. |
| 293 | + * |
| 294 | + * Using a power of two number of bits for bitset size allows us to use bitwise |
| 295 | + * AND operations to calculate the modulo of a hash value. It's also a simple |
| 296 | + * way of avoiding the modulo bias effect. |
| 297 | + */ |
| 298 | +static inline uint32 |
| 299 | +mod_m(uint32 val, uint64 m) |
| 300 | +{ |
| 301 | + Assert(m <= PG_UINT32_MAX + UINT64CONST(1)); |
| 302 | + Assert(((m - 1) & m) == 0); |
| 303 | + |
| 304 | + return val & (m - 1); |
| 305 | +} |
0 commit comments