Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit ee4af34

Browse files
committed
Measure Bloom index signature-length reloption in bits, not words.
Per discussion, this is a more understandable and future-proof way of exposing the setting to users. On-disk, we can still store it in words, so as to not break on-disk compatibility with beta1. Along the way, clean up the code associated with Bloom reloptions. Provide explicit macros for default and maximum lengths rather than having magic numbers buried in multiple places in the code. Drop the adjustBloomOptions() code altogether: it was useless in view of the fact that reloptions.c already performed default-substitution and range checking for the options. Rename a couple of macros and types for more clarity. Discussion: <23767.1464926580@sss.pgh.pa.us>
1 parent fdfaccf commit ee4af34

File tree

4 files changed

+86
-93
lines changed

4 files changed

+86
-93
lines changed

contrib/bloom/bloom.h

+22-11
Original file line numberDiff line numberDiff line change
@@ -79,18 +79,31 @@ typedef BloomPageOpaqueData *BloomPageOpaque;
7979
#define BLOOM_HEAD_BLKNO (1) /* first data page */
8080

8181
/*
82-
* Maximum of bloom signature length in uint16. Actual value
83-
* is 512 bytes
82+
* We store Bloom signatures as arrays of uint16 words.
8483
*/
85-
#define MAX_BLOOM_LENGTH (256)
84+
typedef uint16 BloomSignatureWord;
85+
86+
#define SIGNWORDBITS ((int) (BITS_PER_BYTE * sizeof(BloomSignatureWord)))
87+
88+
/*
89+
* Default and maximum Bloom signature length in bits.
90+
*/
91+
#define DEFAULT_BLOOM_LENGTH (5 * SIGNWORDBITS)
92+
#define MAX_BLOOM_LENGTH (256 * SIGNWORDBITS)
93+
94+
/*
95+
* Default and maximum signature bits generated per index key.
96+
*/
97+
#define DEFAULT_BLOOM_BITS 2
98+
#define MAX_BLOOM_BITS (MAX_BLOOM_LENGTH - 1)
8699

87100
/* Bloom index options */
88101
typedef struct BloomOptions
89102
{
90103
int32 vl_len_; /* varlena header (do not touch directly!) */
91-
int bloomLength; /* length of signature in uint16 */
92-
int bitSize[INDEX_MAX_KEYS]; /* signature bits per index
93-
* key */
104+
int bloomLength; /* length of signature in words (not bits!) */
105+
int bitSize[INDEX_MAX_KEYS]; /* # of bits generated for each
106+
* index key */
94107
} BloomOptions;
95108

96109
/*
@@ -143,20 +156,18 @@ typedef struct BloomState
143156
/*
144157
* Tuples are very different from all other relations
145158
*/
146-
typedef uint16 SignType;
147-
148159
typedef struct BloomTuple
149160
{
150161
ItemPointerData heapPtr;
151-
SignType sign[FLEXIBLE_ARRAY_MEMBER];
162+
BloomSignatureWord sign[FLEXIBLE_ARRAY_MEMBER];
152163
} BloomTuple;
153164

154165
#define BLOOMTUPLEHDRSZ offsetof(BloomTuple, sign)
155166

156167
/* Opaque data structure for bloom index scan */
157168
typedef struct BloomScanOpaqueData
158169
{
159-
SignType *sign; /* Scan signature */
170+
BloomSignatureWord *sign; /* Scan signature */
160171
BloomState state;
161172
} BloomScanOpaqueData;
162173

@@ -170,7 +181,7 @@ extern void BloomFillMetapage(Relation index, Page metaPage);
170181
extern void BloomInitMetapage(Relation index);
171182
extern void BloomInitPage(Page page, uint16 flags);
172183
extern Buffer BloomNewBuffer(Relation index);
173-
extern void signValue(BloomState * state, SignType * sign, Datum value, int attno);
184+
extern void signValue(BloomState * state, BloomSignatureWord * sign, Datum value, int attno);
174185
extern BloomTuple *BloomFormTuple(BloomState * state, ItemPointer iptr, Datum *values, bool *isnull);
175186
extern bool BloomPageAddItem(BloomState * state, Page page, BloomTuple * tuple);
176187

contrib/bloom/blscan.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ blgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
9393
/* New search: have to calculate search signature */
9494
ScanKey skey = scan->keyData;
9595

96-
so->sign = palloc0(sizeof(SignType) * so->state.opts.bloomLength);
96+
so->sign = palloc0(sizeof(BloomSignatureWord) * so->state.opts.bloomLength);
9797

9898
for (i = 0; i < scan->numberOfKeys; i++)
9999
{

contrib/bloom/blutils.c

+53-72
Original file line numberDiff line numberDiff line change
@@ -27,23 +27,26 @@
2727

2828
#include "bloom.h"
2929

30-
/* Signature dealing macros */
31-
#define BITSIGNTYPE (BITS_PER_BYTE * sizeof(SignType))
32-
#define GETWORD(x,i) ( *( (SignType*)(x) + (int)( (i) / BITSIGNTYPE ) ) )
33-
#define CLRBIT(x,i) GETWORD(x,i) &= ~( 0x01 << ( (i) % BITSIGNTYPE ) )
34-
#define SETBIT(x,i) GETWORD(x,i) |= ( 0x01 << ( (i) % BITSIGNTYPE ) )
35-
#define GETBIT(x,i) ( (GETWORD(x,i) >> ( (i) % BITSIGNTYPE )) & 0x01 )
30+
/* Signature dealing macros - note i is assumed to be of type int */
31+
#define GETWORD(x,i) ( *( (BloomSignatureWord *)(x) + ( (i) / SIGNWORDBITS ) ) )
32+
#define CLRBIT(x,i) GETWORD(x,i) &= ~( 0x01 << ( (i) % SIGNWORDBITS ) )
33+
#define SETBIT(x,i) GETWORD(x,i) |= ( 0x01 << ( (i) % SIGNWORDBITS ) )
34+
#define GETBIT(x,i) ( (GETWORD(x,i) >> ( (i) % SIGNWORDBITS )) & 0x01 )
3635

3736
PG_FUNCTION_INFO_V1(blhandler);
3837

39-
/* Kind of relation optioms for bloom index */
38+
/* Kind of relation options for bloom index */
4039
static relopt_kind bl_relopt_kind;
40+
/* parse table for fillRelOptions */
41+
static relopt_parse_elt bl_relopt_tab[INDEX_MAX_KEYS + 1];
4142

4243
static int32 myRand(void);
4344
static void mySrand(uint32 seed);
4445

4546
/*
46-
* Module initialize function: initilized relation options.
47+
* Module initialize function: initialize info about Bloom relation options.
48+
*
49+
* Note: keep this in sync with makeDefaultBloomOptions().
4750
*/
4851
void
4952
_PG_init(void)
@@ -53,17 +56,46 @@ _PG_init(void)
5356

5457
bl_relopt_kind = add_reloption_kind();
5558

59+
/* Option for length of signature */
5660
add_int_reloption(bl_relopt_kind, "length",
57-
"Length of signature in uint16 type", 5, 1, 256);
61+
"Length of signature in bits",
62+
DEFAULT_BLOOM_LENGTH, 1, MAX_BLOOM_LENGTH);
63+
bl_relopt_tab[0].optname = "length";
64+
bl_relopt_tab[0].opttype = RELOPT_TYPE_INT;
65+
bl_relopt_tab[0].offset = offsetof(BloomOptions, bloomLength);
5866

67+
/* Number of bits for each possible index column: col1, col2, ... */
5968
for (i = 0; i < INDEX_MAX_KEYS; i++)
6069
{
61-
snprintf(buf, 16, "col%d", i + 1);
70+
snprintf(buf, sizeof(buf), "col%d", i + 1);
6271
add_int_reloption(bl_relopt_kind, buf,
63-
"Number of bits for corresponding column", 2, 1, 2048);
72+
"Number of bits generated for each index column",
73+
DEFAULT_BLOOM_BITS, 1, MAX_BLOOM_BITS);
74+
bl_relopt_tab[i + 1].optname = MemoryContextStrdup(TopMemoryContext,
75+
buf);
76+
bl_relopt_tab[i + 1].opttype = RELOPT_TYPE_INT;
77+
bl_relopt_tab[i + 1].offset = offsetof(BloomOptions, bitSize[i]);
6478
}
6579
}
6680

81+
/*
82+
* Construct a default set of Bloom options.
83+
*/
84+
static BloomOptions *
85+
makeDefaultBloomOptions(void)
86+
{
87+
BloomOptions *opts;
88+
int i;
89+
90+
opts = (BloomOptions *) palloc0(sizeof(BloomOptions));
91+
/* Convert DEFAULT_BLOOM_LENGTH from # of bits to # of words */
92+
opts->bloomLength = (DEFAULT_BLOOM_LENGTH + SIGNWORDBITS - 1) / SIGNWORDBITS;
93+
for (i = 0; i < INDEX_MAX_KEYS; i++)
94+
opts->bitSize[i] = DEFAULT_BLOOM_BITS;
95+
SET_VARSIZE(opts, sizeof(BloomOptions));
96+
return opts;
97+
}
98+
6799
/*
68100
* Bloom handler function: return IndexAmRoutine with access method parameters
69101
* and callbacks.
@@ -157,7 +189,7 @@ initBloomState(BloomState *state, Relation index)
157189

158190
memcpy(&state->opts, index->rd_amcache, sizeof(state->opts));
159191
state->sizeOfBloomTuple = BLOOMTUPLEHDRSZ +
160-
sizeof(SignType) * state->opts.bloomLength;
192+
sizeof(BloomSignatureWord) * state->opts.bloomLength;
161193
}
162194

163195
/*
@@ -208,7 +240,7 @@ mySrand(uint32 seed)
208240
* Add bits of given value to the signature.
209241
*/
210242
void
211-
signValue(BloomState *state, SignType *sign, Datum value, int attno)
243+
signValue(BloomState *state, BloomSignatureWord *sign, Datum value, int attno)
212244
{
213245
uint32 hashVal;
214246
int nBit,
@@ -231,8 +263,8 @@ signValue(BloomState *state, SignType *sign, Datum value, int attno)
231263

232264
for (j = 0; j < state->opts.bitSize[attno]; j++)
233265
{
234-
/* prevent mutiple evaluation */
235-
nBit = myRand() % (state->opts.bloomLength * BITSIGNTYPE);
266+
/* prevent multiple evaluation in SETBIT macro */
267+
nBit = myRand() % (state->opts.bloomLength * SIGNWORDBITS);
236268
SETBIT(sign, nBit);
237269
}
238270
}
@@ -361,39 +393,6 @@ BloomInitPage(Page page, uint16 flags)
361393
opaque->bloom_page_id = BLOOM_PAGE_ID;
362394
}
363395

364-
/*
365-
* Adjust options of bloom index.
366-
*
367-
* This must produce default options when *opts is initially all-zero.
368-
*/
369-
static void
370-
adjustBloomOptions(BloomOptions *opts)
371-
{
372-
int i;
373-
374-
/* Default length of bloom filter is 5 of 16-bit integers */
375-
if (opts->bloomLength <= 0)
376-
opts->bloomLength = 5;
377-
else if (opts->bloomLength > MAX_BLOOM_LENGTH)
378-
ereport(ERROR,
379-
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
380-
errmsg("length of bloom signature (%d) is greater than maximum %d",
381-
opts->bloomLength, MAX_BLOOM_LENGTH)));
382-
383-
/* Check signature length */
384-
for (i = 0; i < INDEX_MAX_KEYS; i++)
385-
{
386-
/*
387-
* Zero and negative number of bits is meaningless. Also setting
388-
* more bits than signature have seems useless. Replace both cases
389-
* with 2 bits default.
390-
*/
391-
if (opts->bitSize[i] <= 0
392-
|| opts->bitSize[i] >= opts->bloomLength * sizeof(SignType) * BITS_PER_BYTE)
393-
opts->bitSize[i] = 2;
394-
}
395-
}
396-
397396
/*
398397
* Fill in metapage for bloom index.
399398
*/
@@ -405,14 +404,11 @@ BloomFillMetapage(Relation index, Page metaPage)
405404

406405
/*
407406
* Choose the index's options. If reloptions have been assigned, use
408-
* those, otherwise create default options by applying adjustBloomOptions
409-
* to a zeroed chunk of memory. We apply adjustBloomOptions to existing
410-
* reloptions too, just out of paranoia; they should be valid already.
407+
* those, otherwise create default options.
411408
*/
412409
opts = (BloomOptions *) index->rd_options;
413410
if (!opts)
414-
opts = (BloomOptions *) palloc0(sizeof(BloomOptions));
415-
adjustBloomOptions(opts);
411+
opts = makeDefaultBloomOptions();
416412

417413
/*
418414
* Initialize contents of meta page, including a copy of the options,
@@ -462,30 +458,15 @@ bloptions(Datum reloptions, bool validate)
462458
relopt_value *options;
463459
int numoptions;
464460
BloomOptions *rdopts;
465-
relopt_parse_elt tab[INDEX_MAX_KEYS + 1];
466-
int i;
467-
char buf[16];
468-
469-
/* Option for length of signature */
470-
tab[0].optname = "length";
471-
tab[0].opttype = RELOPT_TYPE_INT;
472-
tab[0].offset = offsetof(BloomOptions, bloomLength);
473-
474-
/* Number of bits for each of possible columns: col1, col2, ... */
475-
for (i = 0; i < INDEX_MAX_KEYS; i++)
476-
{
477-
snprintf(buf, sizeof(buf), "col%d", i + 1);
478-
tab[i + 1].optname = pstrdup(buf);
479-
tab[i + 1].opttype = RELOPT_TYPE_INT;
480-
tab[i + 1].offset = offsetof(BloomOptions, bitSize[i]);
481-
}
482461

462+
/* Parse the user-given reloptions */
483463
options = parseRelOptions(reloptions, validate, bl_relopt_kind, &numoptions);
484464
rdopts = allocateReloptStruct(sizeof(BloomOptions), options, numoptions);
485465
fillRelOptions((void *) rdopts, sizeof(BloomOptions), options, numoptions,
486-
validate, tab, INDEX_MAX_KEYS + 1);
466+
validate, bl_relopt_tab, lengthof(bl_relopt_tab));
487467

488-
adjustBloomOptions(rdopts);
468+
/* Convert signature length from # of bits to # to words, rounding up */
469+
rdopts->bloomLength = (rdopts->bloomLength + SIGNWORDBITS - 1) / SIGNWORDBITS;
489470

490471
return (bytea *) rdopts;
491472
}

doc/src/sgml/bloom.sgml

+10-9
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88
</indexterm>
99

1010
<para>
11-
<literal>bloom</> is a module which implements an index access method. It comes
12-
as an example of custom access methods and generic WAL records usage. But it
11+
<literal>bloom</> is a module that implements an index access method. It comes
12+
as an example of custom access methods and generic WAL record usage. But it
1313
is also useful in itself.
1414
</para>
1515

@@ -22,8 +22,9 @@
2222
allows fast exclusion of non-candidate tuples via signatures.
2323
Since a signature is a lossy representation of all indexed attributes,
2424
search results must be rechecked using heap information.
25-
The user can specify signature length (in uint16, default is 5) and the
26-
number of bits, which can be set per attribute (1 < colN < 2048).
25+
The user can specify signature length in bits (default 80, maximum 4096)
26+
and the number of bits generated for each index column (default 2,
27+
maximum 4095).
2728
</para>
2829

2930
<para>
@@ -51,17 +52,17 @@
5152
<term><literal>length</></term>
5253
<listitem>
5354
<para>
54-
Length of signature in uint16 type values
55+
Length of signature in bits
5556
</para>
5657
</listitem>
5758
</varlistentry>
5859
</variablelist>
5960
<variablelist>
6061
<varlistentry>
61-
<term><literal>col1 &mdash; col16</></term>
62+
<term><literal>col1 &mdash; col32</></term>
6263
<listitem>
6364
<para>
64-
Number of bits for corresponding column
65+
Number of bits generated for each index column
6566
</para>
6667
</listitem>
6768
</varlistentry>
@@ -77,12 +78,12 @@
7778

7879
<programlisting>
7980
CREATE INDEX bloomidx ON tbloom USING bloom (i1,i2,i3)
80-
WITH (length=5, col1=2, col2=2, col3=4);
81+
WITH (length=80, col1=2, col2=2, col3=4);
8182
</programlisting>
8283

8384
<para>
8485
Here, we created a bloom index with a signature length of 80 bits,
85-
and attributes i1 and i2 mapped to 2 bits, and attribute i3 to 4 bits.
86+
and attributes i1 and i2 mapped to 2 bits, and attribute i3 mapped to 4 bits.
8687
</para>
8788

8889
<para>

0 commit comments

Comments
 (0)