Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 8cf739d

Browse files
committed
Fix building of large (bigger than shared_buffers) hash indexes.
When the index is predicted to need more than NBuffers buckets, CREATE INDEX attempts to sort the index entries by hash key before insertion, so as to reduce thrashing. This code path got broken by commit 9f03ca9, which overlooked that _hash_form_tuple() is not just an alias for index_form_tuple(). The index got built anyway, but with garbage data, so that searches for pre-existing tuples always failed. Fix by refactoring to separate construction of the indexable data from calling index_form_tuple(). Per bug #14210 from Daniel Newman. Back-patch to 9.5 where the bug was introduced. Report: <20160623162507.17237.39471@wrigleys.postgresql.org>
1 parent 9e9c38e commit 8cf739d

File tree

3 files changed

+48
-35
lines changed

3 files changed

+48
-35
lines changed

src/backend/access/hash/hash.c

+18-16
Original file line numberDiff line numberDiff line change
@@ -176,19 +176,25 @@ hashbuildCallback(Relation index,
176176
void *state)
177177
{
178178
HashBuildState *buildstate = (HashBuildState *) state;
179+
Datum index_values[1];
180+
bool index_isnull[1];
179181
IndexTuple itup;
180182

181-
/* Hash indexes don't index nulls, see notes in hashinsert */
182-
if (isnull[0])
183+
/* convert data to a hash key; on failure, do not insert anything */
184+
if (!_hash_convert_tuple(index,
185+
values, isnull,
186+
index_values, index_isnull))
183187
return;
184188

185189
/* Either spool the tuple for sorting, or just put it into the index */
186190
if (buildstate->spool)
187-
_h_spool(buildstate->spool, &htup->t_self, values, isnull);
191+
_h_spool(buildstate->spool, &htup->t_self,
192+
index_values, index_isnull);
188193
else
189194
{
190195
/* form an index tuple and point it at the heap tuple */
191-
itup = _hash_form_tuple(index, values, isnull);
196+
itup = index_form_tuple(RelationGetDescr(index),
197+
index_values, index_isnull);
192198
itup->t_tid = htup->t_self;
193199
_hash_doinsert(index, itup);
194200
pfree(itup);
@@ -208,22 +214,18 @@ hashinsert(Relation rel, Datum *values, bool *isnull,
208214
ItemPointer ht_ctid, Relation heapRel,
209215
IndexUniqueCheck checkUnique)
210216
{
217+
Datum index_values[1];
218+
bool index_isnull[1];
211219
IndexTuple itup;
212220

213-
/*
214-
* If the single index key is null, we don't insert it into the index.
215-
* Hash tables support scans on '='. Relational algebra says that A = B
216-
* returns null if either A or B is null. This means that no
217-
* qualification used in an index scan could ever return true on a null
218-
* attribute. It also means that indices can't be used by ISNULL or
219-
* NOTNULL scans, but that's an artifact of the strategy map architecture
220-
* chosen in 1986, not of the way nulls are handled here.
221-
*/
222-
if (isnull[0])
221+
/* convert data to a hash key; on failure, do not insert anything */
222+
if (!_hash_convert_tuple(rel,
223+
values, isnull,
224+
index_values, index_isnull))
223225
return false;
224226

225-
/* generate an index tuple */
226-
itup = _hash_form_tuple(rel, values, isnull);
227+
/* form an index tuple and point it at the heap tuple */
228+
itup = index_form_tuple(RelationGetDescr(rel), index_values, index_isnull);
227229
itup->t_tid = *ht_ctid;
228230

229231
_hash_doinsert(rel, itup);

src/backend/access/hash/hashutil.c

+27-17
Original file line numberDiff line numberDiff line change
@@ -240,27 +240,37 @@ _hash_get_indextuple_hashkey(IndexTuple itup)
240240
}
241241

242242
/*
243-
* _hash_form_tuple - form an index tuple containing hash code only
243+
* _hash_convert_tuple - convert raw index data to hash key
244+
*
245+
* Inputs: values and isnull arrays for the user data column(s)
246+
* Outputs: values and isnull arrays for the index tuple, suitable for
247+
* passing to index_form_tuple().
248+
*
249+
* Returns true if successful, false if not (because there are null values).
250+
* On a false result, the given data need not be indexed.
251+
*
252+
* Note: callers know that the index-column arrays are always of length 1.
253+
* In principle, there could be more than one input column, though we do not
254+
* currently support that.
244255
*/
245-
IndexTuple
246-
_hash_form_tuple(Relation index, Datum *values, bool *isnull)
256+
bool
257+
_hash_convert_tuple(Relation index,
258+
Datum *user_values, bool *user_isnull,
259+
Datum *index_values, bool *index_isnull)
247260
{
248-
IndexTuple itup;
249261
uint32 hashkey;
250-
Datum hashkeydatum;
251-
TupleDesc hashdesc;
252262

253-
if (isnull[0])
254-
hashkeydatum = (Datum) 0;
255-
else
256-
{
257-
hashkey = _hash_datum2hashkey(index, values[0]);
258-
hashkeydatum = UInt32GetDatum(hashkey);
259-
}
260-
hashdesc = RelationGetDescr(index);
261-
Assert(hashdesc->natts == 1);
262-
itup = index_form_tuple(hashdesc, &hashkeydatum, isnull);
263-
return itup;
263+
/*
264+
* We do not insert null values into hash indexes. This is okay because
265+
* the only supported search operator is '=', and we assume it is strict.
266+
*/
267+
if (user_isnull[0])
268+
return false;
269+
270+
hashkey = _hash_datum2hashkey(index, user_values[0]);
271+
index_values[0] = UInt32GetDatum(hashkey);
272+
index_isnull[0] = false;
273+
return true;
264274
}
265275

266276
/*

src/include/access/hash.h

+3-2
Original file line numberDiff line numberDiff line change
@@ -359,8 +359,9 @@ extern Bucket _hash_hashkey2bucket(uint32 hashkey, uint32 maxbucket,
359359
extern uint32 _hash_log2(uint32 num);
360360
extern void _hash_checkpage(Relation rel, Buffer buf, int flags);
361361
extern uint32 _hash_get_indextuple_hashkey(IndexTuple itup);
362-
extern IndexTuple _hash_form_tuple(Relation index,
363-
Datum *values, bool *isnull);
362+
extern bool _hash_convert_tuple(Relation index,
363+
Datum *user_values, bool *user_isnull,
364+
Datum *index_values, bool *index_isnull);
364365
extern OffsetNumber _hash_binsearch(Page page, uint32 hash_value);
365366
extern OffsetNumber _hash_binsearch_last(Page page, uint32 hash_value);
366367

0 commit comments

Comments
 (0)