8
8
*
9
9
*
10
10
* IDENTIFICATION
11
- * $PostgreSQL: pgsql/src/backend/access/hash/hashovfl.c,v 1.52 2006/03/31 23:32:05 tgl Exp $
11
+ * $PostgreSQL: pgsql/src/backend/access/hash/hashovfl.c,v 1.53 2006/11/19 21:33:22 tgl Exp $
12
12
*
13
13
* NOTES
14
14
* Overflow pages look like ordinary relation pages.
20
20
#include "access/hash.h"
21
21
22
22
23
- static BlockNumber _hash_getovflpage (Relation rel , Buffer metabuf );
23
+ static Buffer _hash_getovflpage (Relation rel , Buffer metabuf );
24
24
static uint32 _hash_firstfreebit (uint32 map );
25
25
26
26
@@ -99,18 +99,14 @@ blkno_to_bitno(HashMetaPage metap, BlockNumber ovflblkno)
99
99
Buffer
100
100
_hash_addovflpage (Relation rel , Buffer metabuf , Buffer buf )
101
101
{
102
- BlockNumber ovflblkno ;
103
102
Buffer ovflbuf ;
104
103
Page page ;
105
104
Page ovflpage ;
106
105
HashPageOpaque pageopaque ;
107
106
HashPageOpaque ovflopaque ;
108
107
109
- /* allocate an empty overflow page */
110
- ovflblkno = _hash_getovflpage (rel , metabuf );
111
-
112
- /* lock the overflow page */
113
- ovflbuf = _hash_getbuf (rel , ovflblkno , HASH_WRITE );
108
+ /* allocate and lock an empty overflow page */
109
+ ovflbuf = _hash_getovflpage (rel , metabuf );
114
110
ovflpage = BufferGetPage (ovflbuf );
115
111
116
112
/*
@@ -150,7 +146,7 @@ _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf)
150
146
MarkBufferDirty (ovflbuf );
151
147
152
148
/* logically chain overflow page to previous page */
153
- pageopaque -> hasho_nextblkno = ovflblkno ;
149
+ pageopaque -> hasho_nextblkno = BufferGetBlockNumber ( ovflbuf ) ;
154
150
_hash_wrtbuf (rel , buf );
155
151
156
152
return ovflbuf ;
@@ -159,16 +155,18 @@ _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf)
159
155
/*
160
156
* _hash_getovflpage()
161
157
*
162
- * Find an available overflow page and return its block number.
158
+ * Find an available overflow page and return it. The returned buffer
159
+ * is pinned and write-locked, but its contents are not initialized.
163
160
*
164
161
* The caller must hold a pin, but no lock, on the metapage buffer.
165
- * The buffer is returned in the same state.
162
+ * That buffer is left in the same state at exit .
166
163
*/
167
- static BlockNumber
164
+ static Buffer
168
165
_hash_getovflpage (Relation rel , Buffer metabuf )
169
166
{
170
167
HashMetaPage metap ;
171
168
Buffer mapbuf = 0 ;
169
+ Buffer newbuf ;
172
170
BlockNumber blkno ;
173
171
uint32 orig_firstfree ;
174
172
uint32 splitnum ;
@@ -243,11 +241,10 @@ _hash_getovflpage(Relation rel, Buffer metabuf)
243
241
_hash_chgbufaccess (rel , metabuf , HASH_NOLOCK , HASH_WRITE );
244
242
}
245
243
246
- /* No Free Page Found - have to allocate a new page */
247
- bit = metap -> hashm_spares [splitnum ];
248
- metap -> hashm_spares [splitnum ]++ ;
249
-
250
- /* Check if we need to allocate a new bitmap page */
244
+ /*
245
+ * No free pages --- have to extend the relation to add an overflow page.
246
+ * First, check to see if we have to add a new bitmap page too.
247
+ */
251
248
if (last_bit == (uint32 ) (BMPGSZ_BIT (metap ) - 1 ))
252
249
{
253
250
/*
@@ -258,22 +255,39 @@ _hash_getovflpage(Relation rel, Buffer metabuf)
258
255
* marked "in use". Subsequent pages do not exist yet, but it is
259
256
* convenient to pre-mark them as "in use" too.
260
257
*/
261
- _hash_initbitmap (rel , metap , bitno_to_blkno (metap , bit ));
262
-
263
258
bit = metap -> hashm_spares [splitnum ];
259
+ _hash_initbitmap (rel , metap , bitno_to_blkno (metap , bit ));
264
260
metap -> hashm_spares [splitnum ]++ ;
265
261
}
266
262
else
267
263
{
268
264
/*
269
- * Nothing to do here; since the page was past the last used page, we
270
- * know its bitmap bit was preinitialized to "in use".
265
+ * Nothing to do here; since the page will be past the last used page,
266
+ * we know its bitmap bit was preinitialized to "in use".
271
267
*/
272
268
}
273
269
274
270
/* Calculate address of the new overflow page */
271
+ bit = metap -> hashm_spares [splitnum ];
275
272
blkno = bitno_to_blkno (metap , bit );
276
273
274
+ /*
275
+ * We have to fetch the page with P_NEW to ensure smgr's idea of the
276
+ * relation length stays in sync with ours. XXX It's annoying to do this
277
+ * with metapage write lock held; would be better to use a lock that
278
+ * doesn't block incoming searches. Best way to fix it would be to stop
279
+ * maintaining hashm_spares[hashm_ovflpoint] and rely entirely on the
280
+ * smgr relation length to track where new overflow pages come from;
281
+ * then we could release the metapage before we do the smgrextend.
282
+ * FIXME later (not in beta...)
283
+ */
284
+ newbuf = _hash_getbuf (rel , P_NEW , HASH_WRITE );
285
+ if (BufferGetBlockNumber (newbuf ) != blkno )
286
+ elog (ERROR , "unexpected hash relation size: %u, should be %u" ,
287
+ BufferGetBlockNumber (newbuf ), blkno );
288
+
289
+ metap -> hashm_spares [splitnum ]++ ;
290
+
277
291
/*
278
292
* Adjust hashm_firstfree to avoid redundant searches. But don't risk
279
293
* changing it if someone moved it while we were searching bitmap pages.
@@ -284,7 +298,7 @@ _hash_getovflpage(Relation rel, Buffer metabuf)
284
298
/* Write updated metapage and release lock, but not pin */
285
299
_hash_chgbufaccess (rel , metabuf , HASH_WRITE , HASH_NOLOCK );
286
300
287
- return blkno ;
301
+ return newbuf ;
288
302
289
303
found :
290
304
/* convert bit to bit number within page */
@@ -300,7 +314,7 @@ _hash_getovflpage(Relation rel, Buffer metabuf)
300
314
/* convert bit to absolute bit number */
301
315
bit += (i << BMPG_SHIFT (metap ));
302
316
303
- /* Calculate address of the new overflow page */
317
+ /* Calculate address of the recycled overflow page */
304
318
blkno = bitno_to_blkno (metap , bit );
305
319
306
320
/*
@@ -320,7 +334,8 @@ _hash_getovflpage(Relation rel, Buffer metabuf)
320
334
_hash_chgbufaccess (rel , metabuf , HASH_READ , HASH_NOLOCK );
321
335
}
322
336
323
- return blkno ;
337
+ /* Fetch and return the recycled page */
338
+ return _hash_getbuf (rel , blkno , HASH_WRITE );
324
339
}
325
340
326
341
/*
@@ -388,7 +403,11 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf)
388
403
prevblkno = ovflopaque -> hasho_prevblkno ;
389
404
bucket = ovflopaque -> hasho_bucket ;
390
405
391
- /* Zero the page for debugging's sake; then write and release it */
406
+ /*
407
+ * Zero the page for debugging's sake; then write and release it.
408
+ * (Note: if we failed to zero the page here, we'd have problems
409
+ * with the Assert in _hash_pageinit() when the page is reused.)
410
+ */
392
411
MemSet (ovflpage , 0 , BufferGetPageSize (ovflbuf ));
393
412
_hash_wrtbuf (rel , ovflbuf );
394
413
@@ -488,12 +507,19 @@ _hash_initbitmap(Relation rel, HashMetaPage metap, BlockNumber blkno)
488
507
/*
489
508
* It is okay to write-lock the new bitmap page while holding metapage
490
509
* write lock, because no one else could be contending for the new page.
510
+ * Also, the metapage lock makes it safe to extend the index using P_NEW,
511
+ * which we want to do to ensure the smgr's idea of the relation size
512
+ * stays in step with ours.
491
513
*
492
514
* There is some loss of concurrency in possibly doing I/O for the new
493
515
* page while holding the metapage lock, but this path is taken so seldom
494
516
* that it's not worth worrying about.
495
517
*/
496
- buf = _hash_getbuf (rel , blkno , HASH_WRITE );
518
+ buf = _hash_getbuf (rel , P_NEW , HASH_WRITE );
519
+ if (BufferGetBlockNumber (buf ) != blkno )
520
+ elog (ERROR , "unexpected hash relation size: %u, should be %u" ,
521
+ BufferGetBlockNumber (buf ), blkno );
522
+
497
523
pg = BufferGetPage (buf );
498
524
499
525
/* initialize the page */
0 commit comments