|
9 | 9 | *
|
10 | 10 | *
|
11 | 11 | * IDENTIFICATION
|
12 |
| - * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.69 2003/08/08 21:41:27 momjian Exp $ |
| 12 | + * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.70 2003/08/10 19:48:08 tgl Exp $ |
13 | 13 | *
|
14 | 14 | * NOTES
|
15 | 15 | * Postgres btree pages look like ordinary relation pages. The opaque
|
@@ -409,23 +409,47 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access)
|
409 | 409 | * that the page is still free. (For example, an already-free
|
410 | 410 | * page could have been re-used between the time the last VACUUM
|
411 | 411 | * scanned it and the time the VACUUM made its FSM updates.)
|
| 412 | + * |
| 413 | + * In fact, it's worse than that: we can't even assume that it's |
| 414 | + * safe to take a lock on the reported page. If somebody else |
| 415 | + * has a lock on it, or even worse our own caller does, we could |
| 416 | + * deadlock. (The own-caller scenario is actually not improbable. |
| 417 | + * Consider an index on a serial or timestamp column. Nearly all |
| 418 | + * splits will be at the rightmost page, so it's entirely likely |
| 419 | + * that _bt_split will call us while holding a lock on the page most |
| 420 | + * recently acquired from FSM. A VACUUM running concurrently with |
| 421 | + * the previous split could well have placed that page back in FSM.) |
| 422 | + * |
| 423 | + * To get around that, we ask for only a conditional lock on the |
| 424 | + * reported page. If we fail, then someone else is using the page, |
| 425 | + * and we may reasonably assume it's not free. (If we happen to be |
| 426 | + * wrong, the worst consequence is the page will be lost to use till |
| 427 | + * the next VACUUM, which is no big problem.) |
412 | 428 | */
|
413 | 429 | for (;;)
|
414 | 430 | {
|
415 | 431 | blkno = GetFreeIndexPage(&rel->rd_node);
|
416 | 432 | if (blkno == InvalidBlockNumber)
|
417 | 433 | break;
|
418 | 434 | buf = ReadBuffer(rel, blkno);
|
419 |
| - LockBuffer(buf, access); |
420 |
| - page = BufferGetPage(buf); |
421 |
| - if (_bt_page_recyclable(page)) |
| 435 | + if (ConditionalLockBuffer(buf)) |
422 | 436 | {
|
423 |
| - /* Okay to use page. Re-initialize and return it */ |
424 |
| - _bt_pageinit(page, BufferGetPageSize(buf)); |
425 |
| - return buf; |
| 437 | + page = BufferGetPage(buf); |
| 438 | + if (_bt_page_recyclable(page)) |
| 439 | + { |
| 440 | + /* Okay to use page. Re-initialize and return it */ |
| 441 | + _bt_pageinit(page, BufferGetPageSize(buf)); |
| 442 | + return buf; |
| 443 | + } |
| 444 | + elog(DEBUG2, "FSM returned nonrecyclable page"); |
| 445 | + _bt_relbuf(rel, buf); |
| 446 | + } |
| 447 | + else |
| 448 | + { |
| 449 | + elog(DEBUG2, "FSM returned nonlockable page"); |
| 450 | + /* couldn't get lock, so just drop pin */ |
| 451 | + ReleaseBuffer(buf); |
426 | 452 | }
|
427 |
| - elog(DEBUG2, "FSM returned nonrecyclable page"); |
428 |
| - _bt_relbuf(rel, buf); |
429 | 453 | }
|
430 | 454 |
|
431 | 455 | /*
|
|
0 commit comments