|
8 | 8 | *
|
9 | 9 | *
|
10 | 10 | * IDENTIFICATION
|
11 |
| - * $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.65 2003/08/04 02:39:57 momjian Exp $ |
| 11 | + * $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.66 2003/09/02 02:18:38 tgl Exp $ |
12 | 12 | *
|
13 | 13 | * NOTES
|
14 | 14 | * This file contains only the public interface routines.
|
@@ -449,40 +449,178 @@ hashbulkdelete(PG_FUNCTION_ARGS)
|
449 | 449 | BlockNumber num_pages;
|
450 | 450 | double tuples_removed;
|
451 | 451 | double num_index_tuples;
|
452 |
| - IndexScanDesc iscan; |
| 452 | + uint32 deleted_tuples; |
| 453 | + uint32 tuples_remaining; |
| 454 | + uint32 orig_ntuples; |
| 455 | + Bucket orig_maxbucket; |
| 456 | + Bucket cur_maxbucket; |
| 457 | + Bucket cur_bucket; |
| 458 | + Buffer metabuf; |
| 459 | + HashMetaPage metap; |
| 460 | + HashMetaPageData local_metapage; |
453 | 461 |
|
| 462 | + /* |
| 463 | + * keep track of counts in both float form (to return) and integer form |
| 464 | + * (to update hashm_ntuples). It'd be better to make hashm_ntuples a |
| 465 | + * double, but that will have to wait for an initdb. |
| 466 | + */ |
454 | 467 | tuples_removed = 0;
|
455 | 468 | num_index_tuples = 0;
|
| 469 | + deleted_tuples = 0; |
| 470 | + tuples_remaining = 0; |
456 | 471 |
|
457 | 472 | /*
|
458 |
| - * XXX generic implementation --- should be improved! |
| 473 | + * Read the metapage to fetch original bucket and tuple counts. Also, |
| 474 | + * we keep a copy of the last-seen metapage so that we can use its |
| 475 | + * hashm_spares[] values to compute bucket page addresses. This is a |
| 476 | + * bit hokey but perfectly safe, since the interesting entries in the |
| 477 | + * spares array cannot change under us; and it beats rereading the |
| 478 | + * metapage for each bucket. |
459 | 479 | */
|
| 480 | + metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ); |
| 481 | + metap = (HashMetaPage) BufferGetPage(metabuf); |
| 482 | + _hash_checkpage((Page) metap, LH_META_PAGE); |
| 483 | + orig_maxbucket = metap->hashm_maxbucket; |
| 484 | + orig_ntuples = metap->hashm_ntuples; |
| 485 | + memcpy(&local_metapage, metap, sizeof(local_metapage)); |
| 486 | + _hash_relbuf(rel, metabuf, HASH_READ); |
| 487 | + |
| 488 | + /* Scan the buckets that we know exist */ |
| 489 | + cur_bucket = 0; |
| 490 | + cur_maxbucket = orig_maxbucket; |
| 491 | + |
| 492 | +loop_top: |
| 493 | + while (cur_bucket <= cur_maxbucket) |
| 494 | + { |
| 495 | + BlockNumber bucket_blkno; |
| 496 | + BlockNumber blkno; |
| 497 | + bool bucket_dirty = false; |
460 | 498 |
|
461 |
| - /* walk through the entire index */ |
462 |
| - iscan = index_beginscan(NULL, rel, SnapshotAny, 0, (ScanKey) NULL); |
463 |
| - /* including killed tuples */ |
464 |
| - iscan->ignore_killed_tuples = false; |
| 499 | + /* Get address of bucket's start page */ |
| 500 | + bucket_blkno = BUCKET_TO_BLKNO(&local_metapage, cur_bucket); |
465 | 501 |
|
466 |
| - while (index_getnext_indexitem(iscan, ForwardScanDirection)) |
467 |
| - { |
468 |
| - if (callback(&iscan->xs_ctup.t_self, callback_state)) |
| 502 | + /* XXX lock bucket here */ |
| 503 | + |
| 504 | + /* Scan each page in bucket */ |
| 505 | + blkno = bucket_blkno; |
| 506 | + while (BlockNumberIsValid(blkno)) |
469 | 507 | {
|
470 |
| - ItemPointerData indextup = iscan->currentItemData; |
| 508 | + Buffer buf; |
| 509 | + Page page; |
| 510 | + HashPageOpaque opaque; |
| 511 | + OffsetNumber offno; |
| 512 | + OffsetNumber maxoffno; |
| 513 | + bool page_dirty = false; |
| 514 | + |
| 515 | + buf = _hash_getbuf(rel, blkno, HASH_WRITE); |
| 516 | + page = BufferGetPage(buf); |
| 517 | + _hash_checkpage(page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE); |
| 518 | + opaque = (HashPageOpaque) PageGetSpecialPointer(page); |
| 519 | + Assert(opaque->hasho_bucket == cur_bucket); |
| 520 | + |
| 521 | + /* Scan each tuple in page */ |
| 522 | + offno = FirstOffsetNumber; |
| 523 | + maxoffno = PageGetMaxOffsetNumber(page); |
| 524 | + while (offno <= maxoffno) |
| 525 | + { |
| 526 | + HashItem hitem; |
| 527 | + ItemPointer htup; |
| 528 | + |
| 529 | + hitem = (HashItem) PageGetItem(page, |
| 530 | + PageGetItemId(page, offno)); |
| 531 | + htup = &(hitem->hash_itup.t_tid); |
| 532 | + if (callback(htup, callback_state)) |
| 533 | + { |
| 534 | + ItemPointerData indextup; |
| 535 | + |
| 536 | + /* adjust any active scans that will be affected */ |
| 537 | + /* (this should be unnecessary) */ |
| 538 | + ItemPointerSet(&indextup, blkno, offno); |
| 539 | + _hash_adjscans(rel, &indextup); |
| 540 | + |
| 541 | + /* delete the item from the page */ |
| 542 | + PageIndexTupleDelete(page, offno); |
| 543 | + bucket_dirty = page_dirty = true; |
| 544 | + |
| 545 | + /* don't increment offno, instead decrement maxoffno */ |
| 546 | + maxoffno = OffsetNumberPrev(maxoffno); |
| 547 | + |
| 548 | + tuples_removed += 1; |
| 549 | + deleted_tuples += 1; |
| 550 | + } |
| 551 | + else |
| 552 | + { |
| 553 | + offno = OffsetNumberNext(offno); |
| 554 | + |
| 555 | + num_index_tuples += 1; |
| 556 | + tuples_remaining += 1; |
| 557 | + } |
| 558 | + } |
471 | 559 |
|
472 |
| - /* adjust any active scans that will be affected by deletion */ |
473 |
| - /* (namely, my own scan) */ |
474 |
| - _hash_adjscans(rel, &indextup); |
| 560 | + /* |
| 561 | + * Write or free page if needed, advance to next page. We want |
| 562 | + * to preserve the invariant that overflow pages are nonempty. |
| 563 | + */ |
| 564 | + blkno = opaque->hasho_nextblkno; |
| 565 | + |
| 566 | + if (PageIsEmpty(page) && (opaque->hasho_flag & LH_OVERFLOW_PAGE)) |
| 567 | + _hash_freeovflpage(rel, buf); |
| 568 | + else if (page_dirty) |
| 569 | + _hash_wrtbuf(rel, buf); |
| 570 | + else |
| 571 | + _hash_relbuf(rel, buf, HASH_WRITE); |
| 572 | + } |
475 | 573 |
|
476 |
| - /* delete the data from the page */ |
477 |
| - _hash_pagedel(rel, &indextup); |
| 574 | + /* If we deleted anything, try to compact free space */ |
| 575 | + if (bucket_dirty) |
| 576 | + _hash_squeezebucket(rel, cur_bucket, bucket_blkno); |
478 | 577 |
|
479 |
| - tuples_removed += 1; |
480 |
| - } |
| 578 | + /* XXX unlock bucket here */ |
| 579 | + |
| 580 | + /* Advance to next bucket */ |
| 581 | + cur_bucket++; |
| 582 | + } |
| 583 | + |
| 584 | + /* Write-lock metapage and check for split since we started */ |
| 585 | + metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_WRITE); |
| 586 | + metap = (HashMetaPage) BufferGetPage(metabuf); |
| 587 | + _hash_checkpage((Page) metap, LH_META_PAGE); |
| 588 | + |
| 589 | + if (cur_maxbucket != metap->hashm_maxbucket) |
| 590 | + { |
| 591 | + /* There's been a split, so process the additional bucket(s) */ |
| 592 | + cur_maxbucket = metap->hashm_maxbucket; |
| 593 | + memcpy(&local_metapage, metap, sizeof(local_metapage)); |
| 594 | + _hash_relbuf(rel, metabuf, HASH_WRITE); |
| 595 | + goto loop_top; |
| 596 | + } |
| 597 | + |
| 598 | + /* Okay, we're really done. Update tuple count in metapage. */ |
| 599 | + |
| 600 | + if (orig_maxbucket == metap->hashm_maxbucket && |
| 601 | + orig_ntuples == metap->hashm_ntuples) |
| 602 | + { |
| 603 | + /* |
| 604 | + * No one has split or inserted anything since start of scan, |
| 605 | + * so believe our count as gospel. |
| 606 | + */ |
| 607 | + metap->hashm_ntuples = tuples_remaining; |
| 608 | + } |
| 609 | + else |
| 610 | + { |
| 611 | + /* |
| 612 | + * Otherwise, our count is untrustworthy since we may have |
| 613 | + * double-scanned tuples in split buckets. Proceed by |
| 614 | + * dead-reckoning. |
| 615 | + */ |
| 616 | + if (metap->hashm_ntuples > deleted_tuples) |
| 617 | + metap->hashm_ntuples -= deleted_tuples; |
481 | 618 | else
|
482 |
| - num_index_tuples += 1; |
| 619 | + metap->hashm_ntuples = 0; |
| 620 | + num_index_tuples = metap->hashm_ntuples; |
483 | 621 | }
|
484 | 622 |
|
485 |
| - index_endscan(iscan); |
| 623 | + _hash_wrtbuf(rel, metabuf); |
486 | 624 |
|
487 | 625 | /* return statistics */
|
488 | 626 | num_pages = RelationGetNumberOfBlocks(rel);
|
|
0 commit comments