17
17
#include "access/gin_private.h"
18
18
#include "access/xloginsert.h"
19
19
#include "miscadmin.h"
20
+ #include "utils/memutils.h"
20
21
#include "utils/rel.h"
21
22
22
23
static void ginFindParents (GinBtree btree , GinBtreeStack * stack );
@@ -312,27 +313,45 @@ ginFindParents(GinBtree btree, GinBtreeStack *stack)
312
313
* Insert a new item to a page.
313
314
*
314
315
* Returns true if the insertion was finished. On false, the page was split and
315
- * the parent needs to be updated. (a root split returns true as it doesn't
316
- * need any further action by the caller to complete)
316
+ * the parent needs to be updated. (A root split returns true as it doesn't
317
+ * need any further action by the caller to complete. )
317
318
*
318
319
* When inserting a downlink to an internal page, 'childbuf' contains the
319
320
* child page that was split. Its GIN_INCOMPLETE_SPLIT flag will be cleared
320
- * atomically with the insert. Also, the existing item at the given location
321
- * is updated to point to ' updateblkno' .
321
+ * atomically with the insert. Also, the existing item at offset stack->off
322
+ * in the target page is updated to point to updateblkno.
322
323
*
323
324
* stack->buffer is locked on entry, and is kept locked.
325
+ * Likewise for childbuf, if given.
324
326
*/
325
327
static bool
326
328
ginPlaceToPage (GinBtree btree , GinBtreeStack * stack ,
327
329
void * insertdata , BlockNumber updateblkno ,
328
330
Buffer childbuf , GinStatsData * buildStats )
329
331
{
330
332
Page page = BufferGetPage (stack -> buffer );
333
+ bool result ;
331
334
GinPlaceToPageRC rc ;
332
335
uint16 xlflags = 0 ;
333
336
Page childpage = NULL ;
334
337
Page newlpage = NULL ,
335
338
newrpage = NULL ;
339
+ void * ptp_workspace = NULL ;
340
+ MemoryContext tmpCxt ;
341
+ MemoryContext oldCxt ;
342
+
343
+ /*
344
+ * We do all the work of this function and its subfunctions in a temporary
345
+ * memory context. This avoids leakages and simplifies APIs, since some
346
+ * subfunctions allocate storage that has to survive until we've finished
347
+ * the WAL insertion.
348
+ */
349
+ tmpCxt = AllocSetContextCreate (CurrentMemoryContext ,
350
+ "ginPlaceToPage temporary context" ,
351
+ ALLOCSET_DEFAULT_MINSIZE ,
352
+ ALLOCSET_DEFAULT_INITSIZE ,
353
+ ALLOCSET_DEFAULT_MAXSIZE );
354
+ oldCxt = MemoryContextSwitchTo (tmpCxt );
336
355
337
356
if (GinPageIsData (page ))
338
357
xlflags |= GIN_INSERT_ISDATA ;
@@ -350,40 +369,42 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
350
369
}
351
370
352
371
/*
353
- * Try to put the incoming tuple on the page. placeToPage will decide if
354
- * the page needs to be split.
355
- *
356
- * WAL-logging this operation is a bit funny:
357
- *
358
- * We're responsible for calling XLogBeginInsert() and XLogInsert().
359
- * XLogBeginInsert() must be called before placeToPage, because
360
- * placeToPage can register some data to the WAL record.
361
- *
362
- * If placeToPage returns INSERTED, placeToPage has already called
363
- * START_CRIT_SECTION() and XLogBeginInsert(), and registered any data
364
- * required to replay the operation, in block index 0. We're responsible
365
- * for filling in the main data portion of the WAL record, calling
366
- * XLogInsert(), and END_CRIT_SECTION.
367
- *
368
- * If placeToPage returns SPLIT, we're wholly responsible for WAL logging.
369
- * Splits happen infrequently, so we just make a full-page image of all
370
- * the pages involved.
372
+ * See if the incoming tuple will fit on the page. beginPlaceToPage will
373
+ * decide if the page needs to be split, and will compute the split
374
+ * contents if so. See comments for beginPlaceToPage and execPlaceToPage
375
+ * functions for more details of the API here.
371
376
*/
372
- rc = btree -> placeToPage (btree , stack -> buffer , stack ,
373
- insertdata , updateblkno ,
374
- & newlpage , & newrpage );
375
- if (rc == UNMODIFIED )
377
+ rc = btree -> beginPlaceToPage (btree , stack -> buffer , stack ,
378
+ insertdata , updateblkno ,
379
+ & ptp_workspace ,
380
+ & newlpage , & newrpage );
381
+
382
+ if (rc == GPTP_NO_WORK )
376
383
{
377
- XLogResetInsertion ();
378
- return true;
384
+ /* Nothing to do */
385
+ result = true;
379
386
}
380
- else if (rc == INSERTED )
387
+ else if (rc == GPTP_INSERT )
381
388
{
382
- /* placeToPage did START_CRIT_SECTION() */
389
+ /* It will fit, perform the insertion */
390
+ START_CRIT_SECTION ();
391
+
392
+ if (RelationNeedsWAL (btree -> index ))
393
+ {
394
+ XLogBeginInsert ();
395
+ XLogRegisterBuffer (0 , stack -> buffer , REGBUF_STANDARD );
396
+ if (BufferIsValid (childbuf ))
397
+ XLogRegisterBuffer (1 , childbuf , REGBUF_STANDARD );
398
+ }
399
+
400
+ /* Perform the page update, and register any extra WAL data */
401
+ btree -> execPlaceToPage (btree , stack -> buffer , stack ,
402
+ insertdata , updateblkno , ptp_workspace );
403
+
383
404
MarkBufferDirty (stack -> buffer );
384
405
385
406
/* An insert to an internal page finishes the split of the child. */
386
- if (childbuf != InvalidBuffer )
407
+ if (BufferIsValid ( childbuf ) )
387
408
{
388
409
GinPageGetOpaque (childpage )-> flags &= ~GIN_INCOMPLETE_SPLIT ;
389
410
MarkBufferDirty (childbuf );
@@ -395,21 +416,15 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
395
416
ginxlogInsert xlrec ;
396
417
BlockIdData childblknos [2 ];
397
418
398
- /*
399
- * placetopage already registered stack->buffer as block 0.
400
- */
401
419
xlrec .flags = xlflags ;
402
420
403
- if (childbuf != InvalidBuffer )
404
- XLogRegisterBuffer (1 , childbuf , REGBUF_STANDARD );
405
-
406
421
XLogRegisterData ((char * ) & xlrec , sizeof (ginxlogInsert ));
407
422
408
423
/*
409
424
* Log information about child if this was an insertion of a
410
425
* downlink.
411
426
*/
412
- if (childbuf != InvalidBuffer )
427
+ if (BufferIsValid ( childbuf ) )
413
428
{
414
429
BlockIdSet (& childblknos [0 ], BufferGetBlockNumber (childbuf ));
415
430
BlockIdSet (& childblknos [1 ], GinPageGetOpaque (childpage )-> rightlink );
@@ -419,23 +434,29 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
419
434
420
435
recptr = XLogInsert (RM_GIN_ID , XLOG_GIN_INSERT );
421
436
PageSetLSN (page , recptr );
422
- if (childbuf != InvalidBuffer )
437
+ if (BufferIsValid ( childbuf ) )
423
438
PageSetLSN (childpage , recptr );
424
439
}
425
440
426
441
END_CRIT_SECTION ();
427
442
428
- return true;
443
+ /* Insertion is complete. */
444
+ result = true;
429
445
}
430
- else if (rc == SPLIT )
446
+ else if (rc == GPTP_SPLIT )
431
447
{
432
- /* Didn't fit, had to split */
448
+ /*
449
+ * Didn't fit, need to split. The split has been computed in newlpage
450
+ * and newrpage, which are pointers to palloc'd pages, not associated
451
+ * with buffers. stack->buffer is not touched yet.
452
+ */
433
453
Buffer rbuffer ;
434
454
BlockNumber savedRightLink ;
435
455
ginxlogSplit data ;
436
456
Buffer lbuffer = InvalidBuffer ;
437
457
Page newrootpg = NULL ;
438
458
459
+ /* Get a new index page to become the right page */
439
460
rbuffer = GinNewBuffer (btree -> index );
440
461
441
462
/* During index build, count the new page */
@@ -449,19 +470,11 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
449
470
450
471
savedRightLink = GinPageGetOpaque (page )-> rightlink ;
451
472
452
- /*
453
- * newlpage and newrpage are pointers to memory pages, not associated
454
- * with buffers. stack->buffer is not touched yet.
455
- */
456
-
473
+ /* Begin setting up WAL record */
457
474
data .node = btree -> index -> rd_node ;
458
475
data .flags = xlflags ;
459
- if (childbuf != InvalidBuffer )
476
+ if (BufferIsValid ( childbuf ) )
460
477
{
461
- Page childpage = BufferGetPage (childbuf );
462
-
463
- GinPageGetOpaque (childpage )-> flags &= ~GIN_INCOMPLETE_SPLIT ;
464
-
465
478
data .leftChildBlkno = BufferGetBlockNumber (childbuf );
466
479
data .rightChildBlkno = GinPageGetOpaque (childpage )-> rightlink ;
467
480
}
@@ -471,12 +484,12 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
471
484
if (stack -> parent == NULL )
472
485
{
473
486
/*
474
- * split root, so we need to allocate new left page and place
475
- * pointer on root to left and right page
487
+ * splitting the root, so we need to allocate new left page and
488
+ * place pointers to left and right page on root page.
476
489
*/
477
490
lbuffer = GinNewBuffer (btree -> index );
478
491
479
- /* During index build, count the newly-added root page */
492
+ /* During index build, count the new left page */
480
493
if (buildStats )
481
494
{
482
495
if (btree -> isData )
@@ -493,9 +506,9 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
493
506
494
507
/*
495
508
* Construct a new root page containing downlinks to the new left
496
- * and right pages. (do this in a temporary copy first rather than
497
- * overwriting the original page directly, so that we can still
498
- * abort gracefully if this fails .)
509
+ * and right pages. (Do this in a temporary copy rather than
510
+ * overwriting the original page directly, since we're not in the
511
+ * critical section yet .)
499
512
*/
500
513
newrootpg = PageGetTempPage (newrpage );
501
514
GinInitPage (newrootpg , GinPageGetOpaque (newlpage )-> flags & ~(GIN_LEAF | GIN_COMPRESSED ), BLCKSZ );
@@ -506,7 +519,7 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
506
519
}
507
520
else
508
521
{
509
- /* split non-root page */
522
+ /* splitting a non-root page */
510
523
data .rrlink = savedRightLink ;
511
524
512
525
GinPageGetOpaque (newrpage )-> rightlink = savedRightLink ;
@@ -515,41 +528,44 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
515
528
}
516
529
517
530
/*
518
- * Ok , we have the new contents of the left page in a temporary copy
519
- * now (newlpage), and the newly-allocated right block has been filled
520
- * in . The original page is still unchanged.
531
+ * OK , we have the new contents of the left page in a temporary copy
532
+ * now (newlpage), and likewise for the new contents of the
533
+ * newly-allocated right block . The original page is still unchanged.
521
534
*
522
535
* If this is a root split, we also have a temporary page containing
523
- * the new contents of the root. Copy the new left page to a
524
- * newly-allocated block, and initialize the (original) root page the
525
- * new copy. Otherwise, copy over the temporary copy of the new left
526
- * page over the old left page.
536
+ * the new contents of the root.
527
537
*/
528
538
529
539
START_CRIT_SECTION ();
530
540
531
541
MarkBufferDirty (rbuffer );
532
542
MarkBufferDirty (stack -> buffer );
533
- if (BufferIsValid (childbuf ))
534
- MarkBufferDirty (childbuf );
535
543
536
544
/*
537
- * Restore the temporary copies over the real buffers. But don't free
538
- * the temporary copies yet, WAL record data points to them.
545
+ * Restore the temporary copies over the real buffers.
539
546
*/
540
547
if (stack -> parent == NULL )
541
548
{
549
+ /* Splitting the root, three pages to update */
542
550
MarkBufferDirty (lbuffer );
543
- memcpy (BufferGetPage ( stack -> buffer ) , newrootpg , BLCKSZ );
551
+ memcpy (page , newrootpg , BLCKSZ );
544
552
memcpy (BufferGetPage (lbuffer ), newlpage , BLCKSZ );
545
553
memcpy (BufferGetPage (rbuffer ), newrpage , BLCKSZ );
546
554
}
547
555
else
548
556
{
549
- memcpy (BufferGetPage (stack -> buffer ), newlpage , BLCKSZ );
557
+ /* Normal split, only two pages to update */
558
+ memcpy (page , newlpage , BLCKSZ );
550
559
memcpy (BufferGetPage (rbuffer ), newrpage , BLCKSZ );
551
560
}
552
561
562
+ /* We also clear childbuf's INCOMPLETE_SPLIT flag, if passed */
563
+ if (BufferIsValid (childbuf ))
564
+ {
565
+ GinPageGetOpaque (childpage )-> flags &= ~GIN_INCOMPLETE_SPLIT ;
566
+ MarkBufferDirty (childbuf );
567
+ }
568
+
553
569
/* write WAL record */
554
570
if (RelationNeedsWAL (btree -> index ))
555
571
{
@@ -574,12 +590,13 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
574
590
XLogRegisterBuffer (1 , rbuffer , REGBUF_FORCE_IMAGE | REGBUF_STANDARD );
575
591
}
576
592
if (BufferIsValid (childbuf ))
577
- XLogRegisterBuffer (3 , childbuf , 0 );
593
+ XLogRegisterBuffer (3 , childbuf , REGBUF_STANDARD );
578
594
579
595
XLogRegisterData ((char * ) & data , sizeof (ginxlogSplit ));
580
596
581
597
recptr = XLogInsert (RM_GIN_ID , XLOG_GIN_SPLIT );
582
- PageSetLSN (BufferGetPage (stack -> buffer ), recptr );
598
+
599
+ PageSetLSN (page , recptr );
583
600
PageSetLSN (BufferGetPage (rbuffer ), recptr );
584
601
if (stack -> parent == NULL )
585
602
PageSetLSN (BufferGetPage (lbuffer ), recptr );
@@ -589,33 +606,31 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
589
606
END_CRIT_SECTION ();
590
607
591
608
/*
592
- * We can release the lock on the right page now, but keep the
593
- * original buffer locked.
609
+ * We can release the locks/pins on the new pages now, but keep
610
+ * stack-> buffer locked. childbuf doesn't get unlocked either .
594
611
*/
595
612
UnlockReleaseBuffer (rbuffer );
596
613
if (stack -> parent == NULL )
597
614
UnlockReleaseBuffer (lbuffer );
598
615
599
- pfree (newlpage );
600
- pfree (newrpage );
601
- if (newrootpg )
602
- pfree (newrootpg );
603
-
604
616
/*
605
617
* If we split the root, we're done. Otherwise the split is not
606
618
* complete until the downlink for the new page has been inserted to
607
619
* the parent.
608
620
*/
609
- if (stack -> parent == NULL )
610
- return true;
611
- else
612
- return false;
621
+ result = (stack -> parent == NULL );
613
622
}
614
623
else
615
624
{
616
- elog (ERROR , "unknown return code from GIN placeToPage method: %d" , rc );
617
- return false; /* keep compiler quiet */
625
+ elog (ERROR , "invalid return code from GIN placeToPage method: %d" , rc );
626
+ result = false; /* keep compiler quiet */
618
627
}
628
+
629
+ /* Clean up temp context */
630
+ MemoryContextSwitchTo (oldCxt );
631
+ MemoryContextDelete (tmpCxt );
632
+
633
+ return result ;
619
634
}
620
635
621
636
/*
0 commit comments