Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 9c808f8

Browse files
committed
Refactor XLogInsert a bit. The rdata entries for backup blocks are now
constructed before acquiring WALInsertLock, which slightly reduces the time the lock is held. Although I could not measure any benefit in benchmarks, the code is more readable this way.
1 parent 26e89e7 commit 9c808f8

File tree

1 file changed

+91
-111
lines changed
  • src/backend/access/transam

1 file changed

+91
-111
lines changed

src/backend/access/transam/xlog.c

Lines changed: 91 additions & 111 deletions
Original file line numberDiff line numberDiff line change
@@ -694,6 +694,7 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata)
694694
uint32 freespace;
695695
int curridx;
696696
XLogRecData *rdt;
697+
XLogRecData *rdt_lastnormal;
697698
Buffer dtbuf[XLR_MAX_BKP_BLOCKS];
698699
bool dtbuf_bkp[XLR_MAX_BKP_BLOCKS];
699700
BkpBlock dtbuf_xlg[XLR_MAX_BKP_BLOCKS];
@@ -708,6 +709,7 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata)
708709
bool updrqst;
709710
bool doPageWrites;
710711
bool isLogSwitch = (rmid == RM_XLOG_ID && info == XLOG_SWITCH);
712+
uint8 info_orig = info;
711713

712714
/* cross-check on whether we should be here or not */
713715
if (!XLogInsertAllowed())
@@ -731,23 +733,18 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata)
731733
}
732734

733735
/*
734-
* Here we scan the rdata chain, determine which buffers must be backed
735-
* up, and compute the CRC values for the data. Note that the record
736-
* header isn't added into the CRC initially since we don't know the final
737-
* length or info bits quite yet. Thus, the CRC will represent the CRC of
738-
* the whole record in the order "rdata, then backup blocks, then record
739-
* header".
736+
* Here we scan the rdata chain, to determine which buffers must be backed
737+
* up.
740738
*
741739
* We may have to loop back to here if a race condition is detected below.
742740
* We could prevent the race by doing all this work while holding the
743741
* insert lock, but it seems better to avoid doing CRC calculations while
744-
* holding the lock. This means we have to be careful about modifying the
745-
* rdata chain until we know we aren't going to loop back again. The only
746-
* change we allow ourselves to make earlier is to set rdt->data = NULL in
747-
* chain items we have decided we will have to back up the whole buffer
748-
* for. This is OK because we will certainly decide the same thing again
749-
* for those items if we do it over; doing it here saves an extra pass
750-
* over the chain later.
742+
* holding the lock.
743+
*
744+
* We add entries for backup blocks to the chain, so that they don't
745+
* need any special treatment in the critical section where the chunks are
746+
* copied into the WAL buffers. Those entries have to be unlinked from the
747+
* chain if we have to loop back here.
751748
*/
752749
begin:;
753750
for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
@@ -764,15 +761,13 @@ begin:;
764761
*/
765762
doPageWrites = fullPageWrites || Insert->forcePageWrites;
766763

767-
INIT_CRC32(rdata_crc);
768764
len = 0;
769765
for (rdt = rdata;;)
770766
{
771767
if (rdt->buffer == InvalidBuffer)
772768
{
773769
/* Simple data, just include it */
774770
len += rdt->len;
775-
COMP_CRC32(rdata_crc, rdt->data, rdt->len);
776771
}
777772
else
778773
{
@@ -783,12 +778,12 @@ begin:;
783778
{
784779
/* Buffer already referenced by earlier chain item */
785780
if (dtbuf_bkp[i])
781+
{
786782
rdt->data = NULL;
783+
rdt->len = 0;
784+
}
787785
else if (rdt->data)
788-
{
789786
len += rdt->len;
790-
COMP_CRC32(rdata_crc, rdt->data, rdt->len);
791-
}
792787
break;
793788
}
794789
if (dtbuf[i] == InvalidBuffer)
@@ -800,12 +795,10 @@ begin:;
800795
{
801796
dtbuf_bkp[i] = true;
802797
rdt->data = NULL;
798+
rdt->len = 0;
803799
}
804800
else if (rdt->data)
805-
{
806801
len += rdt->len;
807-
COMP_CRC32(rdata_crc, rdt->data, rdt->len);
808-
}
809802
break;
810803
}
811804
}
@@ -819,39 +812,6 @@ begin:;
819812
rdt = rdt->next;
820813
}
821814

822-
/*
823-
* Now add the backup block headers and data into the CRC
824-
*/
825-
for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
826-
{
827-
if (dtbuf_bkp[i])
828-
{
829-
BkpBlock *bkpb = &(dtbuf_xlg[i]);
830-
char *page;
831-
832-
COMP_CRC32(rdata_crc,
833-
(char *) bkpb,
834-
sizeof(BkpBlock));
835-
page = (char *) BufferGetBlock(dtbuf[i]);
836-
if (bkpb->hole_length == 0)
837-
{
838-
COMP_CRC32(rdata_crc,
839-
page,
840-
BLCKSZ);
841-
}
842-
else
843-
{
844-
/* must skip the hole */
845-
COMP_CRC32(rdata_crc,
846-
page,
847-
bkpb->hole_offset);
848-
COMP_CRC32(rdata_crc,
849-
page + (bkpb->hole_offset + bkpb->hole_length),
850-
BLCKSZ - (bkpb->hole_offset + bkpb->hole_length));
851-
}
852-
}
853-
}
854-
855815
/*
856816
* NOTE: We disallow len == 0 because it provides a useful bit of extra
857817
* error checking in ReadRecord. This means that all callers of
@@ -862,70 +822,20 @@ begin:;
862822
if (len == 0 && !isLogSwitch)
863823
elog(PANIC, "invalid xlog record length %u", len);
864824

865-
START_CRIT_SECTION();
866-
867-
/* Now wait to get insert lock */
868-
LWLockAcquire(WALInsertLock, LW_EXCLUSIVE);
869-
870-
/*
871-
* Check to see if my RedoRecPtr is out of date. If so, may have to go
872-
* back and recompute everything. This can only happen just after a
873-
* checkpoint, so it's better to be slow in this case and fast otherwise.
874-
*
875-
* If we aren't doing full-page writes then RedoRecPtr doesn't actually
876-
* affect the contents of the XLOG record, so we'll update our local copy
877-
* but not force a recomputation.
878-
*/
879-
if (!XLByteEQ(RedoRecPtr, Insert->RedoRecPtr))
880-
{
881-
Assert(XLByteLT(RedoRecPtr, Insert->RedoRecPtr));
882-
RedoRecPtr = Insert->RedoRecPtr;
883-
884-
if (doPageWrites)
885-
{
886-
for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
887-
{
888-
if (dtbuf[i] == InvalidBuffer)
889-
continue;
890-
if (dtbuf_bkp[i] == false &&
891-
XLByteLE(dtbuf_lsn[i], RedoRecPtr))
892-
{
893-
/*
894-
* Oops, this buffer now needs to be backed up, but we
895-
* didn't think so above. Start over.
896-
*/
897-
LWLockRelease(WALInsertLock);
898-
END_CRIT_SECTION();
899-
goto begin;
900-
}
901-
}
902-
}
903-
}
904-
905-
/*
906-
* Also check to see if forcePageWrites was just turned on; if we weren't
907-
* already doing full-page writes then go back and recompute. (If it was
908-
* just turned off, we could recompute the record without full pages, but
909-
* we choose not to bother.)
910-
*/
911-
if (Insert->forcePageWrites && !doPageWrites)
912-
{
913-
/* Oops, must redo it with full-page data */
914-
LWLockRelease(WALInsertLock);
915-
END_CRIT_SECTION();
916-
goto begin;
917-
}
918-
919825
/*
920826
* Make additional rdata chain entries for the backup blocks, so that we
921-
* don't need to special-case them in the write loop. Note that we have
922-
* now irrevocably changed the input rdata chain. At the exit of this
923-
* loop, write_len includes the backup block data.
827+
* don't need to special-case them in the write loop. This modifies the
828+
* original rdata chain, but we keep a pointer to the last regular entry,
829+
* rdt_lastnormal, so that we can undo this if we have to loop back to the
830+
* beginning.
831+
*
832+
* At the exit of this loop, write_len includes the backup block data.
924833
*
925834
* Also set the appropriate info bits to show which buffers were backed
926835
* up. The i'th XLR_SET_BKP_BLOCK bit corresponds to the i'th distinct
927836
* buffer value (ignoring InvalidBuffer) appearing in the rdata chain.
928837
*/
838+
rdt_lastnormal = rdt;
929839
write_len = len;
930840
for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
931841
{
@@ -974,6 +884,76 @@ begin:;
974884
}
975885
}
976886

887+
/*
888+
* Calculate CRC of the data, including all the backup blocks
889+
*
890+
* Note that the record header isn't added into the CRC initially since
891+
* we don't know the prev-link yet. Thus, the CRC will represent the CRC
892+
* of the whole record in the order: rdata, then backup blocks, then
893+
* record header.
894+
*/
895+
INIT_CRC32(rdata_crc);
896+
for (rdt = rdata; rdt != NULL; rdt = rdt->next)
897+
COMP_CRC32(rdata_crc, rdt->data, rdt->len);
898+
899+
START_CRIT_SECTION();
900+
901+
/* Now wait to get insert lock */
902+
LWLockAcquire(WALInsertLock, LW_EXCLUSIVE);
903+
904+
/*
905+
* Check to see if my RedoRecPtr is out of date. If so, may have to go
906+
* back and recompute everything. This can only happen just after a
907+
* checkpoint, so it's better to be slow in this case and fast otherwise.
908+
*
909+
* If we aren't doing full-page writes then RedoRecPtr doesn't actually
910+
* affect the contents of the XLOG record, so we'll update our local copy
911+
* but not force a recomputation.
912+
*/
913+
if (!XLByteEQ(RedoRecPtr, Insert->RedoRecPtr))
914+
{
915+
Assert(XLByteLT(RedoRecPtr, Insert->RedoRecPtr));
916+
RedoRecPtr = Insert->RedoRecPtr;
917+
918+
if (doPageWrites)
919+
{
920+
for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
921+
{
922+
if (dtbuf[i] == InvalidBuffer)
923+
continue;
924+
if (dtbuf_bkp[i] == false &&
925+
XLByteLE(dtbuf_lsn[i], RedoRecPtr))
926+
{
927+
/*
928+
* Oops, this buffer now needs to be backed up, but we
929+
* didn't think so above. Start over.
930+
*/
931+
LWLockRelease(WALInsertLock);
932+
END_CRIT_SECTION();
933+
rdt_lastnormal->next = NULL;
934+
info = info_orig;
935+
goto begin;
936+
}
937+
}
938+
}
939+
}
940+
941+
/*
942+
* Also check to see if forcePageWrites was just turned on; if we weren't
943+
* already doing full-page writes then go back and recompute. (If it was
944+
* just turned off, we could recompute the record without full pages, but
945+
* we choose not to bother.)
946+
*/
947+
if (Insert->forcePageWrites && !doPageWrites)
948+
{
949+
/* Oops, must redo it with full-page data. */
950+
LWLockRelease(WALInsertLock);
951+
END_CRIT_SECTION();
952+
rdt_lastnormal->next = NULL;
953+
info = info_orig;
954+
goto begin;
955+
}
956+
977957
/*
978958
* If there isn't enough space on the current XLOG page for a record
979959
* header, advance to the next page (leaving the unused space as zeroes).

0 commit comments

Comments
 (0)