@@ -694,6 +694,7 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata)
694
694
uint32 freespace ;
695
695
int curridx ;
696
696
XLogRecData * rdt ;
697
+ XLogRecData * rdt_lastnormal ;
697
698
Buffer dtbuf [XLR_MAX_BKP_BLOCKS ];
698
699
bool dtbuf_bkp [XLR_MAX_BKP_BLOCKS ];
699
700
BkpBlock dtbuf_xlg [XLR_MAX_BKP_BLOCKS ];
@@ -708,6 +709,7 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata)
708
709
bool updrqst ;
709
710
bool doPageWrites ;
710
711
bool isLogSwitch = (rmid == RM_XLOG_ID && info == XLOG_SWITCH );
712
+ uint8 info_orig = info ;
711
713
712
714
/* cross-check on whether we should be here or not */
713
715
if (!XLogInsertAllowed ())
@@ -731,23 +733,18 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata)
731
733
}
732
734
733
735
/*
734
- * Here we scan the rdata chain, determine which buffers must be backed
735
- * up, and compute the CRC values for the data. Note that the record
736
- * header isn't added into the CRC initially since we don't know the final
737
- * length or info bits quite yet. Thus, the CRC will represent the CRC of
738
- * the whole record in the order "rdata, then backup blocks, then record
739
- * header".
736
+ * Here we scan the rdata chain, to determine which buffers must be backed
737
+ * up.
740
738
*
741
739
* We may have to loop back to here if a race condition is detected below.
742
740
* We could prevent the race by doing all this work while holding the
743
741
* insert lock, but it seems better to avoid doing CRC calculations while
744
- * holding the lock. This means we have to be careful about modifying the
745
- * rdata chain until we know we aren't going to loop back again. The only
746
- * change we allow ourselves to make earlier is to set rdt->data = NULL in
747
- * chain items we have decided we will have to back up the whole buffer
748
- * for. This is OK because we will certainly decide the same thing again
749
- * for those items if we do it over; doing it here saves an extra pass
750
- * over the chain later.
742
+ * holding the lock.
743
+ *
744
+ * We add entries for backup blocks to the chain, so that they don't
745
+ * need any special treatment in the critical section where the chunks are
746
+ * copied into the WAL buffers. Those entries have to be unlinked from the
747
+ * chain if we have to loop back here.
751
748
*/
752
749
begin :;
753
750
for (i = 0 ; i < XLR_MAX_BKP_BLOCKS ; i ++ )
@@ -764,15 +761,13 @@ begin:;
764
761
*/
765
762
doPageWrites = fullPageWrites || Insert -> forcePageWrites ;
766
763
767
- INIT_CRC32 (rdata_crc );
768
764
len = 0 ;
769
765
for (rdt = rdata ;;)
770
766
{
771
767
if (rdt -> buffer == InvalidBuffer )
772
768
{
773
769
/* Simple data, just include it */
774
770
len += rdt -> len ;
775
- COMP_CRC32 (rdata_crc , rdt -> data , rdt -> len );
776
771
}
777
772
else
778
773
{
@@ -783,12 +778,12 @@ begin:;
783
778
{
784
779
/* Buffer already referenced by earlier chain item */
785
780
if (dtbuf_bkp [i ])
781
+ {
786
782
rdt -> data = NULL ;
783
+ rdt -> len = 0 ;
784
+ }
787
785
else if (rdt -> data )
788
- {
789
786
len += rdt -> len ;
790
- COMP_CRC32 (rdata_crc , rdt -> data , rdt -> len );
791
- }
792
787
break ;
793
788
}
794
789
if (dtbuf [i ] == InvalidBuffer )
@@ -800,12 +795,10 @@ begin:;
800
795
{
801
796
dtbuf_bkp [i ] = true;
802
797
rdt -> data = NULL ;
798
+ rdt -> len = 0 ;
803
799
}
804
800
else if (rdt -> data )
805
- {
806
801
len += rdt -> len ;
807
- COMP_CRC32 (rdata_crc , rdt -> data , rdt -> len );
808
- }
809
802
break ;
810
803
}
811
804
}
@@ -819,39 +812,6 @@ begin:;
819
812
rdt = rdt -> next ;
820
813
}
821
814
822
- /*
823
- * Now add the backup block headers and data into the CRC
824
- */
825
- for (i = 0 ; i < XLR_MAX_BKP_BLOCKS ; i ++ )
826
- {
827
- if (dtbuf_bkp [i ])
828
- {
829
- BkpBlock * bkpb = & (dtbuf_xlg [i ]);
830
- char * page ;
831
-
832
- COMP_CRC32 (rdata_crc ,
833
- (char * ) bkpb ,
834
- sizeof (BkpBlock ));
835
- page = (char * ) BufferGetBlock (dtbuf [i ]);
836
- if (bkpb -> hole_length == 0 )
837
- {
838
- COMP_CRC32 (rdata_crc ,
839
- page ,
840
- BLCKSZ );
841
- }
842
- else
843
- {
844
- /* must skip the hole */
845
- COMP_CRC32 (rdata_crc ,
846
- page ,
847
- bkpb -> hole_offset );
848
- COMP_CRC32 (rdata_crc ,
849
- page + (bkpb -> hole_offset + bkpb -> hole_length ),
850
- BLCKSZ - (bkpb -> hole_offset + bkpb -> hole_length ));
851
- }
852
- }
853
- }
854
-
855
815
/*
856
816
* NOTE: We disallow len == 0 because it provides a useful bit of extra
857
817
* error checking in ReadRecord. This means that all callers of
@@ -862,70 +822,20 @@ begin:;
862
822
if (len == 0 && !isLogSwitch )
863
823
elog (PANIC , "invalid xlog record length %u" , len );
864
824
865
- START_CRIT_SECTION ();
866
-
867
- /* Now wait to get insert lock */
868
- LWLockAcquire (WALInsertLock , LW_EXCLUSIVE );
869
-
870
- /*
871
- * Check to see if my RedoRecPtr is out of date. If so, may have to go
872
- * back and recompute everything. This can only happen just after a
873
- * checkpoint, so it's better to be slow in this case and fast otherwise.
874
- *
875
- * If we aren't doing full-page writes then RedoRecPtr doesn't actually
876
- * affect the contents of the XLOG record, so we'll update our local copy
877
- * but not force a recomputation.
878
- */
879
- if (!XLByteEQ (RedoRecPtr , Insert -> RedoRecPtr ))
880
- {
881
- Assert (XLByteLT (RedoRecPtr , Insert -> RedoRecPtr ));
882
- RedoRecPtr = Insert -> RedoRecPtr ;
883
-
884
- if (doPageWrites )
885
- {
886
- for (i = 0 ; i < XLR_MAX_BKP_BLOCKS ; i ++ )
887
- {
888
- if (dtbuf [i ] == InvalidBuffer )
889
- continue ;
890
- if (dtbuf_bkp [i ] == false &&
891
- XLByteLE (dtbuf_lsn [i ], RedoRecPtr ))
892
- {
893
- /*
894
- * Oops, this buffer now needs to be backed up, but we
895
- * didn't think so above. Start over.
896
- */
897
- LWLockRelease (WALInsertLock );
898
- END_CRIT_SECTION ();
899
- goto begin ;
900
- }
901
- }
902
- }
903
- }
904
-
905
- /*
906
- * Also check to see if forcePageWrites was just turned on; if we weren't
907
- * already doing full-page writes then go back and recompute. (If it was
908
- * just turned off, we could recompute the record without full pages, but
909
- * we choose not to bother.)
910
- */
911
- if (Insert -> forcePageWrites && !doPageWrites )
912
- {
913
- /* Oops, must redo it with full-page data */
914
- LWLockRelease (WALInsertLock );
915
- END_CRIT_SECTION ();
916
- goto begin ;
917
- }
918
-
919
825
/*
920
826
* Make additional rdata chain entries for the backup blocks, so that we
921
- * don't need to special-case them in the write loop. Note that we have
922
- * now irrevocably changed the input rdata chain. At the exit of this
923
- * loop, write_len includes the backup block data.
827
+ * don't need to special-case them in the write loop. This modifies the
828
+ * original rdata chain, but we keep a pointer to the last regular entry,
829
+ * rdt_lastnormal, so that we can undo this if we have to loop back to the
830
+ * beginning.
831
+ *
832
+ * At the exit of this loop, write_len includes the backup block data.
924
833
*
925
834
* Also set the appropriate info bits to show which buffers were backed
926
835
* up. The i'th XLR_SET_BKP_BLOCK bit corresponds to the i'th distinct
927
836
* buffer value (ignoring InvalidBuffer) appearing in the rdata chain.
928
837
*/
838
+ rdt_lastnormal = rdt ;
929
839
write_len = len ;
930
840
for (i = 0 ; i < XLR_MAX_BKP_BLOCKS ; i ++ )
931
841
{
@@ -974,6 +884,76 @@ begin:;
974
884
}
975
885
}
976
886
887
+ /*
888
+ * Calculate CRC of the data, including all the backup blocks
889
+ *
890
+ * Note that the record header isn't added into the CRC initially since
891
+ * we don't know the prev-link yet. Thus, the CRC will represent the CRC
892
+ * of the whole record in the order: rdata, then backup blocks, then
893
+ * record header.
894
+ */
895
+ INIT_CRC32 (rdata_crc );
896
+ for (rdt = rdata ; rdt != NULL ; rdt = rdt -> next )
897
+ COMP_CRC32 (rdata_crc , rdt -> data , rdt -> len );
898
+
899
+ START_CRIT_SECTION ();
900
+
901
+ /* Now wait to get insert lock */
902
+ LWLockAcquire (WALInsertLock , LW_EXCLUSIVE );
903
+
904
+ /*
905
+ * Check to see if my RedoRecPtr is out of date. If so, may have to go
906
+ * back and recompute everything. This can only happen just after a
907
+ * checkpoint, so it's better to be slow in this case and fast otherwise.
908
+ *
909
+ * If we aren't doing full-page writes then RedoRecPtr doesn't actually
910
+ * affect the contents of the XLOG record, so we'll update our local copy
911
+ * but not force a recomputation.
912
+ */
913
+ if (!XLByteEQ (RedoRecPtr , Insert -> RedoRecPtr ))
914
+ {
915
+ Assert (XLByteLT (RedoRecPtr , Insert -> RedoRecPtr ));
916
+ RedoRecPtr = Insert -> RedoRecPtr ;
917
+
918
+ if (doPageWrites )
919
+ {
920
+ for (i = 0 ; i < XLR_MAX_BKP_BLOCKS ; i ++ )
921
+ {
922
+ if (dtbuf [i ] == InvalidBuffer )
923
+ continue ;
924
+ if (dtbuf_bkp [i ] == false &&
925
+ XLByteLE (dtbuf_lsn [i ], RedoRecPtr ))
926
+ {
927
+ /*
928
+ * Oops, this buffer now needs to be backed up, but we
929
+ * didn't think so above. Start over.
930
+ */
931
+ LWLockRelease (WALInsertLock );
932
+ END_CRIT_SECTION ();
933
+ rdt_lastnormal -> next = NULL ;
934
+ info = info_orig ;
935
+ goto begin ;
936
+ }
937
+ }
938
+ }
939
+ }
940
+
941
+ /*
942
+ * Also check to see if forcePageWrites was just turned on; if we weren't
943
+ * already doing full-page writes then go back and recompute. (If it was
944
+ * just turned off, we could recompute the record without full pages, but
945
+ * we choose not to bother.)
946
+ */
947
+ if (Insert -> forcePageWrites && !doPageWrites )
948
+ {
949
+ /* Oops, must redo it with full-page data. */
950
+ LWLockRelease (WALInsertLock );
951
+ END_CRIT_SECTION ();
952
+ rdt_lastnormal -> next = NULL ;
953
+ info = info_orig ;
954
+ goto begin ;
955
+ }
956
+
977
957
/*
978
958
* If there isn't enough space on the current XLOG page for a record
979
959
* header, advance to the next page (leaving the unused space as zeroes).
0 commit comments