8
8
*
9
9
*
10
10
* IDENTIFICATION
11
- * $Header: /cvsroot/pgsql/src/backend/storage/smgr/md.c,v 1.65 2000/04/09 04:43:20 tgl Exp $
11
+ * $Header: /cvsroot/pgsql/src/backend/storage/smgr/md.c,v 1.66 2000/04/10 23:41:51 tgl Exp $
12
12
*
13
13
*-------------------------------------------------------------------------
14
14
*/
48
48
typedef struct _MdfdVec
49
49
{
50
50
int mdfd_vfd ; /* fd number in vfd pool */
51
- int mdfd_flags ; /* free, temporary */
51
+ int mdfd_flags ; /* fd status flags */
52
52
53
53
/* these are the assigned bits in mdfd_flags: */
54
54
#define MDFD_FREE (1 << 0)/* unused entry */
55
- #define MDFD_TEMP (1 << 1)/* close this entry at transaction end */
56
55
57
56
int mdfd_lstbcnt ; /* most recent block count */
58
57
int mdfd_nextFree ; /* next free vector */
@@ -72,8 +71,8 @@ static void mdclose_fd(int fd);
72
71
static int _mdfd_getrelnfd (Relation reln );
73
72
static MdfdVec * _mdfd_openseg (Relation reln , int segno , int oflags );
74
73
static MdfdVec * _mdfd_getseg (Relation reln , int blkno );
75
- static MdfdVec * _mdfd_blind_getseg (char * dbname , char * relname ,
76
- Oid dbid , Oid relid , int blkno );
74
+ static int _mdfd_blind_getseg (char * dbname , char * relname ,
75
+ Oid dbid , Oid relid , int blkno );
77
76
static int _fdvec_alloc (void );
78
77
static void _fdvec_free (int );
79
78
static BlockNumber _mdnblocks (File file , Size blcksz );
@@ -572,23 +571,25 @@ mdflush(Relation reln, BlockNumber blocknum, char *buffer)
572
571
*
573
572
* We have to be able to do this using only the name and OID of
574
573
* the database and relation in which the block belongs. Otherwise
575
- * this is just like mdwrite().
574
+ * this is much like mdwrite(). If dofsync is TRUE, then we fsync
575
+ * the file, making it more like mdflush().
576
576
*/
577
577
int
578
578
mdblindwrt (char * dbname ,
579
579
char * relname ,
580
580
Oid dbid ,
581
581
Oid relid ,
582
582
BlockNumber blkno ,
583
- char * buffer )
583
+ char * buffer ,
584
+ bool dofsync )
584
585
{
585
586
int status ;
586
587
long seekpos ;
587
- MdfdVec * v ;
588
+ int fd ;
588
589
589
- v = _mdfd_blind_getseg (dbname , relname , dbid , relid , blkno );
590
+ fd = _mdfd_blind_getseg (dbname , relname , dbid , relid , blkno );
590
591
591
- if (v == NULL )
592
+ if (fd < 0 )
592
593
return SM_FAIL ;
593
594
594
595
#ifndef LET_OS_MANAGE_FILESIZE
@@ -601,11 +602,22 @@ mdblindwrt(char *dbname,
601
602
seekpos = (long ) (BLCKSZ * (blkno ));
602
603
#endif
603
604
604
- if (FileSeek (v -> mdfd_vfd , seekpos , SEEK_SET ) != seekpos )
605
+ if (lseek (fd , seekpos , SEEK_SET ) != seekpos )
606
+ {
607
+ close (fd );
605
608
return SM_FAIL ;
609
+ }
606
610
607
611
status = SM_SUCCESS ;
608
- if (FileWrite (v -> mdfd_vfd , buffer , BLCKSZ ) != BLCKSZ )
612
+
613
+ /* write and optionally sync the block */
614
+ if (write (fd , buffer , BLCKSZ ) != BLCKSZ )
615
+ status = SM_FAIL ;
616
+ else if (dofsync &&
617
+ pg_fsync (fd ) < 0 )
618
+ status = SM_FAIL ;
619
+
620
+ if (close (fd ) < 0 )
609
621
status = SM_FAIL ;
610
622
611
623
return status ;
@@ -633,7 +645,8 @@ mdmarkdirty(Relation reln, BlockNumber blkno)
633
645
*
634
646
* We have to be able to do this using only the name and OID of
635
647
* the database and relation in which the block belongs. Otherwise
636
- * this is just like mdmarkdirty().
648
+ * this is much like mdmarkdirty(). However, we do the fsync immediately
649
+ * rather than building md/fd datastructures to postpone it till later.
637
650
*/
638
651
int
639
652
mdblindmarkdirty (char * dbname ,
@@ -642,16 +655,23 @@ mdblindmarkdirty(char *dbname,
642
655
Oid relid ,
643
656
BlockNumber blkno )
644
657
{
645
- MdfdVec * v ;
658
+ int status ;
659
+ int fd ;
646
660
647
- v = _mdfd_blind_getseg (dbname , relname , dbid , relid , blkno );
661
+ fd = _mdfd_blind_getseg (dbname , relname , dbid , relid , blkno );
648
662
649
- if (v == NULL )
663
+ if (fd < 0 )
650
664
return SM_FAIL ;
651
665
652
- FileMarkDirty ( v -> mdfd_vfd ) ;
666
+ status = SM_SUCCESS ;
653
667
654
- return SM_SUCCESS ;
668
+ if (pg_fsync (fd ) < 0 )
669
+ status = SM_FAIL ;
670
+
671
+ if (close (fd ) < 0 )
672
+ status = SM_FAIL ;
673
+
674
+ return status ;
655
675
}
656
676
657
677
/*
@@ -820,24 +840,15 @@ mdcommit()
820
840
v = & Md_fdvec [i ];
821
841
if (v -> mdfd_flags & MDFD_FREE )
822
842
continue ;
823
- if (v -> mdfd_flags & MDFD_TEMP )
824
- {
825
- /* Sync and close the file */
826
- mdclose_fd (i );
827
- }
828
- else
829
- {
830
- /* Sync, but keep the file entry */
831
-
843
+ /* Sync the file entry */
832
844
#ifndef LET_OS_MANAGE_FILESIZE
833
- for ( ; v != (MdfdVec * ) NULL ; v = v -> mdfd_chain )
845
+ for ( ; v != (MdfdVec * ) NULL ; v = v -> mdfd_chain )
834
846
#else
835
- if (v != (MdfdVec * ) NULL )
847
+ if (v != (MdfdVec * ) NULL )
836
848
#endif
837
- {
838
- if (FileSync (v -> mdfd_vfd ) < 0 )
839
- return SM_FAIL ;
840
- }
849
+ {
850
+ if (FileSync (v -> mdfd_vfd ) < 0 )
851
+ return SM_FAIL ;
841
852
}
842
853
}
843
854
@@ -854,21 +865,9 @@ mdcommit()
854
865
int
855
866
mdabort ()
856
867
{
857
- int i ;
858
- MdfdVec * v ;
859
-
860
- for (i = 0 ; i < CurFd ; i ++ )
861
- {
862
- v = & Md_fdvec [i ];
863
- if (v -> mdfd_flags & MDFD_FREE )
864
- continue ;
865
- if (v -> mdfd_flags & MDFD_TEMP )
866
- {
867
- /* Close the file */
868
- mdclose_fd (i );
869
- }
870
- }
871
-
868
+ /* We don't actually have to do anything here. fd.c will discard
869
+ * fsync-needed bits in its AtEOXact_Files() routine.
870
+ */
872
871
return SM_SUCCESS ;
873
872
}
874
873
@@ -1057,102 +1056,52 @@ _mdfd_getseg(Relation reln, int blkno)
1057
1056
return v ;
1058
1057
}
1059
1058
1060
- /* Find the segment of the relation holding the specified block.
1061
- * This is the same as _mdfd_getseg() except that we must work
1062
- * "blind" with no Relation struct.
1059
+ /*
1060
+ * Find the segment of the relation holding the specified block.
1063
1061
*
1064
- * NOTE: we have no easy way to tell whether a FD already exists for the
1065
- * target relation, so we always make a new one. This should probably
1066
- * be improved somehow, but I doubt it's a significant performance issue
1067
- * under normal circumstances. The FD is marked to be closed at end of xact
1068
- * so that we don't accumulate a lot of dead FDs.
1062
+ * This performs the same work as _mdfd_getseg() except that we must work
1063
+ * "blind" with no Relation struct. We assume that we are not likely to
1064
+ * touch the same relation again soon, so we do not create an FD entry for
1065
+ * the relation --- we just open a kernel file descriptor which will be
1066
+ * used and promptly closed. The return value is the kernel descriptor,
1067
+ * or -1 on failure.
1069
1068
*/
1070
1069
1071
- static MdfdVec *
1070
+ static int
1072
1071
_mdfd_blind_getseg (char * dbname , char * relname , Oid dbid , Oid relid ,
1073
1072
int blkno )
1074
1073
{
1075
- MdfdVec * v ;
1076
1074
char * path ;
1077
1075
int fd ;
1078
- int vfd ;
1079
1076
#ifndef LET_OS_MANAGE_FILESIZE
1080
1077
int segno ;
1081
- int targsegno ;
1082
1078
#endif
1083
1079
1084
- /* construct the path to the file and open it */
1080
+ /* construct the path to the relation */
1085
1081
path = relpath_blind (dbname , relname , dbid , relid );
1086
1082
1087
- #ifndef __CYGWIN32__
1088
- fd = FileNameOpenFile (path , O_RDWR , 0600 );
1089
- #else
1090
- fd = FileNameOpenFile (path , O_RDWR | O_BINARY , 0600 );
1091
- #endif
1092
-
1093
- if (fd < 0 )
1094
- return NULL ;
1095
-
1096
- vfd = _fdvec_alloc ();
1097
- if (vfd < 0 )
1098
- return NULL ;
1099
-
1100
- Md_fdvec [vfd ].mdfd_vfd = fd ;
1101
- Md_fdvec [vfd ].mdfd_flags = MDFD_TEMP ;
1102
- Md_fdvec [vfd ].mdfd_lstbcnt = _mdnblocks (fd , BLCKSZ );
1103
1083
#ifndef LET_OS_MANAGE_FILESIZE
1104
- Md_fdvec [vfd ].mdfd_chain = (MdfdVec * ) NULL ;
1105
-
1106
- #ifdef DIAGNOSTIC
1107
- if (Md_fdvec [vfd ].mdfd_lstbcnt > RELSEG_SIZE )
1108
- elog (FATAL , "segment too big on relopen!" );
1109
- #endif
1110
-
1111
- targsegno = blkno / RELSEG_SIZE ;
1112
- for (v = & Md_fdvec [vfd ], segno = 1 ; segno <= targsegno ; segno ++ )
1084
+ /* append the '.segno', if needed */
1085
+ segno = blkno / RELSEG_SIZE ;
1086
+ if (segno > 0 )
1113
1087
{
1114
- char * segpath ;
1115
- MdfdVec * newv ;
1116
- MemoryContext oldcxt ;
1088
+ char * segpath = (char * ) palloc (strlen (path ) + 12 );
1117
1089
1118
- segpath = (char * ) palloc (strlen (path ) + 12 );
1119
1090
sprintf (segpath , "%s.%d" , path , segno );
1120
-
1121
- #ifndef __CYGWIN32__
1122
- fd = FileNameOpenFile (segpath , O_RDWR | O_CREAT , 0600 );
1123
- #else
1124
- fd = FileNameOpenFile (segpath , O_RDWR | O_BINARY | O_CREAT , 0600 );
1091
+ pfree (path );
1092
+ path = segpath ;
1093
+ }
1125
1094
#endif
1126
1095
1127
- pfree (segpath );
1128
-
1129
- if (fd < 0 )
1130
- return (MdfdVec * ) NULL ;
1131
-
1132
- /* allocate an mdfdvec entry for it */
1133
- oldcxt = MemoryContextSwitchTo (MdCxt );
1134
- newv = (MdfdVec * ) palloc (sizeof (MdfdVec ));
1135
- MemoryContextSwitchTo (oldcxt );
1136
-
1137
- /* fill the entry */
1138
- newv -> mdfd_vfd = fd ;
1139
- newv -> mdfd_flags = MDFD_TEMP ;
1140
- newv -> mdfd_lstbcnt = _mdnblocks (fd , BLCKSZ );
1141
- newv -> mdfd_chain = (MdfdVec * ) NULL ;
1142
- #ifdef DIAGNOSTIC
1143
- if (newv -> mdfd_lstbcnt > RELSEG_SIZE )
1144
- elog (FATAL , "segment too big on open!" );
1145
- #endif
1146
- v -> mdfd_chain = newv ;
1147
- v = newv ;
1148
- }
1096
+ #ifndef __CYGWIN32__
1097
+ fd = open (path , O_RDWR , 0600 );
1149
1098
#else
1150
- v = & Md_fdvec [ vfd ] ;
1099
+ fd = open ( path , O_RDWR | O_BINARY , 0600 ) ;
1151
1100
#endif
1152
1101
1153
1102
pfree (path );
1154
1103
1155
- return v ;
1104
+ return fd ;
1156
1105
}
1157
1106
1158
1107
static BlockNumber
0 commit comments