Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 2692d32

Browse files
committed
Tweak smgrblindwrt per advice from Vadim: add parameter indicating
whether to do fsync or not, and if so (which should be seldom) just do the fsync immediately. This way we need not build data structures in md.c/fd.c for blind writes.
1 parent a447ae2 commit 2692d32

File tree

5 files changed

+95
-135
lines changed

5 files changed

+95
-135
lines changed

src/backend/storage/buffer/bufmgr.c

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.78 2000/04/09 04:43:18 tgl Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.79 2000/04/10 23:41:49 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -1127,7 +1127,8 @@ BufferSync()
11271127
bufHdr->blind.relname,
11281128
bufdb, bufrel,
11291129
bufHdr->tag.blockNum,
1130-
(char *) MAKE_PTR(bufHdr->data));
1130+
(char *) MAKE_PTR(bufHdr->data),
1131+
true); /* must fsync */
11311132
}
11321133
else
11331134
{
@@ -1529,7 +1530,8 @@ BufferReplace(BufferDesc *bufHdr)
15291530
status = smgrblindwrt(DEFAULT_SMGR, bufHdr->blind.dbname,
15301531
bufHdr->blind.relname, bufdb, bufrel,
15311532
bufHdr->tag.blockNum,
1532-
(char *) MAKE_PTR(bufHdr->data));
1533+
(char *) MAKE_PTR(bufHdr->data),
1534+
false); /* no fsync */
15331535
}
15341536

15351537
#ifndef OPTIMIZE_SINGLE
@@ -1544,9 +1546,11 @@ BufferReplace(BufferDesc *bufHdr)
15441546
return FALSE;
15451547

15461548
/* If we had marked this buffer as needing to be fsync'd, we can forget
1547-
* about that, because it's now the storage manager's responsibility.
1549+
* about that, because it's now the storage manager's responsibility
1550+
* (but only if we called smgrwrite, not smgrblindwrt).
15481551
*/
1549-
ClearBufferDirtiedByMe(BufferDescriptorGetBuffer(bufHdr), bufHdr);
1552+
if (reln != (Relation) NULL)
1553+
ClearBufferDirtiedByMe(BufferDescriptorGetBuffer(bufHdr), bufHdr);
15501554

15511555
BufferFlushCount++;
15521556

src/backend/storage/smgr/md.c

Lines changed: 68 additions & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/storage/smgr/md.c,v 1.65 2000/04/09 04:43:20 tgl Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/storage/smgr/md.c,v 1.66 2000/04/10 23:41:51 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -48,11 +48,10 @@
4848
typedef struct _MdfdVec
4949
{
5050
int mdfd_vfd; /* fd number in vfd pool */
51-
int mdfd_flags; /* free, temporary */
51+
int mdfd_flags; /* fd status flags */
5252

5353
/* these are the assigned bits in mdfd_flags: */
5454
#define MDFD_FREE (1 << 0)/* unused entry */
55-
#define MDFD_TEMP (1 << 1)/* close this entry at transaction end */
5655

5756
int mdfd_lstbcnt; /* most recent block count */
5857
int mdfd_nextFree; /* next free vector */
@@ -72,8 +71,8 @@ static void mdclose_fd(int fd);
7271
static int _mdfd_getrelnfd(Relation reln);
7372
static MdfdVec *_mdfd_openseg(Relation reln, int segno, int oflags);
7473
static MdfdVec *_mdfd_getseg(Relation reln, int blkno);
75-
static MdfdVec *_mdfd_blind_getseg(char *dbname, char *relname,
76-
Oid dbid, Oid relid, int blkno);
74+
static int _mdfd_blind_getseg(char *dbname, char *relname,
75+
Oid dbid, Oid relid, int blkno);
7776
static int _fdvec_alloc(void);
7877
static void _fdvec_free(int);
7978
static BlockNumber _mdnblocks(File file, Size blcksz);
@@ -572,23 +571,25 @@ mdflush(Relation reln, BlockNumber blocknum, char *buffer)
572571
*
573572
* We have to be able to do this using only the name and OID of
574573
* the database and relation in which the block belongs. Otherwise
575-
* this is just like mdwrite().
574+
* this is much like mdwrite(). If dofsync is TRUE, then we fsync
575+
* the file, making it more like mdflush().
576576
*/
577577
int
578578
mdblindwrt(char *dbname,
579579
char *relname,
580580
Oid dbid,
581581
Oid relid,
582582
BlockNumber blkno,
583-
char *buffer)
583+
char *buffer,
584+
bool dofsync)
584585
{
585586
int status;
586587
long seekpos;
587-
MdfdVec *v;
588+
int fd;
588589

589-
v = _mdfd_blind_getseg(dbname, relname, dbid, relid, blkno);
590+
fd = _mdfd_blind_getseg(dbname, relname, dbid, relid, blkno);
590591

591-
if (v == NULL)
592+
if (fd < 0)
592593
return SM_FAIL;
593594

594595
#ifndef LET_OS_MANAGE_FILESIZE
@@ -601,11 +602,22 @@ mdblindwrt(char *dbname,
601602
seekpos = (long) (BLCKSZ * (blkno));
602603
#endif
603604

604-
if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
605+
if (lseek(fd, seekpos, SEEK_SET) != seekpos)
606+
{
607+
close(fd);
605608
return SM_FAIL;
609+
}
606610

607611
status = SM_SUCCESS;
608-
if (FileWrite(v->mdfd_vfd, buffer, BLCKSZ) != BLCKSZ)
612+
613+
/* write and optionally sync the block */
614+
if (write(fd, buffer, BLCKSZ) != BLCKSZ)
615+
status = SM_FAIL;
616+
else if (dofsync &&
617+
pg_fsync(fd) < 0)
618+
status = SM_FAIL;
619+
620+
if (close(fd) < 0)
609621
status = SM_FAIL;
610622

611623
return status;
@@ -633,7 +645,8 @@ mdmarkdirty(Relation reln, BlockNumber blkno)
633645
*
634646
* We have to be able to do this using only the name and OID of
635647
* the database and relation in which the block belongs. Otherwise
636-
* this is just like mdmarkdirty().
648+
* this is much like mdmarkdirty(). However, we do the fsync immediately
649+
* rather than building md/fd datastructures to postpone it till later.
637650
*/
638651
int
639652
mdblindmarkdirty(char *dbname,
@@ -642,16 +655,23 @@ mdblindmarkdirty(char *dbname,
642655
Oid relid,
643656
BlockNumber blkno)
644657
{
645-
MdfdVec *v;
658+
int status;
659+
int fd;
646660

647-
v = _mdfd_blind_getseg(dbname, relname, dbid, relid, blkno);
661+
fd = _mdfd_blind_getseg(dbname, relname, dbid, relid, blkno);
648662

649-
if (v == NULL)
663+
if (fd < 0)
650664
return SM_FAIL;
651665

652-
FileMarkDirty(v->mdfd_vfd);
666+
status = SM_SUCCESS;
653667

654-
return SM_SUCCESS;
668+
if (pg_fsync(fd) < 0)
669+
status = SM_FAIL;
670+
671+
if (close(fd) < 0)
672+
status = SM_FAIL;
673+
674+
return status;
655675
}
656676

657677
/*
@@ -820,24 +840,15 @@ mdcommit()
820840
v = &Md_fdvec[i];
821841
if (v->mdfd_flags & MDFD_FREE)
822842
continue;
823-
if (v->mdfd_flags & MDFD_TEMP)
824-
{
825-
/* Sync and close the file */
826-
mdclose_fd(i);
827-
}
828-
else
829-
{
830-
/* Sync, but keep the file entry */
831-
843+
/* Sync the file entry */
832844
#ifndef LET_OS_MANAGE_FILESIZE
833-
for ( ; v != (MdfdVec *) NULL; v = v->mdfd_chain)
845+
for ( ; v != (MdfdVec *) NULL; v = v->mdfd_chain)
834846
#else
835-
if (v != (MdfdVec *) NULL)
847+
if (v != (MdfdVec *) NULL)
836848
#endif
837-
{
838-
if (FileSync(v->mdfd_vfd) < 0)
839-
return SM_FAIL;
840-
}
849+
{
850+
if (FileSync(v->mdfd_vfd) < 0)
851+
return SM_FAIL;
841852
}
842853
}
843854

@@ -854,21 +865,9 @@ mdcommit()
854865
int
855866
mdabort()
856867
{
857-
int i;
858-
MdfdVec *v;
859-
860-
for (i = 0; i < CurFd; i++)
861-
{
862-
v = &Md_fdvec[i];
863-
if (v->mdfd_flags & MDFD_FREE)
864-
continue;
865-
if (v->mdfd_flags & MDFD_TEMP)
866-
{
867-
/* Close the file */
868-
mdclose_fd(i);
869-
}
870-
}
871-
868+
/* We don't actually have to do anything here. fd.c will discard
869+
* fsync-needed bits in its AtEOXact_Files() routine.
870+
*/
872871
return SM_SUCCESS;
873872
}
874873

@@ -1057,102 +1056,52 @@ _mdfd_getseg(Relation reln, int blkno)
10571056
return v;
10581057
}
10591058

1060-
/* Find the segment of the relation holding the specified block.
1061-
* This is the same as _mdfd_getseg() except that we must work
1062-
* "blind" with no Relation struct.
1059+
/*
1060+
* Find the segment of the relation holding the specified block.
10631061
*
1064-
* NOTE: we have no easy way to tell whether a FD already exists for the
1065-
* target relation, so we always make a new one. This should probably
1066-
* be improved somehow, but I doubt it's a significant performance issue
1067-
* under normal circumstances. The FD is marked to be closed at end of xact
1068-
* so that we don't accumulate a lot of dead FDs.
1062+
* This performs the same work as _mdfd_getseg() except that we must work
1063+
* "blind" with no Relation struct. We assume that we are not likely to
1064+
* touch the same relation again soon, so we do not create an FD entry for
1065+
* the relation --- we just open a kernel file descriptor which will be
1066+
* used and promptly closed. The return value is the kernel descriptor,
1067+
* or -1 on failure.
10691068
*/
10701069

1071-
static MdfdVec *
1070+
static int
10721071
_mdfd_blind_getseg(char *dbname, char *relname, Oid dbid, Oid relid,
10731072
int blkno)
10741073
{
1075-
MdfdVec *v;
10761074
char *path;
10771075
int fd;
1078-
int vfd;
10791076
#ifndef LET_OS_MANAGE_FILESIZE
10801077
int segno;
1081-
int targsegno;
10821078
#endif
10831079

1084-
/* construct the path to the file and open it */
1080+
/* construct the path to the relation */
10851081
path = relpath_blind(dbname, relname, dbid, relid);
10861082

1087-
#ifndef __CYGWIN32__
1088-
fd = FileNameOpenFile(path, O_RDWR, 0600);
1089-
#else
1090-
fd = FileNameOpenFile(path, O_RDWR | O_BINARY, 0600);
1091-
#endif
1092-
1093-
if (fd < 0)
1094-
return NULL;
1095-
1096-
vfd = _fdvec_alloc();
1097-
if (vfd < 0)
1098-
return NULL;
1099-
1100-
Md_fdvec[vfd].mdfd_vfd = fd;
1101-
Md_fdvec[vfd].mdfd_flags = MDFD_TEMP;
1102-
Md_fdvec[vfd].mdfd_lstbcnt = _mdnblocks(fd, BLCKSZ);
11031083
#ifndef LET_OS_MANAGE_FILESIZE
1104-
Md_fdvec[vfd].mdfd_chain = (MdfdVec *) NULL;
1105-
1106-
#ifdef DIAGNOSTIC
1107-
if (Md_fdvec[vfd].mdfd_lstbcnt > RELSEG_SIZE)
1108-
elog(FATAL, "segment too big on relopen!");
1109-
#endif
1110-
1111-
targsegno = blkno / RELSEG_SIZE;
1112-
for (v = &Md_fdvec[vfd], segno = 1; segno <= targsegno; segno++)
1084+
/* append the '.segno', if needed */
1085+
segno = blkno / RELSEG_SIZE;
1086+
if (segno > 0)
11131087
{
1114-
char *segpath;
1115-
MdfdVec *newv;
1116-
MemoryContext oldcxt;
1088+
char *segpath = (char *) palloc(strlen(path) + 12);
11171089

1118-
segpath = (char *) palloc(strlen(path) + 12);
11191090
sprintf(segpath, "%s.%d", path, segno);
1120-
1121-
#ifndef __CYGWIN32__
1122-
fd = FileNameOpenFile(segpath, O_RDWR | O_CREAT, 0600);
1123-
#else
1124-
fd = FileNameOpenFile(segpath, O_RDWR | O_BINARY | O_CREAT, 0600);
1091+
pfree(path);
1092+
path = segpath;
1093+
}
11251094
#endif
11261095

1127-
pfree(segpath);
1128-
1129-
if (fd < 0)
1130-
return (MdfdVec *) NULL;
1131-
1132-
/* allocate an mdfdvec entry for it */
1133-
oldcxt = MemoryContextSwitchTo(MdCxt);
1134-
newv = (MdfdVec *) palloc(sizeof(MdfdVec));
1135-
MemoryContextSwitchTo(oldcxt);
1136-
1137-
/* fill the entry */
1138-
newv->mdfd_vfd = fd;
1139-
newv->mdfd_flags = MDFD_TEMP;
1140-
newv->mdfd_lstbcnt = _mdnblocks(fd, BLCKSZ);
1141-
newv->mdfd_chain = (MdfdVec *) NULL;
1142-
#ifdef DIAGNOSTIC
1143-
if (newv->mdfd_lstbcnt > RELSEG_SIZE)
1144-
elog(FATAL, "segment too big on open!");
1145-
#endif
1146-
v->mdfd_chain = newv;
1147-
v = newv;
1148-
}
1096+
#ifndef __CYGWIN32__
1097+
fd = open(path, O_RDWR, 0600);
11491098
#else
1150-
v = &Md_fdvec[vfd];
1099+
fd = open(path, O_RDWR | O_BINARY, 0600);
11511100
#endif
11521101

11531102
pfree(path);
11541103

1155-
return v;
1104+
return fd;
11561105
}
11571106

11581107
static BlockNumber

src/backend/storage/smgr/mm.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
*
1212
*
1313
* IDENTIFICATION
14-
* $Header: /cvsroot/pgsql/src/backend/storage/smgr/Attic/mm.c,v 1.18 2000/01/26 05:57:05 momjian Exp $
14+
* $Header: /cvsroot/pgsql/src/backend/storage/smgr/Attic/mm.c,v 1.19 2000/04/10 23:41:51 tgl Exp $
1515
*
1616
*-------------------------------------------------------------------------
1717
*/
@@ -478,7 +478,8 @@ mmblindwrt(char *dbstr,
478478
Oid dbid,
479479
Oid relid,
480480
BlockNumber blkno,
481-
char *buffer)
481+
char *buffer,
482+
bool dofsync)
482483
{
483484
return SM_FAIL;
484485
}

0 commit comments

Comments
 (0)