Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 3bdbdf5

Browse files
committed
Introduce pg_pwrite_zeros() in fileutils.c
This routine is designed to write zeros to a file using vectored I/O, for a size given by its caller, being useful when it comes to initializing a file with a final size already known. XLogFileInitInternal() in xlog.c is changed to use this new routine when initializing WAL segments with zeros (wal_init_zero enabled). Note that the aligned buffers used for the vectored I/O writes have a size of XLOG_BLCKSZ, and not BLCKSZ anymore, as pg_pwrite_zeros() relies on PGAlignedBlock while xlog.c originally used PGAlignedXLogBlock. This routine will be used in a follow-up patch to do the pre-padding of WAL segments for pg_receivewal and pg_basebackup when these are not compressed. Author: Bharath Rupireddy Reviewed-by: Nathan Bossart, Andres Freund, Thomas Munro, Michael Paquier Discussion: https://www.postgresql.org/message-id/CALj2ACUq7nAb7%3DbJNbK3yYmp-SZhJcXFR_pLk8un6XgDzDF3OA%40mail.gmail.com
1 parent d7744d5 commit 3bdbdf5

File tree

3 files changed

+80
-28
lines changed

3 files changed

+80
-28
lines changed

src/backend/access/transam/xlog.c

+5-28
Original file line numberDiff line numberDiff line change
@@ -2921,7 +2921,6 @@ XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli,
29212921
bool *added, char *path)
29222922
{
29232923
char tmppath[MAXPGPATH];
2924-
PGAlignedXLogBlock zbuffer;
29252924
XLogSegNo installed_segno;
29262925
XLogSegNo max_segno;
29272926
int fd;
@@ -2965,14 +2964,11 @@ XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli,
29652964
(errcode_for_file_access(),
29662965
errmsg("could not create file \"%s\": %m", tmppath)));
29672966

2968-
memset(zbuffer.data, 0, XLOG_BLCKSZ);
2969-
29702967
pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_WRITE);
29712968
save_errno = 0;
29722969
if (wal_init_zero)
29732970
{
2974-
struct iovec iov[PG_IOV_MAX];
2975-
int blocks;
2971+
ssize_t rc;
29762972

29772973
/*
29782974
* Zero-fill the file. With this setting, we do this the hard way to
@@ -2983,29 +2979,10 @@ XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli,
29832979
* indirect blocks are down on disk. Therefore, fdatasync(2) or
29842980
* O_DSYNC will be sufficient to sync future writes to the log file.
29852981
*/
2982+
rc = pg_pwrite_zeros(fd, wal_segment_size);
29862983

2987-
/* Prepare to write out a lot of copies of our zero buffer at once. */
2988-
for (int i = 0; i < lengthof(iov); ++i)
2989-
{
2990-
iov[i].iov_base = zbuffer.data;
2991-
iov[i].iov_len = XLOG_BLCKSZ;
2992-
}
2993-
2994-
/* Loop, writing as many blocks as we can for each system call. */
2995-
blocks = wal_segment_size / XLOG_BLCKSZ;
2996-
for (int i = 0; i < blocks;)
2997-
{
2998-
int iovcnt = Min(blocks - i, lengthof(iov));
2999-
off_t offset = i * XLOG_BLCKSZ;
3000-
3001-
if (pg_pwritev_with_retry(fd, iov, iovcnt, offset) < 0)
3002-
{
3003-
save_errno = errno;
3004-
break;
3005-
}
3006-
3007-
i += iovcnt;
3008-
}
2984+
if (rc < 0)
2985+
save_errno = errno;
30092986
}
30102987
else
30112988
{
@@ -3014,7 +2991,7 @@ XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli,
30142991
* enough.
30152992
*/
30162993
errno = 0;
3017-
if (pg_pwrite(fd, zbuffer.data, 1, wal_segment_size - 1) != 1)
2994+
if (pg_pwrite(fd, "\0", 1, wal_segment_size - 1) != 1)
30182995
{
30192996
/* if write didn't set errno, assume no disk space */
30202997
save_errno = errno ? errno : ENOSPC;

src/common/file_utils.c

+73
Original file line numberDiff line numberDiff line change
@@ -527,3 +527,76 @@ pg_pwritev_with_retry(int fd, const struct iovec *iov, int iovcnt, off_t offset)
527527

528528
return sum;
529529
}
530+
531+
/*
532+
* pg_pwrite_zeros
533+
*
534+
* Writes zeros to file worth "size" bytes, using vectored I/O.
535+
*
536+
* Returns the total amount of data written. On failure, a negative value
537+
* is returned with errno set.
538+
*/
539+
ssize_t
540+
pg_pwrite_zeros(int fd, size_t size)
541+
{
542+
PGAlignedBlock zbuffer; /* worth BLCKSZ */
543+
size_t zbuffer_sz;
544+
struct iovec iov[PG_IOV_MAX];
545+
int blocks;
546+
size_t remaining_size = 0;
547+
int i;
548+
ssize_t written;
549+
ssize_t total_written = 0;
550+
551+
zbuffer_sz = sizeof(zbuffer.data);
552+
553+
/* Zero-fill the buffer. */
554+
memset(zbuffer.data, 0, zbuffer_sz);
555+
556+
/* Prepare to write out a lot of copies of our zero buffer at once. */
557+
for (i = 0; i < lengthof(iov); ++i)
558+
{
559+
iov[i].iov_base = zbuffer.data;
560+
iov[i].iov_len = zbuffer_sz;
561+
}
562+
563+
/* Loop, writing as many blocks as we can for each system call. */
564+
blocks = size / zbuffer_sz;
565+
remaining_size = size % zbuffer_sz;
566+
for (i = 0; i < blocks;)
567+
{
568+
int iovcnt = Min(blocks - i, lengthof(iov));
569+
off_t offset = i * zbuffer_sz;
570+
571+
written = pg_pwritev_with_retry(fd, iov, iovcnt, offset);
572+
573+
if (written < 0)
574+
return written;
575+
576+
i += iovcnt;
577+
total_written += written;
578+
}
579+
580+
/* Now, write the remaining size, if any, of the file with zeros. */
581+
if (remaining_size > 0)
582+
{
583+
/* We'll never write more than one block here */
584+
int iovcnt = 1;
585+
586+
/* Jump on to the end of previously written blocks */
587+
off_t offset = i * zbuffer_sz;
588+
589+
iov[0].iov_len = remaining_size;
590+
591+
written = pg_pwritev_with_retry(fd, iov, iovcnt, offset);
592+
593+
if (written < 0)
594+
return written;
595+
596+
total_written += written;
597+
}
598+
599+
Assert(total_written == size);
600+
601+
return total_written;
602+
}

src/include/common/file_utils.h

+2
Original file line numberDiff line numberDiff line change
@@ -44,4 +44,6 @@ extern ssize_t pg_pwritev_with_retry(int fd,
4444
int iovcnt,
4545
off_t offset);
4646

47+
extern ssize_t pg_pwrite_zeros(int fd, size_t size);
48+
4749
#endif /* FILE_UTILS_H */

0 commit comments

Comments
 (0)