Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 7fdf56b

Browse files
committed
Address set of issues with errno handling
System calls mixed up in error code paths are causing two issues which several code paths have not correctly handled: 1) For write() calls, sometimes the system may return less bytes than what has been written without errno being set. Some paths were careful enough to consider that case, and assumed that errno should be set to ENOSPC, other calls missed that. 2) errno generated by a system call is overwritten by other system calls which may succeed once an error code path is taken, causing what is reported to the user to be incorrect. This patch uses the brute-force approach of correcting all those code paths. Some refactoring could happen in the future, but this is let as future work, which is not targeted for back-branches anyway. Author: Michael Paquier Reviewed-by: Ashutosh Sharma Discussion: https://postgr.es/m/20180622061535.GD5215@paquier.xyz
1 parent 037768c commit 7fdf56b

File tree

10 files changed

+98
-3
lines changed

10 files changed

+98
-3
lines changed

src/backend/access/heap/rewriteheap.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1163,9 +1163,14 @@ heap_xlog_logical_rewrite(XLogReaderState *r)
11631163

11641164
/* write out tail end of mapping file (again) */
11651165
if (write(fd, data, len) != len)
1166+
{
1167+
/* if write didn't set errno, assume problem is no disk space */
1168+
if (errno == 0)
1169+
errno = ENOSPC;
11661170
ereport(ERROR,
11671171
(errcode_for_file_access(),
11681172
errmsg("could not write to file \"%s\": %m", path)));
1173+
}
11691174

11701175
/*
11711176
* Now fsync all previously written data. We could improve things and only

src/backend/access/transam/twophase.c

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1171,12 +1171,17 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
11711171
*/
11721172
if (fstat(fd, &stat))
11731173
{
1174+
int save_errno = errno;
1175+
11741176
CloseTransientFile(fd);
11751177
if (give_warnings)
1178+
{
1179+
errno = save_errno;
11761180
ereport(WARNING,
11771181
(errcode_for_file_access(),
11781182
errmsg("could not stat two-phase state file \"%s\": %m",
11791183
path)));
1184+
}
11801185
return NULL;
11811186
}
11821187

@@ -1203,12 +1208,17 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
12031208

12041209
if (read(fd, buf, stat.st_size) != stat.st_size)
12051210
{
1211+
int save_errno = errno;
1212+
12061213
CloseTransientFile(fd);
12071214
if (give_warnings)
1215+
{
1216+
errno = save_errno;
12081217
ereport(WARNING,
12091218
(errcode_for_file_access(),
12101219
errmsg("could not read two-phase state file \"%s\": %m",
12111220
path)));
1221+
}
12121222
pfree(buf);
12131223
return NULL;
12141224
}
@@ -1550,14 +1560,24 @@ RecreateTwoPhaseFile(TransactionId xid, void *content, int len)
15501560
/* Write content and CRC */
15511561
if (write(fd, content, len) != len)
15521562
{
1563+
int save_errno = errno;
1564+
15531565
CloseTransientFile(fd);
1566+
1567+
/* if write didn't set errno, assume problem is no disk space */
1568+
errno = save_errno ? save_errno : ENOSPC;
15541569
ereport(ERROR,
15551570
(errcode_for_file_access(),
15561571
errmsg("could not write two-phase state file: %m")));
15571572
}
15581573
if (write(fd, &statefile_crc, sizeof(pg_crc32c)) != sizeof(pg_crc32c))
15591574
{
1575+
int save_errno = errno;
1576+
15601577
CloseTransientFile(fd);
1578+
1579+
/* if write didn't set errno, assume problem is no disk space */
1580+
errno = save_errno ? save_errno : ENOSPC;
15611581
ereport(ERROR,
15621582
(errcode_for_file_access(),
15631583
errmsg("could not write two-phase state file: %m")));
@@ -1569,7 +1589,10 @@ RecreateTwoPhaseFile(TransactionId xid, void *content, int len)
15691589
*/
15701590
if (pg_fsync(fd) != 0)
15711591
{
1592+
int save_errno = errno;
1593+
15721594
CloseTransientFile(fd);
1595+
errno = save_errno;
15731596
ereport(ERROR,
15741597
(errcode_for_file_access(),
15751598
errmsg("could not fsync two-phase state file: %m")));

src/backend/access/transam/xlog.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3087,7 +3087,10 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock)
30873087

30883088
if (pg_fsync(fd) != 0)
30893089
{
3090+
int save_errno = errno;
3091+
30903092
close(fd);
3093+
errno = save_errno;
30913094
ereport(ERROR,
30923095
(errcode_for_file_access(),
30933096
errmsg("could not fsync file \"%s\": %m", tmppath)));
@@ -11255,8 +11258,10 @@ XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen,
1125511258
if (lseek(readFile, (off_t) readOff, SEEK_SET) < 0)
1125611259
{
1125711260
char fname[MAXFNAMELEN];
11261+
int save_errno = errno;
1125811262

1125911263
XLogFileName(fname, curFileTLI, readSegNo);
11264+
errno = save_errno;
1126011265
ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
1126111266
(errcode_for_file_access(),
1126211267
errmsg("could not seek in log segment %s to offset %u: %m",
@@ -11267,8 +11272,10 @@ XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen,
1126711272
if (read(readFile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
1126811273
{
1126911274
char fname[MAXFNAMELEN];
11275+
int save_errno = errno;
1127011276

1127111277
XLogFileName(fname, curFileTLI, readSegNo);
11278+
errno = save_errno;
1127211279
ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
1127311280
(errcode_for_file_access(),
1127411281
errmsg("could not read from log segment %s, offset %u: %m",

src/backend/access/transam/xlogutils.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -710,9 +710,11 @@ XLogRead(char *buf, TimeLineID tli, XLogRecPtr startptr, Size count)
710710
if (lseek(sendFile, (off_t) startoff, SEEK_SET) < 0)
711711
{
712712
char path[MAXPGPATH];
713+
int save_errno = errno;
713714

714715
XLogFilePath(path, tli, sendSegNo);
715716

717+
errno = save_errno;
716718
ereport(ERROR,
717719
(errcode_for_file_access(),
718720
errmsg("could not seek in log segment %s to offset %u: %m",
@@ -731,9 +733,11 @@ XLogRead(char *buf, TimeLineID tli, XLogRecPtr startptr, Size count)
731733
if (readbytes <= 0)
732734
{
733735
char path[MAXPGPATH];
736+
int save_errno = errno;
734737

735738
XLogFilePath(path, tli, sendSegNo);
736739

740+
errno = save_errno;
737741
ereport(ERROR,
738742
(errcode_for_file_access(),
739743
errmsg("could not read from log segment %s, offset %u, length %lu: %m",

src/backend/replication/basebackup.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -392,13 +392,16 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
392392
fp = AllocateFile(pathbuf, "rb");
393393
if (fp == NULL)
394394
{
395+
int save_errno = errno;
396+
395397
/*
396398
* Most likely reason for this is that the file was already
397399
* removed by a checkpoint, so check for that to get a better
398400
* error message.
399401
*/
400402
CheckXLogRemoved(segno, tli);
401403

404+
errno = save_errno;
402405
ereport(ERROR,
403406
(errcode_for_file_access(),
404407
errmsg("could not open file \"%s\": %m", pathbuf)));

src/backend/replication/logical/origin.c

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -549,7 +549,12 @@ CheckPointReplicationOrigin(void)
549549
/* write magic */
550550
if ((write(tmpfd, &magic, sizeof(magic))) != sizeof(magic))
551551
{
552+
int save_errno = errno;
553+
552554
CloseTransientFile(tmpfd);
555+
556+
/* if write didn't set errno, assume problem is no disk space */
557+
errno = save_errno ? save_errno : ENOSPC;
553558
ereport(PANIC,
554559
(errcode_for_file_access(),
555560
errmsg("could not write to file \"%s\": %m",
@@ -588,7 +593,12 @@ CheckPointReplicationOrigin(void)
588593
if ((write(tmpfd, &disk_state, sizeof(disk_state))) !=
589594
sizeof(disk_state))
590595
{
596+
int save_errno = errno;
597+
591598
CloseTransientFile(tmpfd);
599+
600+
/* if write didn't set errno, assume problem is no disk space */
601+
errno = save_errno ? save_errno : ENOSPC;
592602
ereport(PANIC,
593603
(errcode_for_file_access(),
594604
errmsg("could not write to file \"%s\": %m",
@@ -604,7 +614,12 @@ CheckPointReplicationOrigin(void)
604614
FIN_CRC32C(crc);
605615
if ((write(tmpfd, &crc, sizeof(crc))) != sizeof(crc))
606616
{
617+
int save_errno = errno;
618+
607619
CloseTransientFile(tmpfd);
620+
621+
/* if write didn't set errno, assume problem is no disk space */
622+
errno = save_errno ? save_errno : ENOSPC;
608623
ereport(PANIC,
609624
(errcode_for_file_access(),
610625
errmsg("could not write to file \"%s\": %m",

src/backend/replication/logical/reorderbuffer.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2352,7 +2352,9 @@ ReorderBufferSerializeChange(ReorderBuffer *rb, ReorderBufferTXN *txn,
23522352
int save_errno = errno;
23532353

23542354
CloseTransientFile(fd);
2355-
errno = save_errno;
2355+
2356+
/* if write didn't set errno, assume problem is no disk space */
2357+
errno = save_errno ? save_errno : ENOSPC;
23562358
ereport(ERROR,
23572359
(errcode_for_file_access(),
23582360
errmsg("could not write to data file for XID %u: %m",

src/backend/replication/logical/snapbuild.c

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1583,7 +1583,12 @@ SnapBuildSerialize(SnapBuild *builder, XLogRecPtr lsn)
15831583

15841584
if ((write(fd, ondisk, needed_length)) != needed_length)
15851585
{
1586+
int save_errno = errno;
1587+
15861588
CloseTransientFile(fd);
1589+
1590+
/* if write didn't set errno, assume problem is no disk space */
1591+
errno = save_errno ? save_errno : ENOSPC;
15871592
ereport(ERROR,
15881593
(errcode_for_file_access(),
15891594
errmsg("could not write to file \"%s\": %m", tmppath)));
@@ -1599,7 +1604,10 @@ SnapBuildSerialize(SnapBuild *builder, XLogRecPtr lsn)
15991604
*/
16001605
if (pg_fsync(fd) != 0)
16011606
{
1607+
int save_errno = errno;
1608+
16021609
CloseTransientFile(fd);
1610+
errno = save_errno;
16031611
ereport(ERROR,
16041612
(errcode_for_file_access(),
16051613
errmsg("could not fsync file \"%s\": %m", tmppath)));
@@ -1681,7 +1689,10 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
16811689
readBytes = read(fd, &ondisk, SnapBuildOnDiskConstantSize);
16821690
if (readBytes != SnapBuildOnDiskConstantSize)
16831691
{
1692+
int save_errno = errno;
1693+
16841694
CloseTransientFile(fd);
1695+
errno = save_errno;
16851696
ereport(ERROR,
16861697
(errcode_for_file_access(),
16871698
errmsg("could not read file \"%s\", read %d of %d: %m",
@@ -1707,7 +1718,10 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
17071718
readBytes = read(fd, &ondisk.builder, sizeof(SnapBuild));
17081719
if (readBytes != sizeof(SnapBuild))
17091720
{
1721+
int save_errno = errno;
1722+
17101723
CloseTransientFile(fd);
1724+
errno = save_errno;
17111725
ereport(ERROR,
17121726
(errcode_for_file_access(),
17131727
errmsg("could not read file \"%s\", read %d of %d: %m",
@@ -1722,7 +1736,10 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
17221736
readBytes = read(fd, ondisk.builder.was_running.was_xip, sz);
17231737
if (readBytes != sz)
17241738
{
1739+
int save_errno = errno;
1740+
17251741
CloseTransientFile(fd);
1742+
errno = save_errno;
17261743
ereport(ERROR,
17271744
(errcode_for_file_access(),
17281745
errmsg("could not read file \"%s\", read %d of %d: %m",
@@ -1736,7 +1753,10 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
17361753
readBytes = read(fd, ondisk.builder.committed.xip, sz);
17371754
if (readBytes != sz)
17381755
{
1756+
int save_errno = errno;
1757+
17391758
CloseTransientFile(fd);
1759+
errno = save_errno;
17401760
ereport(ERROR,
17411761
(errcode_for_file_access(),
17421762
errmsg("could not read file \"%s\", read %d of %d: %m",

src/backend/replication/slot.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1085,7 +1085,9 @@ SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel)
10851085
int save_errno = errno;
10861086

10871087
CloseTransientFile(fd);
1088-
errno = save_errno;
1088+
1089+
/* if write didn't set errno, assume problem is no disk space */
1090+
errno = save_errno ? save_errno : ENOSPC;
10891091
ereport(elevel,
10901092
(errcode_for_file_access(),
10911093
errmsg("could not write to file \"%s\": %m",
@@ -1184,7 +1186,10 @@ RestoreSlotFromDisk(const char *name)
11841186
*/
11851187
if (pg_fsync(fd) != 0)
11861188
{
1189+
int save_errno = errno;
1190+
11871191
CloseTransientFile(fd);
1192+
errno = save_errno;
11881193
ereport(PANIC,
11891194
(errcode_for_file_access(),
11901195
errmsg("could not fsync file \"%s\": %m",

src/bin/pg_basebackup/receivelog.c

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,9 @@ open_walfile(StreamCtl *stream, XLogRecPtr startpoint)
149149
{
150150
if (write(f, zerobuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
151151
{
152+
/* if write didn't set errno, assume problem is no disk space */
153+
if (errno == 0)
154+
errno = ENOSPC;
152155
fprintf(stderr,
153156
_("%s: could not pad transaction log file \"%s\": %s\n"),
154157
progname, fn, strerror(errno));
@@ -334,7 +337,9 @@ writeTimeLineHistoryFile(StreamCtl *stream, char *filename, char *content)
334337
*/
335338
close(fd);
336339
unlink(tmppath);
337-
errno = save_errno;
340+
341+
/* if write didn't set errno, assume problem is no disk space */
342+
errno = save_errno ? save_errno : ENOSPC;
338343

339344
fprintf(stderr, _("%s: could not write timeline history file \"%s\": %s\n"),
340345
progname, tmppath, strerror(errno));
@@ -343,7 +348,10 @@ writeTimeLineHistoryFile(StreamCtl *stream, char *filename, char *content)
343348

344349
if (fsync(fd) != 0)
345350
{
351+
int save_errno = errno;
352+
346353
close(fd);
354+
errno = save_errno;
347355
fprintf(stderr, _("%s: could not fsync file \"%s\": %s\n"),
348356
progname, tmppath, strerror(errno));
349357
return false;
@@ -1185,6 +1193,9 @@ ProcessXLogDataMsg(PGconn *conn, StreamCtl *stream, char *copybuf, int len,
11851193
copybuf + hdr_len + bytes_written,
11861194
bytes_to_write) != bytes_to_write)
11871195
{
1196+
/* if write didn't set errno, assume problem is no disk space */
1197+
if (errno == 0)
1198+
errno = ENOSPC;
11881199
fprintf(stderr,
11891200
_("%s: could not write %u bytes to WAL file \"%s\": %s\n"),
11901201
progname, bytes_to_write, current_walfile_name,

0 commit comments

Comments
 (0)