Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 6cb3372

Browse files
committed
Address set of issues with errno handling
System calls mixed up in error code paths are causing two issues which several code paths have not correctly handled: 1) For write() calls, sometimes the system may return less bytes than what has been written without errno being set. Some paths were careful enough to consider that case, and assumed that errno should be set to ENOSPC, other calls missed that. 2) errno generated by a system call is overwritten by other system calls which may succeed once an error code path is taken, causing what is reported to the user to be incorrect. This patch uses the brute-force approach of correcting all those code paths. Some refactoring could happen in the future, but this is let as future work, which is not targeted for back-branches anyway. Author: Michael Paquier Reviewed-by: Ashutosh Sharma Discussion: https://postgr.es/m/20180622061535.GD5215@paquier.xyz
1 parent 370e68a commit 6cb3372

File tree

10 files changed

+118
-6
lines changed

10 files changed

+118
-6
lines changed

src/backend/access/heap/rewriteheap.c

+5
Original file line numberDiff line numberDiff line change
@@ -1168,9 +1168,14 @@ heap_xlog_logical_rewrite(XLogReaderState *r)
11681168
/* write out tail end of mapping file (again) */
11691169
pgstat_report_wait_start(WAIT_EVENT_LOGICAL_REWRITE_MAPPING_WRITE);
11701170
if (write(fd, data, len) != len)
1171+
{
1172+
/* if write didn't set errno, assume problem is no disk space */
1173+
if (errno == 0)
1174+
errno = ENOSPC;
11711175
ereport(ERROR,
11721176
(errcode_for_file_access(),
11731177
errmsg("could not write to file \"%s\": %m", path)));
1178+
}
11741179
pgstat_report_wait_end();
11751180

11761181
/*

src/backend/access/transam/twophase.c

+23
Original file line numberDiff line numberDiff line change
@@ -1241,12 +1241,17 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
12411241
*/
12421242
if (fstat(fd, &stat))
12431243
{
1244+
int save_errno = errno;
1245+
12441246
CloseTransientFile(fd);
12451247
if (give_warnings)
1248+
{
1249+
errno = save_errno;
12461250
ereport(WARNING,
12471251
(errcode_for_file_access(),
12481252
errmsg("could not stat two-phase state file \"%s\": %m",
12491253
path)));
1254+
}
12501255
return NULL;
12511256
}
12521257

@@ -1274,13 +1279,18 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
12741279
pgstat_report_wait_start(WAIT_EVENT_TWOPHASE_FILE_READ);
12751280
if (read(fd, buf, stat.st_size) != stat.st_size)
12761281
{
1282+
int save_errno = errno;
1283+
12771284
pgstat_report_wait_end();
12781285
CloseTransientFile(fd);
12791286
if (give_warnings)
1287+
{
1288+
errno = save_errno;
12801289
ereport(WARNING,
12811290
(errcode_for_file_access(),
12821291
errmsg("could not read two-phase state file \"%s\": %m",
12831292
path)));
1293+
}
12841294
pfree(buf);
12851295
return NULL;
12861296
}
@@ -1663,16 +1673,26 @@ RecreateTwoPhaseFile(TransactionId xid, void *content, int len)
16631673
pgstat_report_wait_start(WAIT_EVENT_TWOPHASE_FILE_WRITE);
16641674
if (write(fd, content, len) != len)
16651675
{
1676+
int save_errno = errno;
1677+
16661678
pgstat_report_wait_end();
16671679
CloseTransientFile(fd);
1680+
1681+
/* if write didn't set errno, assume problem is no disk space */
1682+
errno = save_errno ? save_errno : ENOSPC;
16681683
ereport(ERROR,
16691684
(errcode_for_file_access(),
16701685
errmsg("could not write two-phase state file: %m")));
16711686
}
16721687
if (write(fd, &statefile_crc, sizeof(pg_crc32c)) != sizeof(pg_crc32c))
16731688
{
1689+
int save_errno = errno;
1690+
16741691
pgstat_report_wait_end();
16751692
CloseTransientFile(fd);
1693+
1694+
/* if write didn't set errno, assume problem is no disk space */
1695+
errno = save_errno ? save_errno : ENOSPC;
16761696
ereport(ERROR,
16771697
(errcode_for_file_access(),
16781698
errmsg("could not write two-phase state file: %m")));
@@ -1686,7 +1706,10 @@ RecreateTwoPhaseFile(TransactionId xid, void *content, int len)
16861706
pgstat_report_wait_start(WAIT_EVENT_TWOPHASE_FILE_SYNC);
16871707
if (pg_fsync(fd) != 0)
16881708
{
1709+
int save_errno = errno;
1710+
16891711
CloseTransientFile(fd);
1712+
errno = save_errno;
16901713
ereport(ERROR,
16911714
(errcode_for_file_access(),
16921715
errmsg("could not fsync two-phase state file: %m")));

src/backend/access/transam/xlog.c

+7
Original file line numberDiff line numberDiff line change
@@ -3268,7 +3268,10 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock)
32683268
pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_SYNC);
32693269
if (pg_fsync(fd) != 0)
32703270
{
3271+
int save_errno = errno;
3272+
32713273
close(fd);
3274+
errno = save_errno;
32723275
ereport(ERROR,
32733276
(errcode_for_file_access(),
32743277
errmsg("could not fsync file \"%s\": %m", tmppath)));
@@ -11675,8 +11678,10 @@ XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen,
1167511678
if (lseek(readFile, (off_t) readOff, SEEK_SET) < 0)
1167611679
{
1167711680
char fname[MAXFNAMELEN];
11681+
int save_errno = errno;
1167811682

1167911683
XLogFileName(fname, curFileTLI, readSegNo, wal_segment_size);
11684+
errno = save_errno;
1168011685
ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
1168111686
(errcode_for_file_access(),
1168211687
errmsg("could not seek in log segment %s to offset %u: %m",
@@ -11688,9 +11693,11 @@ XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen,
1168811693
if (read(readFile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
1168911694
{
1169011695
char fname[MAXFNAMELEN];
11696+
int save_errno = errno;
1169111697

1169211698
pgstat_report_wait_end();
1169311699
XLogFileName(fname, curFileTLI, readSegNo, wal_segment_size);
11700+
errno = save_errno;
1169411701
ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
1169511702
(errcode_for_file_access(),
1169611703
errmsg("could not read from log segment %s, offset %u: %m",

src/backend/access/transam/xlogutils.c

+4-2
Original file line numberDiff line numberDiff line change
@@ -718,9 +718,10 @@ XLogRead(char *buf, int segsize, TimeLineID tli, XLogRecPtr startptr,
718718
if (lseek(sendFile, (off_t) startoff, SEEK_SET) < 0)
719719
{
720720
char path[MAXPGPATH];
721+
int save_errno = errno;
721722

722723
XLogFilePath(path, tli, sendSegNo, segsize);
723-
724+
errno = save_errno;
724725
ereport(ERROR,
725726
(errcode_for_file_access(),
726727
errmsg("could not seek in log segment %s to offset %u: %m",
@@ -741,9 +742,10 @@ XLogRead(char *buf, int segsize, TimeLineID tli, XLogRecPtr startptr,
741742
if (readbytes <= 0)
742743
{
743744
char path[MAXPGPATH];
745+
int save_errno = errno;
744746

745747
XLogFilePath(path, tli, sendSegNo, segsize);
746-
748+
errno = save_errno;
747749
ereport(ERROR,
748750
(errcode_for_file_access(),
749751
errmsg("could not read from log segment %s, offset %u, length %lu: %m",

src/backend/replication/basebackup.c

+3
Original file line numberDiff line numberDiff line change
@@ -495,13 +495,16 @@ perform_base_backup(basebackup_options *opt)
495495
fp = AllocateFile(pathbuf, "rb");
496496
if (fp == NULL)
497497
{
498+
int save_errno = errno;
499+
498500
/*
499501
* Most likely reason for this is that the file was already
500502
* removed by a checkpoint, so check for that to get a better
501503
* error message.
502504
*/
503505
CheckXLogRemoved(segno, tli);
504506

507+
errno = save_errno;
505508
ereport(ERROR,
506509
(errcode_for_file_access(),
507510
errmsg("could not open file \"%s\": %m", pathbuf)));

src/backend/replication/logical/origin.c

+15
Original file line numberDiff line numberDiff line change
@@ -578,7 +578,12 @@ CheckPointReplicationOrigin(void)
578578
/* write magic */
579579
if ((write(tmpfd, &magic, sizeof(magic))) != sizeof(magic))
580580
{
581+
int save_errno = errno;
582+
581583
CloseTransientFile(tmpfd);
584+
585+
/* if write didn't set errno, assume problem is no disk space */
586+
errno = save_errno ? save_errno : ENOSPC;
582587
ereport(PANIC,
583588
(errcode_for_file_access(),
584589
errmsg("could not write to file \"%s\": %m",
@@ -617,7 +622,12 @@ CheckPointReplicationOrigin(void)
617622
if ((write(tmpfd, &disk_state, sizeof(disk_state))) !=
618623
sizeof(disk_state))
619624
{
625+
int save_errno = errno;
626+
620627
CloseTransientFile(tmpfd);
628+
629+
/* if write didn't set errno, assume problem is no disk space */
630+
errno = save_errno ? save_errno : ENOSPC;
621631
ereport(PANIC,
622632
(errcode_for_file_access(),
623633
errmsg("could not write to file \"%s\": %m",
@@ -633,7 +643,12 @@ CheckPointReplicationOrigin(void)
633643
FIN_CRC32C(crc);
634644
if ((write(tmpfd, &crc, sizeof(crc))) != sizeof(crc))
635645
{
646+
int save_errno = errno;
647+
636648
CloseTransientFile(tmpfd);
649+
650+
/* if write didn't set errno, assume problem is no disk space */
651+
errno = save_errno ? save_errno : ENOSPC;
637652
ereport(PANIC,
638653
(errcode_for_file_access(),
639654
errmsg("could not write to file \"%s\": %m",

src/backend/replication/logical/reorderbuffer.c

+3-1
Original file line numberDiff line numberDiff line change
@@ -2304,7 +2304,9 @@ ReorderBufferSerializeChange(ReorderBuffer *rb, ReorderBufferTXN *txn,
23042304
int save_errno = errno;
23052305

23062306
CloseTransientFile(fd);
2307-
errno = save_errno;
2307+
2308+
/* if write didn't set errno, assume problem is no disk space */
2309+
errno = save_errno ? save_errno : ENOSPC;
23082310
ereport(ERROR,
23092311
(errcode_for_file_access(),
23102312
errmsg("could not write to data file for XID %u: %m",

src/backend/replication/logical/snapbuild.c

+20
Original file line numberDiff line numberDiff line change
@@ -1605,7 +1605,12 @@ SnapBuildSerialize(SnapBuild *builder, XLogRecPtr lsn)
16051605
pgstat_report_wait_start(WAIT_EVENT_SNAPBUILD_WRITE);
16061606
if ((write(fd, ondisk, needed_length)) != needed_length)
16071607
{
1608+
int save_errno = errno;
1609+
16081610
CloseTransientFile(fd);
1611+
1612+
/* if write didn't set errno, assume problem is no disk space */
1613+
errno = save_errno ? save_errno : ENOSPC;
16091614
ereport(ERROR,
16101615
(errcode_for_file_access(),
16111616
errmsg("could not write to file \"%s\": %m", tmppath)));
@@ -1623,7 +1628,10 @@ SnapBuildSerialize(SnapBuild *builder, XLogRecPtr lsn)
16231628
pgstat_report_wait_start(WAIT_EVENT_SNAPBUILD_SYNC);
16241629
if (pg_fsync(fd) != 0)
16251630
{
1631+
int save_errno = errno;
1632+
16261633
CloseTransientFile(fd);
1634+
errno = save_errno;
16271635
ereport(ERROR,
16281636
(errcode_for_file_access(),
16291637
errmsg("could not fsync file \"%s\": %m", tmppath)));
@@ -1708,7 +1716,10 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
17081716
pgstat_report_wait_end();
17091717
if (readBytes != SnapBuildOnDiskConstantSize)
17101718
{
1719+
int save_errno = errno;
1720+
17111721
CloseTransientFile(fd);
1722+
errno = save_errno;
17121723
ereport(ERROR,
17131724
(errcode_for_file_access(),
17141725
errmsg("could not read file \"%s\", read %d of %d: %m",
@@ -1736,7 +1747,10 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
17361747
pgstat_report_wait_end();
17371748
if (readBytes != sizeof(SnapBuild))
17381749
{
1750+
int save_errno = errno;
1751+
17391752
CloseTransientFile(fd);
1753+
errno = save_errno;
17401754
ereport(ERROR,
17411755
(errcode_for_file_access(),
17421756
errmsg("could not read file \"%s\", read %d of %d: %m",
@@ -1753,7 +1767,10 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
17531767
pgstat_report_wait_end();
17541768
if (readBytes != sz)
17551769
{
1770+
int save_errno = errno;
1771+
17561772
CloseTransientFile(fd);
1773+
errno = save_errno;
17571774
ereport(ERROR,
17581775
(errcode_for_file_access(),
17591776
errmsg("could not read file \"%s\", read %d of %d: %m",
@@ -1769,7 +1786,10 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
17691786
pgstat_report_wait_end();
17701787
if (readBytes != sz)
17711788
{
1789+
int save_errno = errno;
1790+
17721791
CloseTransientFile(fd);
1792+
errno = save_errno;
17731793
ereport(ERROR,
17741794
(errcode_for_file_access(),
17751795
errmsg("could not read file \"%s\", read %d of %d: %m",

src/backend/replication/slot.c

+6-1
Original file line numberDiff line numberDiff line change
@@ -1274,7 +1274,9 @@ SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel)
12741274

12751275
pgstat_report_wait_end();
12761276
CloseTransientFile(fd);
1277-
errno = save_errno;
1277+
1278+
/* if write didn't set errno, assume problem is no disk space */
1279+
errno = save_errno ? save_errno : ENOSPC;
12781280
ereport(elevel,
12791281
(errcode_for_file_access(),
12801282
errmsg("could not write to file \"%s\": %m",
@@ -1378,7 +1380,10 @@ RestoreSlotFromDisk(const char *name)
13781380
pgstat_report_wait_start(WAIT_EVENT_REPLICATION_SLOT_RESTORE_SYNC);
13791381
if (pg_fsync(fd) != 0)
13801382
{
1383+
int save_errno = errno;
1384+
13811385
CloseTransientFile(fd);
1386+
errno = save_errno;
13821387
ereport(PANIC,
13831388
(errcode_for_file_access(),
13841389
errmsg("could not fsync file \"%s\": %m",

src/bin/pg_basebackup/walmethods.c

+32-2
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,11 @@ dir_open_for_write(const char *pathname, const char *temp_suffix, size_t pad_to_
128128

129129
pg_free(zerobuf);
130130
close(fd);
131-
errno = save_errno;
131+
132+
/*
133+
* If write didn't set errno, assume problem is no disk space.
134+
*/
135+
errno = save_errno ? save_errno : ENOSPC;
132136
return NULL;
133137
}
134138
}
@@ -442,7 +446,14 @@ tar_write_compressed_data(void *buf, size_t count, bool flush)
442446
size_t len = ZLIB_OUT_SIZE - tar_data->zp->avail_out;
443447

444448
if (write(tar_data->fd, tar_data->zlibOut, len) != len)
449+
{
450+
/*
451+
* If write didn't set errno, assume problem is no disk space.
452+
*/
453+
if (errno == 0)
454+
errno = ENOSPC;
445455
return false;
456+
}
446457

447458
tar_data->zp->next_out = tar_data->zlibOut;
448459
tar_data->zp->avail_out = ZLIB_OUT_SIZE;
@@ -623,7 +634,8 @@ tar_open_for_write(const char *pathname, const char *temp_suffix, size_t pad_to_
623634
save_errno = errno;
624635
pg_free(tar_data->currentfile);
625636
tar_data->currentfile = NULL;
626-
errno = save_errno;
637+
/* if write didn't set errno, assume problem is no disk space */
638+
errno = save_errno ? save_errno : ENOSPC;
627639
return NULL;
628640
}
629641
}
@@ -818,7 +830,12 @@ tar_close(Walfile f, WalCloseMethod method)
818830
if (!tar_data->compression)
819831
{
820832
if (write(tar_data->fd, tf->header, 512) != 512)
833+
{
834+
/* if write didn't set errno, assume problem is no disk space */
835+
if (errno == 0)
836+
errno = ENOSPC;
821837
return -1;
838+
}
822839
}
823840
#ifdef HAVE_LIBZ
824841
else
@@ -884,7 +901,12 @@ tar_finish(void)
884901
if (!tar_data->compression)
885902
{
886903
if (write(tar_data->fd, zerobuf, sizeof(zerobuf)) != sizeof(zerobuf))
904+
{
905+
/* if write didn't set errno, assume problem is no disk space */
906+
if (errno == 0)
907+
errno = ENOSPC;
887908
return false;
909+
}
888910
}
889911
#ifdef HAVE_LIBZ
890912
else
@@ -911,7 +933,15 @@ tar_finish(void)
911933
size_t len = ZLIB_OUT_SIZE - tar_data->zp->avail_out;
912934

913935
if (write(tar_data->fd, tar_data->zlibOut, len) != len)
936+
{
937+
/*
938+
* If write didn't set errno, assume problem is no disk
939+
* space.
940+
*/
941+
if (errno == 0)
942+
errno = ENOSPC;
914943
return false;
944+
}
915945
}
916946
if (r == Z_STREAM_END)
917947
break;

0 commit comments

Comments
 (0)