Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 54e72b6

Browse files
committed
Refactor rmtree() to use get_dirent_type().
Switch to get_dirent_type() instead of lstat() while traversing a directory tree, to see if that fixes the intermittent ENOTEMPTY failures seen in recent pg_upgrade tests, on Windows CI. While refactoring, also use AllocateDir() instead of opendir() in the backend, which knows how to handle descriptor pressure. Our CI system currently uses Windows Server 2019, a version known not to have POSIX unlink semantics enabled by default yet, unlike typical Windows 10 and 11 systems. That might explain why we see this flapping on CI but (apparently) not in the build farm, though the frequency is quite low. The theory is that some directory entry must be in state STATUS_DELETE_PENDING, which lstat() would report as ENOENT, though unfortunately we don't know exactly why yet. With this change, rmtree() will not skip them, and try to unlink (again). Our unlink() wrapper should either wait a short time for them to go away when some other process closes the handle, or log a message to tell us the path of the problem file if not, so we can dig further. Discussion: https://postgr.es/m/20220919213217.ptqfdlcc5idk5xup%40awork3.anarazel.de
1 parent 3bef56e commit 54e72b6

File tree

1 file changed

+64
-56
lines changed

1 file changed

+64
-56
lines changed

src/common/rmtree.c

Lines changed: 64 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,21 @@
2020
#include <unistd.h>
2121
#include <sys/stat.h>
2222

23+
#include "common/file_utils.h"
24+
2325
#ifndef FRONTEND
26+
#include "storage/fd.h"
2427
#define pg_log_warning(...) elog(WARNING, __VA_ARGS__)
28+
#define LOG_LEVEL WARNING
29+
#define OPENDIR(x) AllocateDir(x)
30+
#define CLOSEDIR(x) FreeDir(x)
2531
#else
2632
#include "common/logging.h"
33+
#define LOG_LEVEL PG_LOG_WARNING
34+
#define OPENDIR(x) opendir(x)
35+
#define CLOSEDIR(x) closedir(x)
2736
#endif
2837

29-
3038
/*
3139
* rmtree
3240
*
@@ -41,82 +49,82 @@
4149
bool
4250
rmtree(const char *path, bool rmtopdir)
4351
{
44-
bool result = true;
4552
char pathbuf[MAXPGPATH];
46-
char **filenames;
47-
char **filename;
48-
struct stat statbuf;
49-
50-
/*
51-
* we copy all the names out of the directory before we start modifying
52-
* it.
53-
*/
54-
filenames = pgfnames(path);
53+
DIR *dir;
54+
struct dirent *de;
55+
bool result = true;
56+
size_t dirnames_size = 0;
57+
size_t dirnames_capacity = 8;
58+
char **dirnames = palloc(sizeof(char *) * dirnames_capacity);
5559

56-
if (filenames == NULL)
60+
dir = OPENDIR(path);
61+
if (dir == NULL)
62+
{
63+
pg_log_warning("could not open directory \"%s\": %m", path);
5764
return false;
65+
}
5866

59-
/* now we have the names we can start removing things */
60-
for (filename = filenames; *filename; filename++)
67+
while (errno = 0, (de = readdir(dir)))
6168
{
62-
snprintf(pathbuf, MAXPGPATH, "%s/%s", path, *filename);
63-
64-
/*
65-
* It's ok if the file is not there anymore; we were just about to
66-
* delete it anyway.
67-
*
68-
* This is not an academic possibility. One scenario where this
69-
* happens is when bgwriter has a pending unlink request for a file in
70-
* a database that's being dropped. In dropdb(), we call
71-
* ForgetDatabaseSyncRequests() to flush out any such pending unlink
72-
* requests, but because that's asynchronous, it's not guaranteed that
73-
* the bgwriter receives the message in time.
74-
*/
75-
if (lstat(pathbuf, &statbuf) != 0)
76-
{
77-
if (errno != ENOENT)
78-
{
79-
pg_log_warning("could not stat file or directory \"%s\": %m",
80-
pathbuf);
81-
result = false;
82-
}
69+
if (strcmp(de->d_name, ".") == 0 ||
70+
strcmp(de->d_name, "..") == 0)
8371
continue;
84-
}
85-
86-
if (S_ISDIR(statbuf.st_mode))
87-
{
88-
/* call ourselves recursively for a directory */
89-
if (!rmtree(pathbuf, true))
90-
{
91-
/* we already reported the error */
92-
result = false;
93-
}
94-
}
95-
else
72+
snprintf(pathbuf, sizeof(pathbuf), "%s/%s", path, de->d_name);
73+
switch (get_dirent_type(pathbuf, de, false, LOG_LEVEL))
9674
{
97-
if (unlink(pathbuf) != 0)
98-
{
99-
if (errno != ENOENT)
75+
case PGFILETYPE_ERROR:
76+
/* already logged, press on */
77+
break;
78+
case PGFILETYPE_DIR:
79+
80+
/*
81+
* Defer recursion until after we've closed this directory, to
82+
* avoid using more than one file descriptor at a time.
83+
*/
84+
if (dirnames_size == dirnames_capacity)
85+
{
86+
dirnames = repalloc(dirnames,
87+
sizeof(char *) * dirnames_capacity * 2);
88+
dirnames_capacity *= 2;
89+
}
90+
dirnames[dirnames_size++] = pstrdup(pathbuf);
91+
break;
92+
default:
93+
if (unlink(pathbuf) != 0 && errno != ENOENT)
10094
{
101-
pg_log_warning("could not remove file or directory \"%s\": %m",
102-
pathbuf);
95+
pg_log_warning("could not unlink file \"%s\": %m", pathbuf);
10396
result = false;
10497
}
105-
}
98+
break;
10699
}
107100
}
108101

102+
if (errno != 0)
103+
{
104+
pg_log_warning("could not read directory \"%s\": %m", path);
105+
result = false;
106+
}
107+
108+
CLOSEDIR(dir);
109+
110+
/* Now recurse into the subdirectories we found. */
111+
for (size_t i = 0; i < dirnames_size; ++i)
112+
{
113+
if (!rmtree(dirnames[i], true))
114+
result = false;
115+
pfree(dirnames[i]);
116+
}
117+
109118
if (rmtopdir)
110119
{
111120
if (rmdir(path) != 0)
112121
{
113-
pg_log_warning("could not remove file or directory \"%s\": %m",
114-
path);
122+
pg_log_warning("could not remove directory \"%s\": %m", path);
115123
result = false;
116124
}
117125
}
118126

119-
pgfnames_cleanup(filenames);
127+
pfree(dirnames);
120128

121129
return result;
122130
}

0 commit comments

Comments
 (0)