2
2
*
3
3
* catalog.c: backup catalog opration
4
4
*
5
- * Copyright (c) 2009-2011, NIPPON TELEGRAPH AND TELEPHONE CORPORATION
5
+ * Portions Copyright (c) 2009-2011, NIPPON TELEGRAPH AND TELEPHONE CORPORATION
6
+ * Portions Copyright (c) 2015-2017, Postgres Professional
6
7
*
7
8
*-------------------------------------------------------------------------
8
9
*/
12
13
#include <dirent.h>
13
14
#include <fcntl.h>
14
15
#include <libgen.h>
16
+ #include <signal.h>
15
17
#include <sys/file.h>
16
18
#include <sys/stat.h>
17
19
#include <sys/types.h>
@@ -25,42 +27,185 @@ static pgBackup *read_backup_from_file(const char *path);
25
27
26
28
#define BOOL_TO_STR (val ) ((val) ? "true" : "false")
27
29
28
- static int lock_fd = -1 ;
30
+ static bool exit_hook_registered = false;
31
+ static char lock_file [MAXPGPATH ];
32
+
33
+ static void
34
+ unlink_lock_atexit (void )
35
+ {
36
+ int res ;
37
+ res = unlink (lock_file );
38
+ if (res != 0 && res != ENOENT )
39
+ elog (WARNING , "%s: %s" , lock_file , strerror (errno ));
40
+ }
29
41
30
42
/*
31
- * Lock of the catalog with pg_probackup.conf file and return 0.
32
- * If the lock is held by another one, return 1 immediately.
43
+ * Create a lockfile.
33
44
*/
34
45
int
35
46
catalog_lock (bool check_catalog )
36
47
{
37
- int ret ;
38
- char id_path [MAXPGPATH ];
39
-
40
- join_path_components (id_path , backup_path , BACKUP_CATALOG_CONF_FILE );
41
- lock_fd = open (id_path , O_RDWR );
42
- if (lock_fd == -1 )
43
- elog (errno == ENOENT ? ERROR : ERROR ,
44
- "cannot open file \"%s\": %s" , id_path , strerror (errno ));
45
- #ifdef __IBMC__
46
- ret = lockf (lock_fd , LOCK_EX | LOCK_NB , 0 ); /* non-blocking */
48
+ int fd ;
49
+ char buffer [MAXPGPATH * 2 + 256 ];
50
+ int ntries ;
51
+ int len ;
52
+ int encoded_pid ;
53
+ pid_t my_pid ,
54
+ my_p_pid ;
55
+
56
+ join_path_components (lock_file , backup_path , BACKUP_CATALOG_PID );
57
+
58
+ /*
59
+ * If the PID in the lockfile is our own PID or our parent's or
60
+ * grandparent's PID, then the file must be stale (probably left over from
61
+ * a previous system boot cycle). We need to check this because of the
62
+ * likelihood that a reboot will assign exactly the same PID as we had in
63
+ * the previous reboot, or one that's only one or two counts larger and
64
+ * hence the lockfile's PID now refers to an ancestor shell process. We
65
+ * allow pg_ctl to pass down its parent shell PID (our grandparent PID)
66
+ * via the environment variable PG_GRANDPARENT_PID; this is so that
67
+ * launching the postmaster via pg_ctl can be just as reliable as
68
+ * launching it directly. There is no provision for detecting
69
+ * further-removed ancestor processes, but if the init script is written
70
+ * carefully then all but the immediate parent shell will be root-owned
71
+ * processes and so the kill test will fail with EPERM. Note that we
72
+ * cannot get a false negative this way, because an existing postmaster
73
+ * would surely never launch a competing postmaster or pg_ctl process
74
+ * directly.
75
+ */
76
+ my_pid = getpid ();
77
+ #ifndef WIN32
78
+ my_p_pid = getppid ();
47
79
#else
48
- ret = flock (lock_fd , LOCK_EX | LOCK_NB ); /* non-blocking */
80
+
81
+ /*
82
+ * Windows hasn't got getppid(), but doesn't need it since it's not using
83
+ * real kill() either...
84
+ */
85
+ my_p_pid = 0 ;
49
86
#endif
50
- if (ret == -1 )
87
+
88
+ /*
89
+ * We need a loop here because of race conditions. But don't loop forever
90
+ * (for example, a non-writable $backup_path directory might cause a failure
91
+ * that won't go away). 100 tries seems like plenty.
92
+ */
93
+ for (ntries = 0 ;; ntries ++ )
51
94
{
52
- if (errno == EWOULDBLOCK )
95
+ /*
96
+ * Try to create the lock file --- O_EXCL makes this atomic.
97
+ *
98
+ * Think not to make the file protection weaker than 0600. See
99
+ * comments below.
100
+ */
101
+ fd = open (lock_file , O_RDWR | O_CREAT | O_EXCL , 0600 );
102
+ if (fd >= 0 )
103
+ break ; /* Success; exit the retry loop */
104
+
105
+ /*
106
+ * Couldn't create the pid file. Probably it already exists.
107
+ */
108
+ if ((errno != EEXIST && errno != EACCES ) || ntries > 100 )
109
+ elog (ERROR , "could not create lock file \"%s\": %s" ,
110
+ lock_file , strerror (errno ));
111
+
112
+ /*
113
+ * Read the file to get the old owner's PID. Note race condition
114
+ * here: file might have been deleted since we tried to create it.
115
+ */
116
+ fd = open (lock_file , O_RDONLY , 0600 );
117
+ if (fd < 0 )
53
118
{
54
- close (lock_fd );
55
- return 1 ;
119
+ if (errno == ENOENT )
120
+ continue ; /* race condition; try again */
121
+ elog (ERROR , "could not open lock file \"%s\": %s" ,
122
+ lock_file , strerror (errno ));
56
123
}
57
- else
124
+ if ((len = read (fd , buffer , sizeof (buffer ) - 1 )) < 0 )
125
+ elog (ERROR , "could not read lock file \"%s\": %s" ,
126
+ lock_file , strerror (errno ));
127
+ close (fd );
128
+
129
+ if (len == 0 )
130
+ elog (ERROR , "lock file \"%s\" is empty" , lock_file );
131
+
132
+ buffer [len ] = '\0' ;
133
+ encoded_pid = atoi (buffer );
134
+
135
+ if (encoded_pid <= 0 )
136
+ elog (ERROR , "bogus data in lock file \"%s\": \"%s\"" ,
137
+ lock_file , buffer );
138
+
139
+ /*
140
+ * Check to see if the other process still exists
141
+ *
142
+ * Per discussion above, my_pid, my_p_pid can be
143
+ * ignored as false matches.
144
+ *
145
+ * Normally kill() will fail with ESRCH if the given PID doesn't
146
+ * exist.
147
+ */
148
+ if (encoded_pid != my_pid && encoded_pid != my_p_pid )
58
149
{
59
- int errno_tmp = errno ;
60
- close (lock_fd );
61
- elog (ERROR , "cannot lock file \"%s\": %s" , id_path ,
62
- strerror (errno_tmp ));
150
+ if (kill (encoded_pid , 0 ) == 0 ||
151
+ (errno != ESRCH && errno != EPERM ))
152
+ elog (ERROR , "lock file \"%s\" already exists" , lock_file );
63
153
}
154
+
155
+ /*
156
+ * Looks like nobody's home. Unlink the file and try again to create
157
+ * it. Need a loop because of possible race condition against other
158
+ * would-be creators.
159
+ */
160
+ if (unlink (lock_file ) < 0 )
161
+ elog (ERROR , "could not remove old lock file \"%s\": %s" ,
162
+ lock_file , strerror (errno ));
163
+ }
164
+
165
+ /*
166
+ * Successfully created the file, now fill it.
167
+ */
168
+ snprintf (buffer , sizeof (buffer ), "%d\n" , my_pid );
169
+
170
+ errno = 0 ;
171
+ if (write (fd , buffer , strlen (buffer )) != strlen (buffer ))
172
+ {
173
+ int save_errno = errno ;
174
+
175
+ close (fd );
176
+ unlink (lock_file );
177
+ /* if write didn't set errno, assume problem is no disk space */
178
+ errno = save_errno ? save_errno : ENOSPC ;
179
+ elog (ERROR , "could not write lock file \"%s\": %s" ,
180
+ lock_file , strerror (errno ));
181
+ }
182
+ if (fsync (fd ) != 0 )
183
+ {
184
+ int save_errno = errno ;
185
+
186
+ close (fd );
187
+ unlink (lock_file );
188
+ errno = save_errno ;
189
+ elog (ERROR , "could not write lock file \"%s\": %s" ,
190
+ lock_file , strerror (errno ));
191
+ }
192
+ if (close (fd ) != 0 )
193
+ {
194
+ int save_errno = errno ;
195
+
196
+ unlink (lock_file );
197
+ errno = save_errno ;
198
+ elog (ERROR , "could not write lock file \"%s\": %s" ,
199
+ lock_file , strerror (errno ));
200
+ }
201
+
202
+ /*
203
+ * Arrange to unlink the lock file(s) at proc_exit.
204
+ */
205
+ if (!exit_hook_registered )
206
+ {
207
+ atexit (unlink_lock_atexit );
208
+ exit_hook_registered = true;
64
209
}
65
210
66
211
if (check_catalog )
@@ -79,16 +224,6 @@ catalog_lock(bool check_catalog)
79
224
return 0 ;
80
225
}
81
226
82
- /*
83
- * Release catalog lock.
84
- */
85
- void
86
- catalog_unlock (void )
87
- {
88
- close (lock_fd );
89
- lock_fd = -1 ;
90
- }
91
-
92
227
/*
93
228
* Create a pgBackup which taken at timestamp.
94
229
* If no backup matches, return NULL.
0 commit comments