Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 610747d

Browse files
committed
Cope with EINVAL and EIDRM shmat() failures in PGSharedMemoryAttach.
There's a very old race condition in our code to see whether a pre-existing shared memory segment is still in use by a conflicting postmaster: it's possible for the other postmaster to remove the segment in between our shmctl() and shmat() calls. It's a narrow window, and there's no risk unless both postmasters are using the same port number, but that's possible during parallelized "make check" tests. (Note that while the TAP tests take some pains to choose a randomized port number, pg_regress doesn't.) If it does happen, we treated that as an unexpected case and errored out. To fix, allow EINVAL to be treated as segment-not-present, and the same for EIDRM on Linux. AFAICS, the considerations here are basically identical to the checks for acceptable shmctl() failures, so I documented and coded it that way. While at it, adjust PGSharedMemoryAttach's API to remove its undocumented dependency on UsedShmemSegAddr in favor of passing the attach address explicitly. This makes it easier to be sure we're using a null shmaddr when probing for segment conflicts (thus avoiding questions about what EINVAL means). I don't think there was a bug there, but it required fragile assumptions about the state of UsedShmemSegAddr during PGSharedMemoryIsInUse. Commit c098509 may have made this failure more probable by applying the conflicting-segment tests more often. Hence, back-patch to all supported branches, as that was. Discussion: https://postgr.es/m/22224.1557340366@sss.pgh.pa.us
1 parent c65bcfe commit 610747d

File tree

1 file changed

+47
-23
lines changed

1 file changed

+47
-23
lines changed

src/backend/port/sysv_shmem.c

+47-23
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ static void *InternalIpcMemoryCreate(IpcMemoryKey memKey, Size size);
103103
static void IpcMemoryDetach(int status, Datum shmaddr);
104104
static void IpcMemoryDelete(int status, Datum shmId);
105105
static IpcMemoryState PGSharedMemoryAttach(IpcMemoryId shmId,
106+
void *attachAt,
106107
PGShmemHeader **addr);
107108

108109

@@ -310,7 +311,7 @@ PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2)
310311
PGShmemHeader *memAddress;
311312
IpcMemoryState state;
312313

313-
state = PGSharedMemoryAttach((IpcMemoryId) id2, &memAddress);
314+
state = PGSharedMemoryAttach((IpcMemoryId) id2, NULL, &memAddress);
314315
if (memAddress && shmdt(memAddress) < 0)
315316
elog(LOG, "shmdt(%p) failed: %m", memAddress);
316317
switch (state)
@@ -326,9 +327,17 @@ PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2)
326327
return true;
327328
}
328329

329-
/* See comment at IpcMemoryState. */
330+
/*
331+
* Test for a segment with id shmId; see comment at IpcMemoryState.
332+
*
333+
* If the segment exists, we'll attempt to attach to it, using attachAt
334+
* if that's not NULL (but it's best to pass NULL if possible).
335+
*
336+
* *addr is set to the segment memory address if we attached to it, else NULL.
337+
*/
330338
static IpcMemoryState
331339
PGSharedMemoryAttach(IpcMemoryId shmId,
340+
void *attachAt,
332341
PGShmemHeader **addr)
333342
{
334343
struct shmid_ds shmStat;
@@ -338,8 +347,7 @@ PGSharedMemoryAttach(IpcMemoryId shmId,
338347
*addr = NULL;
339348

340349
/*
341-
* We detect whether a shared memory segment is in use by seeing whether
342-
* it (a) exists and (b) has any processes attached to it.
350+
* First, try to stat the shm segment ID, to see if it exists at all.
343351
*/
344352
if (shmctl(shmId, IPC_STAT, &shmStat) < 0)
345353
{
@@ -372,34 +380,48 @@ PGSharedMemoryAttach(IpcMemoryId shmId,
372380
#endif
373381

374382
/*
375-
* Otherwise, we had better assume that the segment is in use. The
376-
* only likely case is EIDRM, which implies that the segment has been
377-
* IPC_RMID'd but there are still processes attached to it.
383+
* Otherwise, we had better assume that the segment is in use. The
384+
* only likely case is (non-Linux, assumed spec-compliant) EIDRM,
385+
* which implies that the segment has been IPC_RMID'd but there are
386+
* still processes attached to it.
378387
*/
379388
return SHMSTATE_ANALYSIS_FAILURE;
380389
}
381390

382391
/*
383392
* Try to attach to the segment and see if it matches our data directory.
384393
* This avoids shmid-conflict problems on machines that are running
385-
* several postmasters under the same userid.
394+
* several postmasters under the same userid and port number. (That would
395+
* not ordinarily happen in production, but it can happen during parallel
396+
* testing. Since our test setups don't open any TCP ports on Unix, such
397+
* cases don't conflict otherwise.)
386398
*/
387399
if (stat(DataDir, &statbuf) < 0)
388400
return SHMSTATE_ANALYSIS_FAILURE; /* can't stat; be conservative */
389401

390-
/*
391-
* Attachment fails if we have no write permission. Since that will never
392-
* happen with Postgres IPCProtection, such a failure shows the segment is
393-
* not a Postgres segment. If attachment fails for some other reason, be
394-
* conservative.
395-
*/
396-
hdr = (PGShmemHeader *) shmat(shmId, UsedShmemSegAddr, PG_SHMAT_FLAGS);
402+
hdr = (PGShmemHeader *) shmat(shmId, attachAt, PG_SHMAT_FLAGS);
397403
if (hdr == (PGShmemHeader *) -1)
398404
{
405+
/*
406+
* Attachment failed. The cases we're interested in are the same as
407+
* for the shmctl() call above. In particular, note that the owning
408+
* postmaster could have terminated and removed the segment between
409+
* shmctl() and shmat().
410+
*
411+
* If attachAt isn't NULL, it's possible that EINVAL reflects a
412+
* problem with that address not a vanished segment, so it's best to
413+
* pass NULL when probing for conflicting segments.
414+
*/
415+
if (errno == EINVAL)
416+
return SHMSTATE_ENOENT; /* segment disappeared */
399417
if (errno == EACCES)
400-
return SHMSTATE_FOREIGN;
401-
else
402-
return SHMSTATE_ANALYSIS_FAILURE;
418+
return SHMSTATE_FOREIGN; /* must be non-Postgres */
419+
#ifdef HAVE_LINUX_EIDRM_BUG
420+
if (errno == EIDRM)
421+
return SHMSTATE_ENOENT; /* segment disappeared */
422+
#endif
423+
/* Otherwise, be conservative. */
424+
return SHMSTATE_ANALYSIS_FAILURE;
403425
}
404426
*addr = hdr;
405427

@@ -414,6 +436,11 @@ PGSharedMemoryAttach(IpcMemoryId shmId,
414436
return SHMSTATE_FOREIGN;
415437
}
416438

439+
/*
440+
* It does match our data directory, so now test whether any processes are
441+
* still attached to it. (We are, now, but the shm_nattch result is from
442+
* before we attached to it.)
443+
*/
417444
return shmStat.shm_nattch == 0 ? SHMSTATE_UNATTACHED : SHMSTATE_ATTACHED;
418445
}
419446

@@ -629,9 +656,6 @@ PGSharedMemoryCreate(Size size, int port,
629656
else
630657
sysvsize = size;
631658

632-
/* Make sure PGSharedMemoryAttach doesn't fail without need */
633-
UsedShmemSegAddr = NULL;
634-
635659
/*
636660
* Loop till we find a free IPC key. Trust CreateDataDirLockFile() to
637661
* ensure no more than one postmaster per data directory can enter this
@@ -665,7 +689,7 @@ PGSharedMemoryCreate(Size size, int port,
665689
state = SHMSTATE_FOREIGN;
666690
}
667691
else
668-
state = PGSharedMemoryAttach(shmid, &oldhdr);
692+
state = PGSharedMemoryAttach(shmid, NULL, &oldhdr);
669693

670694
switch (state)
671695
{
@@ -791,7 +815,7 @@ PGSharedMemoryReAttach(void)
791815
if (shmid < 0)
792816
state = SHMSTATE_FOREIGN;
793817
else
794-
state = PGSharedMemoryAttach(shmid, &hdr);
818+
state = PGSharedMemoryAttach(shmid, UsedShmemSegAddr, &hdr);
795819
if (state != SHMSTATE_ATTACHED)
796820
elog(FATAL, "could not reattach to shared memory (key=%d, addr=%p): %m",
797821
(int) UsedShmemSegID, UsedShmemSegAddr);

0 commit comments

Comments
 (0)