Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 617dc6d

Browse files
committed
Avoid "could not reattach" by providing space for concurrent allocation.
We've long had reports of intermittent "could not reattach to shared memory" errors on Windows. Buildfarm member dory fails that way when PGSharedMemoryReAttach() execution overlaps with creation of a thread for the process's "default thread pool". Fix that by providing a second region to receive asynchronous allocations that would otherwise intrude into UsedShmemSegAddr. In pgwin32_ReserveSharedMemoryRegion(), stop trying to free reservations landing at incorrect addresses; the caller's next step has been to terminate the affected process. Back-patch to 9.4 (all supported versions). Reviewed by Tom Lane. He also did much of the prerequisite research; see commit bcbf234. Discussion: https://postgr.es/m/20190402135442.GA1173872@rfd.leadboat.com
1 parent 6421011 commit 617dc6d

File tree

3 files changed

+86
-14
lines changed

3 files changed

+86
-14
lines changed

src/backend/port/win32_shmem.c

Lines changed: 78 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,28 @@
1717
#include "storage/ipc.h"
1818
#include "storage/pg_shmem.h"
1919

20+
/*
21+
* Early in a process's life, Windows asynchronously creates threads for the
22+
* process's "default thread pool"
23+
* (https://docs.microsoft.com/en-us/windows/desktop/ProcThread/thread-pools).
24+
* Occasionally, thread creation allocates a stack after
25+
* PGSharedMemoryReAttach() has released UsedShmemSegAddr and before it has
26+
* mapped shared memory at UsedShmemSegAddr. This would cause mapping to fail
27+
* if the allocator preferred the just-released region for allocating the new
28+
* thread stack. We observed such failures in some Windows Server 2016
29+
* configurations. To give the system another region to prefer, reserve and
30+
* release an additional, protective region immediately before reserving or
31+
* releasing shared memory. The idea is that, if the allocator handed out
32+
* REGION1 pages before REGION2 pages at one occasion, it will do so whenever
33+
* both regions are free. Windows Server 2016 exhibits that behavior, and a
34+
* system behaving differently would have less need to protect
35+
* UsedShmemSegAddr. The protective region must be at least large enough for
36+
* one thread stack. However, ten times as much is less than 2% of the 32-bit
37+
* address space and is negligible relative to the 64-bit address space.
38+
*/
39+
#define PROTECTIVE_REGION_SIZE (10 * WIN32_STACK_RLIMIT)
40+
void *ShmemProtectiveRegion = NULL;
41+
2042
HANDLE UsedShmemSegID = INVALID_HANDLE_VALUE;
2143
void *UsedShmemSegAddr = NULL;
2244
static Size UsedShmemSegSize = 0;
@@ -192,6 +214,12 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port,
192214
Size orig_size = size;
193215
DWORD flProtect = PAGE_READWRITE;
194216

217+
ShmemProtectiveRegion = VirtualAlloc(NULL, PROTECTIVE_REGION_SIZE,
218+
MEM_RESERVE, PAGE_NOACCESS);
219+
if (ShmemProtectiveRegion == NULL)
220+
elog(FATAL, "could not reserve memory region: error code %lu",
221+
GetLastError());
222+
195223
/* Room for a header? */
196224
Assert(size > MAXALIGN(sizeof(PGShmemHeader)));
197225

@@ -370,22 +398,26 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port,
370398
* an already existing shared memory segment, using the handle inherited from
371399
* the postmaster.
372400
*
373-
* UsedShmemSegID and UsedShmemSegAddr are implicit parameters to this
374-
* routine. The caller must have already restored them to the postmaster's
375-
* values.
401+
* ShmemProtectiveRegion, UsedShmemSegID and UsedShmemSegAddr are implicit
402+
* parameters to this routine. The caller must have already restored them to
403+
* the postmaster's values.
376404
*/
377405
void
378406
PGSharedMemoryReAttach(void)
379407
{
380408
PGShmemHeader *hdr;
381409
void *origUsedShmemSegAddr = UsedShmemSegAddr;
382410

411+
Assert(ShmemProtectiveRegion != NULL);
383412
Assert(UsedShmemSegAddr != NULL);
384413
Assert(IsUnderPostmaster);
385414

386415
/*
387-
* Release memory region reservation that was made by the postmaster
416+
* Release memory region reservations made by the postmaster
388417
*/
418+
if (VirtualFree(ShmemProtectiveRegion, 0, MEM_RELEASE) == 0)
419+
elog(FATAL, "failed to release reserved memory region (addr=%p): error code %lu",
420+
ShmemProtectiveRegion, GetLastError());
389421
if (VirtualFree(UsedShmemSegAddr, 0, MEM_RELEASE) == 0)
390422
elog(FATAL, "failed to release reserved memory region (addr=%p): error code %lu",
391423
UsedShmemSegAddr, GetLastError());
@@ -414,13 +446,14 @@ PGSharedMemoryReAttach(void)
414446
* The child process startup logic might or might not call PGSharedMemoryDetach
415447
* after this; make sure that it will be a no-op if called.
416448
*
417-
* UsedShmemSegID and UsedShmemSegAddr are implicit parameters to this
418-
* routine. The caller must have already restored them to the postmaster's
419-
* values.
449+
* ShmemProtectiveRegion, UsedShmemSegID and UsedShmemSegAddr are implicit
450+
* parameters to this routine. The caller must have already restored them to
451+
* the postmaster's values.
420452
*/
421453
void
422454
PGSharedMemoryNoReAttach(void)
423455
{
456+
Assert(ShmemProtectiveRegion != NULL);
424457
Assert(UsedShmemSegAddr != NULL);
425458
Assert(IsUnderPostmaster);
426459

@@ -447,12 +480,25 @@ PGSharedMemoryNoReAttach(void)
447480
* Rather, this is for subprocesses that have inherited an attachment and want
448481
* to get rid of it.
449482
*
450-
* UsedShmemSegID and UsedShmemSegAddr are implicit parameters to this
451-
* routine.
483+
* ShmemProtectiveRegion, UsedShmemSegID and UsedShmemSegAddr are implicit
484+
* parameters to this routine.
452485
*/
453486
void
454487
PGSharedMemoryDetach(void)
455488
{
489+
/*
490+
* Releasing the protective region liberates an unimportant quantity of
491+
* address space, but be tidy.
492+
*/
493+
if (ShmemProtectiveRegion != NULL)
494+
{
495+
if (VirtualFree(ShmemProtectiveRegion, 0, MEM_RELEASE) == 0)
496+
elog(LOG, "failed to release reserved memory region (addr=%p): error code %lu",
497+
ShmemProtectiveRegion, GetLastError());
498+
499+
ShmemProtectiveRegion = NULL;
500+
}
501+
456502
/* Unmap the view, if it's mapped */
457503
if (UsedShmemSegAddr != NULL)
458504
{
@@ -510,29 +556,47 @@ pgwin32_ReserveSharedMemoryRegion(HANDLE hChild)
510556
{
511557
void *address;
512558

559+
Assert(ShmemProtectiveRegion != NULL);
513560
Assert(UsedShmemSegAddr != NULL);
514561
Assert(UsedShmemSegSize != 0);
515562

516-
address = VirtualAllocEx(hChild, UsedShmemSegAddr, UsedShmemSegSize,
517-
MEM_RESERVE, PAGE_READWRITE);
563+
/* ShmemProtectiveRegion */
564+
address = VirtualAllocEx(hChild, ShmemProtectiveRegion,
565+
PROTECTIVE_REGION_SIZE,
566+
MEM_RESERVE, PAGE_NOACCESS);
518567
if (address == NULL)
519568
{
520569
/* Don't use FATAL since we're running in the postmaster */
521570
elog(LOG, "could not reserve shared memory region (addr=%p) for child %p: error code %lu",
522-
UsedShmemSegAddr, hChild, GetLastError());
571+
ShmemProtectiveRegion, hChild, GetLastError());
523572
return false;
524573
}
525-
if (address != UsedShmemSegAddr)
574+
if (address != ShmemProtectiveRegion)
526575
{
527576
/*
528577
* Should never happen - in theory if allocation granularity causes
529578
* strange effects it could, so check just in case.
530579
*
531580
* Don't use FATAL since we're running in the postmaster.
532581
*/
582+
elog(LOG, "reserved shared memory region got incorrect address %p, expected %p",
583+
address, ShmemProtectiveRegion);
584+
return false;
585+
}
586+
587+
/* UsedShmemSegAddr */
588+
address = VirtualAllocEx(hChild, UsedShmemSegAddr, UsedShmemSegSize,
589+
MEM_RESERVE, PAGE_READWRITE);
590+
if (address == NULL)
591+
{
592+
elog(LOG, "could not reserve shared memory region (addr=%p) for child %p: error code %lu",
593+
UsedShmemSegAddr, hChild, GetLastError());
594+
return false;
595+
}
596+
if (address != UsedShmemSegAddr)
597+
{
533598
elog(LOG, "reserved shared memory region got incorrect address %p, expected %p",
534599
address, UsedShmemSegAddr);
535-
VirtualFreeEx(hChild, address, 0, MEM_RELEASE);
536600
return false;
537601
}
538602

src/backend/postmaster/postmaster.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -484,6 +484,7 @@ typedef struct
484484
#ifndef WIN32
485485
unsigned long UsedShmemSegID;
486486
#else
487+
void *ShmemProtectiveRegion;
487488
HANDLE UsedShmemSegID;
488489
#endif
489490
void *UsedShmemSegAddr;
@@ -6041,6 +6042,9 @@ save_backend_variables(BackendParameters *param, Port *port,
60416042
param->MyCancelKey = MyCancelKey;
60426043
param->MyPMChildSlot = MyPMChildSlot;
60436044

6045+
#ifdef WIN32
6046+
param->ShmemProtectiveRegion = ShmemProtectiveRegion;
6047+
#endif
60446048
param->UsedShmemSegID = UsedShmemSegID;
60456049
param->UsedShmemSegAddr = UsedShmemSegAddr;
60466050

@@ -6274,6 +6278,9 @@ restore_backend_variables(BackendParameters *param, Port *port)
62746278
MyCancelKey = param->MyCancelKey;
62756279
MyPMChildSlot = param->MyPMChildSlot;
62766280

6281+
#ifdef WIN32
6282+
ShmemProtectiveRegion = param->ShmemProtectiveRegion;
6283+
#endif
62776284
UsedShmemSegID = param->UsedShmemSegID;
62786285
UsedShmemSegAddr = param->UsedShmemSegAddr;
62796286

src/include/storage/pg_shmem.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ typedef enum
6565
extern unsigned long UsedShmemSegID;
6666
#else
6767
extern HANDLE UsedShmemSegID;
68+
extern void *ShmemProtectiveRegion;
6869
#endif
6970
extern void *UsedShmemSegAddr;
7071

0 commit comments

Comments
 (0)