Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit bb98b2e

Browse files
committed
Change win32 child-death tracking code to use a threadpool to wait for
childprocess deaths instead of using one thread per child. This drastastically reduces the address space usage and should allow for more backends running. Also change the win32_waitpid functionality to use an IO Completion Port for queueing child death notices instead of using a fixed-size array.
1 parent acac68b commit bb98b2e

File tree

2 files changed

+81
-172
lines changed

2 files changed

+81
-172
lines changed

src/backend/postmaster/postmaster.c

+79-171
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
*
3838
*
3939
* IDENTIFICATION
40-
* $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.542 2007/09/26 22:36:30 tgl Exp $
40+
* $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.543 2007/10/26 21:50:10 mha Exp $
4141
*
4242
* NOTES
4343
*
@@ -331,14 +331,17 @@ static void StartAutovacuumWorker(void);
331331
#ifdef EXEC_BACKEND
332332

333333
#ifdef WIN32
334-
static void win32_AddChild(pid_t pid, HANDLE handle);
335-
static void win32_RemoveChild(pid_t pid);
336334
static pid_t win32_waitpid(int *exitstatus);
337-
static DWORD WINAPI win32_sigchld_waiter(LPVOID param);
335+
static void WINAPI pgwin32_deadchild_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired);
338336

339-
static pid_t *win32_childPIDArray;
340-
static HANDLE *win32_childHNDArray;
341-
static unsigned long win32_numChildren = 0;
337+
static HANDLE win32ChildQueue;
338+
339+
typedef struct
340+
{
341+
HANDLE waitHandle;
342+
HANDLE procHandle;
343+
DWORD procId;
344+
} win32_deadchild_waitinfo;
342345

343346
HANDLE PostmasterHandle;
344347
#endif
@@ -899,16 +902,12 @@ PostmasterMain(int argc, char *argv[])
899902
#ifdef WIN32
900903

901904
/*
902-
* Initialize the child pid/HANDLE arrays for signal handling.
905+
* Initialize I/O completion port used to deliver list of dead children.
903906
*/
904-
win32_childPIDArray = (pid_t *)
905-
malloc(mul_size(NUM_BACKENDARRAY_ELEMS, sizeof(pid_t)));
906-
win32_childHNDArray = (HANDLE *)
907-
malloc(mul_size(NUM_BACKENDARRAY_ELEMS, sizeof(HANDLE)));
908-
if (!win32_childPIDArray || !win32_childHNDArray)
907+
win32ChildQueue = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 1);
908+
if (win32ChildQueue == NULL)
909909
ereport(FATAL,
910-
(errcode(ERRCODE_OUT_OF_MEMORY),
911-
errmsg("out of memory")));
910+
(errmsg("could not create I/O completion port for child queue")));
912911

913912
/*
914913
* Set up a handle that child processes can use to check whether the
@@ -2072,12 +2071,7 @@ reaper(SIGNAL_ARGS)
20722071
#define LOOPHEADER() (exitstatus = status.w_status)
20732072
#else /* WIN32 */
20742073
#define LOOPTEST() ((pid = win32_waitpid(&exitstatus)) > 0)
2075-
/*
2076-
* We need to do this here, and not in CleanupBackend, since this is
2077-
* to be called on all children when we are done with them. Could move
2078-
* to LogChildExit, but that seems like asking for future trouble...
2079-
*/
2080-
#define LOOPHEADER() (win32_RemoveChild(pid))
2074+
#define LOOPHEADER()
20812075
#endif /* WIN32 */
20822076
#endif /* HAVE_WAITPID */
20832077

@@ -3332,28 +3326,18 @@ internal_forkexec(int argc, char *argv[], Port *port)
33323326
int i;
33333327
int j;
33343328
char cmdLine[MAXPGPATH * 2];
3335-
HANDLE childHandleCopy;
3336-
HANDLE waiterThread;
33373329
HANDLE paramHandle;
33383330
BackendParameters *param;
33393331
SECURITY_ATTRIBUTES sa;
33403332
char paramHandleStr[32];
3333+
win32_deadchild_waitinfo *childinfo;
33413334

33423335
/* Make sure caller set up argv properly */
33433336
Assert(argc >= 3);
33443337
Assert(argv[argc] == NULL);
33453338
Assert(strncmp(argv[1], "--fork", 6) == 0);
33463339
Assert(argv[2] == NULL);
33473340

3348-
/* Verify that there is room in the child list */
3349-
if (win32_numChildren >= NUM_BACKENDARRAY_ELEMS)
3350-
{
3351-
elog(LOG, "no room for child entry in backend list");
3352-
/* Report same error as for a fork failure on Unix */
3353-
errno = EAGAIN;
3354-
return -1;
3355-
}
3356-
33573341
/* Set up shared memory for parameter passing */
33583342
ZeroMemory(&sa, sizeof(sa));
33593343
sa.nLength = sizeof(sa);
@@ -3463,34 +3447,34 @@ internal_forkexec(int argc, char *argv[], Port *port)
34633447
return -1;
34643448
}
34653449

3466-
if (!IsUnderPostmaster)
3467-
{
3468-
/* We are the Postmaster creating a child... */
3469-
win32_AddChild(pi.dwProcessId, pi.hProcess);
3470-
}
3471-
3472-
/* Set up the thread to handle the SIGCHLD for this process */
3473-
if (DuplicateHandle(GetCurrentProcess(),
3474-
pi.hProcess,
3475-
GetCurrentProcess(),
3476-
&childHandleCopy,
3477-
0,
3478-
FALSE,
3479-
DUPLICATE_SAME_ACCESS) == 0)
3450+
/*
3451+
* Queue a waiter for to signal when this child dies. The wait will be handled automatically
3452+
* by an operating system thread pool.
3453+
*
3454+
* Note: use malloc instead of palloc, since it needs to be thread-safe. Struct will be
3455+
* free():d from the callback function that runs on a different thread.
3456+
*/
3457+
childinfo = malloc(sizeof(win32_deadchild_waitinfo));
3458+
if (!childinfo)
34803459
ereport(FATAL,
3481-
(errmsg_internal("could not duplicate child handle: error code %d",
3460+
(errcode(ERRCODE_OUT_OF_MEMORY),
3461+
errmsg("out of memory")));
3462+
3463+
childinfo->procHandle = pi.hProcess;
3464+
childinfo->procId = pi.dwProcessId;
3465+
3466+
if (!RegisterWaitForSingleObject(&childinfo->waitHandle,
3467+
pi.hProcess,
3468+
pgwin32_deadchild_callback,
3469+
childinfo,
3470+
INFINITE,
3471+
WT_EXECUTEONLYONCE | WT_EXECUTEINWAITTHREAD))
3472+
ereport(FATAL,
3473+
(errmsg_internal("could not register process for wait: error code %d",
34823474
(int) GetLastError())));
34833475

3484-
waiterThread = CreateThread(NULL, 64 * 1024, win32_sigchld_waiter,
3485-
(LPVOID) childHandleCopy, 0, NULL);
3486-
if (!waiterThread)
3487-
ereport(FATAL,
3488-
(errmsg_internal("could not create sigchld waiter thread: error code %d",
3489-
(int) GetLastError())));
3490-
CloseHandle(waiterThread);
3476+
/* Don't close pi.hProcess here - the wait thread needs access to it */
34913477

3492-
if (IsUnderPostmaster)
3493-
CloseHandle(pi.hProcess);
34943478
CloseHandle(pi.hThread);
34953479

34963480
return pi.dwProcessId;
@@ -4500,137 +4484,61 @@ ShmemBackendArrayRemove(pid_t pid)
45004484

45014485
#ifdef WIN32
45024486

4503-
/*
4504-
* Note: The following three functions must not be interrupted (eg. by
4505-
* signals). As the Postgres Win32 signalling architecture (currently)
4506-
* requires polling, or APC checking functions which aren't used here, this
4507-
* is not an issue.
4508-
*
4509-
* We keep two separate arrays, instead of a single array of pid/HANDLE
4510-
* structs, to avoid having to re-create a handle array for
4511-
* WaitForMultipleObjects on each call to win32_waitpid.
4512-
*/
4513-
4514-
static void
4515-
win32_AddChild(pid_t pid, HANDLE handle)
4516-
{
4517-
Assert(win32_childPIDArray && win32_childHNDArray);
4518-
if (win32_numChildren < NUM_BACKENDARRAY_ELEMS)
4519-
{
4520-
win32_childPIDArray[win32_numChildren] = pid;
4521-
win32_childHNDArray[win32_numChildren] = handle;
4522-
++win32_numChildren;
4523-
}
4524-
else
4525-
ereport(FATAL,
4526-
(errmsg_internal("no room for child entry with pid %lu",
4527-
(unsigned long) pid)));
4528-
}
4529-
4530-
static void
4531-
win32_RemoveChild(pid_t pid)
4532-
{
4533-
int i;
4534-
4535-
Assert(win32_childPIDArray && win32_childHNDArray);
4536-
4537-
for (i = 0; i < win32_numChildren; i++)
4538-
{
4539-
if (win32_childPIDArray[i] == pid)
4540-
{
4541-
CloseHandle(win32_childHNDArray[i]);
4542-
4543-
/* Swap last entry into the "removed" one */
4544-
--win32_numChildren;
4545-
win32_childPIDArray[i] = win32_childPIDArray[win32_numChildren];
4546-
win32_childHNDArray[i] = win32_childHNDArray[win32_numChildren];
4547-
return;
4548-
}
4549-
}
4550-
4551-
ereport(WARNING,
4552-
(errmsg_internal("could not find child entry with pid %lu",
4553-
(unsigned long) pid)));
4554-
}
4555-
45564487
static pid_t
45574488
win32_waitpid(int *exitstatus)
45584489
{
4490+
DWORD dwd;
4491+
ULONG_PTR key;
4492+
OVERLAPPED* ovl;
4493+
45594494
/*
4560-
* Note: Do NOT use WaitForMultipleObjectsEx, as we don't want to run
4561-
* queued APCs here.
4495+
* Check if there are any dead children. If there are, return the pid of the first one that died.
45624496
*/
4563-
int index;
4564-
DWORD exitCode;
4565-
DWORD ret;
4566-
unsigned long offset;
4567-
4568-
Assert(win32_childPIDArray && win32_childHNDArray);
4569-
elog(DEBUG3, "waiting on %lu children", win32_numChildren);
4570-
4571-
for (offset = 0; offset < win32_numChildren; offset += MAXIMUM_WAIT_OBJECTS)
4497+
if (GetQueuedCompletionStatus(win32ChildQueue, &dwd, &key, &ovl, 0))
45724498
{
4573-
unsigned long num = Min(MAXIMUM_WAIT_OBJECTS, win32_numChildren - offset);
4574-
4575-
ret = WaitForMultipleObjects(num, &win32_childHNDArray[offset], FALSE, 0);
4576-
switch (ret)
4577-
{
4578-
case WAIT_FAILED:
4579-
ereport(LOG,
4580-
(errmsg_internal("failed to wait on %lu of %lu children: error code %d",
4581-
num, win32_numChildren, (int) GetLastError())));
4582-
return -1;
4583-
4584-
case WAIT_TIMEOUT:
4585-
/* No children (in this chunk) have finished */
4586-
break;
4587-
4588-
default:
4589-
4590-
/*
4591-
* Get the exit code, and return the PID of, the respective
4592-
* process
4593-
*/
4594-
index = offset + ret - WAIT_OBJECT_0;
4595-
Assert(index >= 0 && index < win32_numChildren);
4596-
if (!GetExitCodeProcess(win32_childHNDArray[index], &exitCode))
4597-
{
4598-
/*
4599-
* If we get this far, this should never happen, but, then
4600-
* again... No choice other than to assume a catastrophic
4601-
* failure.
4602-
*/
4603-
ereport(FATAL,
4604-
(errmsg_internal("failed to get exit code for child %lu",
4605-
(unsigned long) win32_childPIDArray[index])));
4606-
}
4607-
*exitstatus = (int) exitCode;
4608-
return win32_childPIDArray[index];
4609-
}
4499+
*exitstatus = (int)key;
4500+
return dwd;
46104501
}
46114502

4612-
/* No children have finished */
46134503
return -1;
46144504
}
46154505

46164506
/*
4617-
* Note! Code below executes on separate threads, one for
4618-
* each child process created
4507+
* Note! Code below executes on a thread pool! All operations must
4508+
* be thread safe! Note that elog() and friends must *not* be used.
46194509
*/
4620-
static DWORD WINAPI
4621-
win32_sigchld_waiter(LPVOID param)
4510+
static void WINAPI
4511+
pgwin32_deadchild_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired)
46224512
{
4623-
HANDLE procHandle = (HANDLE) param;
4513+
win32_deadchild_waitinfo *childinfo = (win32_deadchild_waitinfo *)lpParameter;
4514+
DWORD exitcode;
46244515

4625-
DWORD r = WaitForSingleObject(procHandle, INFINITE);
4516+
if (TimerOrWaitFired)
4517+
return; /* timeout. Should never happen, since we use INFINITE as timeout value. */
46264518

4627-
if (r == WAIT_OBJECT_0)
4628-
pg_queue_signal(SIGCHLD);
4629-
else
4630-
write_stderr("could not wait on child process handle: error code %d\n",
4631-
(int) GetLastError());
4632-
CloseHandle(procHandle);
4633-
return 0;
4519+
/* Remove handle from wait - required even though it's set to wait only once */
4520+
UnregisterWaitEx(childinfo->waitHandle, NULL);
4521+
4522+
if (!GetExitCodeProcess(childinfo->procHandle, &exitcode))
4523+
{
4524+
/*
4525+
* Should never happen. Inform user and set a fixed exitcode.
4526+
*/
4527+
write_stderr("could not read exitcode for process\n");
4528+
exitcode = 255;
4529+
}
4530+
4531+
if (!PostQueuedCompletionStatus(win32ChildQueue, childinfo->procId, (ULONG_PTR)exitcode, NULL))
4532+
write_stderr("could not post child completion status\n");
4533+
4534+
/* Handle is per-process, so we close it here instead of in the originating thread */
4535+
CloseHandle(childinfo->procHandle);
4536+
4537+
/* Free struct that was allocated before the call to RegisterWaitForSingleObject() */
4538+
free(childinfo);
4539+
4540+
/* Queue SIGCHLD signal */
4541+
pg_queue_signal(SIGCHLD);
46344542
}
46354543

46364544
#endif /* WIN32 */

src/include/port/win32.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
1-
/* $PostgreSQL: pgsql/src/include/port/win32.h,v 1.76 2007/07/25 12:22:53 mha Exp $ */
1+
/* $PostgreSQL: pgsql/src/include/port/win32.h,v 1.77 2007/10/26 21:50:10 mha Exp $ */
22

33
#if defined(_MSC_VER) || defined(__BORLANDC__)
44
#define WIN32_ONLY_COMPILER
55
#endif
66

7+
#define _WIN32_WINNT 0x0500
78
/*
89
* Always build with SSPI support. Keep it as a #define in case
910
* we want a switch to disable it sometime in the future.

0 commit comments

Comments
 (0)