Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 63f64d2

Browse files
committed
Fix postmaster's handling of fork failure for a bgworker process.
This corner case didn't behave nicely at all: the postmaster would (partially) update its state as though the process had started successfully, and be quite confused thereafter. Fix it to act like the worker had crashed, instead. In passing, refactor so that do_start_bgworker contains all the state-change logic for bgworker launch, rather than just some of it. Back-patch as far as 9.4. 9.3 contains similar logic, but it's just enough different that I don't feel comfortable applying the patch without more study; and the use of bgworkers in 9.3 was so small that it doesn't seem worth the extra work. transam/parallel.c is still entirely unprepared for the possibility of bgworker startup failure, but that seems like material for a separate patch. Discussion: https://postgr.es/m/4905.1492813727@sss.pgh.pa.us
1 parent 39369b4 commit 63f64d2

File tree

1 file changed

+77
-31
lines changed

1 file changed

+77
-31
lines changed

src/backend/postmaster/postmaster.c

Lines changed: 77 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -412,6 +412,7 @@ static void TerminateChildren(int signal);
412412
#define SignalChildren(sig) SignalSomeChildren(sig, BACKEND_TYPE_ALL)
413413

414414
static int CountChildren(int target);
415+
static bool assign_backendlist_entry(RegisteredBgWorker *rw);
415416
static void maybe_start_bgworker(void);
416417
static bool CreateOptsFile(int argc, char *argv[], char *fullprogname);
417418
static pid_t StartChildProcess(AuxProcType type);
@@ -5491,13 +5492,33 @@ bgworker_forkexec(int shmem_slot)
54915492
* Start a new bgworker.
54925493
* Starting time conditions must have been checked already.
54935494
*
5495+
* Returns true on success, false on failure.
5496+
* In either case, update the RegisteredBgWorker's state appropriately.
5497+
*
54945498
* This code is heavily based on autovacuum.c, q.v.
54955499
*/
5496-
static void
5500+
static bool
54975501
do_start_bgworker(RegisteredBgWorker *rw)
54985502
{
54995503
pid_t worker_pid;
55005504

5505+
Assert(rw->rw_pid == 0);
5506+
5507+
/*
5508+
* Allocate and assign the Backend element. Note we must do this before
5509+
* forking, so that we can handle out of memory properly.
5510+
*
5511+
* Treat failure as though the worker had crashed. That way, the
5512+
* postmaster will wait a bit before attempting to start it again; if it
5513+
* tried again right away, most likely it'd find itself repeating the
5514+
* out-of-memory or fork failure condition.
5515+
*/
5516+
if (!assign_backendlist_entry(rw))
5517+
{
5518+
rw->rw_crashed_at = GetCurrentTimestamp();
5519+
return false;
5520+
}
5521+
55015522
ereport(DEBUG1,
55025523
(errmsg("starting background worker process \"%s\"",
55035524
rw->rw_worker.bgw_name)));
@@ -5509,9 +5530,17 @@ do_start_bgworker(RegisteredBgWorker *rw)
55095530
#endif
55105531
{
55115532
case -1:
5533+
/* in postmaster, fork failed ... */
55125534
ereport(LOG,
55135535
(errmsg("could not fork worker process: %m")));
5514-
return;
5536+
/* undo what assign_backendlist_entry did */
5537+
ReleasePostmasterChildSlot(rw->rw_child_slot);
5538+
rw->rw_child_slot = 0;
5539+
free(rw->rw_backend);
5540+
rw->rw_backend = NULL;
5541+
/* mark entry as crashed, so we'll try again later */
5542+
rw->rw_crashed_at = GetCurrentTimestamp();
5543+
break;
55155544

55165545
#ifndef EXEC_BACKEND
55175546
case 0:
@@ -5535,14 +5564,24 @@ do_start_bgworker(RegisteredBgWorker *rw)
55355564
PostmasterContext = NULL;
55365565

55375566
StartBackgroundWorker();
5567+
5568+
exit(1); /* should not get here */
55385569
break;
55395570
#endif
55405571
default:
5572+
/* in postmaster, fork successful ... */
55415573
rw->rw_pid = worker_pid;
55425574
rw->rw_backend->pid = rw->rw_pid;
55435575
ReportBackgroundWorkerPID(rw);
5544-
break;
5576+
/* add new worker to lists of backends */
5577+
dlist_push_head(&BackendList, &rw->rw_backend->elem);
5578+
#ifdef EXEC_BACKEND
5579+
ShmemBackendArrayAdd(rw->rw_backend);
5580+
#endif
5581+
return true;
55455582
}
5583+
5584+
return false;
55465585
}
55475586

55485587
/*
@@ -5589,6 +5628,8 @@ bgworker_should_start_now(BgWorkerStartTime start_time)
55895628
* Allocate the Backend struct for a connected background worker, but don't
55905629
* add it to the list of backends just yet.
55915630
*
5631+
* On failure, return false without changing any worker state.
5632+
*
55925633
* Some info from the Backend is copied into the passed rw.
55935634
*/
55945635
static bool
@@ -5601,14 +5642,6 @@ assign_backendlist_entry(RegisteredBgWorker *rw)
56015642
ereport(LOG,
56025643
(errcode(ERRCODE_OUT_OF_MEMORY),
56035644
errmsg("out of memory")));
5604-
5605-
/*
5606-
* The worker didn't really crash, but setting this nonzero makes
5607-
* postmaster wait a bit before attempting to start it again; if it
5608-
* tried again right away, most likely it'd find itself under the same
5609-
* memory pressure.
5610-
*/
5611-
rw->rw_crashed_at = GetCurrentTimestamp();
56125645
return false;
56135646
}
56145647

@@ -5638,20 +5671,31 @@ assign_backendlist_entry(RegisteredBgWorker *rw)
56385671
* As a side effect, the bgworker control variables are set or reset whenever
56395672
* there are more workers to start after this one, and whenever the overall
56405673
* system state requires it.
5674+
*
5675+
* The reason we start at most one worker per call is to avoid consuming the
5676+
* postmaster's attention for too long when many such requests are pending.
5677+
* As long as StartWorkerNeeded is true, ServerLoop will not block and will
5678+
* call this function again after dealing with any other issues.
56415679
*/
56425680
static void
56435681
maybe_start_bgworker(void)
56445682
{
56455683
slist_mutable_iter iter;
56465684
TimestampTz now = 0;
56475685

5686+
/*
5687+
* During crash recovery, we have no need to be called until the state
5688+
* transition out of recovery.
5689+
*/
56485690
if (FatalError)
56495691
{
56505692
StartWorkerNeeded = false;
56515693
HaveCrashedWorker = false;
5652-
return; /* not yet */
5694+
return;
56535695
}
56545696

5697+
/* Don't need to be called again unless we find a reason for it below */
5698+
StartWorkerNeeded = false;
56555699
HaveCrashedWorker = false;
56565700

56575701
slist_foreach_modify(iter, &BackgroundWorkerList)
@@ -5660,11 +5704,11 @@ maybe_start_bgworker(void)
56605704

56615705
rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
56625706

5663-
/* already running? */
5707+
/* ignore if already running */
56645708
if (rw->rw_pid != 0)
56655709
continue;
56665710

5667-
/* marked for death? */
5711+
/* if marked for death, clean up and remove from list */
56685712
if (rw->rw_terminate)
56695713
{
56705714
ForgetBackgroundWorker(&iter);
@@ -5686,48 +5730,50 @@ maybe_start_bgworker(void)
56865730
continue;
56875731
}
56885732

5733+
/* read system time only when needed */
56895734
if (now == 0)
56905735
now = GetCurrentTimestamp();
56915736

56925737
if (!TimestampDifferenceExceeds(rw->rw_crashed_at, now,
56935738
rw->rw_worker.bgw_restart_time * 1000))
56945739
{
5740+
/* Set flag to remember that we have workers to start later */
56955741
HaveCrashedWorker = true;
56965742
continue;
56975743
}
56985744
}
56995745

57005746
if (bgworker_should_start_now(rw->rw_worker.bgw_start_time))
57015747
{
5702-
/* reset crash time before calling assign_backendlist_entry */
5748+
/* reset crash time before trying to start worker */
57035749
rw->rw_crashed_at = 0;
57045750

57055751
/*
5706-
* Allocate and assign the Backend element. Note we must do this
5707-
* before forking, so that we can handle out of memory properly.
5752+
* Try to start the worker.
5753+
*
5754+
* On failure, give up processing workers for now, but set
5755+
* StartWorkerNeeded so we'll come back here on the next iteration
5756+
* of ServerLoop to try again. (We don't want to wait, because
5757+
* there might be additional ready-to-run workers.) We could set
5758+
* HaveCrashedWorker as well, since this worker is now marked
5759+
* crashed, but there's no need because the next run of this
5760+
* function will do that.
57085761
*/
5709-
if (!assign_backendlist_entry(rw))
5762+
if (!do_start_bgworker(rw))
5763+
{
5764+
StartWorkerNeeded = true;
57105765
return;
5711-
5712-
do_start_bgworker(rw); /* sets rw->rw_pid */
5713-
5714-
dlist_push_head(&BackendList, &rw->rw_backend->elem);
5715-
#ifdef EXEC_BACKEND
5716-
ShmemBackendArrayAdd(rw->rw_backend);
5717-
#endif
5766+
}
57185767

57195768
/*
5720-
* Have ServerLoop call us again. Note that there might not
5721-
* actually *be* another runnable worker, but we don't care all
5722-
* that much; we will find out the next time we run.
5769+
* Quit, but have ServerLoop call us again to look for additional
5770+
* ready-to-run workers. There might not be any, but we'll find
5771+
* out the next time we run.
57235772
*/
57245773
StartWorkerNeeded = true;
57255774
return;
57265775
}
57275776
}
5728-
5729-
/* no runnable worker found */
5730-
StartWorkerNeeded = false;
57315777
}
57325778

57335779
/*

0 commit comments

Comments
 (0)