11
11
*
12
12
*
13
13
* IDENTIFICATION
14
- * $Header: /cvsroot/pgsql/src/backend/postmaster/postmaster.c,v 1.200 2000/12/18 18:45:04 momjian Exp $
14
+ * $Header: /cvsroot/pgsql/src/backend/postmaster/postmaster.c,v 1.201 2000/12/20 21:51:52 tgl Exp $
15
15
*
16
16
* NOTES
17
17
*
@@ -180,7 +180,7 @@ static time_t checkpointed = 0;
180
180
181
181
static int Shutdown = NoShutdown ;
182
182
183
- static bool FatalError = false;
183
+ static bool FatalError = false; /* T if recovering from backend crash */
184
184
185
185
/*
186
186
* State for assigning random salts and cancel keys.
@@ -649,7 +649,7 @@ PostmasterMain(int argc, char *argv[])
649
649
pqsignal (SIGTERM , pmdie ); /* wait for children and ShutdownDataBase */
650
650
pqsignal (SIGALRM , SIG_IGN ); /* ignored */
651
651
pqsignal (SIGPIPE , SIG_IGN ); /* ignored */
652
- pqsignal (SIGUSR1 , SIG_IGN ); /* ignored */
652
+ pqsignal (SIGUSR1 , pmdie ); /* currently ignored, but see note in pmdie */
653
653
pqsignal (SIGUSR2 , pmdie ); /* send SIGUSR2, don't die */
654
654
pqsignal (SIGCHLD , reaper ); /* handle child termination */
655
655
pqsignal (SIGTTIN , SIG_IGN ); /* ignored */
@@ -1329,6 +1329,18 @@ pmdie(SIGNAL_ARGS)
1329
1329
1330
1330
switch (postgres_signal_arg )
1331
1331
{
1332
+ case SIGUSR1 :
1333
+ /*
1334
+ * Currently the postmaster ignores SIGUSR1 (maybe it should
1335
+ * do something useful instead?) But we must have some handler
1336
+ * installed for SIGUSR1, not just set it to SIG_IGN. Else, a
1337
+ * freshly spawned backend would likewise have it set to SIG_IGN,
1338
+ * which would mean the backend would ignore any attempt to kill
1339
+ * it before it had gotten as far as setting up its own handler.
1340
+ */
1341
+ errno = save_errno ;
1342
+ return ;
1343
+
1332
1344
case SIGUSR2 :
1333
1345
1334
1346
/*
@@ -1511,7 +1523,7 @@ reaper(SIGNAL_ARGS)
1511
1523
ExitPostmaster (1 );
1512
1524
}
1513
1525
StartupPID = 0 ;
1514
- FatalError = false;
1526
+ FatalError = false; /* done with recovery */
1515
1527
if (Shutdown > NoShutdown )
1516
1528
{
1517
1529
if (ShutdownPID > 0 )
@@ -1539,12 +1551,7 @@ reaper(SIGNAL_ARGS)
1539
1551
/*
1540
1552
* Wait for all children exit, then reset shmem and StartupDataBase.
1541
1553
*/
1542
- if (DLGetHead (BackendList ))
1543
- {
1544
- errno = save_errno ;
1545
- return ;
1546
- }
1547
- if (StartupPID > 0 || ShutdownPID > 0 )
1554
+ if (DLGetHead (BackendList ) || StartupPID > 0 || ShutdownPID > 0 )
1548
1555
{
1549
1556
errno = save_errno ;
1550
1557
return ;
@@ -1595,21 +1602,18 @@ CleanupProc(int pid,
1595
1602
Dlelem * curr ,
1596
1603
* next ;
1597
1604
Backend * bp ;
1598
- int sig ;
1599
1605
1600
1606
if (DebugLvl )
1601
- {
1602
1607
fprintf (stderr , "%s: CleanupProc: pid %d exited with status %d\n" ,
1603
1608
progname , pid , exitstatus );
1604
- }
1605
1609
1606
1610
/*
1607
1611
* If a backend dies in an ugly way (i.e. exit status not 0) then we
1608
1612
* must signal all other backends to quickdie. If exit status is zero
1609
1613
* we assume everything is hunky dory and simply remove the backend
1610
1614
* from the active backend list.
1611
1615
*/
1612
- if (! exitstatus )
1616
+ if (exitstatus == 0 )
1613
1617
{
1614
1618
curr = DLGetHead (BackendList );
1615
1619
while (curr )
@@ -1628,73 +1632,78 @@ CleanupProc(int pid,
1628
1632
if (pid == CheckPointPID )
1629
1633
{
1630
1634
CheckPointPID = 0 ;
1631
- checkpointed = time (NULL );
1635
+ if (!FatalError )
1636
+ checkpointed = time (NULL );
1632
1637
}
1633
1638
else
1634
- ProcRemove (pid );
1639
+ {
1640
+ /* Why is this done here, and not by the backend itself? */
1641
+ if (!FatalError )
1642
+ ProcRemove (pid );
1643
+ }
1635
1644
1636
1645
return ;
1637
1646
}
1638
1647
1639
1648
if (!FatalError )
1640
1649
{
1650
+ /* Make log entry unless we did so already */
1641
1651
tnow = time (NULL );
1642
1652
fprintf (stderr , "Server process (pid %d) exited with status %d at %s"
1643
1653
"Terminating any active server processes...\n" ,
1644
1654
pid , exitstatus , ctime (& tnow ));
1645
1655
fflush (stderr );
1646
1656
}
1647
- FatalError = true;
1657
+
1648
1658
curr = DLGetHead (BackendList );
1649
1659
while (curr )
1650
1660
{
1651
1661
next = DLGetSucc (curr );
1652
1662
bp = (Backend * ) DLE_VAL (curr );
1653
-
1654
- /*
1655
- * SIGUSR1 is the special signal that says exit without proc_exit
1656
- * and let the user know what's going on. ProcSemaphoreKill()
1657
- * cleans up the backends semaphore. If SendStop is set (-s on
1658
- * command line), then we send a SIGSTOP so that we can core dumps
1659
- * from all backends by hand.
1660
- */
1661
- sig = (SendStop ) ? SIGSTOP : SIGUSR1 ;
1662
1663
if (bp -> pid != pid )
1663
1664
{
1664
- if (DebugLvl )
1665
- fprintf (stderr , "%s: CleanupProc: sending %s to process %d\n" ,
1666
- progname ,
1667
- (sig == SIGUSR1 )
1668
- ? "SIGUSR1" : "SIGSTOP" ,
1669
- bp -> pid );
1670
- kill (bp -> pid , sig );
1665
+ /*
1666
+ * This backend is still alive. Unless we did so already,
1667
+ * tell it to commit hara-kiri.
1668
+ *
1669
+ * SIGUSR1 is the special signal that says exit without proc_exit
1670
+ * and let the user know what's going on. But if SendStop is set
1671
+ * (-s on command line), then we send SIGSTOP instead, so that we
1672
+ * can get core dumps from all backends by hand.
1673
+ */
1674
+ if (!FatalError )
1675
+ {
1676
+ if (DebugLvl )
1677
+ fprintf (stderr , "%s: CleanupProc: sending %s to process %d\n" ,
1678
+ progname ,
1679
+ (SendStop ? "SIGSTOP" : "SIGUSR1" ),
1680
+ bp -> pid );
1681
+ kill (bp -> pid , (SendStop ? SIGSTOP : SIGUSR1 ));
1682
+ }
1671
1683
}
1672
1684
else
1673
1685
{
1674
-
1675
1686
/*
1676
- * I don't like that we call ProcRemove() here, assuming that
1677
- * shmem may be corrupted! But is there another way to free
1678
- * backend semaphores? Actually, I believe that we need not in
1679
- * per backend semaphore at all (we use them to wait on lock
1680
- * only, couldn't we just sigpause?), so probably we'll remove
1681
- * this call from here someday. -- vadim 04-10-1999
1687
+ * Found entry for freshly-dead backend, so remove it.
1688
+ *
1689
+ * Don't call ProcRemove() here, since shmem may be corrupted!
1690
+ * We are going to reinitialize shmem and semaphores anyway
1691
+ * once all the children are dead, so no need for it.
1682
1692
*/
1683
- if (pid == CheckPointPID )
1684
- {
1685
- CheckPointPID = 0 ;
1686
- checkpointed = 0 ;
1687
- }
1688
- else
1689
- ProcRemove (pid );
1690
-
1691
1693
DLRemove (curr );
1692
1694
free (bp );
1693
1695
DLFreeElem (curr );
1694
1696
}
1695
1697
curr = next ;
1696
1698
}
1697
1699
1700
+ if (pid == CheckPointPID )
1701
+ {
1702
+ CheckPointPID = 0 ;
1703
+ checkpointed = 0 ;
1704
+ }
1705
+
1706
+ FatalError = true;
1698
1707
}
1699
1708
1700
1709
/*
0 commit comments