28
28
#include <time.h>
29
29
#include <sys/types.h>
30
30
#include <sys/stat.h>
31
+ #include <sys/wait.h>
31
32
#include <unistd.h>
32
33
33
34
#ifdef HAVE_SYS_RESOURCE_H
@@ -153,10 +154,10 @@ static int CreateRestrictedProcess(char *cmd, PROCESS_INFORMATION *processInfo,
153
154
static pgpid_t get_pgpid (bool is_status_request );
154
155
static char * * readfile (const char * path );
155
156
static void free_readfile (char * * optlines );
156
- static int start_postmaster (void );
157
+ static pgpid_t start_postmaster (void );
157
158
static void read_post_opts (void );
158
159
159
- static PGPing test_postmaster_connection (bool );
160
+ static PGPing test_postmaster_connection (pgpid_t pm_pid , bool do_checkpoint );
160
161
static bool postmaster_is_alive (pid_t pid );
161
162
162
163
#if defined(HAVE_GETRLIMIT ) && defined(RLIMIT_CORE )
@@ -419,36 +420,73 @@ free_readfile(char **optlines)
419
420
* start/test/stop routines
420
421
*/
421
422
422
- static int
423
+ /*
424
+ * Start the postmaster and return its PID.
425
+ *
426
+ * Currently, on Windows what we return is the PID of the shell process
427
+ * that launched the postmaster (and, we trust, is waiting for it to exit).
428
+ * So the PID is usable for "is the postmaster still running" checks,
429
+ * but cannot be compared directly to postmaster.pid.
430
+ *
431
+ * On Windows, we also save aside a handle to the shell process in
432
+ * "postmasterProcess", which the caller should close when done with it.
433
+ */
434
+ static pgpid_t
423
435
start_postmaster (void )
424
436
{
425
437
char cmd [MAXPGPATH ];
426
438
427
439
#ifndef WIN32
440
+ pgpid_t pm_pid ;
441
+
442
+ /* Flush stdio channels just before fork, to avoid double-output problems */
443
+ fflush (stdout );
444
+ fflush (stderr );
445
+
446
+ pm_pid = fork ();
447
+ if (pm_pid < 0 )
448
+ {
449
+ /* fork failed */
450
+ write_stderr (_ ("%s: could not start server: %s\n" ),
451
+ progname , strerror (errno ));
452
+ exit (1 );
453
+ }
454
+ if (pm_pid > 0 )
455
+ {
456
+ /* fork succeeded, in parent */
457
+ return pm_pid ;
458
+ }
459
+
460
+ /* fork succeeded, in child */
428
461
429
462
/*
430
463
* Since there might be quotes to handle here, it is easier simply to pass
431
- * everything to a shell to process them.
432
- *
433
- * XXX it would be better to fork and exec so that we would know the child
434
- * postmaster's PID directly; then test_postmaster_connection could use
435
- * the PID without having to rely on reading it back from the pidfile.
464
+ * everything to a shell to process them. Use exec so that the postmaster
465
+ * has the same PID as the current child process.
436
466
*/
437
467
if (log_file != NULL )
438
- snprintf (cmd , MAXPGPATH , "\"%s\" %s%s < \"%s\" >> \"%s\" 2>&1 & " ,
468
+ snprintf (cmd , MAXPGPATH , "exec \"%s\" %s%s < \"%s\" >> \"%s\" 2>&1" ,
439
469
exec_path , pgdata_opt , post_opts ,
440
470
DEVNULL , log_file );
441
471
else
442
- snprintf (cmd , MAXPGPATH , "\"%s\" %s%s < \"%s\" 2>&1 & " ,
472
+ snprintf (cmd , MAXPGPATH , "exec \"%s\" %s%s < \"%s\" 2>&1" ,
443
473
exec_path , pgdata_opt , post_opts , DEVNULL );
444
474
445
- return system (cmd );
475
+ (void ) execl ("/bin/sh" , "/bin/sh" , "-c" , cmd , (char * ) NULL );
476
+
477
+ /* exec failed */
478
+ write_stderr (_ ("%s: could not start server: %s\n" ),
479
+ progname , strerror (errno ));
480
+ exit (1 );
481
+
482
+ return 0 ; /* keep dumb compilers quiet */
483
+
446
484
#else /* WIN32 */
447
485
448
486
/*
449
- * On win32 we don't use system(). So we don't need to use & (which would
450
- * be START /B on win32). However, we still call the shell ( CMD.EXE) with
451
- * it to handle redirection etc .
487
+ * As with the Unix case, it's easiest to use the shell (CMD.EXE) to
488
+ * handle redirection etc. Unfortunately CMD.EXE lacks any equivalent of
489
+ * "exec", so we don't get to find out the postmaster's PID immediately .
452
490
*/
453
491
PROCESS_INFORMATION pi ;
454
492
@@ -460,10 +498,15 @@ start_postmaster(void)
460
498
exec_path , pgdata_opt , post_opts , DEVNULL );
461
499
462
500
if (!CreateRestrictedProcess (cmd , & pi , false))
463
- return GetLastError ();
464
- CloseHandle (pi .hProcess );
501
+ {
502
+ write_stderr (_ ("%s: could not start server: error code %lu\n" ),
503
+ progname , (unsigned long ) GetLastError ());
504
+ exit (1 );
505
+ }
506
+ /* Don't close command process handle here; caller must do so */
507
+ postmasterProcess = pi .hProcess ;
465
508
CloseHandle (pi .hThread );
466
- return 0 ;
509
+ return pi . dwProcessId ; /* Shell's PID, not postmaster's! */
467
510
#endif /* WIN32 */
468
511
}
469
512
@@ -472,15 +515,21 @@ start_postmaster(void)
472
515
/*
473
516
* Find the pgport and try a connection
474
517
*
518
+ * On Unix, pm_pid is the PID of the just-launched postmaster. On Windows,
519
+ * it may be the PID of an ancestor shell process, so we can't check the
520
+ * contents of postmaster.pid quite as carefully.
521
+ *
522
+ * On Windows, the static variable postmasterProcess is an implicit argument
523
+ * to this routine; it contains a handle to the postmaster process or an
524
+ * ancestor shell process thereof.
525
+ *
475
526
* Note that the checkpoint parameter enables a Windows service control
476
527
* manager checkpoint, it's got nothing to do with database checkpoints!!
477
528
*/
478
529
static PGPing
479
- test_postmaster_connection (bool do_checkpoint )
530
+ test_postmaster_connection (pgpid_t pm_pid , bool do_checkpoint )
480
531
{
481
532
PGPing ret = PQPING_NO_RESPONSE ;
482
- bool found_stale_pidfile = false;
483
- pgpid_t pm_pid = 0 ;
484
533
char connstr [MAXPGPATH * 2 + 256 ];
485
534
int i ;
486
535
@@ -535,29 +584,27 @@ test_postmaster_connection(bool do_checkpoint)
535
584
optlines [5 ] != NULL )
536
585
{
537
586
/* File is complete enough for us, parse it */
538
- long pmpid ;
587
+ pgpid_t pmpid ;
539
588
time_t pmstart ;
540
589
541
590
/*
542
- * Make sanity checks. If it's for a standalone backend
543
- * (negative PID), or the recorded start time is before
544
- * pg_ctl started, then either we are looking at the wrong
545
- * data directory, or this is a pre-existing pidfile that
546
- * hasn't (yet?) been overwritten by our child postmaster.
547
- * Allow 2 seconds slop for possible cross-process clock
548
- * skew.
591
+ * Make sanity checks. If it's for the wrong PID, or the
592
+ * recorded start time is before pg_ctl started, then
593
+ * either we are looking at the wrong data directory, or
594
+ * this is a pre-existing pidfile that hasn't (yet?) been
595
+ * overwritten by our child postmaster. Allow 2 seconds
596
+ * slop for possible cross-process clock skew.
549
597
*/
550
598
pmpid = atol (optlines [LOCK_FILE_LINE_PID - 1 ]);
551
599
pmstart = atol (optlines [LOCK_FILE_LINE_START_TIME - 1 ]);
552
- if (pmpid <= 0 || pmstart < start_time - 2 )
553
- {
554
- /*
555
- * Set flag to report stale pidfile if it doesn't get
556
- * overwritten before we give up waiting.
557
- */
558
- found_stale_pidfile = true;
559
- }
560
- else
600
+ if (pmstart >= start_time - 2 &&
601
+ #ifndef WIN32
602
+ pmpid == pm_pid
603
+ #else
604
+ /* Windows can only reject standalone-backend PIDs */
605
+ pmpid > 0
606
+ #endif
607
+ )
561
608
{
562
609
/*
563
610
* OK, seems to be a valid pidfile from our child.
@@ -567,9 +614,6 @@ test_postmaster_connection(bool do_checkpoint)
567
614
char * hostaddr ;
568
615
char host_str [MAXPGPATH ];
569
616
570
- found_stale_pidfile = false;
571
- pm_pid = (pgpid_t ) pmpid ;
572
-
573
617
/*
574
618
* Extract port number and host string to use. Prefer
575
619
* using Unix socket if available.
@@ -635,42 +679,23 @@ test_postmaster_connection(bool do_checkpoint)
635
679
}
636
680
637
681
/*
638
- * The postmaster should create postmaster.pid very soon after being
639
- * started. If it's not there after we've waited 5 or more seconds,
640
- * assume startup failed and give up waiting. (Note this covers both
641
- * cases where the pidfile was never created, and where it was created
642
- * and then removed during postmaster exit.) Also, if there *is* a
643
- * file there but it appears stale, issue a suitable warning and give
644
- * up waiting.
682
+ * Check whether the child postmaster process is still alive. This
683
+ * lets us exit early if the postmaster fails during startup.
684
+ *
685
+ * On Windows, we may be checking the postmaster's parent shell, but
686
+ * that's fine for this purpose.
645
687
*/
646
- if ( i >= 5 )
688
+ #ifndef WIN32
647
689
{
648
- struct stat statbuf ;
690
+ int exitstatus ;
649
691
650
- if (stat (pid_file , & statbuf ) != 0 )
651
- {
652
- if (errno != ENOENT )
653
- write_stderr (_ ("\n%s: could not stat file \"%s\": %s\n" ),
654
- progname , pid_file , strerror (errno ));
655
- return PQPING_NO_RESPONSE ;
656
- }
657
-
658
- if (found_stale_pidfile )
659
- {
660
- write_stderr (_ ("\n%s: this data directory appears to be running a pre-existing postmaster\n" ),
661
- progname );
692
+ if (waitpid ((pid_t ) pm_pid , & exitstatus , WNOHANG ) == (pid_t ) pm_pid )
662
693
return PQPING_NO_RESPONSE ;
663
- }
664
694
}
665
-
666
- /*
667
- * If we've been able to identify the child postmaster's PID, check
668
- * the process is still alive. This covers cases where the postmaster
669
- * successfully created the pidfile but then crashed without removing
670
- * it.
671
- */
672
- if (pm_pid > 0 && !postmaster_is_alive ((pid_t ) pm_pid ))
695
+ #else
696
+ if (WaitForSingleObject (postmasterProcess , 0 ) == WAIT_OBJECT_0 )
673
697
return PQPING_NO_RESPONSE ;
698
+ #endif
674
699
675
700
/* No response, or startup still in process; wait */
676
701
#if defined(WIN32 )
@@ -836,7 +861,7 @@ static void
836
861
do_start (void )
837
862
{
838
863
pgpid_t old_pid = 0 ;
839
- int exitcode ;
864
+ pgpid_t pm_pid ;
840
865
841
866
if (ctl_command != RESTART_COMMAND )
842
867
{
@@ -876,19 +901,13 @@ do_start(void)
876
901
}
877
902
#endif
878
903
879
- exitcode = start_postmaster ();
880
- if (exitcode != 0 )
881
- {
882
- write_stderr (_ ("%s: could not start server: exit code was %d\n" ),
883
- progname , exitcode );
884
- exit (1 );
885
- }
904
+ pm_pid = start_postmaster ();
886
905
887
906
if (do_wait )
888
907
{
889
908
print_msg (_ ("waiting for server to start..." ));
890
909
891
- switch (test_postmaster_connection (false))
910
+ switch (test_postmaster_connection (pm_pid , false))
892
911
{
893
912
case PQPING_OK :
894
913
print_msg (_ (" done\n" ));
@@ -914,6 +933,12 @@ do_start(void)
914
933
}
915
934
else
916
935
print_msg (_ ("server starting\n" ));
936
+
937
+ #ifdef WIN32
938
+ /* Now we don't need the handle to the shell process anymore */
939
+ CloseHandle (postmasterProcess );
940
+ postmasterProcess = INVALID_HANDLE_VALUE ;
941
+ #endif
917
942
}
918
943
919
944
@@ -1585,7 +1610,7 @@ pgwin32_ServiceMain(DWORD argc, LPTSTR *argv)
1585
1610
if (do_wait )
1586
1611
{
1587
1612
write_eventlog (EVENTLOG_INFORMATION_TYPE , _ ("Waiting for server startup...\n" ));
1588
- if (test_postmaster_connection (true) != PQPING_OK )
1613
+ if (test_postmaster_connection (postmasterPID , true) != PQPING_OK )
1589
1614
{
1590
1615
write_eventlog (EVENTLOG_ERROR_TYPE , _ ("Timed out waiting for server startup\n" ));
1591
1616
pgwin32_SetServiceStatus (SERVICE_STOPPED );
@@ -1606,10 +1631,9 @@ pgwin32_ServiceMain(DWORD argc, LPTSTR *argv)
1606
1631
{
1607
1632
/*
1608
1633
* status.dwCheckPoint can be incremented by
1609
- * test_postmaster_connection(true), so it might not start
1610
- * from 0.
1634
+ * test_postmaster_connection(), so it might not start from 0.
1611
1635
*/
1612
- int maxShutdownCheckPoint = status .dwCheckPoint + 12 ;;
1636
+ int maxShutdownCheckPoint = status .dwCheckPoint + 12 ;
1613
1637
1614
1638
kill (postmasterPID , SIGINT );
1615
1639
0 commit comments