Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
SlideShare a Scribd company logo
CIS 216
Dan Morrill
 Top
    Gets you a list of processes that are consuming the CPU
 htop
    Near real time list of running processes by CPU, includes
     scrolling, and mouse support
 vmstat
    Provides information about processes, memory, paging, I/O,
     traps and CPU
 w/who/finger
    Provides information about users that are consuming
     resources on the computer
 ps (ps –ef )
    Lists all the currently running processes on a Linux computer
 pgrep/pkill
   pgrep <process name> lists the PID of the process based on
   pkill <process name> sends a specific kill signal (default
    sigterm or shutdown) to a matching process
 free
   Shows the current memory usage of the system. Shows
    physical and swap memory
 mpstat
   mpstat 2 5 - shows five set of data of global statistics among
    all processors at two second intervals.
   mpstat –P ALL 2 5 - shows 5 sets of statistics for all processors
    at two second intervals.
 iostat
   reports CPU statists for devices and partitions
    (including NFS Samba partitions)
 pmap
   This command reports memory map of a process. This
    can be used to find memory usage of the process.
 Set the debug mode for this, you will want it,
 remember what each debug mode switch does
  1. # set -n : Uncomment to check script syntax, without
  2. #     Note: Do not forget to put the comment back in
  3. #      the shell script will not execute!
  4. # set -x : Uncomment to debug this shell script
 PROC_MON=`basename $0`                  # Defines the script_name variable as the
  file name of this script
  LOGFILE="/home/ganesh/procmon.log"         # Shows log file and where
  [[ ! -s $LOGFILE ]] && touch $LOGFILE   # This checks to see if the file exists
                 # if not it creates one.
  TTY=$(tty)                 # Current tty or pty
  PROCESS="ssh"                # This will define which process to monitor
  SLEEP_TIME="1"                # This is the sleep time in second between

  txtred=$(tput setaf 1) # Red: will indicate a failed process and the information
  txtgrn=$(tput setaf 2) # Green: this is successful process information
  txtylw=$(tput setaf 3) # Yellow: this is used to show cautionary information
  txtrst=$(tput sgr0) # resets text
 function exit_trap     # this is the behavior of the trap
  # Log an ending time for process monitoring
    DATE=$(date +%D)
    TIME=$(date +%T) # Get a new timestamp...
    echo "$DATE @ $TIME: Monitoring for $PROCESS
  terminated" >> $LOGFILE & # this will create an entry in
  the logfile
    echo "$DATE @ $TIME: ${txtred}Monitoring for
  $PROCESS terminated${txtrst}"
  #kill all functions
  kill -9 $(jobs -p) 2>/dev/null
 Set the trap to see if the process exits
 trap 'exit_trap; exit 0' 1 2 3 15

  # this will see if process is running if not will start it

  ps aux | grep "$PROCESS" | grep -v "grep $PROCESS" 
  | grep -v $PROC_MON >/dev/null
   if (( $? != 0 ))
       DATE=$(date +%D)
       TIME=$(date +%T)
       echo "$DATE @ $TIME: $PROCESS is NOT active...starting $PROCESS.." >> $LOGFILE & #
                                # an entry in the logfile
       echo "$DATE @ $TIME: ${txtylw}$PROCESS is NOT active...starting $PROCESS..${txtrst}"
    sleep 1
       service $PROCESS start &
       echo "$DATE @ $TIME: $PROCESS has been started..." >> $LOGFILE & #puts an enrty in logfile
         else # this will say what to do if process is already running
       echo -e "n" # a blank line
       DATE=$(date +%D)
       TIME=$(date +%T)
       echo "$DATE @ $TIME: $PROCESS is currnetly RUNNING..." >> $LOGFILE & # puts entry in logfile
       echo "$DATE @ $TIME: ${txtgrn}$PROCESS is currently RUNNING...${txtrst}"
 while (( RC == 0 )) # this will loop until the return code is not zero
     ps aux | grep $PROCESS | grep -v "grep $PROCESS" 
     | grep -v $PROC_MON >/dev/null 2>&1
     if (( $? != 0 )) # check the return code
     DATE=$(date +%D)
    TIME=$(date +%T)
    echo "$DATE @ $TIME: $PROCESS has STOPPED..." >> $LOGFILE & # entry
  in logfile
       echo "$DATE @ $TIME: ${txtred}$PROCESS has STOPPED...${txtrst}"
    service $PROCESS start &
    echo "$DATE @ $TIME: $PROCESS has RESTARTED..." >> $LOGFILE & #
    echo "$DATE @ $TIME: ${txtgrn}$PROCESS has RESTARTED...${txtrst}"
       sleep 1
      ps aux | grep $PROCESS | grep -v "grep $PROCESS" 
       | grep -v $PROC_MON >/dev/null 2>&1
       if (( $? != 0 ))  # This will check the return code
       DATE=$(date +%D)         # New time stamp
       TIME=$(date +%T)
       echo "$DATE @ $TIME: $PROCESS failed to restart..." >> $LOGFILE
    & #entry in logfile
       echo "$DATE @ $TIME: ${txtred}$PROCESS failed to
       exit 0
      sleep $SLEEP_TIME          # This is needed to reduce CPU Load!!!
 Process is hard coded in the script
   # Process to be monitored
 wait_time="10“
 This is in seconds
 log_file="procmon.log"
 script_failure="0"
   # Monitor process and restart if necessary
    for attempt in 1 2 3
       ps aux | grep "$target" | grep -v "grep $target" 
       | grep -v $script_name >/dev/null
       if [ $? != 0 ]
          echo "$(tput setaf 3)$target is not running. Attempt will be made to restart. This is attempt
    $attempt of 3.$(tput sgr0)"
          echo >>$log_file
          echo "$log_time: $target is not running. Restarting. Attempt $attempt of 3.">>$log_file
          service $target start &
          sleep 2 # Pause to prevent false positives from restart attempt.
    sleep 2 # Pause to prevent false positives from restart attempt.
   detect_failure()
    ps aux | grep "$target" | grep -v "grep $target" 
    | grep -v $script_name >/dev/null
    if [ $? != 0 ]
       echo "$(tput setaf 1)$target is not running after 3 attempts. Process has failed and
    cannot be restarted. $(tput sgr0)" # Report failure to user
       echo "This script will now close."
       echo "">>$log_file
       echo "$log_time: $target cannot be restarted.">>$log_file # Log failure
       script_failure="1" # Set failure flag
       echo "$log_time : $target is running."
       echo "$log_time : $target is running." >> $log_file
 program_closing()
  # Report and log script shutdown
  echo "Closing ProcMon script. No further monitoring of $target will be
  performed." #Reports closing of ProcMon to user
  echo "$(tput setaf 1)$log_time: Monitoring for $target terminated. $(tput sgr0)"
  echo "$log_time: Monitoring for $target terminated.">>$log_file # Logs closing
  of ProcMon to log_file
  echo >> $log_file
  echo "***************" >> $log_file
  echo >> $log_file
  # Ensure this script is properly killed
  kill -9 > /dev/null
   # Trap shutdown attempts to enable logging of shutdown
    trap 'program_closing; exit 0' 1 2 3 15
    # Inform user of purpose of script
    echo "This script will monitor $target to ensure that it is running,"
    echo "and attempt to restart it if it is not. If it is unable to"
    echo "restart after 3 attempts, it will report failure and close."
    sleep 2
    #Perform monitoring
    while [ $script_failure != "1" ]
      process_monitoring # Monitors process and attempts 3 restarts if it fails.
      detect_failure # Reports failure in the event that the process does not restart.
      if [ $script_failure != "1" ]
         sleep $wait_time
    sleep 2
    program_closing # Logs script closure
    exit 0
Process monitoring in UNIX shell scripting

More Related Content

Process monitoring in UNIX shell scripting

  • 2.  Top  Gets you a list of processes that are consuming the CPU  htop  Near real time list of running processes by CPU, includes scrolling, and mouse support  vmstat  Provides information about processes, memory, paging, I/O, traps and CPU  w/who/finger  Provides information about users that are consuming resources on the computer  ps (ps –ef )  Lists all the currently running processes on a Linux computer
  • 3.  pgrep/pkill  pgrep <process name> lists the PID of the process based on name  pkill <process name> sends a specific kill signal (default sigterm or shutdown) to a matching process  free  Shows the current memory usage of the system. Shows physical and swap memory  mpstat  mpstat 2 5 - shows five set of data of global statistics among all processors at two second intervals.  mpstat –P ALL 2 5 - shows 5 sets of statistics for all processors at two second intervals.
  • 4.  iostat  reports CPU statists for devices and partitions (including NFS Samba partitions)  pmap  This command reports memory map of a process. This can be used to find memory usage of the process.
  • 5.  Set the debug mode for this, you will want it, remember what each debug mode switch does 1. # set -n : Uncomment to check script syntax, without execution. 2. # Note: Do not forget to put the comment back in or 3. # the shell script will not execute! 4. # set -x : Uncomment to debug this shell script
  • 6.  PROC_MON=`basename $0` # Defines the script_name variable as the file name of this script LOGFILE="/home/ganesh/procmon.log" # Shows log file and where located [[ ! -s $LOGFILE ]] && touch $LOGFILE # This checks to see if the file exists # if not it creates one. TTY=$(tty) # Current tty or pty PROCESS="ssh" # This will define which process to monitor SLEEP_TIME="1" # This is the sleep time in second between monitoring txtred=$(tput setaf 1) # Red: will indicate a failed process and the information txtgrn=$(tput setaf 2) # Green: this is successful process information txtylw=$(tput setaf 3) # Yellow: this is used to show cautionary information txtrst=$(tput sgr0) # resets text
  • 7.  function exit_trap # this is the behavior of the trap signal { # Log an ending time for process monitoring DATE=$(date +%D) TIME=$(date +%T) # Get a new timestamp... echo "$DATE @ $TIME: Monitoring for $PROCESS terminated" >> $LOGFILE & # this will create an entry in the logfile echo "$DATE @ $TIME: ${txtred}Monitoring for $PROCESS terminated${txtrst}" #kill all functions kill -9 $(jobs -p) 2>/dev/null
  • 8.  Set the trap to see if the process exits  trap 'exit_trap; exit 0' 1 2 3 15 # this will see if process is running if not will start it ps aux | grep "$PROCESS" | grep -v "grep $PROCESS" | grep -v $PROC_MON >/dev/null
  • 9. if (( $? != 0 )) then DATE=$(date +%D) TIME=$(date +%T) echo echo "$DATE @ $TIME: $PROCESS is NOT active...starting $PROCESS.." >> $LOGFILE & # creates # an entry in the logfile echo "$DATE @ $TIME: ${txtylw}$PROCESS is NOT active...starting $PROCESS..${txtrst}" echo sleep 1 service $PROCESS start & echo "$DATE @ $TIME: $PROCESS has been started..." >> $LOGFILE & #puts an enrty in logfile else # this will say what to do if process is already running echo -e "n" # a blank line DATE=$(date +%D) TIME=$(date +%T) echo "$DATE @ $TIME: $PROCESS is currnetly RUNNING..." >> $LOGFILE & # puts entry in logfile echo "$DATE @ $TIME: ${txtgrn}$PROCESS is currently RUNNING...${txtrst}" fi
  • 10.  while (( RC == 0 )) # this will loop until the return code is not zero do ps aux | grep $PROCESS | grep -v "grep $PROCESS" | grep -v $PROC_MON >/dev/null 2>&1 if (( $? != 0 )) # check the return code then echo DATE=$(date +%D) TIME=$(date +%T) echo "$DATE @ $TIME: $PROCESS has STOPPED..." >> $LOGFILE & # entry in logfile echo "$DATE @ $TIME: ${txtred}$PROCESS has STOPPED...${txtrst}" echo service $PROCESS start & echo "$DATE @ $TIME: $PROCESS has RESTARTED..." >> $LOGFILE & # ENTRY IN LOGFILE echo "$DATE @ $TIME: ${txtgrn}$PROCESS has RESTARTED...${txtrst}" sleep 1
  • 11. ps aux | grep $PROCESS | grep -v "grep $PROCESS" | grep -v $PROC_MON >/dev/null 2>&1 if (( $? != 0 )) # This will check the return code then echo DATE=$(date +%D) # New time stamp TIME=$(date +%T) echo "$DATE @ $TIME: $PROCESS failed to restart..." >> $LOGFILE & #entry in logfile echo "$DATE @ $TIME: ${txtred}$PROCESS failed to restart...${txtrst}" exit 0 fi fi sleep $SLEEP_TIME # This is needed to reduce CPU Load!!! done
  • 12.  Process is hard coded in the script  # Process to be monitored target="ssh"
  • 16. # Monitor process and restart if necessary for attempt in 1 2 3 do ps aux | grep "$target" | grep -v "grep $target" | grep -v $script_name >/dev/null if [ $? != 0 ] then log_time=$(date) echo echo "$(tput setaf 3)$target is not running. Attempt will be made to restart. This is attempt $attempt of 3.$(tput sgr0)" echo >>$log_file echo "$log_time: $target is not running. Restarting. Attempt $attempt of 3.">>$log_file echo service $target start & sleep 2 # Pause to prevent false positives from restart attempt. else attempt="3" fi done sleep 2 # Pause to prevent false positives from restart attempt. }
  • 17. detect_failure() { ps aux | grep "$target" | grep -v "grep $target" | grep -v $script_name >/dev/null if [ $? != 0 ] then log_time=$(date) echo echo "$(tput setaf 1)$target is not running after 3 attempts. Process has failed and cannot be restarted. $(tput sgr0)" # Report failure to user echo "This script will now close." echo "">>$log_file echo "$log_time: $target cannot be restarted.">>$log_file # Log failure script_failure="1" # Set failure flag else log_time=$(date) echo echo "$log_time : $target is running." echo "$log_time : $target is running." >> $log_file fi }
  • 18.  program_closing() { # Report and log script shutdown log_time=$(date) echo echo "Closing ProcMon script. No further monitoring of $target will be performed." #Reports closing of ProcMon to user echo echo "$(tput setaf 1)$log_time: Monitoring for $target terminated. $(tput sgr0)" echo echo "$log_time: Monitoring for $target terminated.">>$log_file # Logs closing of ProcMon to log_file echo >> $log_file echo "***************" >> $log_file echo >> $log_file # Ensure this script is properly killed kill -9 > /dev/null }
  • 19. # Trap shutdown attempts to enable logging of shutdown trap 'program_closing; exit 0' 1 2 3 15 # Inform user of purpose of script clear echo echo "This script will monitor $target to ensure that it is running," echo "and attempt to restart it if it is not. If it is unable to" echo "restart after 3 attempts, it will report failure and close." sleep 2 #Perform monitoring while [ $script_failure != "1" ] do process_monitoring # Monitors process and attempts 3 restarts if it fails. detect_failure # Reports failure in the event that the process does not restart. if [ $script_failure != "1" ] then sleep $wait_time fi done sleep 2 program_closing # Logs script closure exit 0