From 48f71af09704e3f8dd34f7c73e576ad85ff3238b Mon Sep 17 00:00:00 2001 From: Matt Koscica Date: Mon, 12 Aug 2013 01:41:50 +0200 Subject: [PATCH 1/4] Abort the kill loop in do_stop if $self->pid changes midway. If we're running in a high-availability situation, this prevents do_stop from terminating a standby worker, in addition to the currently exiting active worker, from getting taken out by the kill loop, after it has promoted itself to the active worker as a result of running `$main_worker stop`. We do this by caching the PID we first encounter, and aborting the kill loop immediately if we see it change while we're inside the kill loop. --- lib/Daemon/Control.pm | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/lib/Daemon/Control.pm b/lib/Daemon/Control.pm index 74a35a3..1ae35b5 100644 --- a/lib/Daemon/Control.pm +++ b/lib/Daemon/Control.pm @@ -311,9 +311,9 @@ sub read_pid { } sub pid_running { - my ( $self ) = @_; + my ( $self, $pid ) = @_; - $self->read_pid; + $pid ||= $self->read_pid; return 0 unless $self->pid >= 1; return 0 unless kill 0, $self->pid; @@ -422,22 +422,23 @@ sub do_stop { my ( $self ) = @_; $self->read_pid; + my $target_pid = $self->pid; - if ( $self->pid && $self->pid_running ) { + if ( $target_pid > 1 && $self->pid_running($target_pid) ) { foreach my $signal ( qw(TERM TERM INT KILL) ) { - $self->trace( "Sending $signal signal to pid ", $self->pid, "..." ); - kill $signal => $self->pid; + $self->trace( "Sending $signal signal to pid $target_pid..." ); + kill $signal => $target_pid; for (1..$self->kill_timeout) { # abort early if the process is now stopped - $self->trace('checking if pid ', $self->pid, ' is still running...'); - last if not $self->pid_running; + $self->trace("checking if pid $target_pid is still running..."); + last if not $self->pid_running($target_pid); sleep 1; } - last unless $self->pid_running; + last unless $self->pid_running($target_pid); } - if ( $self->pid_running ) { + if ( $self->pid_running($target_pid) ) { $self->pretty_print( "Failed to Stop", "red" ); exit 1; } From 926b547adab6b014b8d603a63d820d945e994d5f Mon Sep 17 00:00:00 2001 From: Matt Koscica Date: Wed, 21 Aug 2013 20:59:05 +0200 Subject: [PATCH 2/4] Change do_stop so it won't unlink pid_file if the pid changes midway. Originally the code would kill pidfiles for standbys being promoted to active workers in HA scenarios. --- lib/Daemon/Control.pm | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/lib/Daemon/Control.pm b/lib/Daemon/Control.pm index 1ae35b5..2c9cfcd 100644 --- a/lib/Daemon/Control.pm +++ b/lib/Daemon/Control.pm @@ -422,23 +422,32 @@ sub do_stop { my ( $self ) = @_; $self->read_pid; - my $target_pid = $self->pid; - if ( $target_pid > 1 && $self->pid_running($target_pid) ) { + my $start_pid = $self->pid; + + # Probably don't want to send anything to init(1). + return unless $start_pid > 1; + + my $current_pid = $self->read_pid; + + if ( $self->pid_running($start_pid) ) { + SIGNAL: foreach my $signal ( qw(TERM TERM INT KILL) ) { - $self->trace( "Sending $signal signal to pid $target_pid..." ); - kill $signal => $target_pid; + $self->trace( "Sending $signal signal to pid $start_pid..." ); + kill $signal => $start_pid; for (1..$self->kill_timeout) { # abort early if the process is now stopped - $self->trace("checking if pid $target_pid is still running..."); - last if not $self->pid_running($target_pid); + $self->trace("checking if pid $start_pid is still running..."); + last if not $self->pid_running($start_pid); + $current_pid = $self->read_pid; + last if $current_pid != $start_pid; sleep 1; } - last unless $self->pid_running($target_pid); + last unless $self->pid_running($start_pid); } - if ( $self->pid_running($target_pid) ) { + if ( $self->pid_running($start_pid) ) { $self->pretty_print( "Failed to Stop", "red" ); exit 1; } @@ -447,8 +456,12 @@ sub do_stop { $self->pretty_print( "Not Running", "red" ); } - # Clean up the PID file on stop. - unlink($self->pid_file) if $self->pid_file; + # Clean up the PID file on stop, unless + # it doesn't match what we started with. + + if ( $self->pid_file ) { + unlink($self->pid_file) if $current_pid == $start_pid; + } } sub do_restart { From 63ffb672dc71daa0aa5545a47b4e8f12a1f0d96a Mon Sep 17 00:00:00 2001 From: Matt Koscica Date: Wed, 21 Aug 2013 21:06:46 +0200 Subject: [PATCH 3/4] Remove un-necessary variable. --- lib/Daemon/Control.pm | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/lib/Daemon/Control.pm b/lib/Daemon/Control.pm index 2c9cfcd..fd1440c 100644 --- a/lib/Daemon/Control.pm +++ b/lib/Daemon/Control.pm @@ -428,8 +428,6 @@ sub do_stop { # Probably don't want to send anything to init(1). return unless $start_pid > 1; - my $current_pid = $self->read_pid; - if ( $self->pid_running($start_pid) ) { SIGNAL: foreach my $signal ( qw(TERM TERM INT KILL) ) { @@ -441,8 +439,7 @@ sub do_stop { # abort early if the process is now stopped $self->trace("checking if pid $start_pid is still running..."); last if not $self->pid_running($start_pid); - $current_pid = $self->read_pid; - last if $current_pid != $start_pid; + last if $self->read_pid != $start_pid; sleep 1; } last unless $self->pid_running($start_pid); @@ -460,7 +457,7 @@ sub do_stop { # it doesn't match what we started with. if ( $self->pid_file ) { - unlink($self->pid_file) if $current_pid == $start_pid; + unlink($self->pid_file) if $self->read_pid == $start_pid; } } From 777672fa549e28a477c446dbf48b611fb62705e7 Mon Sep 17 00:00:00 2001 From: Matt Koscica Date: Thu, 22 Aug 2013 12:48:44 +0200 Subject: [PATCH 4/4] Remove check against read_pid, which could result in unkilled processes. Also expand comment about pid_file removal. --- lib/Daemon/Control.pm | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/Daemon/Control.pm b/lib/Daemon/Control.pm index fd1440c..f59b1ba 100644 --- a/lib/Daemon/Control.pm +++ b/lib/Daemon/Control.pm @@ -422,7 +422,6 @@ sub do_stop { my ( $self ) = @_; $self->read_pid; - my $start_pid = $self->pid; # Probably don't want to send anything to init(1). @@ -439,7 +438,6 @@ sub do_stop { # abort early if the process is now stopped $self->trace("checking if pid $start_pid is still running..."); last if not $self->pid_running($start_pid); - last if $self->read_pid != $start_pid; sleep 1; } last unless $self->pid_running($start_pid); @@ -453,8 +451,10 @@ sub do_stop { $self->pretty_print( "Not Running", "red" ); } - # Clean up the PID file on stop, unless - # it doesn't match what we started with. + # Clean up the PID file on stop, unless the pid + # doesn't match $start_pid (perhaps a standby + # worker stepped in to take over from the one + # that was just terminated). if ( $self->pid_file ) { unlink($self->pid_file) if $self->read_pid == $start_pid;