Skip to content

Commit 5f54f44

Browse files
yaauiehone
authored andcommitted
Improve test case for resque#638 using redis' blocking list operations,
ensure job is hard-killed when it doesn't die gracefully in the specified time
1 parent 7935099 commit 5f54f44

File tree

1 file changed

+58
-51
lines changed

1 file changed

+58
-51
lines changed

test/worker_test.rb

Lines changed: 58 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -438,64 +438,71 @@ def self.perform
438438
assert_not_equal original_connection, Resque.redis.client.connection.instance_variable_get("@sock")
439439
end
440440

441-
442-
test 'SIGTERM' do
443-
begin
444-
class LongRunningJob
445-
@queue = :long_running_job
446-
def self.perform(time)
447-
$child_pid_writer.write Process.pid
448-
$child_pid_writer.close
449-
sleep(time)
450-
rescue SignalException => e
451-
$child_worker_message_writer.write %Q(SignalException caught! #{e.inspect})
452-
$child_worker_message_writer.close
441+
{
442+
'cleanup occurs in allotted time' => nil,
443+
'cleanup takes too long' => 2
444+
}.each do |scenario,rescue_time|
445+
test "SIGTERM when #{scenario}" do
446+
begin
447+
class LongRunningJob
448+
@queue = :long_running_job
449+
def self.perform( run_time, rescue_time=nil )
450+
Resque.redis.client.reconnect # get its own connection
451+
Resque.redis.rpush( 'sigterm-test:start', Process.pid )
452+
sleep run_time
453+
Resque.redis.rpush( 'sigterm-test:result', 'Finished Normally' )
454+
rescue SignalException => e
455+
Resque.redis.rpush( 'sigterm-test:result', %Q(Caught SignalException: #{e.inspect}))
456+
sleep rescue_time unless rescue_time.nil?
457+
ensure
458+
Resque.redis.rpush( 'sigterm-test:final', 'exiting.' )
459+
end
453460
end
454-
end
455461

456-
$child_pid_reader, $child_pid_writer = IO.pipe
457-
$child_worker_message_reader, $child_worker_message_writer = IO.pipe
462+
Resque.enqueue( LongRunningJob, 5, rescue_time )
458463

459-
Resque.enqueue(LongRunningJob, 2)
464+
worker_pid = Kernel.fork do
465+
# ensure we actually fork
466+
$TESTING = false
467+
# reconnect since we just forked
468+
Resque.redis.client.reconnect
460469

461-
worker_pid = Kernel.fork do
462-
# don't hold up the read-end of the pipe
463-
$child_pid_reader.close
464-
# ensure we actually fork
465-
$TESTING = false
466-
# reconnect since we just forked
467-
Resque.redis.client.reconnect
470+
worker = Resque::Worker.new(:long_running_job)
471+
worker.term_timeout = 1
468472

469-
worker = Resque::Worker.new(:long_running_job)
470-
worker.term_timeout = 1
473+
worker.work(0)
474+
exit!
475+
end
471476

472-
worker.work(0)
473-
exit!
477+
# ensure the worker is started
478+
start_status = Resque.redis.blpop( 'sigterm-test:start', 5 )
479+
assert_not_nil start_status
480+
child_pid = start_status[1].to_i
481+
assert_operator child_pid, :>, 0
482+
483+
# send signal to abort the worker
484+
Process.kill('TERM', worker_pid)
485+
Process.waitpid(worker_pid)
486+
487+
# wait to see how it all came down
488+
result = Resque.redis.blpop( 'sigterm-test:result', 5 )
489+
assert_not_nil result
490+
assert !result[1].start_with?('Finished Normally'), 'Job Finished normally. Sleep not long enough?'
491+
assert result[1].start_with? 'Caught SignalException', 'Signal exception not raised in child.'
492+
493+
# ensure that the child pid is no longer running
494+
child_still_running = !(`ps -p #{child_pid.to_s} -o pid=`).empty?
495+
assert !child_still_running
496+
497+
# see if post-cleanup occurred. This should happen IFF the rescue_time is less than the term_timeout
498+
post_cleanup_occurred = Resque.redis.lpop( 'sigterm-test:final' )
499+
assert post_cleanup_occurred, 'post cleanup did not occur. SIGKILL sent too early?' if rescue_time.nil?
500+
assert !post_cleanup_occurred, 'post cleanup occurred. SIGKILL sent too late?' unless rescue_time.nil?
501+
502+
ensure
503+
remaining_keys = Resque.redis.keys('sigterm-test:*') || []
504+
Resque.redis.del(*remaining_keys) unless remaining_keys.empty?
474505
end
475-
476-
# close writers in parent now that we've forked
477-
$child_pid_writer.close
478-
$child_worker_message_writer.close
479-
480-
# give it a moment to start up the worker
481-
sleep(0.2)
482-
483-
# send signal to abort the worker
484-
Process.kill('TERM', worker_pid)
485-
Process.waitpid(worker_pid)
486-
487-
# try to get the child pid
488-
child_pid = Timeout.timeout(1.0) { $child_pid_reader.read.to_i }
489-
assert child_pid > 0
490-
491-
child_still_running = !(`ps -p #{child_pid.to_s} -o pid=`).empty?
492-
assert !child_still_running
493-
494-
child_exception = $child_worker_message_reader.read
495-
assert_not_equal '', child_exception, 'child did not raise SignalException'
496-
ensure
497-
$child_pid_reader, $child_pid_writer, $child_worker_message_reader, $child_worker_message_writer = nil, nil, nil, nil
498506
end
499507
end
500-
501508
end

0 commit comments

Comments
 (0)