raise Resque specific error to catch when a job is being termed.

hone · hone · commit af82eede2cab · 2012-08-01T08:03:48.000-05:00
This allows a Job class to rescue Resque::TermException to do
quick/clean shutdown work. It inherits from SignalException since it's
still a term that is causing this.
diff --git a/lib/resque/errors.rb b/lib/resque/errors.rb
@@ -7,4 +7,7 @@ class NoClassError < RuntimeError; end
 
   # Raised when a worker was killed while processing a job.
   class DirtyExit < RuntimeError; end
+
+  # Raised when child process is TERM'd so job can rescue this to do shutdown work.
+  class TermException < SignalException; end
 end
diff --git a/lib/resque/worker.rb b/lib/resque/worker.rb
@@ -296,7 +296,7 @@ def register_signal_handlers
     end
 
     def unregister_signal_handlers
-      trap('TERM', 'DEFAULT')
+      trap('TERM') { raise TermException.new("SIGTERM") }
       trap('INT', 'DEFAULT')
 
       begin
diff --git a/test/worker_test.rb b/test/worker_test.rb
@@ -438,71 +438,75 @@ def self.perform
     assert_not_equal original_connection, Resque.redis.client.connection.instance_variable_get("@sock")
   end
 
-  {
-    'cleanup occurs in allotted time' => nil,
-    'cleanup takes too long' => 2
-  }.each do |scenario,rescue_time|
-    test "SIGTERM when #{scenario}" do
-      begin
-        class LongRunningJob
-          @queue = :long_running_job
-          def self.perform( run_time, rescue_time=nil )
-            Resque.redis.client.reconnect # get its own connection
-            Resque.redis.rpush( 'sigterm-test:start', Process.pid )
-            sleep run_time
-            Resque.redis.rpush( 'sigterm-test:result', 'Finished Normally' )
-          rescue SignalException => e
-            Resque.redis.rpush( 'sigterm-test:result', %Q(Caught SignalException: #{e.inspect}))
-            sleep rescue_time unless rescue_time.nil?
-          ensure
-            Resque.redis.rpush( 'sigterm-test:final', 'exiting.' )
+  [SignalException, Resque::TermException].each do |exception|
+    {
+      'cleanup occurs in allotted time' => nil,
+      'cleanup takes too long' => 2
+    }.each do |scenario,rescue_time|
+      test "SIGTERM when #{scenario} while catching #{exception}" do
+        begin
+          eval("class LongRunningJob; @@exception = #{exception}; end")
+          class LongRunningJob
+            @queue = :long_running_job
+
+            def self.perform( run_time, rescue_time=nil )
+              Resque.redis.client.reconnect # get its own connection
+              Resque.redis.rpush( 'sigterm-test:start', Process.pid )
+              sleep run_time
+              Resque.redis.rpush( 'sigterm-test:result', 'Finished Normally' )
+            rescue @@exception => e
+              Resque.redis.rpush( 'sigterm-test:result', %Q(Caught SignalException: #{e.inspect}))
+              sleep rescue_time unless rescue_time.nil?
+            ensure
+              Resque.redis.rpush( 'sigterm-test:final', 'exiting.' )
+            end
           end
-        end
 
-        Resque.enqueue( LongRunningJob, 5, rescue_time )
+          Resque.enqueue( LongRunningJob, 5, rescue_time )
 
-        worker_pid = Kernel.fork do
-          # ensure we actually fork
-          $TESTING = false 
-          # reconnect since we just forked
-          Resque.redis.client.reconnect
+          worker_pid = Kernel.fork do
+            # ensure we actually fork
+            $TESTING = false
+            # reconnect since we just forked
+            Resque.redis.client.reconnect
 
-          worker = Resque::Worker.new(:long_running_job)
-          worker.term_timeout = 1
-          worker.term_child = 1
+            worker = Resque::Worker.new(:long_running_job)
+            worker.term_timeout = 1
+            worker.term_child = 1
 
-          worker.work(0)
-          exit!
-        end
+            worker.work(0)
+            exit!
+          end
 
-        # ensure the worker is started
-        start_status = Resque.redis.blpop( 'sigterm-test:start', 5 )
-        assert_not_nil start_status
-        child_pid = start_status[1].to_i
-        assert_operator child_pid, :>, 0
-
-        # send signal to abort the worker
-        Process.kill('TERM', worker_pid)
-        Process.waitpid(worker_pid)
-
-        # wait to see how it all came down
-        result = Resque.redis.blpop( 'sigterm-test:result', 5 )
-        assert_not_nil result
-        assert !result[1].start_with?('Finished Normally'), 'Job Finished normally. Sleep not long enough?'
-        assert result[1].start_with? 'Caught SignalException', 'Signal exception not raised in child.'
-
-        # ensure that the child pid is no longer running
-        child_still_running = !(`ps -p #{child_pid.to_s} -o pid=`).empty?
-        assert !child_still_running
-
-        # see if post-cleanup occurred. This should happen IFF the rescue_time is less than the term_timeout
-        post_cleanup_occurred = Resque.redis.lpop( 'sigterm-test:final' )
-        assert post_cleanup_occurred, 'post cleanup did not occur. SIGKILL sent too early?' if rescue_time.nil?
-        assert !post_cleanup_occurred, 'post cleanup occurred. SIGKILL sent too late?' unless rescue_time.nil?
-
-      ensure
-        remaining_keys = Resque.redis.keys('sigterm-test:*') || []
-        Resque.redis.del(*remaining_keys) unless remaining_keys.empty?
+          # ensure the worker is started
+          start_status = Resque.redis.blpop( 'sigterm-test:start', 5 )
+          assert_not_nil start_status
+          child_pid = start_status[1].to_i
+          assert_operator child_pid, :>, 0
+
+          # send signal to abort the worker
+          Process.kill('TERM', worker_pid)
+          Process.waitpid(worker_pid)
+
+          # wait to see how it all came down
+          result = Resque.redis.blpop( 'sigterm-test:result', 5 )
+          assert_not_nil result
+          assert !result[1].start_with?('Finished Normally'), 'Job Finished normally. Sleep not long enough?'
+          assert result[1].start_with? 'Caught SignalException', 'Signal exception not raised in child.'
+
+          # ensure that the child pid is no longer running
+          child_still_running = !(`ps -p #{child_pid.to_s} -o pid=`).empty?
+          assert !child_still_running
+
+          # see if post-cleanup occurred. This should happen IFF the rescue_time is less than the term_timeout
+          post_cleanup_occurred = Resque.redis.lpop( 'sigterm-test:final' )
+          assert post_cleanup_occurred, 'post cleanup did not occur. SIGKILL sent too early?' if rescue_time.nil?
+          assert !post_cleanup_occurred, 'post cleanup occurred. SIGKILL sent too late?' unless rescue_time.nil?
+
+        ensure
+          remaining_keys = Resque.redis.keys('sigterm-test:*') || []
+          Resque.redis.del(*remaining_keys) unless remaining_keys.empty?
+        end
       end
     end
   end