6
6
import commands
7
7
import random
8
8
9
- from pyres .exceptions import NoQueueError , TimeoutError
9
+ from pyres .exceptions import NoQueueError , JobError , TimeoutError , CrashError
10
10
from pyres .job import Job
11
11
from pyres import ResQ , Stat , __version__
12
12
@@ -128,8 +128,6 @@ def work(self, interval=5):
128
128
that job to make sure another worker won't run it, then *forks* itself to
129
129
work on that job.
130
130
131
- Finally, the ``process`` method actually processes the job by eventually calling the Job instance's ``perform`` method.
132
-
133
131
"""
134
132
self ._setproctitle ("Starting" )
135
133
self .startup ()
@@ -142,63 +140,7 @@ def work(self, interval=5):
142
140
job = self .reserve (interval )
143
141
144
142
if job :
145
- logger .debug ('picked up job' )
146
- logger .debug ('job details: %s' % job )
147
- self .before_fork (job )
148
- self .child = os .fork ()
149
- if self .child :
150
- self ._setproctitle ("Forked %s at %s" %
151
- (self .child ,
152
- datetime .datetime .now ()))
153
- logger .info ('Forked %s at %s' % (self .child ,
154
- datetime .datetime .now ()))
155
-
156
- try :
157
- start = datetime .datetime .now ()
158
-
159
- # waits for the result or times out
160
- while True :
161
- result = os .waitpid (self .child , os .WNOHANG )
162
- if result != (0 , 0 ):
163
- break
164
- time .sleep (0.5 )
165
-
166
- now = datetime .datetime .now ()
167
- if self .timeout and ((now - start ).seconds > self .timeout ):
168
- os .kill (self .child , signal .SIGKILL )
169
- os .waitpid (- 1 , os .WNOHANG )
170
- raise TimeoutError ("Timed out after %d seconds" % self .timeout )
171
-
172
- except OSError as ose :
173
- import errno
174
-
175
- if ose .errno != errno .EINTR :
176
- raise ose
177
-
178
- except TimeoutError as e :
179
- exceptionType , exceptionValue , exceptionTraceback = sys .exc_info ()
180
- logger .exception ("%s timed out: %s" % (job , e ))
181
- job .fail (exceptionTraceback )
182
- self .failed ()
183
- self .done_working ()
184
-
185
- logger .debug ('done waiting' )
186
- else :
187
- self ._setproctitle ("Processing %s since %s" %
188
- (job ._queue ,
189
- datetime .datetime .now ()))
190
- logger .info ('Processing %s since %s' %
191
- (job ._queue , datetime .datetime .now ()))
192
- self .after_fork (job )
193
-
194
- # re-seed the Python PRNG after forking, otherwise
195
- # all job process will share the same sequence of
196
- # random numbers
197
- random .seed ()
198
-
199
- self .process (job )
200
- os ._exit (0 )
201
- self .child = None
143
+ self .fork_worker (job )
202
144
else :
203
145
if interval == 0 :
204
146
break
@@ -207,6 +149,81 @@ def work(self, interval=5):
207
149
#time.sleep(interval)
208
150
self .unregister_worker ()
209
151
152
+ def fork_worker (self , job ):
153
+ """Invoked by ``work`` method. ``fork_worker`` does the actual forking to create the child
154
+ process that will process the job. It's also responsible for monitoring the child process
155
+ and handling hangs and crashes.
156
+
157
+ Finally, the ``process`` method actually processes the job by eventually calling the Job
158
+ instance's ``perform`` method.
159
+
160
+ """
161
+ logger .debug ('picked up job' )
162
+ logger .debug ('job details: %s' % job )
163
+ self .before_fork (job )
164
+ self .child = os .fork ()
165
+ if self .child :
166
+ self ._setproctitle ("Forked %s at %s" %
167
+ (self .child ,
168
+ datetime .datetime .now ()))
169
+ logger .info ('Forked %s at %s' % (self .child ,
170
+ datetime .datetime .now ()))
171
+
172
+ try :
173
+ start = datetime .datetime .now ()
174
+
175
+ # waits for the result or times out
176
+ while True :
177
+ pid , status = os .waitpid (self .child , os .WNOHANG )
178
+ if pid != 0 :
179
+ if os .WIFEXITED (status ) and os .WEXITSTATUS (status ) == 0 :
180
+ break
181
+ if os .WIFSTOPPED (status ):
182
+ logger .warning ("Process stopped by signal %d" % os .WSTOPSIG (status ))
183
+ else :
184
+ if os .WIFSIGNALED (status ):
185
+ raise CrashError ("Unexpected exit by signal %d" % os .WTERMSIG (status ))
186
+ raise CrashError ("Unexpected exit status %d" % os .WEXITSTATUS (status ))
187
+
188
+ time .sleep (0.5 )
189
+
190
+ now = datetime .datetime .now ()
191
+ if self .timeout and ((now - start ).seconds > self .timeout ):
192
+ os .kill (self .child , signal .SIGKILL )
193
+ os .waitpid (- 1 , os .WNOHANG )
194
+ raise TimeoutError ("Timed out after %d seconds" % self .timeout )
195
+
196
+ except OSError as ose :
197
+ import errno
198
+
199
+ if ose .errno != errno .EINTR :
200
+ raise ose
201
+ except JobError :
202
+ self ._handle_job_exception (job )
203
+ finally :
204
+ # If the child process' job called os._exit manually we need to
205
+ # finish the clean up here.
206
+ if self .job ():
207
+ self .done_working ()
208
+
209
+ logger .debug ('done waiting' )
210
+ else :
211
+ self ._setproctitle ("Processing %s since %s" %
212
+ (job ._queue ,
213
+ datetime .datetime .now ()))
214
+ logger .info ('Processing %s since %s' %
215
+ (job ._queue , datetime .datetime .now ()))
216
+ self .after_fork (job )
217
+
218
+ # re-seed the Python PRNG after forking, otherwise
219
+ # all job process will share the same sequence of
220
+ # random numbers
221
+ random .seed ()
222
+
223
+ self .process (job )
224
+ os ._exit (0 )
225
+ self .child = None
226
+
210
227
def before_fork (self , job ):
211
228
"""
212
229
hook for making changes immediately before forking to process
@@ -227,21 +244,33 @@ def before_process(self, job):
227
244
def process (self , job = None ):
228
245
if not job :
229
246
job = self .reserve ()
247
+
248
+ job_failed = False
230
249
try :
231
- self .working_on (job )
232
- job = self .before_process (job )
233
- return job .perform ()
234
- except Exception , e :
235
- exceptionType , exceptionValue , exceptionTraceback = sys .exc_info ()
236
- logger .exception ("%s failed: %s" % (job , e ))
237
- job .fail (exceptionTraceback )
238
- self .failed ()
239
- else :
240
- logger .info ('completed job' )
241
- logger .debug ('job details: %s' % job )
250
+ try :
251
+ self .working_on (job )
252
+ job = self .before_process (job )
253
+ return job .perform ()
254
+ except Exception :
255
+ job_failed = True
256
+ self ._handle_job_exception (job )
257
+ except SystemExit , e :
258
+ if e .code != 0 :
259
+ job_failed = True
260
+ self ._handle_job_exception (job )
261
+
262
+ if not job_failed :
263
+ logger .info ('completed job' )
264
+ logger .debug ('job details: %s' % job )
242
265
finally :
243
266
self .done_working ()
244
267
268
+ def _handle_job_exception (self , job ):
269
+ exceptionType , exceptionValue , exceptionTraceback = sys .exc_info ()
270
+ logger .exception ("%s failed: %s" % (job , exceptionValue ))
271
+ job .fail (exceptionTraceback )
272
+ self .failed ()
273
+
245
274
def reserve (self , timeout = 10 ):
246
275
logger .debug ('checking queues %s' % self .queues )
247
276
job = self .job_class .reserve (self .queues , self .resq , self .__str__ (), timeout = timeout )
0 commit comments