7
7
import os , sys
8
8
import time
9
9
import json_parser as json
10
+ import commands
10
11
11
12
class Worker (object ):
12
13
"""Defines a worker. The ``pyres_worker`` script instantiates this Worker class and
@@ -22,6 +23,7 @@ def __init__(self, queues=[], server="localhost:6379", password=None):
22
23
self ._shutdown = False
23
24
self .child = None
24
25
self .pid = os .getpid ()
26
+ self .hostname = os .uname ()[1 ]
25
27
if isinstance (server ,basestring ):
26
28
self .resq = ResQ (server = server , password = password )
27
29
elif isinstance (server , ResQ ):
@@ -62,8 +64,21 @@ def unregister_worker(self):
62
64
Stat ("processed:%s" % self , self .resq ).clear ()
63
65
Stat ("failed:%s" % self , self .resq ).clear ()
64
66
67
+ def prune_dead_workers (self ):
68
+ all_workers = Worker .all (self .resq )
69
+ known_workers = self .worker_pids ()
70
+ for worker in all_workers :
71
+ host , pid , queues = worker .id .split (':' )
72
+ if host != self .hostname :
73
+ continue
74
+ if pid in known_workers :
75
+ continue
76
+ logging .warning ("pruning dead worker: %s" % worker )
77
+ worker .unregister_worker ()
78
+
65
79
def startup (self ):
66
80
self .register_signal_handlers ()
81
+ self .prune_dead_workers ()
67
82
self .register_worker ()
68
83
69
84
def register_signal_handlers (self ):
@@ -83,12 +98,11 @@ def kill_child(self, signum, frame):
83
98
if self .child :
84
99
logging .info ("Killing child at %s" % self .child )
85
100
os .kill (self .child , signal .SIGKILL )
86
-
101
+
87
102
def __str__ (self ):
88
103
if getattr (self ,'id' , None ):
89
104
return self .id
90
- hostname = os .uname ()[1 ]
91
- return '%s:%s:%s' % (hostname , self .pid , ',' .join (self .queues ))
105
+ return '%s:%s:%s' % (self .hostname , self .pid , ',' .join (self .queues ))
92
106
93
107
def work (self , interval = 5 ):
94
108
"""Invoked by ``run`` method. ``work`` listens on a list of queues and sleeps
@@ -204,6 +218,13 @@ def processing(self):
204
218
205
219
def state (self ):
206
220
return 'working' if self .resq .redis .exists ('resque:worker:%s' % self ) else 'idle'
221
+
222
+ def worker_pids (self ):
223
+ """Returns an array of all pids (as strings) of the workers on
224
+ this machine. Used when pruning dead workers."""
225
+ return map (lambda l : l .split (' ' )[0 ],
226
+ commands .getoutput ("ps -A -o pid,command | \
227
+ grep pyres_worker" ).split ("\n " ))
207
228
208
229
@classmethod
209
230
def run (cls , queues , server , interval ):
0 commit comments