Newer
Older
import os
import resource
import signal
import subprocess
import sys
from threading import Thread, Event
from cloudcontrol.node.exc import JobError
logger = logging.getLogger(__name__)
class JobManager(object):
def __init__(self, main_loop):
"""
:param main_loop: :class:`MainLoop` instance
"""
def counter():
i = 0
while True:
yield i
i += 1
self.job_id = counter()
self.main = main_loop
#: keep an index of all jobs
self.jobs = {}
def job_start(self):
pass
def job_stop(self):
pass
def notify(self, job):
"""Called when a job is done."""
# by now only remove the job
def cancel(self, job_id):
"""Cancel a job."""
try:
return self.jobs.pop(job_id)
except KeyError:
logger.error('Job %s does not exist', job_id)
def create(self, job_constructor, *args, **kwargs):
"""Create a new job and populate job id."""
job = job_constructor(self, *args, **kwargs)
self.jobs[job.id] = job
return job
def get(self, job_id):
return self.jobs[job_id]
def start(self):
pass
def stop(self):
pass
class BaseThreadedJob(Thread):
"""Job running in a background thread.
Handles job notification to the job manager.
"""
def __init__(self, job_manager):
Thread.__init__(self)
#: report progress in %
self.progress = 0.
self.job_manager = job_manager
#: job id
self.id = job_manager.job_id.next()
self.running = False
def pre_job(self):
"""Job preparation that is called when doing start, it can raise
exceptions to report error to the caller. In the latter case, it will
also removes itself from the job list.
"""
pass
def run_job(self):
"""Overide this method to define what your job do."""
raise NotImplementedError
def run(self):
try:
self.run_job()
except Exception:
finally:
self.running = False
self.job_manager.notify(self)
def start(self):
# first we run pre_job as it could raise an exception
try:
self.pre_job()
except Exception:
# in case of error we must remove the job from the manager
raise
# then we start the watcher when it's safe
self.running = True
Thread.start(self) # thread will signal when it's done using async
def wait(self):
"""For jobs running in a background, this method MUST be called in order
to remove the job from the list in the end.
"""
self.join()
self.notify()
def start_current(self):
"""Start job in current thread."""
try:
self.pre_job()
self.running = True
self.run_job()
# we could log exceptions here but rather do it inside the run_job
# method
finally:
self.notify()
def stop(self):
self.running = False
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
class BaseIOJob(object):
"""Job that can set ionice and executes in a fork.
When inherit, you must define open_fds property that list file descriptors
that must be kept in the child and closed in the parent.
.. warning::
logging should not be used in the child as this would cause a deadlock
to occur, see http://bugs.python.org/issue6721
"""
IO_NICE = 7
def __init__(self, job_manager):
self.job_manager = job_manager
#: job id
self.id = job_manager.job_id.next()
self.running = False
# event for other thread to wait for job termination
self.job_done = Event()
self.fork_pid = None
def pre_job(self):
# overide in sub class
pass
def run_job(self):
# overide in sub class
pass
def set_io_nice(self):
try:
subprocess.check_call(['ionice', '-n%d' % self.IO_NICE,
'-p%d' % os.getpid()], close_fds=True)
except subprocess.CalledProcessError as exc:
sys.stderr.write('Cannot set ionice, return code %s\n' % exc.returncode)
def close_fds(self):
"""Close all fds uneeded fds in children."""
# get max fd
limit = resource.getrlimit(resource.RLIMIT_NOFILE)[1]
if limit == resource.RLIM_INFINITY:
max_fd = 2048
else:
max_fd = limit
exclude_fd = self.open_fds
if self.job_manager.main.config.debug:
exclude_fd += [0, 1, 2] # debug
for fd in xrange(max_fd, -1, -1):
if fd in exclude_fd:
continue
try:
os.close(fd)
except OSError as exc:
if exc.errno != errno.EBADF:
raise
# wasn't open
if not self.job_manager.main.config.debug:
sys.stdin = open(os.devnull)
sys.stdout = open(os.devnull)
sys.stderr = open(os.devnull)
assert sys.stdin.fileno() == 0
assert sys.stdout.fileno() == 1
assert sys.stderr.fileno() == 2
def reset_signal_mask(self):
signal.signal(signal.SIGTERM, lambda *args: os._exit(1))
signal.signal(signal.SIGUSR1, signal.SIG_IGN)
signal.signal(signal.SIGINT, signal.SIG_IGN)
def run(self):
try:
self.fork_pid = os.fork()
except OSError as exc:
logger.error('Cannot fork: %s', os.strerror(exc.errno))
raise
self.running = True
if self.fork_pid == 0:
# child
self.reset_signal_mask()
self.close_fds()
self.set_io_nice()
try:
self.run_job()
except:
sys.stderr.write('Error during job\n')
os._exit(1)
else:
sys.stderr.write('Job execution went well\n')
os._exit(0)
else:
# close child fds
for fd in self.open_fds:
try:
os.close(fd)
except OSError:
logger.error('Error while closing fds in parent')
raise
def start(self):
self.pre_job()
self.run()
def stop(self):
try:
os.kill(self.fork_pid, signal.SIGKILL)
except OSError as exc:
if exc.errno == errno.ESRCH:
logger.debug('Child already killed')
return
logger.error('Cannot kill child for IO job: %s',
os.strerror(exc.errno))
raise
def notify(self):
self.job_manager.notify(self)
def wait(self):
if self.fork_pid is None:
return
try:
while True:
try:
pid, return_status = os.waitpid(self.fork_pid, 0)
except OSError as exc:
if exc.errno == errno.EINTR:
continue
logger.error('Error while waiting for child to terminate: %s',
os.strerror(exc.errno))
raise
else:
break
assert pid == self.fork_pid
if return_status >> 8 != 0:
raise JobError('Exception during job, returned %s',
return_status)
finally:
self.fork_pid = None
self.job_done.set()
self.notify()
def join(self):
self.job_done.wait()