Skip to content
jobs.py 8.43 KiB
Newer Older
Anael Beutot's avatar
Anael Beutot committed
import errno
import logging
Anael Beutot's avatar
Anael Beutot committed
import os
import resource
import signal
import subprocess
import sys
from threading import Thread, Event

from cloudcontrol.node.exc import JobError
from cloudcontrol.node.utils import num_to_sig


logger = logging.getLogger(__name__)
class JobManager(object):
    def __init__(self, main_loop):
        """
        :param main_loop: :class:`MainLoop` instance
        """
        def counter():
            i = 0
            while True:
                yield i
                i += 1

        self.job_id = counter()
        self.main = main_loop
        #: keep an index of all jobs
        self.jobs = {}

    def job_start(self):
        pass

    def job_stop(self):
        pass

    def notify(self, job):
        """Called when a job is done."""
        # by now only remove the job
        self.remove(job.id)

    def cancel(self, job_id):
        """Cancel a job."""
Anael Beutot's avatar
Anael Beutot committed
        self.jobs[job_id].stop()
    def remove(self, job_id):
        try:
            return self.jobs.pop(job_id)
        except KeyError:
            logger.error('Job %s does not exist', job_id)

    def create(self, job_constructor, *args, **kwargs):
        """Create a new job and populate job id."""
        job = job_constructor(self, *args, **kwargs)
        self.jobs[job.id] = job
        return job

    def get(self, job_id):
        return self.jobs[job_id]

    def start(self):
        pass
    def stop(self):
        logger.debug('Stopping all currently running jobs')
        for job in self.jobs.itervalues():
            try:
                job.stop()
            except Exception:
                pass
class BaseThreadedJob(Thread):
    """Job running in a background thread.

    Handles job notification to the job manager.
    """
    def __init__(self, job_manager):
        Thread.__init__(self)
        #: report progress in %
        self.progress = 0.

        self.job_manager = job_manager

        #: job id
        self.id = job_manager.job_id.next()

        self.running = False

    def pre_job(self):
        """Job preparation that is called when doing start, it can raise
        exceptions to report error to the caller. In the latter case, it will
        also removes itself from the job list.

        """
        pass

    def run_job(self):
        """Overide this method to define what your job do."""
        raise NotImplementedError

    def run(self):
        try:
            self.run_job()
        except Exception:
            pass
        finally:
            self.running = False

    def notify(self):
        self.job_manager.notify(self)

    def start(self):
        # first we run pre_job as it could raise an exception
        try:
            self.pre_job()
        except Exception:
            # in case of error we must remove the job from the manager
            self.notify()
            raise
        # then we start the watcher when it's safe
        self.running = True
        Thread.start(self)  # thread will signal when it's done using async

    def wait(self):
        """For jobs running in a background, this method MUST be called in order
        to remove the job from the list in the end.

        """
        self.join()
        self.notify()

    def start_current(self):
        """Start job in current thread."""
        try:
            self.pre_job()
            self.running = True
            self.run_job()
            # we could log exceptions here but rather do it inside the run_job
            # method
        finally:
            self.notify()

    def stop(self):
        self.running = False
class ForkedJob(object):
    """Job that executes in a fork.
Anael Beutot's avatar
Anael Beutot committed

    When inherit, you must define open_fds property that list file descriptors
    that must be kept in the child and closed in the parent.

    .. warning::
        logging should not be used in the child as this would cause a deadlock
        to occur, see http://bugs.python.org/issue6721
    """

    def __init__(self, job_manager):
        self.job_manager = job_manager

        #: job id
        self.id = job_manager.job_id.next()

        self.running = False
        # event for other thread to wait for job termination
        self.job_done = Event()

        self.fork_pid = None

    def pre_job(self):
        # overide in sub class
        pass

    def run_job(self):
        # overide in sub class
        pass

    def after_fork(self):
        # overide in sub class
        pass
Anael Beutot's avatar
Anael Beutot committed

    def close_fds(self):
        """Close all fds uneeded fds in children."""
        # get max fd
        limit = resource.getrlimit(resource.RLIMIT_NOFILE)[1]
        if limit == resource.RLIM_INFINITY:
            max_fd = 2048
        else:
            max_fd = limit

        exclude_fd = self.open_fds
        if self.job_manager.main.config.debug:
            exclude_fd += [0, 1, 2]  # debug
        for fd in xrange(max_fd, -1, -1):
            if fd in exclude_fd:
                continue
            try:
                os.close(fd)
            except OSError as exc:
                if exc.errno != errno.EBADF:
                    raise
                # wasn't open
        if not self.job_manager.main.config.debug:
            sys.stdin = open(os.devnull)
            sys.stdout = open(os.devnull)
            sys.stderr = open(os.devnull)
            assert sys.stdin.fileno() == 0
            assert sys.stdout.fileno() == 1
            assert sys.stderr.fileno() == 2

    def reset_signal_mask(self):
        signal.signal(signal.SIGTERM, lambda *args: os._exit(1))
        signal.signal(signal.SIGUSR1, signal.SIG_IGN)
        signal.signal(signal.SIGINT, signal.SIG_IGN)

    def run(self):
        try:
            self.fork_pid = os.fork()
        except OSError as exc:
            logger.error('Cannot fork: %s', os.strerror(exc.errno))
            raise
        self.running = True

        if self.fork_pid == 0:
            # child
            self.reset_signal_mask()
            self.close_fds()
Anael Beutot's avatar
Anael Beutot committed
            try:
                self.run_job()
            except:
                sys.stderr.write('Error during job\n')
                os._exit(1)
            else:
                sys.stderr.write('Job execution went well\n')
                os._exit(0)
        else:
            # close child fds
            for fd in self.open_fds:
                try:
                    os.close(fd)
                except OSError as exc:
                    logger.error('Error while closing fds in parent: %s',
                                 os.strerror(exc.errno))
Anael Beutot's avatar
Anael Beutot committed
                    raise

    def start(self):
        self.pre_job()
        self.run()

    def stop(self):
        try:
            os.kill(self.fork_pid, signal.SIGKILL)
        except OSError as exc:
            if exc.errno == errno.ESRCH:
                logger.debug('Child already killed')
                return

            logger.error('Cannot kill child for IO job: %s',
                         os.strerror(exc.errno))
            raise

    def notify(self):
        self.job_manager.notify(self)

    def wait(self):
        if self.fork_pid is None:
            return
        try:
            while True:
                try:
                    pid, return_status = os.waitpid(self.fork_pid, 0)
                except OSError as exc:
                    if exc.errno == errno.EINTR:
                        continue

                    logger.error('Error while waiting for child to terminate: %s',
                                 os.strerror(exc.errno))
                    raise
                else:
                    break
            assert pid == self.fork_pid
            if return_status >> 8 != 0:
                if return_status & 0xff == signal.SIGKILL:
                    logger.error('Job was killed')
                else:
                    raise JobError('Exception during job, returned %s, signal %s',
                                   return_status,
                                   num_to_sig(return_status & 0xff))
Anael Beutot's avatar
Anael Beutot committed
        finally:
            self.fork_pid = None
            self.job_done.set()
            self.notify()

    def join(self):
        self.job_done.wait()


class BaseIOJob(ForkedJob):
    """Fork job that set ionice on the child."""

    IO_NICE = 7

    def after_fork(self):
        try:
            subprocess.check_call(['ionice', '-n%d' % self.IO_NICE,
                                   '-p%d' % os.getpid()], close_fds=True)
        except subprocess.CalledProcessError as exc:
            sys.stderr.write('Cannot set ionice, return code %s\n' % exc.returncode)