Skip to content
jobs.py 12.1 KiB
Newer Older
# This file is part of CloudControl.
#
# CloudControl is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# CloudControl is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with CloudControl.  If not, see <http://www.gnu.org/licenses/>.


Anael Beutot's avatar
Anael Beutot committed
import errno
import logging
Anael Beutot's avatar
Anael Beutot committed
import os
import resource
import signal
import subprocess
import sys
Anael Beutot's avatar
Anael Beutot committed
from threading import Thread, Event
from StringIO import StringIO
Anael Beutot's avatar
Anael Beutot committed

from cloudcontrol.node.exc import JobError
from cloudcontrol.node.utils import num_to_sig, close_fds


logger = logging.getLogger(__name__)
class JobManager(object):
    def __init__(self, main_loop):
        """
        :param main_loop: :class:`MainLoop` instance
        """
        def counter():
            i = 0
            while True:
                yield i
                i += 1

        self.job_id = counter()
        self.main = main_loop
        #: keep an index of all jobs
        self.jobs = {}

    def job_start(self):
        pass

    def job_stop(self):
        pass

    def notify(self, job):
        """Called when a job is done."""
        # by now only remove the job
        self.remove(job.id)

    def cancel(self, job_id):
        """Cancel a job."""
Anael Beutot's avatar
Anael Beutot committed
        self.jobs[job_id].stop()
    def remove(self, job_id):
        try:
            return self.jobs.pop(job_id)
        except KeyError:
            logger.error('Job %s does not exist', job_id)

    def create(self, job_constructor, *args, **kwargs):
        """Create a new job and populate job id."""
        job = job_constructor(self, *args, **kwargs)
        self.jobs[job.id] = job
        return job

    def get(self, job_id):
        return self.jobs[job_id]

    def start(self):
        pass
    def stop(self):
        logger.debug('Stopping all currently running jobs')
        for job in self.jobs.itervalues():
            try:
                job.stop()
            except Exception:
                pass
class BaseThreadedJob(Thread):
    """Job running in a background thread.

    Handles job notification to the job manager.
    """
    def __init__(self, job_manager):
        Thread.__init__(self)
        #: report progress in %
        self.progress = 0.

        self.job_manager = job_manager

        #: job id
        self.id = job_manager.job_id.next()

        self.running = False

    def pre_job(self):
        """Job preparation that is called when doing start, it can raise
        exceptions to report error to the caller. In the latter case, it will
        also removes itself from the job list.

        """
        pass

    def run_job(self):
        """Overide this method to define what your job do."""
        raise NotImplementedError

    def run(self):
        try:
            self.run_job()
        except Exception:
            pass
        finally:
            self.running = False

    def notify(self):
        self.job_manager.notify(self)

    def start(self):
Anael Beutot's avatar
Anael Beutot committed
        """Start job in a background thread."""
        # first we run pre_job as it could raise an exception
        try:
            self.pre_job()
        except Exception:
            # in case of error we must remove the job from the manager
            self.notify()
            raise
        # then we start the watcher when it's safe
        self.running = True
        Thread.start(self)  # thread will signal when it's done using async

    def wait(self):
        """For jobs running in a background, this method MUST be called in order
        to remove the job from the list in the end.

        """
        self.join()
        self.notify()

    def start_current(self):
        """Start job in current thread."""
        try:
            self.pre_job()
            self.running = True
            self.run_job()
            # we could log exceptions here but rather do it inside the run_job
            # method
        finally:
            self.notify()

    def stop(self):
        self.running = False
class ForkedJob(object):
    """Job that executes in a fork.
Anael Beutot's avatar
Anael Beutot committed

    When inherit, you must define open_fds property that list file descriptors
    that must be kept in the child and closed in the parent.

    .. warning::
        logging should not be used in the child as this would cause a deadlock
        to occur, see http://bugs.python.org/issue6721
    """

    def __init__(self, job_manager):
Anael Beutot's avatar
Anael Beutot committed
        """
        :param job_manager: :class:`JobManager` instance
        """
Anael Beutot's avatar
Anael Beutot committed
        self.job_manager = job_manager

        #: job id
        self.id = job_manager.job_id.next()

        self.running = False
        # event for other thread to wait for job termination
        self.job_done = Event()

        self.fork_pid = None
        # internal event used to wait for forked process termination
        self.fork_die = Event()
        # libev child watcher
        self.fork_watcher = None
        # return status of forked process
        self.return_status = None

    def create_fork_watcher(self):
        self.fork_watcher = self.job_manager.main.evloop.child(self.fork_pid, False, self.child_cb)
        self.fork_watcher.start()

    def child_cb(self, watcher, revents):
        self.return_status = watcher.rstatus
        self.fork_die.set()
        watcher.stop()
        self.fork_watcher = None
Anael Beutot's avatar
Anael Beutot committed

    def pre_job(self):
Anael Beutot's avatar
Anael Beutot committed
        """This method represents any preparation job that must be done in the
        parent before the fork.
        """
Anael Beutot's avatar
Anael Beutot committed
        pass

    def run_job(self):
Anael Beutot's avatar
Anael Beutot committed
        """This represent the work that will be done in the forked child.

        This method MUST be redefined in subclasses.
        """
Anael Beutot's avatar
Anael Beutot committed
        """This method will be called just after fork in the child.

        It does nothing by default and it can be redifined in subclasses.
        """
    def fatal(self, fmt, *args, **kwargs):
        """Write error message in stderr and exit.

        :param str fmt: format string
        :param \*args: arguments for format string
        :param \*\*kwargs: can contain ('status', :int:) -> exit status of process
        """
        try:
            status = int(kwargs.get('status', 1))
        except (ValueError, TypeError):
            sys.stderr.write('Bad status argument %s' % status)
            os._exit(42)

        try:
            fmt = fmt % args
        except (ValueError, TypeError):
            sys.stderr.write('Bad formatting for string: %s' % fmt)
            os._exit(42)

        try:
            sys.stderr.write(fmt)
        except IOError:
            os._exit(42)

        os._exit(status)

    def fatal_exc(self, fmt, *args, **kwargs):
        """Write error message and traceback and exit.

        :param str fmt: format string
        :param \*args: arguments for format string
        :param \*\*kwargs: can contain ('status', :int:) -> exit status of process
        """
        tb = StringIO()
        tb.write('\n')
        traceback.print_exc(file=tb)
        tb.write('\n')
        fmt += '%s'
        args = args + (tb.getvalue(),)
        self.fatal(fmt, *args, status=kwargs.get('status', 1))

Anael Beutot's avatar
Anael Beutot committed
    def reset_signal_mask(self):
        signal.signal(signal.SIGTERM, lambda *args: os._exit(1))
        signal.signal(signal.SIGUSR1, signal.SIG_IGN)
        signal.signal(signal.SIGINT, signal.SIG_IGN)

    def run(self):
Anael Beutot's avatar
Anael Beutot committed
        """This method performs all the hard work by doing the actual fork.

        It catches all possible exceptions in the child, as this would prevents
        the latter from going back in the stack and doing nasty things with
        libev loop or sjRPC.
        Thus you do not need to capture all exceptions in your code,
        furthermore, if you need to exit from the child, you'd better use
        `os._exit <http://docs.python.org/library/os.html#os._exit>`_ function.
        """
Anael Beutot's avatar
Anael Beutot committed
        try:
            self.fork_pid = os.fork()
        except OSError as exc:
            logger.error('Cannot fork (job %s): %s', self.id, exc.strerror)
Anael Beutot's avatar
Anael Beutot committed
            raise
        self.running = True

        if self.fork_pid == 0:
            # child
            # just hope to not receive any signals in between since there is no
            # way to block signals in python :(
            try:
                self.reset_signal_mask()
                close_fds(exclude_fds=self.open_fds,
                          debug=self.job_manager.main.config.debug)
                self.after_fork()
            except:
                traceback.print_exc()
                os._exit(1)
Anael Beutot's avatar
Anael Beutot committed
            try:
                self.run_job()
            except:
                sys.stderr.write('Error during job %s\n' % self.id)
                traceback.print_exc()
Anael Beutot's avatar
Anael Beutot committed
                os._exit(1)
            else:
                sys.stderr.write('Job execution went well %s\n' % self.id)
Anael Beutot's avatar
Anael Beutot committed
                os._exit(0)
        else:
            self.create_fork_watcher()
Anael Beutot's avatar
Anael Beutot committed
            # close child fds
            for fd in self.open_fds:
                try:
                    os.close(fd)
                except OSError as exc:
                    if exc.errno == errno.EBADF:
                        # FIXME this is weird but it seems to happen sometimes
                        logger.debug('Error while closing fd %s in parent,'
                                     ' EBADF (job %s', fd, self.id)
                    logger.error('Error while closing fds in parent: %s',
Anael Beutot's avatar
Anael Beutot committed
                    raise

    def start(self):
Anael Beutot's avatar
Anael Beutot committed
        """This will start the job by executing :py:meth:`pre_job` method and
        :py:meth:`run`.

        """
Anael Beutot's avatar
Anael Beutot committed
        self.pre_job()
        self.job_manager.main.call_in_main_thread(self.run)
Anael Beutot's avatar
Anael Beutot committed

    def stop(self):
Anael Beutot's avatar
Anael Beutot committed
        """This would be called to stop the job.

        """
        if self.fork_die.is_set():
            return
Anael Beutot's avatar
Anael Beutot committed
        try:
            os.kill(self.fork_pid, signal.SIGKILL)
        except OSError as exc:
            if exc.errno == errno.ESRCH:
                logger.debug('Child already killed')
                return

            logger.error('Cannot kill child for IO job: %s', exc.strerror)
Anael Beutot's avatar
Anael Beutot committed
            raise

    def notify(self):
        self.job_manager.notify(self)

    def wait(self):
Anael Beutot's avatar
Anael Beutot committed
        """This will wait for the fork to end and raise exception depending on
        child return status.

        .. warning::
            This method MUST be called.
        """
Anael Beutot's avatar
Anael Beutot committed
        if self.fork_pid is None:
            return
        try:
            self.fork_die.wait()
            if self.return_status >> 8 != 0:
                if self.return_status & 0xff == signal.SIGKILL:
                    logger.error('Job was killed')
                else:
                    raise JobError('Exception during job, returned %s, signal'
                                   ' %s' % (
                                       self.return_status >> 8,
                                       num_to_sig(self.return_status & 0xff)))
Anael Beutot's avatar
Anael Beutot committed
        finally:
            self.fork_pid = None
            self.job_done.set()
            self.notify()

    def join(self):
Anael Beutot's avatar
Anael Beutot committed
        """This provides an API similar to `threading.Thread.join
        <http://docs.python.org/library/threading.html#threading.Thread.join>`_
        , you can wait
        for the job termination from multiple points in your program but one of
        these and only one MUST be `wait` method.
        """
Anael Beutot's avatar
Anael Beutot committed
        self.job_done.wait()


class BaseIOJob(ForkedJob):
    """Fork job that set ionice on the child."""

Anael Beutot's avatar
Anael Beutot committed
    #: level of io nice that will be set (see :manpage:`ionice(1)`)
    IO_NICE = 7

    def after_fork(self):
        try:
            subprocess.check_call(['ionice', '-n%d' % self.IO_NICE,
                                   '-p%d' % os.getpid()], close_fds=True)
        except subprocess.CalledProcessError as exc:
            sys.stderr.write('Cannot set ionice, return code %s\n' % exc.returncode)