Commit 46f109ea authored by Sebastien Luttringer's avatar Sebastien Luttringer
Browse files

Introduce PipeFile in place of uopen

We now use PipeFile to handle complex file type and compute md5 and filesize.

This commit add the following features in installsystems:
- A download progress bad
- check size before download
- check size after download
- check md5 after download
parent b317d082
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -15,7 +15,7 @@ Description: InstallSytems Installer

Package: python-installsystems
Architecture: all
Depends: ${misc:Depends}, ${python:Depends}
Depends: ${misc:Depends}, ${python:Depends}, python-progressbar
XB-Python-Version: ${python:Versions}
Description: InstallSystems Python modules
 This package provides InstallSystems framework
+77 −28
Original line number Diff line number Diff line
@@ -22,6 +22,7 @@ import cStringIO
import installsystems.template as istemplate
import installsystems.tools as istools
from installsystems.printer import *
from installsystems.tools import PipeFile
from installsystems.tarball import Tarball


@@ -249,14 +250,14 @@ class SourceImage(Image):
        Create a payload file
        Only gzipping it
        '''
        fsource = istools.uopen(source)
        fsource = PipeFile(source, "r")
        # open file not done in GzipFile, to escape writing of filename
        # in gzip file. This change md5.
        fdest = open(dest, "wb")
        fdest = gzip.GzipFile(filename=os.path.basename(source),
                              fileobj=fdest,
                              mtime=os.stat(source).st_mtime)
        istools.copyfileobj(fsource, fdest)
        shutil.copyfileobj(fsource, fdest)
        fsource.close()
        fdest.close()

@@ -405,11 +406,16 @@ class PackageImage(Image):
        self.md5name = md5name
        try:
            if fileobj is None:
                fileobj = istools.uopen(self.path)
                fileobj = PipeFile(self.path, "r")
            else:
                fileobj = PipeFile(mode="r", fileobj=fileobj)
            memfile = cStringIO.StringIO()
            fileobj.seek(0)
            (self.size, self.md5) = istools.copyfileobj(fileobj, memfile)
            shutil.copyfileobj(fileobj, memfile)
            # close source
            fileobj.close()
            # get donwloaded size and md5
            self.size = fileobj.read_size
            self.md5 = fileobj.md5
            memfile.seek(0)
            self._tarball = Tarball.open(fileobj=memfile, mode='r:gz')
        except Exception as e:
@@ -419,7 +425,7 @@ class PackageImage(Image):
        arrow("Image %s v%s loaded" % (self.name, self.version))
        arrow("Author: %s" % self.author, 1)
        arrow("Date: %s" % time.ctime(self.date), 1)
        # build payloads
        # build payloads info
        self.payload = {}
        for pname, pval in self._metadata["payload"].items():
            if self.md5name:
@@ -495,11 +501,17 @@ class PackageImage(Image):
    def check(self, message="Check MD5"):
        '''
        Check md5 and size of tarballs are correct
        Download tarball from path and compare the loaded md5 and remote
        '''
        arrow(message)
        arrowlevel(1)
        # check image
        if self.md5 != istools.md5sum(self.path):
        fo = PipeFile(self.path, "r")
        fo.consume()
        fo.close()
        if self.size != fo.read_size:
            raise Exception("Invalid size of image %s" % self.name)
        if self.md5 != fo.md5:
            raise Exception("Invalid MD5 of image %s" % self.name)
        # check payloads
        for pay_name, pay_obj in self.payload.items():
@@ -518,20 +530,30 @@ class PackageImage(Image):
    def download(self, directory, force=False, payload=False):
        '''
        Download image in directory
        Doesn't use in memory image because we cannot access it
        This is done to don't parasitize self._tarfile access to memfile
        '''
        # check if destination exists
        directory = os.path.abspath(directory)
        dest = os.path.join(directory, self.filename)
        if not force and os.path.exists(dest):
            raise Exception("Image destination already exists: %s" % dest)
        # download
        # some display
        arrow("Downloading image in %s" % directory)
        fs = istools.uopen(self.path)
        debug("Downloading %s from %s" % (self.id, self.path))
        # open source
        fs = PipeFile(self.path, progressbar=True)
        # check if announced file size is good
        if fs.size is not None and self.size != fs.size:
            raise Exception("Downloading image %s failed: Invalid announced size" % self.name)
        # open destination
        fd = open(self.filename, "wb")
        slen, smd5 = istools.copyfileobj(fs, fd)
        shutil.copyfileobj(fs, fd)
        fs.close()
        fd.close()
        if self.md5 != smd5:
        if self.size != fs.consumed_size:
            raise Exception("Download image %s failed: Invalid size" % self.name)
        if self.md5 != fs.md5:
            raise Exception("Download image %s failed: Invalid MD5" % self.name)
        if payload:
            for payname in self.payload:
@@ -610,6 +632,7 @@ class PackageImage(Image):
            arrowlevel(level=old_level)
        arrowlevel(-1)


class Payload(object):
    '''
    Payload class represents a payload object
@@ -646,12 +669,13 @@ class Payload(object):
        '''
        Fill missing md5/size about payload
        '''
        fileobj = istools.uopen(self.path)
        size, md5 = istools.copyfileobj(fileobj, None)
        fileobj = PipeFile(self.path, "r")
        fileobj.consume()
        fileobj.close()
        if self._size is None:
            self._size = size
            self._size = fileobj.read_size
        if self._md5 is None:
            self._md5 = md5
            self._md5 = fileobj.md5

    @property
    def md5(self):
@@ -733,11 +757,12 @@ class Payload(object):
        if self._size is None or self._md5 is None:
            debug("Check is called on payload with nothing to check")
            return True
        fileobj = istools.uopen(self.path)
        size, md5 = istools.copyfileobj(fileobj, None)
        if self._size != size:
        fileobj = PipeFile(self.path, "r")
        fileobj.consume()
        fileobj.close()
        if self._size != fileobj.read_size:
            raise Exception("Invalid size of payload %s" % self.name)
        if self._md5 != md5:
        if self._md5 != fileobj.md5:
            raise Exception("Invalid MD5 of payload %s" % self._md5)

    def download(self, dest, force=False):
@@ -756,13 +781,21 @@ class Payload(object):
                raise Exception("Destination %s is a directory" % dest)
            if not force:
                raise Exception("File %s already exists" % dest)
        # download
        fs = istools.uopen(self.path)
        # Open remote file
        debug("Downloading %s from %s" % (self.name, self.path))
        fs = PipeFile(self.path, progressbar=True)
        # check if announced file size is good
        if fs.size is not None and self.size != fs.size:
            raise Exception("Downloading payload %s failed: Invalid announced size" % self.name)
        fd = open(dest, "wb")
        slen, smd5 = istools.copyfileobj(fs, fd)
        shutil.copyfileobj(fs, fd)
        # closing fo
        fs.close()
        fd.close()
        if self.md5 != smd5:
        # checking download size
        if self.size != fs.read_size:
            raise Exception("Downloading payload %s failed: Invalid size" % self.name)
        if self.md5 != fs.md5:
            raise Exception("Downloading payload %s failed: Invalid MD5" % self.name)

    def extract(self, dest, force=False, filelist=None):
@@ -791,9 +824,12 @@ class Payload(object):
            istools.mkdir(dest)
        # try to open payload file
        try:
            fo = istools.uopen(self.path)
            fo = PipeFile(self.path, progressbar=True)
        except Exception as e:
            raise Exception("Unable to open payload file %s" % self.path)
        # check if announced file size is good
        if fo.size is not None and self.size != fo.size:
            raise Exception("Invalid announced size on payload %s" % self.path)
        # try to open tarball on payload
        try:
            t = Tarball.open(fileobj=fo, mode="r|gz", ignore_zeros=True)
@@ -809,6 +845,11 @@ class Payload(object):
        # closing fo
        t.close()
        fo.close()
        # checking download size
        if self.size != fo.read_size:
            raise Exception("Downloading payload %s failed: Invalid size" % self.name)
        if self.md5 != fo.md5:
            raise Exception("Downloading payload %s failed: Invalid MD5" % self.name)

    def extract_file(self, dest, force=False):
        '''
@@ -827,22 +868,30 @@ class Payload(object):
                raise Exception("Destination %s is a directory" % dest)
            if not force:
                raise Exception("File %s already exists" % dest)
        # opening destination
        # opening destination (must be local)
        try:
            f_dst = istools.uopen(dest, "wb")
            f_dst = open(dest, "wb")
        except Exception as e:
            raise Exception("Unable to open destination file %s" % dest)
        # try to open payload file
        try:
            f_gsrc = istools.uopen(self.path)
            f_gsrc = PipeFile(self.path, "r", progressbar=True)
            f_src = gzipstream.GzipStream(stream=f_gsrc)
        except Exception as e:
            raise Exception("Unable to open payload file %s" % self.path)
        # check if announced file size is good
        if f_gsrc.size is not None and self.size != f_gsrc.size:
            raise Exception("Invalid announced size on payload %s" % self.path)
        # launch copy
        size, md5 = istools.copyfileobj(f_src, f_dst)
        shutil.copyfileobj(f_src, f_dst)
        # closing fo
        f_dst.close()
        f_gsrc.close()
        f_src.close()
        # checking download size
        if self.size != f_gsrc.read_size:
            raise Exception("Downloading payload %s failed: Invalid size" % self.name)
        if self.md5 != f_gsrc.md5:
            raise Exception("Downloading payload %s failed: Invalid MD5" % self.name)
        # settings file orginal rights
        istools.chrights(dest, self.uid, self.gid, self.mode, self.mtime)
+6 −5
Original line number Diff line number Diff line
@@ -18,6 +18,7 @@ import installsystems
import installsystems.tools as istools
from installsystems.printer import *
from installsystems.tarball import Tarball
from installsystems.tools import PipeFile
from installsystems.image import Image, PackageImage
from installsystems.database import Database

@@ -384,8 +385,8 @@ class Repository(object):
                                                           self.config.name))
        memfile = cStringIO.StringIO()
        try:
            fo = istools.uopen(path)
            istools.copyfileobj(fo, memfile)
            fo = PipeFile(path, "r")
            shutil.copyfileobj(fo, memfile)
            fo.close()
        except Exception as e:
            raise Exception("Loading image %s v%s failed: %s" % (name, version, e))
@@ -522,7 +523,7 @@ class RepositoryManager(object):
                open(filedest, "wb")
        # get remote last value
        try:
            rlast = int(istools.uopen(config.lastpath,
            rlast = int(PipeFile(config.lastpath, mode='r',
                              timeout=self.timeout).read().strip())
            # get local last value
            llast = int(os.stat(filedest).st_mtime)
+128 −43
Original line number Diff line number Diff line
@@ -10,23 +10,138 @@ import os
import hashlib
import shutil
import urllib2

from progressbar import ProgressBar, Percentage, FileTransferSpeed
from progressbar import Bar, BouncingBar, ETA, UnknownLength
from installsystems.tarball import Tarball
from installsystems.printer import *


################################################################################
# Classes
################################################################################

def md5sum(path=None, fileobj=None):
class PipeFile(object):
    '''
    Compute md5 of a file
    Pipe file object if a file object with extended capabilties
    like printing progress bar or compute file size, md5 on the fly
    '''

    def __init__(self, path=None, mode="r", fileobj=None, timeout=3,
                 progressbar=False):
        self.progressbar = progressbar
        self.open(path, mode, fileobj, timeout)

    def open(self, path=None, mode="r", fileobj=None, timeout=3):
        if path is None and fileobj is None:
        raise ValueError("No path or fileobj specified")
    if fileobj is None:
        fileobj = uopen(path)
    m = hashlib.md5()
            raise AttributeError("You must have a path or a fileobj to open")
        if mode not in ("r", "w"):
            raise AttributeError("Invalid open mode. Must be r or w")
        self.mode = mode
        self._md5 = hashlib.md5()
        self.size = None
        self.consumed_size = 0
        if fileobj is not None:
            self.fo = fileobj
            # seek to 0 and compute filesize if we have and fd
            if hasattr(self.fo, "fileno"):
                self.seek(0)
                self.size = os.fstat(self.fo.fileno()).st_size
        else:
            ftype = pathtype(path)
            if ftype == "file":
                self.fo = open(path, self.mode)
                self.size = os.fstat(self.fo.fileno()).st_size
            elif ftype == "http" or ftype == "ftp":
                try:
                    self.fo = urllib2.urlopen(path, timeout=timeout)
                except Exception as e:
                    # FIXME: unable to open file
                    raise IOError(e)
                if "Content-Length" in self.fo.headers:
                    self.size = int(self.fo.headers["Content-Length"])
            else:
                raise NotImplementedError
        # init progress bar
        if self.size is None:
            widget = [ BouncingBar(), " ", FileTransferSpeed() ]
            maxval = UnknownLength
        else:
            widget = [ Percentage(), " ", Bar(), " ", FileTransferSpeed(), " ", ETA() ]
            maxval = self.size
        self._progressbar = ProgressBar(widgets=widget, maxval=maxval)
        # start progressbar display if asked
        if self.progressbar:
            self._progressbar.start()

    def close(self):
        if self.progressbar:
            self._progressbar.finish()
        debug("MD5: %s" % self.md5)
        debug("Size: %s" % self.size)
        self.fo.close()

    def read(self, size=None):
        if self.mode == "w":
            raise IOError("Unable to read in w mode")
        buf = self.fo.read(size)
        length = len(buf)
        self._md5.update(buf)
        self.consumed_size += length
        if self.progressbar and length > 0:
            self._progressbar.update(self.consumed_size)
        return buf

    def flush(self):
        if hasattr(self.fo, "flush"):
            return self.fo.flush()

    def write(self, buf):
        if self.mode == "r":
            raise IOError("Unable to write in r mode")
        length = len(buf)
        self._md5.update(buf)
        self.consumed_size += length
        if self.progressbar and length > 0:
            self._progressbar.update(self.consumed_size)
        return None

    def consume(self):
        '''
        Read all data and doesn't save it
        Useful to obtain md5 and size
        '''
        if self.mode == "w":
            raise IOError("Unable to read in w mode")
        while True:
        buf = fileobj.read(1024 * m.block_size)
            buf = self.read(65536)
            if len(buf) == 0:
                break
        m.update(buf)
    return m.hexdigest()

    @property
    def md5(self):
        '''
        Return the md5 of read/write of the file
        '''
        return self._md5.hexdigest()

    @property
    def read_size(self):
        '''
        Return the current read size
        '''
        return self.consumed_size

    @property
    def write_size(self):
        '''
        Return the current wrote size
        '''
        return self.consumed_size

################################################################################
# Functions
################################################################################

def smd5sum(buf):
    '''
@@ -36,23 +151,6 @@ def smd5sum(buf):
    m.update(buf)
    return m.hexdigest()

def copyfileobj(sfile, dfile):
    '''
    Copy data from sfile to dfile computing length and md5 on the fly
    '''
    f_sum = hashlib.md5()
    f_len = 0
    while True:
        buf = sfile.read(1024 * f_sum.block_size)
        buf_len = len(buf)
        if buf_len == 0:
            break
        f_len += buf_len
        f_sum.update(buf)
        if dfile is not None:
            dfile.write(buf)
    return (f_len , f_sum.hexdigest())

def copy(source, destination, uid=None, gid=None, mode=None, timeout=None):
    '''
    Copy a source to destination. Take care of path type
@@ -130,19 +228,6 @@ def abspath(path):
    else:
        return None

def uopen(path, mode="rb", timeout=3):
    '''
    Universal Open
    Create a file-like object to a file which can be remote
    '''
    ftype = pathtype(path)
    if ftype == "file":
        return open(path, mode)
    elif ftype == "http" or ftype == "ftp":
        return urllib2.urlopen(path, timeout=timeout)
    else:
        raise NotImplementedError

def getsize(path):
    '''
    Get size of a path. Recurse if directory