From bf67978638b906f598e417b2a1bce250542c1e7d Mon Sep 17 00:00:00 2001
From: Seblu <sebastien.luttringer@smartjog.com>
Date: Wed, 22 Jun 2011 14:36:19 +0200
Subject: [PATCH] Introduce Payload

Concept of data was changed to payload.
Payload is not necessary a tarball of a file or a directory, but, directly a file without tarball around.
This is useful have the same md5 on data/payload and win some space during repo adding.
Now data which come with image are represented by class Payload and all operation are located inside. This is cleaner!
Payload also handle orginial information (mtime,uid,gid,md5,size,mode)
---
 installsystems/config.py     |   1 -
 installsystems/database.py   |  38 +--
 installsystems/image.py      | 507 +++++++++++++++++++++++++----------
 installsystems/repository.py |  31 ++-
 installsystems/tarball.py    |   7 +
 installsystems/template.py   |  13 +-
 installsystems/tools.py      |  23 +-
 7 files changed, 435 insertions(+), 185 deletions(-)

diff --git a/installsystems/config.py b/installsystems/config.py
index 9b75f8f..b979911 100644
--- a/installsystems/config.py
+++ b/installsystems/config.py
@@ -51,7 +51,6 @@ class ConfigFile(object):
                     # get all options in repo
                     self._repos.append(RepositoryConfig(rep, **dict(cp.items(rep))))
             except Exception as e:
-                raise
                 raise Exception("Unable load file %s: %s" % (self.path, e))
         else:
             debug("No config file found")
diff --git a/installsystems/database.py b/installsystems/database.py
index 874a27a..a774b14 100644
--- a/installsystems/database.py
+++ b/installsystems/database.py
@@ -52,7 +52,7 @@ class Database(object):
         self.conn.execute("PRAGMA foreign_keys = ON")
 
     def get(self, name, version):
-        '''Return a description dict from a package name'''
+        '''Return a description dict from a image name'''
         # parse tarball
         try:
             self.file.seek(0)
@@ -60,20 +60,20 @@ class Database(object):
             rdata = tarball.get_str("%s-%s" % (name, version))
             tarball.close()
         except KeyError:
-            raise Exception("No package %s version %s in metadata" % (name, version))
+            raise Exception("No image %s version %s in metadata" % (name, version))
         except Exception as e:
             raise Exception("Unable to read db %s version %s: %s" % (name, version, e))
         # convert loaded data into dict (json parser)
         try:
             return json.loads(rdata)
         except Exception as e:
-            raise Exception("Invalid metadata in package %s version %s: e" % (name, version, e))
+            raise Exception("Invalid metadata in image %s version %s: e" % (name, version, e))
 
     def ask(self, sql, args=()):
         '''Ask question to db'''
         return self.conn.execute(sql, args)
 
-    def add(self, package):
+    def add(self, image):
         '''Add a packaged image to a db'''
         try:
             # let's go
@@ -82,28 +82,28 @@ class Database(object):
             # insert image information
             arrow("Add image metadata", 2, self.verbose)
             self.conn.execute("INSERT OR REPLACE INTO image values (?,?,?,?,?,?,?)",
-                              (package.md5,
-                               package.name,
-                               package.version,
-                               package.date,
-                               package.author,
-                               package.description,
-                               package.size,
+                              (image.md5,
+                               image.name,
+                               image.version,
+                               image.date,
+                               image.author,
+                               image.description,
+                               image.size,
                                ))
             # insert data informations
-            arrow("Add data metadata", 2, self.verbose)
-            for key,value in package.data.items():
-                self.conn.execute("INSERT OR REPLACE INTO data values (?,?,?,?)",
-                                  (value["md5"],
-                                   package.md5,
-                                   key,
-                                   value["size"]
+            arrow("Add payload metadata", 2, self.verbose)
+            for name, obj in image.payload.items():
+                self.conn.execute("INSERT OR REPLACE INTO payload values (?,?,?,?,?)",
+                                  (obj.md5,
+                                   image.md5,
+                                   name,
+                                   obj.isdir,
+                                   obj.size,
                                    ))
             # on commit
             arrow("Commit transaction to db", 1, self.verbose)
             self.conn.execute("COMMIT TRANSACTION")
         except Exception as e:
-            raise
             raise Exception("Adding metadata fail: %s" % e)
 
     def delete(self, name, version):
diff --git a/installsystems/image.py b/installsystems/image.py
index 48c2499..f4900bf 100644
--- a/installsystems/image.py
+++ b/installsystems/image.py
@@ -16,45 +16,57 @@ import tarfile
 import re
 import cStringIO
 import shutil
+import gzip
 import installsystems.template as istemplate
 import installsystems.tools as istools
 from installsystems.printer import *
 from installsystems.tarball import Tarball
 
+
 class Image(object):
-    '''Abstract class of images'''
+    '''
+    Abstract class of images
+    '''
 
-    extension = ".isimage"
-    extension_data = ".isdata"
     format = "1"
+    extension = ".isimage"
 
     @staticmethod
     def check_image_name(buf):
-        '''Check if @name is a valid image name'''
+        '''
+        Check if @name is a valid image name
+        '''
         return re.match("\w+", buf) is not None
 
     @staticmethod
     def check_image_version(buf):
-        '''Check if @name is a valid image version'''
+        '''
+        Check if @name is a valid image version
+        '''
         return re.match("\d+", buf) is not None
 
+
 class SourceImage(Image):
-    '''Image source manipulation class'''
+    '''
+    Image source manipulation class
+    '''
 
     @classmethod
     def create(cls, path, verbose=True):
-        '''Create an empty source image'''
+        '''
+        Create an empty source image
+        '''
         # check local repository
         if istools.pathtype(path) != "file":
             raise NotImplementedError("SourceImage must be local")
         # main path
         parser_path = os.path.join(path, "parser")
         setup_path = os.path.join(path, "setup")
-        data_path = os.path.join(path, "data")
+        payload_path = os.path.join(path, "payload")
         # create base directories
         arrow("Creating base directories", 1, verbose)
         try:
-            for d in (path, parser_path, setup_path, data_path):
+            for d in (path, parser_path, setup_path, payload_path):
                 if not os.path.exists(d) or not os.path.isdir(d):
                     os.mkdir(d)
         except Exception as e:
@@ -80,8 +92,7 @@ class SourceImage(Image):
             os.umask(umask)
             for dpath in (parser_path, setup_path):
                 for f in os.listdir(dpath):
-                    pf = os.path.join(dpath, f)
-                    os.chmod(pf, 0777 & ~umask)
+                    istools.chrights(os.path.join(dpath, f), mode=0777 & ~umask)
         except Exception as e:
             raise Exception("Unable to set rights on %s: %s" % (pf, e))
         return cls(path, verbose)
@@ -94,109 +105,145 @@ class SourceImage(Image):
         self.base_path = path
         self.parser_path = os.path.join(path, "parser")
         self.setup_path = os.path.join(path, "setup")
-        self.data_path = os.path.join(path, "data")
+        self.payload_path = os.path.join(path, "payload")
         self.verbose = verbose
-        self.validate_source_image()
+        self.validate_source_files()
         self.description = self.parse_description()
+        # script tarball path
+        self.image_name = "%s-%s%s" % (self.description["name"],
+                                       self.description["version"],
+                                       self.extension)
 
-    def validate_source_image(self):
-        '''Check if we are a valid SourceImage'''
-        for d in (self.base_path, self.parser_path, self.setup_path, self.data_path):
+    def validate_source_files(self):
+        '''
+        Check if we are a valid SourceImage directories
+        '''
+        for d in (self.base_path, self.parser_path, self.setup_path, self.payload_path):
             if not os.path.exists(d):
                 raise Exception("Missing directory: %s" % d)
             if not os.path.isdir(d):
                 raise Exception("Not a directory: %s" % d)
             if not os.access(d, os.R_OK|os.X_OK):
                 raise Exception("Unable to access to %s" % d)
+        if not os.path.exists(os.path.join(self.base_path, "description")):
+            raise Exception("No description file")
 
     def build(self, overwrite=False):
-        '''Create packaged image'''
-        # compute script tarball paths
-        tarpath = os.path.join(self.base_path,
-                               "%s-%s%s" % (self.description["name"],
-                                            self.description["version"],
-                                            self.extension))
+        '''
+        Create packaged image
+        '''
         # check if free to create script tarball
-        if os.path.exists(tarpath) and overwrite == False:
+        if os.path.exists(self.image_name) and overwrite == False:
             raise Exception("Tarball already exists. Remove it before")
-        #  Create data tarballs
-        self.create_data_tarballs()
-        # generate description.json
-        jdesc = self.generate_json_description()
+        #  Create payload files
+        payloads = self._create_payloads()
+        # generate a JSON description
+        jdesc = self.generate_json_description(payloads)
         # creating scripts tarball
-        arrow("Creating scripts tarball", 1, self.verbose)
-        arrow("Name %s" % os.path.relpath(tarpath), 2, self.verbose)
+        self._create_image(jdesc)
+
+    def _create_image(self, description):
+        '''
+        Create a script tarball in current directory
+        '''
+        # create tarball
+        arrow("Creating image tarball", 1, self.verbose)
+        arrow("Name %s" % self.image_name, 2, self.verbose)
         try:
-            tarball = Tarball.open(tarpath, mode="w:gz", dereference=True)
+            tarball = Tarball.open(self.image_name, mode="w:gz", dereference=True)
         except Exception as e:
-            raise Exception("Unable to create tarball %s: %s" % (tarpath, e))
+            raise Exception("Unable to create tarball %s: %s" % (self.image_name, e))
         # add .description.json
-        arrow("Add .description.json", 2, self.verbose)
-        tarball.add_str("description.json", jdesc, tarfile.REGTYPE, 0444)
+        arrow("Add description.json", 2, self.verbose)
+        tarball.add_str("description.json", description, tarfile.REGTYPE, 0444)
         # add .format
-        arrow("Add .format", 2, self.verbose)
+        arrow("Add format", 2, self.verbose)
         tarball.add_str("format", self.format, tarfile.REGTYPE, 0444)
         # add parser scripts
         arrow("Add parser scripts", 2, self.verbose)
         tarball.add(self.parser_path, arcname="parser",
-                    recursive=True, filter=self.tar_scripts_filter)
+                    recursive=True, filter=self._tar_scripts_filter)
         # add setup scripts
         arrow("Add setup scripts", 2, self.verbose)
         tarball.add(self.setup_path, arcname="setup",
-                    recursive=True, filter=self.tar_scripts_filter)
+                    recursive=True, filter=self._tar_scripts_filter)
         # closing tarball file
         tarball.close()
 
-    @property
-    def data_tarballs(self):
-        '''List all data tarballs in data directory'''
-        databalls = dict()
-        for dname in os.listdir(self.data_path):
-            filename = "%s-%s-%s%s" % (self.description["name"],
-                                       self.description["version"],
-                                       dname,
-                                       self.extension_data)
-            databalls[dname] = filename
-        return databalls
-
-    def create_data_tarballs(self):
+    def _create_payloads(self):
         '''
-        Create all data tarballs in data directory
+        Create all data payloads in current directory
         Doesn't compute md5 during creation because tarball can
         be created manually
         '''
-        arrow("Creating data tarballs", 1, self.verbose)
-        # build list of data tarball candidate
-        candidates = self.data_tarballs
+        arrow("Creating payloads", 1, self.verbose)
+        # build list of payload files
+        candidates = os.listdir(self.payload_path)
         if len(candidates) == 0:
-            arrow("No data tarball", 2, self.verbose)
-            return
-        # create tarballs
-        for (dn, df) in candidates.items():
-            source_path = os.path.join(self.data_path, dn)
-            dest_path = os.path.join(self.base_path, df)
+            arrow("No payload", 2, self.verbose)
+            return []
+        # create payload files
+        l_l = []
+        for pay in candidates:
+            source_path = os.path.join(self.payload_path, pay)
+            dest_path = "%s-%s-%s%s" % (self.description["name"],
+                                        self.description["version"],
+                                        pay,
+                                        Payload.extension)
+            source_stat = os.stat(source_path)
+            isdir = stat.S_ISDIR(source_stat.st_mode)
             if os.path.exists(dest_path):
-                arrow("Tarball %s already exists." % df, 2, self.verbose)
+                arrow("Payload %s already exists" % dest_path, 2, self.verbose)
             else:
-                arrow("Creating tarball %s" % df, 2, self.verbose)
-                self.create_data_tarball(dest_path, source_path)
+                arrow("Creating payload %s" % dest_path, 2, self.verbose)
+                if isdir:
+                    self._create_payload_tarball(dest_path, source_path)
+                else:
+                    self._create_payload_file(dest_path, source_path)
+            # create payload object
+            payobj = Payload(pay, dest_path, isdir=isdir)
+            payobj.uid = source_stat.st_uid
+            payobj.gid = source_stat.st_gid
+            payobj.mode = stat.S_IMODE(source_stat.st_mode)
+            payobj.mtime = source_stat.st_mtime
+            l_l.append(payobj)
+        return l_l
 
-    def create_data_tarball(self, tar_path, data_path):
-        '''Create a data tarball'''
+    def _create_payload_tarball(self, tar_path, data_path):
+        '''
+        Create a payload tarball
+        This is needed by payload directory
+        '''
         # compute dname to set as a base directory
         dname = os.path.basename(data_path)
-        # not derefence for directory. Verbatim copy.
-        ddref = False if os.path.isdir(data_path) else True
         try:
             # Tarballing
-            tarball = Tarball.open(tar_path, "w:gz", dereference=ddref)
+            tarball = Tarball.open(tar_path, "w:gz", dereference=False)
             tarball.add(data_path, arcname="/", recursive=True)
             tarball.close()
         except Exception as e:
-            raise Exception("Unable to create data tarball %s: %s" % (tar_path, e))
+            raise Exception("Unable to create payload tarball %s: %s" % (tar_path, e))
+
+    def _create_payload_file(self, dest, source):
+        '''
+        Create a payload file
+        Only gzipping it
+        '''
+        fsource = istools.uopen(source)
+        # open file not done in GzipFile, to escape writing of filename
+        # in gzip file. This change md5.
+        fdest = open(dest, "wb")
+        fdest = gzip.GzipFile(filename=os.path.basename(source),
+                              fileobj=fdest,
+                              mtime=os.stat(source).st_mtime)
+        istools.copyfileobj(fsource, fdest)
+        fsource.close()
+        fdest.close()
 
-    def tar_scripts_filter(self, tinfo):
-        '''Filter files which can be included in scripts tarball'''
+    def _tar_scripts_filter(self, tinfo):
+        '''
+        Filter files which can be included in scripts tarball
+        '''
         if not tinfo.name in ("parser", "setup") and os.path.splitext(tinfo.name)[1] != ".py":
             return None
         tinfo.mode = 0755
@@ -204,26 +251,28 @@ class SourceImage(Image):
         tinfo.uname = tinfo.gname = "root"
         return tinfo
 
-    def generate_json_description(self):
-        '''Generate a JSON description file'''
+    def generate_json_description(self, payloads):
+        '''
+        Generate a JSON description file
+        '''
         arrow("Generating JSON description", 1, self.verbose)
         # copy description
         desc = self.description.copy()
         # timestamp image
         arrow("Timestamping", 2, self.verbose)
         desc["date"] = int(time.time())
-        # append data tarballs info
-        desc["data"] = dict()
-        for (dn, df) in self.data_tarballs.items():
-            arrow("Compute MD5 of %s" % df, 2, self.verbose)
-            tb_path = os.path.join(self.base_path, df)
-            desc["data"][dn] = { "size": os.path.getsize(tb_path),
-                                 "md5": istools.md5sum(tb_path) }
+        # append payload infos
+        arrow("Checksumming", 2, self.verbose)
+        desc["payload"] = {}
+        for payload in payloads:
+            desc["payload"][payload.name] = payload.info
         # serialize
         return json.dumps(desc)
 
     def parse_description(self):
-        '''Raise an exception is description file is invalid and return vars to include'''
+        '''
+        Raise an exception is description file is invalid and return vars to include
+        '''
         arrow("Parsing description", 1, self.verbose)
         d = dict()
         try:
@@ -233,12 +282,14 @@ class SourceImage(Image):
             for n in ("name","version", "description", "author"):
                 d[n] = cp.get("image", n)
         except Exception as e:
-            raise Exception("Invalid description: %s" % e)
+            raise Exception("Bad description: %s" % e)
         return d
 
 
 class PackageImage(Image):
-    '''Packaged image manipulation class'''
+    '''
+    Packaged image manipulation class
+    '''
 
     def __init__(self, path, md5name=False, verbose=True):
         Image.__init__(self)
@@ -257,25 +308,43 @@ class PackageImage(Image):
         memfile.seek(0)
         self._tarball = Tarball.open(fileobj=memfile, mode='r:gz')
         self._metadata = self.read_metadata()
+        # build payloads
+        self.payload = {}
+        for pname, pval in self._metadata["payload"].items():
+            if self.md5name:
+                ppath = os.path.join(self.base_path,
+                                     self._metadata["payload"][pname]["md5"])
+            else:
+                ppath = os.path.join(self.base_path,
+                                     "%s-%s%s" % (self.id, pname, Payload.extension))
+            self.payload[pname] = Payload(pname, ppath, **pval)
 
     def __getattr__(self, name):
-        """Give direct access to description field"""
+        '''
+        Give direct access to description field
+        '''
         if name in self._metadata:
             return self._metadata[name]
         raise AttributeError
 
     @property
     def id(self):
-        '''Return image versionned name / id'''
-        return "%s-%s" % (self._metadata["name"], self._metadata["version"])
+        '''
+        Return image versionned name / id
+        '''
+        return "%s-%s" % (self.name, self.version)
 
     @property
     def filename(self):
-        '''Return image filename'''
+        '''
+        Return image filename
+        '''
         return "%s%s" % (self.id, self.extension)
 
     def read_metadata(self):
-        '''Parse tarball and return metadata dict'''
+        '''
+        Parse tarball and return metadata dict
+        '''
         # extract metadata
         arrow("Read tarball metadata", 1, self.verbose)
         img_format = self._tarball.get_str("format")
@@ -285,7 +354,7 @@ class PackageImage(Image):
         if img_format != self.format:
             raise Exception("Invalid tarball image format")
         # check description
-        arrow("Read description file", 2, self.verbose)
+        arrow("Read image description", 2, self.verbose)
         try:
             desc = json.loads(img_desc)
         except Exception as e:
@@ -293,54 +362,36 @@ class PackageImage(Image):
         # FIXME: we should check valid information here
         return desc
 
-    def data_path(self, name):
-        '''Return a data filename from its internal name'''
-        if self.md5name:
-            return os.path.join(self.base_path, self._metadata["data"][name]["md5"])
-        return os.path.join(self.base_path, "%s-%s%s" % (self.id, name, self.extension_data))
-
-    @property
-    def tarballs(self):
-        '''List path of all related tarballs'''
-        d_d = {}
-        name = os.path.join(self.base_path, self.md5) if self.md5name else self.path
-        d_d[name] = {"md5": self.md5, "size": self.size}
-        for key, value in self._metadata["data"].items():
-            d_d[self.data_path(key)] = {"md5": value["md5"], "size": value["size"]}
-        return d_d
-
     def check(self, message="Check MD5"):
-        '''Check md5 and size of tarballs are correct'''
+        '''
+        Check md5 and size of tarballs are correct
+        '''
         arrow(message, 1, self.verbose)
-        # open  /dev/null
-        dn = open("/dev/null", "w")
-        for key,value in self.tarballs.items():
-            arrow(os.path.basename(key), 2, self.verbose)
-            # open tarball
-            tfo = istools.uopen(key)
-            # compute sum and md5 using copy function
-            size, md5 = istools.copyfileobj(tfo ,dn)
-            # close tarball fo
-            tfo.close()
-            # check md5
-            if md5 != value["md5"]:
-                raise Exception("Invalid md5: %s" % key)
-            # check size
-            if size != value["size"]:
-                raise Exception("Invalid size: %s" % key)
-        dn.close()
+        # check image
+        if self.md5 != istools.md5sum(self.path):
+            raise Exception("Invalid MD5 of image %s" % self.name)
+        # check payloads
+        for pay_name, pay_obj in self.payload.items():
+            arrow(pay_name, 2, self.verbose)
+            pay_obj.check()
 
     def run_parser(self, gl):
-        '''Run parser scripts'''
+        '''
+        Run parser scripts
+        '''
         self._run_scripts(gl, "parser")
 
     def run_setup(self, gl):
-        '''Run setup scripts'''
+        '''
+        Run setup scripts
+        '''
         gl["image"] = self
         self._run_scripts(gl, "setup")
 
     def _run_scripts(self, gl, directory):
-        '''Run scripts in a tarball directory'''
+        '''
+        Run scripts in a tarball directory
+        '''
         arrow("Run %s" % directory, 1, self.verbose)
         # get list of parser scripts
         l_scripts = self._tarball.getnames("%s/.*\.py" % directory)
@@ -357,31 +408,205 @@ class PackageImage(Image):
             try:
                 exec(s_scripts, gl, dict())
             except Exception as e:
-                raise
                 raise Exception("Execution script %s fail: %s" %
                                 (os.path.basename(n_scripts), e))
 
-    def extractdata(self, dataname, target, filelist=None):
-        '''Extract a data tarball into target'''
-        # check if dataname exists
-        if dataname not in self._metadata["data"].keys():
-            raise Exception("No such data: %s" % dataname)
-        # tarball info
-        tinfo = self._metadata["data"][dataname]
-        # build data tar paths
-        path = self.data_path(dataname)
+
+class Payload(object):
+    '''
+    Payload class represents a payload object
+    '''
+    extension = ".isdata"
+    legit_attr = ('isdir', 'md5', 'size', 'uid', 'gid', 'mode', 'mtime')
+
+    def __init__(self, name, path, **kwargs):
+        object.__setattr__(self, "name", name)
+        object.__setattr__(self, "path", path)
+        # register legit param
+        for attr in self.legit_attr:
+            setattr(self, attr, None)
+        # set all named param
+        for kwarg in kwargs:
+            if hasattr(self, kwarg):
+                setattr(self, kwarg, kwargs[kwarg])
+
+    def __getattr__(self, name):
+        # get all value with an understance as if there is no underscore
+        if hasattr(self, "_%s" % name):
+            return getattr(self, "_%s" % name)
+        raise AttributeError
+
+    def __setattr__(self, name, value):
+        # set all value which exists have no underscore, but undesrcore exists
+        if name in self.legit_attr:
+            object.__setattr__(self, "_%s" % name, value)
+        else:
+            object.__setattr__(self, name, value)
+
+    def checksummize(self):
+        '''
+        Fill missing md5/size about payload
+        '''
+        fileobj = istools.uopen(self.path)
+        size, md5 = istools.copyfileobj(fileobj, None)
+        if self._size is None:
+            self._size = size
+        if self._md5 is None:
+            self._md5 = md5
+
+    @property
+    def md5(self):
+        '''
+        Return md5 of payload
+        '''
+        if self._md5 is None:
+            self.checksummize()
+        return self._md5
+
+    @property
+    def size(self):
+        '''
+        Return size of payload
+        '''
+        if self._size is None:
+            self.checksummize()
+        return self._size
+
+    @property
+    def uid(self):
+        '''
+        Return uid of owner of orginal payload
+        '''
+        return self._uid if self._uid is not None else 0
+
+    @property
+    def gid(self):
+        '''
+        Return gid of owner of orginal payload
+        '''
+        return self._gid if self._gid is not None else 0
+
+    @property
+    def mode(self):
+        '''
+        Return mode of orginal payload
+        '''
+        if self._mode is not None:
+            return self._mode
+        else:
+            umask = os.umask(0)
+            os.umask(umask)
+            return 0666 & ~umask
+
+    @property
+    def mtime(self):
+        '''
+        Return last modification time of orginal payload
+        '''
+        return self._mtime if self._mtime is not None else time.time()
+
+    @property
+    def info(self):
+        '''
+        return a dict of info about current payload
+        '''
+        return {"md5": self.md5,
+                "size": self.size,
+                "isdir": self.isdir,
+                "uid": self.uid,
+                "gid": self.gid,
+                "mode": self.mode,
+                "mtime": self.mtime}
+
+    def check(self):
+        '''
+        Check that path correspond to current md5 and size
+        '''
+        if self._size is None or self._md5 is None:
+            debug("Check is called on payload with nothing to check")
+            return True
+        fileobj = istools.uopen(self.path)
+        size, md5 = istools.copyfileobj(fileobj, None)
+        if self._size != size:
+            raise Exception("Invalid size of payload %s" % self.name)
+        if self._md5 != md5:
+            raise Exception("Invalid MD5 of payload %s" % self._md5)
+
+    def extract(self, dest, force=False, filelist=None):
+        '''
+        Extract payload into dest
+        filelist is a filter of file in tarball
+        force will overwrite existing file if exists
+        '''
+        if self.isdir:
+            self.extract_tar(dest, force=force, filelist=filelist)
+        else:
+            self.extract_file(dest, force=force)
+
+    def extract_tar(self, dest, force=False, filelist=None):
+        '''
+        Extract a payload which is a tarball.
+        This is used mainly to extract payload from a directory
+        '''
+        # check validity of dest
+        if os.path.exists(dest):
+            if not os.path.isdir(dest):
+                raise Exception("Destination %s is not a directory" % dest)
+            if not force and len(os.listdir(dest)) > 0:
+                raise Exception("Directory %s is not empty (need force)" % dest)
+        else:
+            os.mkdir(dest)
+        # try to open payload file
         try:
-            fo = istools.uopen(path)
+            fo = istools.uopen(self.path)
         except Exception as e:
-            raise Exception("Unable to open data tarball %s" % path)
+            raise Exception("Unable to open payload file %s" % self.path)
+        # try to open tarball on payload
         try:
-            # create tar object
             t = Tarball.open(fileobj=fo, mode="r|gz")
         except Exception as e:
-            raise Exception("Invalid data tarball: %s" % e)
+            raise Exception("Invalid payload tarball: %s" % e)
         # filter on file to extact
-        members = None if filelist is None else [ t.gettarinfo(name) for name in filelist ]
+        members = (None if filelist is None
+                   else [ t.gettarinfo(name) for name in filelist ])
         try:
-            t.extractall(target, members)
+            t.extractall(dest, members)
         except Exception as e:
             raise Exception("Extracting failed: %s" % e)
+        # closing fo
+        t.close()
+        fo.close()
+
+    def extract_file(self, dest, force=False):
+        '''
+        Copy a payload directly to a file
+        Check md5 on the fly
+        '''
+        # if dest is a directory try to create file inside
+        if os.path.isdir(dest):
+            dest = os.path.join(dest, self.name)
+        # check validity of dest
+        if os.path.exists(dest):
+            if not os.path.isfile(dest):
+                raise Exception("Destination %s is not a file" % dest)
+            if not force:
+                raise Exception("File %s already exists" % dest)
+        # opening destination
+        try:
+            f_dst = istools.uopen(dest, "wb")
+        except Exception as e:
+            raise Exception("Unable to open destination file %s" % dest)
+        # try to open payload file
+        try:
+            f_gsrc = istools.uopen(self.path)
+            f_src = gzip.GzipFile(fileobj=f_gsrc)
+        except Exception as e:
+            raise Exception("Unable to open payload file %s" % self.path)
+        # launch copy
+        size, md5 = istools.copyfileobj(f_src, f_dst)
+        # closing fo
+        f_dst.close()
+        f_gsrc.close()
+        f_src.close()
+        # settings file orginal rights
+        istools.chrights(dest, self.uid, self.gid, self.mode, self.mtime)
diff --git a/installsystems/repository.py b/installsystems/repository.py
index b22fb41..1b05574 100644
--- a/installsystems/repository.py
+++ b/installsystems/repository.py
@@ -77,30 +77,33 @@ class Repository(object):
             raise Exception("Read last file failed: %s" % e)
         return 0
 
-    def add(self, package):
+    def add(self, image):
         '''Add a packaged image to repository'''
         # check local repository
         if istools.pathtype(self.config.path) != "file":
             raise NotImplementedError("Repository addition must be local")
         # checking data tarballs md5 before copy
-        package.check("Check tarballs before copy")
+        image.check("Check image and payload before copy")
         # adding file to repository
-        arrow("Copying files", 1, self.verbose)
-        for src,value in package.tarballs.items():
-            dest = os.path.join(self.config.path, value["md5"])
-            basesrc = os.path.basename(src)
+        arrow("Copying images and payload", 1, self.verbose)
+        for obj in [ image ] + image.payload.values():
+            dest = os.path.join(self.config.path, obj.md5)
+            basesrc = os.path.basename(obj.path)
             if os.path.exists(dest):
                 arrow("Skipping %s: already exists" % basesrc, 2, self.verbose)
             else:
-                arrow("Adding %s (%s)" % (basesrc, value["md5"]), 2, self.verbose)
-                istools.copy(src, dest, self.config.uid, self.config.gid, self.config.fmod)
-        # copy is done. create a package inside repo
-        r_package = PackageImage(os.path.join(self.config.path, package.md5),
+                arrow("Adding %s (%s)" % (basesrc, obj.md5), 2, self.verbose)
+                istools.copy(obj.path, dest,
+                             self.config.uid, self.config.gid, self.config.fmod)
+        # copy is done. create a image inside repo
+        r_image = PackageImage(os.path.join(self.config.path, image.md5),
                                  md5name=True, verbose=self.verbose)
-        # checking data tarballs md5 after copy
-        r_package.check("Check tarballs after copy")
+        # checking must be done with original md5
+        r_image.md5 = image.md5
+        # checking image and payload after copy
+        r_image.check("Check image and payload after copy")
         # add description to db
-        self.db.add(r_package)
+        self.db.add(r_image)
         # update last file
         self.update_last()
 
@@ -137,7 +140,7 @@ class Repository(object):
         return self.db.ask("select name,version from image where name = ? and version = ? limit 1", (name,version)).fetchone() is not None
 
     def get(self, name, version):
-        '''return a package from a name and version of pakage'''
+        '''return a image from a name and version of pakage'''
         # get file md5 from db
         r = self.db.ask("select md5 from image where name = ? and version = ? limit 1",
                         (name,version)).fetchone()
diff --git a/installsystems/tarball.py b/installsystems/tarball.py
index 1c0fe0f..cf8d52c 100644
--- a/installsystems/tarball.py
+++ b/installsystems/tarball.py
@@ -36,3 +36,10 @@ class Tarball(tarfile.TarFile):
         else:
             return [ tpname for tpname in lorig
                      if re.match(reg_pattern, tpname) ]
+
+    def size(self):
+        '''Return real (uncompressed) size of the tarball'''
+        total_sz = 0
+        for ti in self.getmembers():
+            total_sz += ti.size
+        return total_sz
diff --git a/installsystems/template.py b/installsystems/template.py
index 748ffd7..af960e5 100644
--- a/installsystems/template.py
+++ b/installsystems/template.py
@@ -24,7 +24,7 @@ setup = """# -*- python -*-
 
 print "hostname: %s" % args.hostname
 
-image.extractdata("rootfs", args.target)
+image.payload["rootfs"].extract(args.target)
 
 # vim:set ts=2 sw=2 noet:
 """
@@ -39,9 +39,10 @@ CREATE TABLE image (md5 TEXT NOT NULL PRIMARY KEY,
                     size INTEGER NOT NULL,
                     UNIQUE(name, version));
 
-CREATE TABLE data (md5 TEXT NOT NULL,
-                   image_md5 TEXT NOT NULL REFERENCES image(md5),
-                   name TEXT NOT NULL,
-                   size INTEGER NOT NULL,
-                   PRIMARY KEY(md5, image_md5));
+CREATE TABLE payload (md5 TEXT NOT NULL,
+                     image_md5 TEXT NOT NULL REFERENCES image(md5),
+                     name TEXT NOT NULL,
+                     isdir INTEGER NOT NULL,
+                     size INTEGER NOT NULL,
+                     PRIMARY KEY(md5, image_md5));
 """
diff --git a/installsystems/tools.py b/installsystems/tools.py
index f42f85c..d45d042 100644
--- a/installsystems/tools.py
+++ b/installsystems/tools.py
@@ -37,7 +37,8 @@ def copyfileobj(sfile, dfile):
             break
         f_len += buf_len
         f_sum.update(buf)
-        dfile.write(buf)
+        if dfile is not None:
+            dfile.write(buf)
     return (f_len , f_sum.hexdigest())
 
 def copy(source, destination, uid=None, gid=None, mode=None, timeout=None):
@@ -67,7 +68,7 @@ def mkdir(path, uid=None, gid=None, mode=None):
     os.mkdir(path)
     chrights(path, uid, gid, mode)
 
-def chrights(path, uid=None, gid=None, mode=None):
+def chrights(path, uid=None, gid=None, mode=None, mtime=None):
     '''Set rights on a file'''
     if uid is not None:
         os.chown(path, uid, -1)
@@ -75,6 +76,8 @@ def chrights(path, uid=None, gid=None, mode=None):
         os.chown(path, -1, gid)
     if mode is not None:
         os.chmod(path, mode)
+    if mtime is not None:
+        os.utime(path, (mtime, mtime))
 
 def pathtype(path):
     '''Return path type. This is usefull to know what kind of path is given'''
@@ -103,14 +106,26 @@ def abspath(path):
     else:
         return None
 
-def uopen(path):
+def uopen(path, mode="rb"):
     '''Universal Open
     Create a file-like object to a file which can be remote
     '''
     ftype = pathtype(path)
     if ftype == "file":
-        return open(path, "r")
+        return open(path, mode)
     elif ftype == "http" or ftype == "ftp":
         return urllib2.urlopen(path)
     else:
         raise NotImplementedError
+
+def getsize(path):
+    '''Get size of a path. Recurse if directory'''
+    total_sz = os.path.getsize(path)
+    if os.path.isdir(path):
+        for root, dirs, files in os.walk(path):
+            for filename in dirs + files:
+                filepath = os.path.join(root, filename)
+                filestat = os.lstat(filepath)
+                if stat.S_ISDIR(filestat.st_mode) or stat.S_ISREG(filestat.st_mode):
+                    total_sz += filestat.st_size
+    return total_sz
-- 
GitLab