[diffoscope] 01/01: Ensure files extracted from squashfs are timely deleted

Jérémy Bobbio lunar at moszumanska.debian.org
Mon Jan 4 14:26:19 CET 2016


This is an automated email from the git hooks/post-receive script.

lunar pushed a commit to branch master
in repository diffoscope.

commit d6bb96fac3ec36782c81b0aa2251a42d66a2061e
Author: Jérémy Bobbio <lunar at debian.org>
Date:   Mon Jan 4 12:51:02 2016 +0100

    Ensure files extracted from squashfs are timely deleted
    
    We used to parse the squashfs listing to create a dictionnary holding
    all File objects. It works, but it means that we keep a reference
    alive for each files as long as we are processing the squashfs.
    This is bad because we now remove the temporary directory when the File
    is garbage collected. We thus could thus be running out of disk space
    pretty quickly for large squashfs before we finished processing them entirely.
    
    So instead, we store in the dictionnary a reference to the relevant squashfs
    member class and the arguments required to instanciate it. The instanciation
    will be done when a member is requested from the SquashfsContainer.
---
 diffoscope/comparators/squashfs.py | 64 +++++++++++++++++++++++++++++---------
 1 file changed, 49 insertions(+), 15 deletions(-)

diff --git a/diffoscope/comparators/squashfs.py b/diffoscope/comparators/squashfs.py
index 9e5216b..7f7b1be 100644
--- a/diffoscope/comparators/squashfs.py
+++ b/diffoscope/comparators/squashfs.py
@@ -67,9 +67,15 @@ class SquashfsRegularFile(SquashfsMember):
     # -rw-r--r-- user/group   446 2015-06-24 14:49 squashfs-root/text
     LINE_RE = re.compile(r'^\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+(?P<member_name>.*)$')
 
-    def __init__(self, archive, line):
+    @staticmethod
+    def parse(line):
         m = SquashfsRegularFile.LINE_RE.match(line)
-        SquashfsMember.__init__(self, archive, m.group('member_name'))
+        if not m:
+            raise SquashfsInvalidLineFormat('invalid line format')
+        return m.groupdict()
+
+    def __init__(self, archive, member_name):
+        SquashfsMember.__init__(self, archive, member_name)
 
 
 class SquashfsDirectory(Directory, SquashfsMember):
@@ -77,11 +83,15 @@ class SquashfsDirectory(Directory, SquashfsMember):
     # drwxr-xr-x user/group    51 2015-06-24 14:47 squashfs-root
     LINE_RE = re.compile(r'^\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+(?P<member_name>.*)$')
 
-    def __init__(self, archive, line):
+    @staticmethod
+    def parse(line):
         m = SquashfsDirectory.LINE_RE.match(line)
         if not m:
             raise SquashfsInvalidLineFormat('invalid line format')
-        SquashfsMember.__init__(self, archive, m.group('member_name') or '/')
+        return m.groupdict()
+
+    def __init__(self, archive, member_name):
+        SquashfsMember.__init__(self, archive, member_name or '/')
 
     def compare(self, other, source=None):
         return None
@@ -108,12 +118,16 @@ class SquashfsSymlink(Symlink, SquashfsMember):
     # lrwxrwxrwx user/group   6 2015-06-24 14:47 squashfs-root/link -> broken
     LINE_RE = re.compile(r'^\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+(?P<member_name>.*)\s+->\s+(?P<destination>.*)$')
 
-    def __init__(self, archive, line):
+    @staticmethod
+    def parse(line):
         m = SquashfsSymlink.LINE_RE.match(line)
         if not m:
             raise SquashfsInvalidLineFormat('invalid line format')
-        SquashfsMember.__init__(self, archive, m.group('member_name'))
-        self._destination = m.group('destination')
+        return m.groupdict()
+
+    def __init__(self, archive, member_name, destination):
+        SquashfsMember.__init__(self, archive, member_name)
+        self._destination = destination
 
     def is_symlink(self):
         return True
@@ -132,14 +146,32 @@ class SquashfsDevice(Device, SquashfsMember):
                  'b': stat.S_IFBLK,
                }
 
-    def __init__(self, archive, line):
+    @staticmethod
+    def parse(line):
         m = SquashfsDevice.LINE_RE.match(line)
         if not m:
             raise SquashfsInvalidLineFormat('invalid line format')
-        SquashfsMember.__init__(self, archive, m.group('member_name'))
-        self._mode = SquashfsDevice.KIND_MAP[m.group('kind')]
-        self._major = int(m.group('major'))
-        self._minor = int(m.group('minor'))
+        d = m.groupdict()
+        try:
+            d['mode'] = SquashfsDevice.KIND_MAP[d['kind']]
+            del d['kind']
+        except KeyError:
+            raise SquashfsInvalidLineFormat('unknown device kind %s' % d['kind'])
+        try:
+            d['major'] = int(d['major'])
+        except ValueError:
+            raise SquashfsInvalidLineFormat('unable to parse major number %s' % d['major'])
+        try:
+            d['minor'] = int(d['minor'])
+        except ValueError:
+            raise SquashfsInvalidLineFormat('unable to parse minor number %s' % d['minor'])
+        return d
+
+    def __init__(self, archive, member_name, mode, major, minor):
+        SquashfsMember.__init__(self, archive, member_name)
+        self._mode = mode
+        self._major = major
+        self._minor = minor
 
     def get_device(self):
         return (self._mode, self._major, self._minor)
@@ -172,14 +204,15 @@ class SquashfsContainer(Archive):
                 continue
             if len(line) > 0 and line[0] in SQUASHFS_LS_MAPPING:
                 try:
-                    yield SQUASHFS_LS_MAPPING[line[0]](self, line)
+                    cls = SQUASHFS_LS_MAPPING[line[0]]
+                    yield cls, cls.parse(line)
                 except SquashfsInvalidLineFormat:
                     logger.warning('Invalid squashfs entry: %s', line)
             else:
                 logger.warning('Unknown squashfs entry: %s', line)
 
     def open_archive(self):
-        return dict([(m.name, m) for m in self.entries(self.source.path)])
+        return {kwargs['member_name']: (cls, kwargs) for cls, kwargs in self.entries(self.source.path)}
 
     def close_archive(self):
         pass
@@ -197,7 +230,8 @@ class SquashfsContainer(Archive):
         return '%s%s' % (dest_dir, member_name)
 
     def get_member(self, member_name):
-        return self.archive[member_name]
+        cls, kwargs = self.archive[member_name]
+        return cls(self, **kwargs)
 
 
 class SquashfsFile(File):

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/diffoscope.git


More information about the diffoscope mailing list