[diffoscope] 02/02: Extract archive members using an auto-incrementing integer, avoiding the need to sanitise filenames. (Closes: #854723)

Chris Lamb chris at chris-lamb.co.uk
Thu Feb 9 23:04:23 CET 2017


This is an automated email from the git hooks/post-receive script.

lamby pushed a commit to branch master
in repository diffoscope.

commit 632a40828a54b399787c25e7fa243f732aef7e05
Author: Chris Lamb <lamby at debian.org>
Date:   Fri Feb 10 10:47:05 2017 +1300

    Extract archive members using an auto-incrementing integer, avoiding the need to sanitise filenames. (Closes: #854723)
    
    Signed-off-by: Chris Lamb <lamby at debian.org>
---
 diffoscope/comparators/utils/libarchive.py | 41 ++++++++++--------------------
 1 file changed, 14 insertions(+), 27 deletions(-)

diff --git a/diffoscope/comparators/utils/libarchive.py b/diffoscope/comparators/utils/libarchive.py
index 2acbb74..70160a0 100644
--- a/diffoscope/comparators/utils/libarchive.py
+++ b/diffoscope/comparators/utils/libarchive.py
@@ -23,6 +23,7 @@ import os.path
 import ctypes
 import logging
 import libarchive
+import collections
 
 from diffoscope.tempfiles import get_temporary_directory
 
@@ -168,11 +169,11 @@ class LibarchiveContainer(Archive):
 
     def get_member_names(self):
         self.ensure_unpacked()
-        return self._member_names
+        return self._members.keys()
 
     def extract(self, member_name, dest_dir):
         self.ensure_unpacked()
-        return os.path.join(self._unpacked, member_name)
+        return self._members[member_name]
 
     def get_member(self, member_name):
         with libarchive.file_reader(self.source.path) as archive:
@@ -197,39 +198,25 @@ class LibarchiveContainer(Archive):
         return LibarchiveMember(self, entry)
 
     def ensure_unpacked(self):
-        if hasattr(self, '_unpacked'):
+        if hasattr(self, '_members'):
             return
 
-        self._unpacked = get_temporary_directory().name
-        self._member_names = []
+        tmpdir = get_temporary_directory().name
+        self._members = collections.OrderedDict()
 
-        logger.debug("Extracting %s to %s", self.source.path, self._unpacked)
+        logger.debug("Extracting %s to %s", self.source.path, tmpdir)
 
         with libarchive.file_reader(self.source.path) as archive:
-            for entry in archive:
-                self._member_names.append(entry.pathname)
+            for idx, entry in enumerate(archive):
+                # Maintain a mapping of archive path to the extracted path,
+                # avoiding the need to sanitise filenames.
+                dst = os.path.join(tmpdir, '{}'.format(idx))
+                self._members[entry.pathname] = dst
 
                 if entry.isdir:
                     continue
 
-                # All extracted locations must be underneath self._unpacked
-                force_prefix = os.path.join(self._unpacked, "")
-
-                # Try to pick a safe and reasonable candidate name
-                candidate_name = os.path.normpath(entry.pathname.rstrip('/' + os.sep))
-                if os.path.isabs(candidate_name):
-                    candidate_name = os.path.relpath(candidate_name, os.path.join(os.path.sep))
-
-                dst = os.path.normpath(os.path.join(self._unpacked, candidate_name))
-                if not dst.startswith(force_prefix):
-                    logger.warn("Skipping member because we could not make a safe name to extract it to: '%s'",
-                                entry.pathname)
-                    continue
-
-                # TODO: need to fix reading these cleaned members. currently
-                # reading will still try to use the uncleaned name.
-                #logging.debug("Extracting %s to %s", entry.pathname, dst)
-                os.makedirs(os.path.dirname(dst), exist_ok=True)
+                logger.debug("Extracting %s to %s", entry.pathname, dst)
 
                 with open(dst, 'wb') as f:
                     for block in entry.get_blocks():
@@ -237,5 +224,5 @@ class LibarchiveContainer(Archive):
 
         logger.debug(
             "Extracted %d entries from %s to %s",
-            len(self._member_names), self.source.path, self._unpacked,
+            len(self._members), self.source.path, tmpdir,
         )

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/diffoscope.git


More information about the diffoscope mailing list