[Git][reproducible-builds/diffoscope][master] 3 commits: Correct string representation output in the traceback when we cannot locate a...

Chris Lamb gitlab at salsa.debian.org
Wed Oct 23 20:36:22 UTC 2019



Chris Lamb pushed to branch master at Reproducible Builds / diffoscope


Commits:
2478e9c0 by Chris Lamb at 2019-10-23T11:53:17Z
Correct string representation output in the traceback when we cannot locate a specific item in a container.

- - - - -
c98e40ff by Chris Lamb at 2019-10-23T13:20:03Z
To match the libarchive container, raise a KeyError exception if we request an invalid member from a directory container.

- - - - -
ea4c94a7 by Chris Lamb at 2019-10-23T20:07:51Z
Rework and refactor the handling of R .rdb files with respect to locating the parallel .rdx prior to inspecting the file to ensure that we do not add files to the user's filesystem in the case of directly comparing two .rdb files or, worse, overwriting a file in is place.

- - - - -


3 changed files:

- diffoscope/comparators/directory.py
- diffoscope/comparators/rdata.py
- diffoscope/comparators/utils/libarchive.py


Changes:

=====================================
diffoscope/comparators/directory.py
=====================================
@@ -266,9 +266,11 @@ class DirectoryContainer(Container):
         if not os.path.islink(member_path) and os.path.isdir(member_path):
             return FilesystemDirectory(member_path)
 
-        return FilesystemFile(
-            os.path.join(self.source.path, member_name), container=self
-        )
+        path = os.path.join(self.source.path, member_name)
+        if not os.path.exists(path):
+            raise KeyError("%s not found in directory" % member_name)
+
+        return FilesystemFile(path, container=self)
 
     def comparisons(self, other):
         my_members = collections.OrderedDict(self.get_adjusted_members_sizes())


=====================================
diffoscope/comparators/rdata.py
=====================================
@@ -18,6 +18,7 @@
 # along with diffoscope.  If not, see <https://www.gnu.org/licenses/>.
 
 from diffoscope.tools import tool_required
+from diffoscope.tempfiles import get_temporary_directory
 from diffoscope.difference import Difference
 
 from .utils.file import File
@@ -25,6 +26,7 @@ from .utils.command import Command
 
 import shutil
 import os.path
+import logging
 import binascii
 
 
@@ -35,27 +37,49 @@ DUMP_RDB = """lazyLoad(commandArgs(TRUE)); for (obj in ls()) { print(obj); for (
 # unfortunately this above snippet can't detect the build-path differences so
 # diffoscope still falls back to a hexdump
 
+logger = logging.getLogger(__name__)
+
 
 def check_rds_extension(f):
     return f.name.endswith(".rds") or f.name.endswith(".rdx")
 
 
-def ensure_archive_rdx(f):
-    if not f.container or f.path.endswith(".rdb"):
-        return f.path
+def get_module_path_for_rdb(rdb):
+    """
+    R's lazyLoad method does not take a filename directly to an .rdb file (eg.
+    `/path/to/foo.rdb`) but rather the path without any extension (eg.
+    `/path/to/foo`). It also requires that the .rdx file exists at
+    `/path/to/foo.fdx`.
+
+    We thus locate the corresponding .rdx file in the surrounding container and
+    copy that to `foo.rdx`. We use a temporary directory to ensure we do not
+    add files to the user's filesystem in the case of directly comparing two
+    .rdb files or, worse, overwriting a file in its place.
+    """
+
+    # If we are not in a container, we will never be able to locate the
+    # corresponding .rdx
+    if rdb.container is None:
+        return
+
+    # Calculate location of parallel .rdx file
+    rdx_name = "{}.rdx".format(os.path.basename(os.path.splitext(rdb.name)[0]))
 
-    # if we're in an archive, copy the .rdx file over so R can read it
-    bname = os.path.basename(f.name)
-    assert bname.endswith(".rdb")
-    rdx_name = f.name[:-4] + ".rdx"
     try:
-        rdx_path = f.container.get_member(rdx_name).path
+        rdx = rdb.container.get_member(rdx_name)
     except KeyError:
-        return f.path
-        # R will fail, diffoscope will report the error and continue
-    shutil.copy(f.path, f.path + ".rdb")
-    shutil.copy(rdx_path, f.path + ".rdx")
-    return f.path + ".rdb"
+        # Corresponding .rdx does not exist
+        return
+
+    temp_dir = get_temporary_directory().name
+    prefix = os.path.join(temp_dir, "temp")
+
+    logger.debug("Copying %s and %s to %s", rdx.path, rdb.path, temp_dir)
+    shutil.copy(rdb.path, '{}.rdb'.format(prefix))
+    shutil.copy(rdx.path, '{}.rdx'.format(prefix))
+
+    # Return the "module" path, ie. without an extension
+    return os.path.join(temp_dir, "temp")
 
 
 class RdsReader(Command):
@@ -89,7 +113,7 @@ class RdsFile(File):
 class RdbReader(Command):
     @tool_required('Rscript')
     def cmdline(self):
-        return ['Rscript', '-e', DUMP_RDB, self.path[:-4]]
+        return ['Rscript', '-e', DUMP_RDB, self.path]
 
 
 class RdbFile(File):
@@ -97,6 +121,10 @@ class RdbFile(File):
     FILE_EXTENSION_SUFFIX = '.rdb'
 
     def compare_details(self, other, source=None):
-        self_path = ensure_archive_rdx(self)
-        other_path = ensure_archive_rdx(other)
-        return [Difference.from_command(RdbReader, self_path, other_path)]
+        a = get_module_path_for_rdb(self)
+        b = get_module_path_for_rdb(other)
+
+        if a is None or b is None:
+            return []
+
+        return [Difference.from_command(RdbReader, a, b)]


=====================================
diffoscope/comparators/utils/libarchive.py
=====================================
@@ -259,7 +259,7 @@ class LibarchiveContainer(Archive):
             for entry in archive:
                 if entry.pathname == member_name:
                     return self.get_subclass(entry)
-        raise KeyError('%s not found in archive', member_name)
+        raise KeyError('%s not found in archive' % member_name)
 
     def get_filtered_members(self):
         try:



View it on GitLab: https://salsa.debian.org/reproducible-builds/diffoscope/compare/2280df550bd7b2eb9fa8e2af380ed9c2ba47ea0f...ea4c94a75ddfbf5b967e3c16d4c78ad3fce41212

-- 
View it on GitLab: https://salsa.debian.org/reproducible-builds/diffoscope/compare/2280df550bd7b2eb9fa8e2af380ed9c2ba47ea0f...ea4c94a75ddfbf5b967e3c16d4c78ad3fce41212
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.reproducible-builds.org/pipermail/rb-commits/attachments/20191023/0112d50e/attachment.htm>


More information about the rb-commits mailing list