[Git][reproducible-builds/diffoscope][master] 15 commits: Don't read the R site and user environment when comparing .rdx, .rdb or .rds files.

Chris Lamb gitlab at salsa.debian.org
Thu Oct 24 14:35:52 UTC 2019



Chris Lamb pushed to branch master at Reproducible Builds / diffoscope


Commits:
b8236d4a by Chris Lamb at 2019-10-24T13:00:27Z
Don't read the R site and user environment when comparing .rdx, .rdb or .rds files.

- - - - -
691ce88b by Chris Lamb at 2019-10-24T14:33:42Z
When formatting command lines for display (eg. as sources of data) ensure newlines and other metacharacters appear escaped as "\n", etc.

- - - - -
bbfdb57b by Chris Lamb at 2019-10-24T14:33:42Z
When displaying the standard error from commands, ensure we use the escaped version.

- - - - -
face6fbe by Chris Lamb at 2019-10-24T14:33:42Z
Use a ("""-formatted) docstring for our R script to dump variables in .rdb files.

- - - - -
1f89609e by Chris Lamb at 2019-10-24T14:33:42Z
Include all R object names are displayed, including ones beginning with a fullstop (".").

- - - - -
9f607241 by Chris Lamb at 2019-10-24T14:33:42Z
Sort package fields when dumping data from R .rdb files.

- - - - -
9b5c5fd3 by Chris Lamb at 2019-10-24T14:33:42Z
Add support for easily masking the standard error of commands.

- - - - -
2e33ad67 by Chris Lamb at 2019-10-24T14:33:42Z
Use our new MASK_STDERR toggle in the ffprobe comparator.

- - - - -
0092be09 by Chris Lamb at 2019-10-24T14:33:42Z
Mask/hide standard error when processing R .rdb files.

- - - - -
cb830766 by Chris Lamb at 2019-10-24T14:33:42Z
Don't include useless/misleading "NULL" when dumping data from R .rdb files.

- - - - -
343d01d4 by Chris Lamb at 2019-10-24T14:33:42Z
Format R .rdb package contents as "foo = bar" rather than using ugly and misleading brackets, etc.

- - - - -
895f3987 by Chris Lamb at 2019-10-24T14:33:42Z
Include the object type when dumping R .rdb data.

- - - - -
c23651e9 by Chris Lamb at 2019-10-24T14:33:42Z
Alias the actual object instance.

Gbp-Dch: ignore

- - - - -
338dbdf5 by Chris Lamb at 2019-10-24T14:33:42Z
Use the newline (etc.) escaped version of the commandline being executed in logging/debug output.

- - - - -
f1e80ca2 by Chris Lamb at 2019-10-24T14:33:42Z
Expose R .rdb file's absolute paths in the semantic/human-readable output, not in a hexdump.

- - - - -


4 changed files:

- diffoscope/comparators/ffprobe.py
- diffoscope/comparators/rdata.py
- diffoscope/comparators/utils/command.py
- diffoscope/difference.py


Changes:

=====================================
diffoscope/comparators/ffprobe.py
=====================================
@@ -27,16 +27,13 @@ from .utils.command import Command
 
 
 class Ffprobe(Command):
+    MASK_STDERR = True
+
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
 
         self.flag = False
 
-    def start(self):
-        super().start()
-
-        self.stderr = ''
-
     @property
     def stdout(self):
         return self._process.stderr.splitlines(True)


=====================================
diffoscope/comparators/rdata.py
=====================================
@@ -32,10 +32,26 @@ import binascii
 
 HEADER = binascii.a2b_hex("580a000000020003")
 
-# has to be one line
-DUMP_RDB = """lazyLoad(commandArgs(TRUE)); for (obj in ls()) { print(obj); for (line in deparse(get(obj))) cat(line,"\\n"); }"""
-# unfortunately this above snippet can't detect the build-path differences so
-# diffoscope still falls back to a hexdump
+DUMP_RDB = r"""
+hideOutput = lazyLoad(commandArgs(TRUE));
+
+for (x in ls(all.names = TRUE, sorted = TRUE)) {
+    obj = get(x)
+
+    cat(sprintf("%s (%s) = ", x, typeof(obj)), sep = "");
+
+    if (typeof(obj) == "environment") {
+        cat("\n{\n", sep = "");
+        for (y in ls(obj, all.names = TRUE, sorted = TRUE))
+            cat(sprintf("    \"%s\" = \"%s\"\n", y, get(y, envir = obj)), sep = "");
+        cat("}\n");
+    } else {
+        for (line in deparse(obj))
+            cat(line, "\n", sep = "");
+    }
+    cat("\n");
+}
+"""
 
 logger = logging.getLogger(__name__)
 
@@ -87,6 +103,7 @@ class RdsReader(Command):
     def cmdline(self):
         return [
             'Rscript',
+            '--no-environ',
             '-e',
             'args <- commandArgs(TRUE); readRDS(args[1])',
             self.path,
@@ -111,9 +128,11 @@ class RdsFile(File):
 
 
 class RdbReader(Command):
+    MASK_STDERR = True
+
     @tool_required('Rscript')
     def cmdline(self):
-        return ['Rscript', '-e', DUMP_RDB, self.path]
+        return ['Rscript', '--no-environ', '-e', DUMP_RDB, self.path]
 
 
 class RdbFile(File):


=====================================
diffoscope/comparators/utils/command.py
=====================================
@@ -26,15 +26,14 @@ logger = logging.getLogger(__name__)
 
 
 class Command(metaclass=abc.ABCMeta):
+    MASK_STDERR = False
     MAX_STDERR_LINES = 50
 
     def __init__(self, path):
         self._path = path
 
     def start(self):
-        logger.debug(
-            "Executing %s", ' '.join([shlex.quote(x) for x in self.cmdline()])
-        )
+        logger.debug("Executing %s", self.shell_cmdline())
 
         self._stdin = self.stdin()
         # "stdin" used to be a feeder but we didn't need the functionality so
@@ -67,12 +66,15 @@ class Command(metaclass=abc.ABCMeta):
         raise NotImplementedError()
 
     def shell_cmdline(self):
-        return ' '.join(
-            map(
-                lambda x: '{}' if x == self.path else shlex.quote(x),
-                self.cmdline(),
-            )
-        )
+        def fn(x):
+            if x == self.path:
+                return '{}'
+            x = repr(x)
+            if ' ' not in x:
+                x = x[1:-1]
+            return x
+
+        return ' '.join(fn(x) for x in self.cmdline())
 
     def env(self):
         return None  # inherit parent environment by default
@@ -88,6 +90,9 @@ class Command(metaclass=abc.ABCMeta):
         pass
 
     def _read_stderr(self):
+        if self.MASK_STDERR:
+            return ""
+
         buf = ""
         lines = self._process.stderr.splitlines(True)
 


=====================================
diffoscope/difference.py
=====================================
@@ -296,12 +296,12 @@ class Difference:
 
         if command1 and command1.stderr:
             difference.add_comment(
-                "stderr from `{}`:".format(' '.join(command1.cmdline()))
+                "stderr from `{}`:".format(command1.shell_cmdline())
             )
             difference.add_comment(command1.stderr)
         if command2 and command2.stderr:
             difference.add_comment(
-                "stderr from `{}`:".format(' '.join(command2.cmdline()))
+                "stderr from `{}`:".format(command2.shell_cmdline())
             )
             difference.add_comment(command2.stderr)
 



View it on GitLab: https://salsa.debian.org/reproducible-builds/diffoscope/compare/ea4c94a75ddfbf5b967e3c16d4c78ad3fce41212...f1e80ca2331dd966fd2489264f6cd4e92a39a874

-- 
View it on GitLab: https://salsa.debian.org/reproducible-builds/diffoscope/compare/ea4c94a75ddfbf5b967e3c16d4c78ad3fce41212...f1e80ca2331dd966fd2489264f6cd4e92a39a874
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.reproducible-builds.org/pipermail/rb-commits/attachments/20191024/b10bee97/attachment.htm>


More information about the rb-commits mailing list