[diffoscope] 01/01: Add support for R rds and rdb object files

Ximin Luo infinity0 at debian.org
Thu Apr 20 21:30:55 CEST 2017


This is an automated email from the git hooks/post-receive script.

infinity0 pushed a commit to branch experimental
in repository diffoscope.

commit 4d313125c252d46af4f059f75160dda0386fd539
Author: Ximin Luo <infinity0 at debian.org>
Date:   Thu Apr 20 21:30:31 2017 +0200

    Add support for R rds and rdb object files
---
 diffoscope/comparators/__init__.py      |   2 +
 diffoscope/comparators/rdata.py         |  94 ++++++++++++++++++++++++++++++++
 diffoscope/comparators/utils/command.py |   5 +-
 tests/comparators/test_rdata.py         |  57 +++++++++++++++++++
 tests/data/rds_expected_diff            |  52 ++++++++++++++++++
 tests/data/test1.rdx                    | Bin 0 -> 276 bytes
 tests/data/test2.rdx                    | Bin 0 -> 276 bytes
 7 files changed, 208 insertions(+), 2 deletions(-)

diff --git a/diffoscope/comparators/__init__.py b/diffoscope/comparators/__init__.py
index 45f6ca4..81f6d16 100644
--- a/diffoscope/comparators/__init__.py
+++ b/diffoscope/comparators/__init__.py
@@ -64,6 +64,8 @@ class ComparatorManager(object):
         ('pdf.PdfFile',),
         ('png.PngFile',),
         ('ppu.PpuFile',),
+        ('rdata.RdbFile',),
+        ('rdata.RdsFile',),
         ('rpm.RpmFile', 'rpm_fallback.RpmFile'),
         ('squashfs.SquashfsFile',),
         ('ar.ArFile',),
diff --git a/diffoscope/comparators/rdata.py b/diffoscope/comparators/rdata.py
new file mode 100644
index 0000000..2b66189
--- /dev/null
+++ b/diffoscope/comparators/rdata.py
@@ -0,0 +1,94 @@
+# -*- coding: utf-8 -*-
+#
+# diffoscope: in-depth comparison of files, archives, and directories
+#
+# Copyright © 2017 Ximin Luo <infinity0 at debian.org>
+#
+# diffoscope is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# diffoscope is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with diffoscope.  If not, see <https://www.gnu.org/licenses/>.
+
+from diffoscope.tools import tool_required
+from diffoscope.difference import Difference
+
+from .utils.file import File
+from .utils.command import Command
+
+import binascii
+import shutil
+import os.path
+
+
+HEADER = binascii.a2b_hex("580a000000020003")
+
+# has to be one line
+DUMP_RDB = """lazyLoad(commandArgs(TRUE));\
+for (obj in ls()) {\
+    print(obj);\
+    for (line in deparse(get(obj))) {\
+        cat(line,"\\n")\
+    }\
+}\
+"""
+# unfortunately this above snippet can't detect the build-path differences so
+# diffoscope still falls back to a hexdump
+
+
+def check_rds_extension(f):
+    return f.name.endswith(".rds") or f.name.endswith(".rdx")
+
+def ensure_archive_rdx(f):
+    if f.container and not f.path.endswith(".rdb"):
+        # if we're in an archive, copy the .rdx file over so R can read it
+        bname = os.path.basename(f.name)
+        assert bname.endswith(".rdb")
+        rdx_name = f.name[:-4] + ".rdx"
+        rdx_bname = os.path.basename(rdx_name)
+        rdx_path = f.container.get_member(rdx_name).path
+        shutil.copy(f.path, f.path + ".rdb")
+        shutil.copy(rdx_path, f.path + ".rdx")
+        return f.path + ".rdb"
+    else:
+        return f.path
+
+class RdsReader(Command):
+    @tool_required('Rscript')
+    def cmdline(self):
+        return ['Rscript', '-e', 'args <- commandArgs(TRUE); readRDS(args[1])', self.path]
+
+class RdsFile(File):
+    @staticmethod
+    def recognizes(file):
+        if (check_rds_extension(file) or
+            file.container and check_rds_extension(file.container.source)):
+            with open(file.path, 'rb') as f:
+                return f.read(8) == HEADER
+        return False
+
+    def compare_details(self, other, source=None):
+        return [Difference.from_command(RdsReader, self.path, other.path)]
+
+class RdbReader(Command):
+    @tool_required('Rscript')
+    def cmdline(self):
+        return ['Rscript', '-e', DUMP_RDB, self.path[:-4]]
+
+class RdbFile(File):
+    @staticmethod
+    def recognizes(file):
+        if file.name.endswith(".rdb"):
+            return True
+
+    def compare_details(self, other, source=None):
+        self_path = ensure_archive_rdx(self)
+        other_path = ensure_archive_rdx(other)
+        return [Difference.from_command(RdbReader, self_path, other_path)]
diff --git a/diffoscope/comparators/utils/command.py b/diffoscope/comparators/utils/command.py
index 14b9b37..2d6fd4f 100644
--- a/diffoscope/comparators/utils/command.py
+++ b/diffoscope/comparators/utils/command.py
@@ -20,6 +20,7 @@
 import io
 import abc
 import logging
+import shlex
 import subprocess
 import threading
 
@@ -29,7 +30,7 @@ logger = logging.getLogger(__name__)
 class Command(object, metaclass=abc.ABCMeta):
     def __init__(self, path):
         self._path = path
-        logger.debug("Executing %s", ' '.join(self.cmdline()))
+        logger.debug("Executing %s", ' '.join([shlex.quote(x) for x in self.cmdline()]))
         self._process = subprocess.Popen(self.cmdline(),
                                          shell=False, close_fds=True,
                                          env=self.env(),
@@ -86,7 +87,7 @@ class Command(object, metaclass=abc.ABCMeta):
         returncode = self._process.wait()
         logger.debug(
             "%s returned (exit code: %d)",
-            ' '.join(self.cmdline()),
+            ' '.join([shlex.quote(x) for x in self.cmdline()]),
             returncode,
         )
         return returncode
diff --git a/tests/comparators/test_rdata.py b/tests/comparators/test_rdata.py
new file mode 100644
index 0000000..fa6fac0
--- /dev/null
+++ b/tests/comparators/test_rdata.py
@@ -0,0 +1,57 @@
+# -*- coding: utf-8 -*-
+#
+# diffoscope: in-depth comparison of files, archives, and directories
+#
+# Copyright © 2017 Ximin Luo <infinity0 at debian.org>
+#
+# diffoscope is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# diffoscope is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with diffoscope.  If not, see <https://www.gnu.org/licenses/>.
+
+import pytest
+import subprocess
+
+from diffoscope.comparators.rdata import RdsFile
+from diffoscope.comparators.gzip import GzipFile
+
+from utils import diff_ignore_line_numbers
+from utils.data import load_fixture, get_data
+from utils.tools import skip_unless_tools_exist, skip_unless_tool_is_at_least, \
+    skip_if_binutils_does_not_support_x86
+from utils.nonexisting import assert_non_existing
+
+
+
+file1 = load_fixture('test1.rdx')
+file2 = load_fixture('test2.rdx')
+
+def test_identification(file1):
+    assert isinstance(file1, GzipFile)
+
+def test_no_differences(file1):
+    difference = file1.compare(file1)
+    assert difference is None
+
+ at pytest.fixture
+def differences(file1, file2):
+    return file1.compare(file2).details
+
+ at skip_unless_tools_exist('Rscript')
+def test_num_items(differences):
+    assert len(differences) == 1
+
+ at skip_unless_tools_exist('Rscript')
+def test_item_rds(differences):
+    assert differences[0].source1 == 'test1.rdx-content'
+    assert differences[0].source2 == 'test2.rdx-content'
+    expected_diff = get_data('rds_expected_diff')
+    assert differences[0].details[0].unified_diff == expected_diff
diff --git a/tests/data/rds_expected_diff b/tests/data/rds_expected_diff
new file mode 100644
index 0000000..c8c4596
--- /dev/null
+++ b/tests/data/rds_expected_diff
@@ -0,0 +1,52 @@
+@@ -1,40 +1,40 @@
+ $variables
+ $variables$`%*t%`
+-[1]  0 98
++[1]  0 96
+ 
+ $variables$`%t*%`
+-[1]  98 101
++[1] 96 99
+ 
+ $variables$`%t*t%`
+-[1] 199 104
++[1] 195 103
+ 
+ $variables$.__NAMESPACE__.
+-[1] 877  45
++[1] 854  43
+ 
+ $variables$.__S3MethodsTable__.
+-[1] 1043   45
++[1] 1016   43
+ 
+ $variables$.packageName
+-[1] 1088   44
++[1] 1059   42
+ 
+ $variables$tensor
+-[1] 1132  775
++[1] 1101  774
+ 
+ 
+ $references
+ $references$`env::1`
+-[1] 572 305
++[1] 564 290
+ 
+ $references$`env::2`
+-[1] 303 121
++[1] 298 119
+ 
+ $references$`env::3`
+-[1] 424 148
++[1] 417 147
+ 
+ $references$`env::4`
+-[1] 922 121
++[1] 897 119
+ 
+ 
+ $compressed
+ [1] TRUE
+ 
diff --git a/tests/data/test1.rdx b/tests/data/test1.rdx
new file mode 100644
index 0000000..8ed618f
Binary files /dev/null and b/tests/data/test1.rdx differ
diff --git a/tests/data/test2.rdx b/tests/data/test2.rdx
new file mode 100644
index 0000000..83ea1e0
Binary files /dev/null and b/tests/data/test2.rdx differ

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/diffoscope.git


More information about the diffoscope mailing list