[diffoscope] 01/01: Add support for R rds and rdb object files
Ximin Luo
infinity0 at debian.org
Thu Apr 20 21:30:55 CEST 2017
This is an automated email from the git hooks/post-receive script.
infinity0 pushed a commit to branch experimental
in repository diffoscope.
commit 4d313125c252d46af4f059f75160dda0386fd539
Author: Ximin Luo <infinity0 at debian.org>
Date: Thu Apr 20 21:30:31 2017 +0200
Add support for R rds and rdb object files
---
diffoscope/comparators/__init__.py | 2 +
diffoscope/comparators/rdata.py | 94 ++++++++++++++++++++++++++++++++
diffoscope/comparators/utils/command.py | 5 +-
tests/comparators/test_rdata.py | 57 +++++++++++++++++++
tests/data/rds_expected_diff | 52 ++++++++++++++++++
tests/data/test1.rdx | Bin 0 -> 276 bytes
tests/data/test2.rdx | Bin 0 -> 276 bytes
7 files changed, 208 insertions(+), 2 deletions(-)
diff --git a/diffoscope/comparators/__init__.py b/diffoscope/comparators/__init__.py
index 45f6ca4..81f6d16 100644
--- a/diffoscope/comparators/__init__.py
+++ b/diffoscope/comparators/__init__.py
@@ -64,6 +64,8 @@ class ComparatorManager(object):
('pdf.PdfFile',),
('png.PngFile',),
('ppu.PpuFile',),
+ ('rdata.RdbFile',),
+ ('rdata.RdsFile',),
('rpm.RpmFile', 'rpm_fallback.RpmFile'),
('squashfs.SquashfsFile',),
('ar.ArFile',),
diff --git a/diffoscope/comparators/rdata.py b/diffoscope/comparators/rdata.py
new file mode 100644
index 0000000..2b66189
--- /dev/null
+++ b/diffoscope/comparators/rdata.py
@@ -0,0 +1,94 @@
+# -*- coding: utf-8 -*-
+#
+# diffoscope: in-depth comparison of files, archives, and directories
+#
+# Copyright © 2017 Ximin Luo <infinity0 at debian.org>
+#
+# diffoscope is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# diffoscope is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with diffoscope. If not, see <https://www.gnu.org/licenses/>.
+
+from diffoscope.tools import tool_required
+from diffoscope.difference import Difference
+
+from .utils.file import File
+from .utils.command import Command
+
+import binascii
+import shutil
+import os.path
+
+
+HEADER = binascii.a2b_hex("580a000000020003")
+
+# has to be one line
+DUMP_RDB = """lazyLoad(commandArgs(TRUE));\
+for (obj in ls()) {\
+ print(obj);\
+ for (line in deparse(get(obj))) {\
+ cat(line,"\\n")\
+ }\
+}\
+"""
+# unfortunately this above snippet can't detect the build-path differences so
+# diffoscope still falls back to a hexdump
+
+
+def check_rds_extension(f):
+ return f.name.endswith(".rds") or f.name.endswith(".rdx")
+
+def ensure_archive_rdx(f):
+ if f.container and not f.path.endswith(".rdb"):
+ # if we're in an archive, copy the .rdx file over so R can read it
+ bname = os.path.basename(f.name)
+ assert bname.endswith(".rdb")
+ rdx_name = f.name[:-4] + ".rdx"
+ rdx_bname = os.path.basename(rdx_name)
+ rdx_path = f.container.get_member(rdx_name).path
+ shutil.copy(f.path, f.path + ".rdb")
+ shutil.copy(rdx_path, f.path + ".rdx")
+ return f.path + ".rdb"
+ else:
+ return f.path
+
+class RdsReader(Command):
+ @tool_required('Rscript')
+ def cmdline(self):
+ return ['Rscript', '-e', 'args <- commandArgs(TRUE); readRDS(args[1])', self.path]
+
+class RdsFile(File):
+ @staticmethod
+ def recognizes(file):
+ if (check_rds_extension(file) or
+ file.container and check_rds_extension(file.container.source)):
+ with open(file.path, 'rb') as f:
+ return f.read(8) == HEADER
+ return False
+
+ def compare_details(self, other, source=None):
+ return [Difference.from_command(RdsReader, self.path, other.path)]
+
+class RdbReader(Command):
+ @tool_required('Rscript')
+ def cmdline(self):
+ return ['Rscript', '-e', DUMP_RDB, self.path[:-4]]
+
+class RdbFile(File):
+ @staticmethod
+ def recognizes(file):
+ if file.name.endswith(".rdb"):
+ return True
+
+ def compare_details(self, other, source=None):
+ self_path = ensure_archive_rdx(self)
+ other_path = ensure_archive_rdx(other)
+ return [Difference.from_command(RdbReader, self_path, other_path)]
diff --git a/diffoscope/comparators/utils/command.py b/diffoscope/comparators/utils/command.py
index 14b9b37..2d6fd4f 100644
--- a/diffoscope/comparators/utils/command.py
+++ b/diffoscope/comparators/utils/command.py
@@ -20,6 +20,7 @@
import io
import abc
import logging
+import shlex
import subprocess
import threading
@@ -29,7 +30,7 @@ logger = logging.getLogger(__name__)
class Command(object, metaclass=abc.ABCMeta):
def __init__(self, path):
self._path = path
- logger.debug("Executing %s", ' '.join(self.cmdline()))
+ logger.debug("Executing %s", ' '.join([shlex.quote(x) for x in self.cmdline()]))
self._process = subprocess.Popen(self.cmdline(),
shell=False, close_fds=True,
env=self.env(),
@@ -86,7 +87,7 @@ class Command(object, metaclass=abc.ABCMeta):
returncode = self._process.wait()
logger.debug(
"%s returned (exit code: %d)",
- ' '.join(self.cmdline()),
+ ' '.join([shlex.quote(x) for x in self.cmdline()]),
returncode,
)
return returncode
diff --git a/tests/comparators/test_rdata.py b/tests/comparators/test_rdata.py
new file mode 100644
index 0000000..fa6fac0
--- /dev/null
+++ b/tests/comparators/test_rdata.py
@@ -0,0 +1,57 @@
+# -*- coding: utf-8 -*-
+#
+# diffoscope: in-depth comparison of files, archives, and directories
+#
+# Copyright © 2017 Ximin Luo <infinity0 at debian.org>
+#
+# diffoscope is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# diffoscope is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with diffoscope. If not, see <https://www.gnu.org/licenses/>.
+
+import pytest
+import subprocess
+
+from diffoscope.comparators.rdata import RdsFile
+from diffoscope.comparators.gzip import GzipFile
+
+from utils import diff_ignore_line_numbers
+from utils.data import load_fixture, get_data
+from utils.tools import skip_unless_tools_exist, skip_unless_tool_is_at_least, \
+ skip_if_binutils_does_not_support_x86
+from utils.nonexisting import assert_non_existing
+
+
+
+file1 = load_fixture('test1.rdx')
+file2 = load_fixture('test2.rdx')
+
+def test_identification(file1):
+ assert isinstance(file1, GzipFile)
+
+def test_no_differences(file1):
+ difference = file1.compare(file1)
+ assert difference is None
+
+ at pytest.fixture
+def differences(file1, file2):
+ return file1.compare(file2).details
+
+ at skip_unless_tools_exist('Rscript')
+def test_num_items(differences):
+ assert len(differences) == 1
+
+ at skip_unless_tools_exist('Rscript')
+def test_item_rds(differences):
+ assert differences[0].source1 == 'test1.rdx-content'
+ assert differences[0].source2 == 'test2.rdx-content'
+ expected_diff = get_data('rds_expected_diff')
+ assert differences[0].details[0].unified_diff == expected_diff
diff --git a/tests/data/rds_expected_diff b/tests/data/rds_expected_diff
new file mode 100644
index 0000000..c8c4596
--- /dev/null
+++ b/tests/data/rds_expected_diff
@@ -0,0 +1,52 @@
+@@ -1,40 +1,40 @@
+ $variables
+ $variables$`%*t%`
+-[1] 0 98
++[1] 0 96
+
+ $variables$`%t*%`
+-[1] 98 101
++[1] 96 99
+
+ $variables$`%t*t%`
+-[1] 199 104
++[1] 195 103
+
+ $variables$.__NAMESPACE__.
+-[1] 877 45
++[1] 854 43
+
+ $variables$.__S3MethodsTable__.
+-[1] 1043 45
++[1] 1016 43
+
+ $variables$.packageName
+-[1] 1088 44
++[1] 1059 42
+
+ $variables$tensor
+-[1] 1132 775
++[1] 1101 774
+
+
+ $references
+ $references$`env::1`
+-[1] 572 305
++[1] 564 290
+
+ $references$`env::2`
+-[1] 303 121
++[1] 298 119
+
+ $references$`env::3`
+-[1] 424 148
++[1] 417 147
+
+ $references$`env::4`
+-[1] 922 121
++[1] 897 119
+
+
+ $compressed
+ [1] TRUE
+
diff --git a/tests/data/test1.rdx b/tests/data/test1.rdx
new file mode 100644
index 0000000..8ed618f
Binary files /dev/null and b/tests/data/test1.rdx differ
diff --git a/tests/data/test2.rdx b/tests/data/test2.rdx
new file mode 100644
index 0000000..83ea1e0
Binary files /dev/null and b/tests/data/test2.rdx differ
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/diffoscope.git
More information about the diffoscope
mailing list