[Git][reproducible-builds/diffoscope][master] Add support for Hierarchical Data Format (HD5) files. (Closes: reproducible-builds/diffoscope#95)

Chris Lamb gitlab at salsa.debian.org
Thu Apr 9 22:45:18 UTC 2020



Chris Lamb pushed to branch master at Reproducible Builds / diffoscope


Commits:
5a2d08f5 by Chris Lamb at 2020-04-09T23:44:18+01:00
Add support for Hierarchical Data Format (HD5) files. (Closes: reproducible-builds/diffoscope#95)

- - - - -


8 changed files:

- debian/control
- debian/tests/control
- diffoscope/comparators/__init__.py
- + diffoscope/comparators/hdf.py
- diffoscope/external_tools.py
- + test2.hdf5
- + tests/comparators/test_hdf.py
- + tests/data/hdf5_expected_diff


Changes:

=====================================
debian/control
=====================================
@@ -34,6 +34,7 @@ Build-Depends:
  giflib-tools <!nocheck>,
  gnumeric <!nocheck>,
  gnupg-utils <!nocheck>,
+ hdf5-tools <!nocheck>,
  help2man,
  imagemagick <!nocheck>,
  jsbeautifier <!nocheck>,
@@ -63,6 +64,7 @@ Build-Depends:
  python3-docutils,
  python3-pdfminer <!nocheck>,
  python3-guestfs <!nocheck>,
+ python3-h5py <!nocheck>,
  python3-jsondiff <!nocheck>,
  python3-libarchive-c,
  python3-magic,


=====================================
debian/tests/control
=====================================
@@ -7,7 +7,7 @@
 #   $ mv debian/tests/control.tmp debian/tests/control
 
 Tests: pytest-with-recommends
-Depends: python3-all, diffoscope, black, python3-pytest, file, linux-image-amd64 [amd64] | linux-image-generic [amd64], abootimg, acl, apktool [!ppc64el !s390x], binutils-multiarch, bzip2, caca-utils, colord, db-util, default-jdk-headless | default-jdk | java-sdk, device-tree-compiler, docx2txt, e2fsprogs, enjarify, ffmpeg, fontforge-extras, fp-utils [!ppc64el !s390x], genisoimage, gettext, ghc, ghostscript, giflib-tools, gnumeric, gnupg, gnupg-utils, imagemagick, jsbeautifier, libarchive-tools, llvm, lz4 | liblz4-tool, mono-utils, ocaml-nox, odt2txt, oggvideotools [!s390x], openssh-client, pgpdump, poppler-utils, procyon-decompiler, python3-pdfminer, r-base-core, rpm2cpio, sng, sqlite3, squashfs-tools, tcpdump, unzip, wabt, xmlbeans, xxd | vim-common, xz-utils, zip, zstd, python3-argcomplete, python3-binwalk, python3-defusedxml, python3-distro, python3-guestfs, python3-jsondiff, python3-progressbar, python3-pypdf2, python3-debian, python3-pyxattr, python3-rpm, python3-tlsh
+Depends: python3-all, diffoscope, black, python3-pytest, file, linux-image-amd64 [amd64] | linux-image-generic [amd64], abootimg, acl, apktool [!ppc64el !s390x], binutils-multiarch, bzip2, caca-utils, colord, db-util, default-jdk-headless | default-jdk | java-sdk, device-tree-compiler, docx2txt, e2fsprogs, enjarify, ffmpeg, fontforge-extras, fp-utils [!ppc64el !s390x], genisoimage, gettext, ghc, ghostscript, giflib-tools, gnumeric, gnupg, gnupg-utils, hdf5-tools, imagemagick, jsbeautifier, libarchive-tools, llvm, lz4 | liblz4-tool, mono-utils, ocaml-nox, odt2txt, oggvideotools [!s390x], openssh-client, pgpdump, poppler-utils, procyon-decompiler, python3-pdfminer, r-base-core, rpm2cpio, sng, sqlite3, squashfs-tools, tcpdump, unzip, wabt, xmlbeans, xxd | vim-common, xz-utils, zip, zstd, python3-argcomplete, python3-binwalk, python3-defusedxml, python3-distro, python3-guestfs, python3-jsondiff, python3-progressbar, python3-pypdf2, python3-debian, python3-pyxattr, python3-rpm, python3-tlsh
 
 Tests: pytest
 Depends: python3-all, diffoscope, python3-pytest, file, python3-tlsh


=====================================
diffoscope/comparators/__init__.py
=====================================
@@ -52,6 +52,7 @@ class ComparatorManager:
         ('bzip2.Bzip2File',),
         ('cpio.CpioFile',),
         ('deb.DebFile',),
+        ('hdf.Hdf5File',),
         ('dex.DexFile',),
         ('elf.ElfFile',),
         ('macho.MachoFile',),


=====================================
diffoscope/comparators/hdf.py
=====================================
@@ -0,0 +1,40 @@
+# -*- coding: utf-8 -*-
+#
+# diffoscope: in-depth comparison of files, archives, and directories
+#
+# Copyright © 2020 Chris Lamb <lamby at debian.org>
+#
+# diffoscope is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# diffoscope is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with diffoscope.  If not, see <https://www.gnu.org/licenses/>.
+
+import re
+
+from diffoscope.tools import tool_required
+from diffoscope.difference import Difference
+
+from .utils.file import File
+from .utils.command import Command
+
+
+class H5dump(Command):
+    @tool_required('h5dump')
+    def cmdline(self):
+        return ['h5dump', self.path]
+
+
+class Hdf5File(File):
+    DESCRIPTION = "Hierarchical Data Format database"
+    FILE_TYPE_RE = re.compile(r'^Hierarchical Data Format \(version 5\) data')
+
+    def compare_details(self, other, source=None):
+        return [Difference.from_command(H5dump, self.path, other.path)]


=====================================
diffoscope/external_tools.py
=====================================
@@ -69,6 +69,7 @@ EXTERNAL_TOOLS = {
         'guix': 'gnupg',
     },
     'gzip': {'debian': 'gzip', 'arch': 'gzip', 'guix': 'gzip'},
+    'h5dump': {'debian': 'hdf5-tools'},
     'identify': {
         'debian': 'imagemagick',
         'arch': 'imagemagick',


=====================================
test2.hdf5
=====================================
Binary files /dev/null and b/test2.hdf5 differ


=====================================
tests/comparators/test_hdf.py
=====================================
@@ -0,0 +1,86 @@
+# -*- coding: utf-8 -*-
+#
+# diffoscope: in-depth comparison of files, archives, and directories
+#
+# Copyright © 2020 Chris Lamb <lamby at debian.org>
+#
+# diffoscope is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# diffoscope is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with diffoscope.  If not, see <https://www.gnu.org/licenses/>.
+
+import re
+import pytest
+
+from diffoscope.comparators.hdf import Hdf5File
+from diffoscope.comparators.binary import FilesystemFile
+from diffoscope.comparators.utils.specialize import specialize
+
+from ..utils.data import load_fixture, get_data
+from ..utils.tools import skip_unless_tools_exist, skip_unless_module_exists
+from ..utils.nonexisting import assert_non_existing
+
+hdf5_1 = load_fixture('test1.hdf5')
+hdf5_2 = load_fixture('test2.hdf5')
+
+re_normalise = re.compile(r'(HDF5 ")[^\"]+/([^\"]+")')
+
+
+def hdf5_fixture(prefix):
+    @pytest.fixture
+    def hdf5d(tmpdir):
+        filename = str(tmpdir.join('{}.db'.format(prefix)))
+
+        import h5py
+
+        with h5py.File(filename, 'w'):
+            pass
+
+        return specialize(FilesystemFile(filename))
+
+    return hdf5d
+
+
+hdf5_1 = hdf5_fixture('test1')
+hdf5_2 = hdf5_fixture('test2')
+
+
+ at skip_unless_module_exists('h5py')
+def test_identification(hdf5_1):
+    assert isinstance(hdf5_1, Hdf5File)
+
+
+ at skip_unless_module_exists('h5py')
+def test_no_differences(hdf5_1):
+    difference = hdf5_1.compare(hdf5_1)
+    assert difference is None
+
+
+ at pytest.fixture
+def differences(hdf5_1, hdf5_2):
+    return hdf5_1.compare(hdf5_2).details
+
+
+ at skip_unless_tools_exist('h5dump')
+ at skip_unless_module_exists('h5py')
+def test_diff(differences):
+    expected_diff = get_data('hdf5_expected_diff')
+    # Remove absolute build path
+    normalised = re_normalise.sub(
+        lambda m: m.group(1) + m.group(2), differences[0].unified_diff
+    )
+    assert normalised == expected_diff
+
+
+ at skip_unless_tools_exist('h5dump')
+ at skip_unless_module_exists('h5py')
+def test_compare_non_existing(monkeypatch, hdf5_1):
+    assert_non_existing(monkeypatch, hdf5_1, has_null_source=False)


=====================================
tests/data/hdf5_expected_diff
=====================================
@@ -0,0 +1,6 @@
+@@ -1,4 +1,4 @@
+-HDF5 "test1.db" {
++HDF5 "test2.db" {
+ GROUP "/" {
+ }
+ }



View it on GitLab: https://salsa.debian.org/reproducible-builds/diffoscope/-/commit/5a2d08f522c87c99db8ad52bec00f00b358b366e

-- 
View it on GitLab: https://salsa.debian.org/reproducible-builds/diffoscope/-/commit/5a2d08f522c87c99db8ad52bec00f00b358b366e
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.reproducible-builds.org/pipermail/rb-commits/attachments/20200409/a212659c/attachment.htm>


More information about the rb-commits mailing list