[Git][reproducible-builds/diffoscope][master] Improve DOS/MBR extraction by adding support for 7z. (Closes: reproducible-builds/diffoscope#333)

Chris Lamb (@lamby) gitlab at salsa.debian.org
Thu Nov 30 12:57:03 UTC 2023


Chris Lamb pushed to branch master at Reproducible Builds / diffoscope


Commits:
59b86c1f by Chris Lamb at 2023-11-30T12:55:34+00:00
Improve DOS/MBR extraction by adding support for 7z. (Closes: reproducible-builds/diffoscope#333)

- - - - -


5 changed files:

- debian/tests/control
- + diffoscope/comparators/7z.py
- diffoscope/comparators/__init__.py
- diffoscope/external_tools.py
- + tests/comparators/test_7z.py


Changes:

=====================================
debian/tests/control
=====================================
@@ -7,7 +7,7 @@
 #   $ mv debian/tests/control.tmp debian/tests/control
 
 Tests: pytest-with-recommends
-Depends: python3-all, diffoscope, black, python3-pytest, python3-h5py, file, linux-image-amd64 [amd64] | linux-image-generic [amd64], aapt [amd64 arm64 armel armhf i386 mips64el mipsel], abootimg, acl, apksigcopier, apksigner, apktool [!ppc64el !s390x], binutils-multiarch, bzip2, caca-utils, colord, coreboot-utils, db-util, default-jdk-headless | default-jdk | java-sdk, device-tree-compiler, dexdump [amd64 arm64 armhf i386], docx2txt, e2fsprogs, enjarify, ffmpeg, fontforge-extras, fonttools, fp-utils [!ppc64el !s390x], genisoimage, gettext, ghc, ghostscript, giflib-tools, gnumeric, gnupg, gnupg-utils, hdf5-tools, html2text, imagemagick, jsbeautifier, libarchive-tools, libxmlb-utils, llvm, lz4 | liblz4-tool, lzip, mono-utils, ocaml-nox, odt2txt, oggvideotools [!s390x], openssh-client, openssl, pgpdump, poppler-utils, procyon-decompiler, python3-pdfminer, r-base-core, rpm2cpio, sng, sqlite3, squashfs-tools, tcpdump, u-boot-tools, unzip, wabt, xmlbeans, xxd, xz-utils, zip, zstd, androguard, python3-argcomplete, python3-binwalk, python3-defusedxml, python3-distro, python3-guestfs, python3-jsondiff, python3-progressbar, python3-pypdf, python3-debian, python3-pyxattr, python3-rpm, python3-tlsh
+Depends: python3-all, diffoscope, black, python3-pytest, python3-h5py, file, linux-image-amd64 [amd64] | linux-image-generic [amd64], aapt [amd64 arm64 armel armhf i386 mips64el mipsel], abootimg, acl, apksigcopier, apksigner, apktool [!ppc64el !s390x], binutils-multiarch, bzip2, caca-utils, colord, coreboot-utils, db-util, default-jdk-headless | default-jdk | java-sdk, device-tree-compiler, dexdump [amd64 arm64 armhf i386], docx2txt, e2fsprogs, enjarify, ffmpeg, fontforge-extras, fonttools, fp-utils [!ppc64el !s390x], genisoimage, gettext, ghc, ghostscript, giflib-tools, gnumeric, gnupg, gnupg-utils, hdf5-tools, html2text, imagemagick, jsbeautifier, libarchive-tools, libxmlb-utils, llvm, lz4 | liblz4-tool, lzip, mono-utils, ocaml-nox, odt2txt, oggvideotools [!s390x], openssh-client, openssl, p7zip-full, pgpdump, poppler-utils, procyon-decompiler, python3-pdfminer, r-base-core, rpm2cpio, sng, sqlite3, squashfs-tools, tcpdump, u-boot-tools, unzip, wabt, xmlbeans, xxd, xz-utils, zip, zstd, androguard, python3-argcomplete, python3-binwalk, python3-defusedxml, python3-distro, python3-guestfs, python3-jsondiff, python3-progressbar, python3-pypdf, python3-debian, python3-pyxattr, python3-rpm, python3-tlsh
 
 Tests: pytest
 Depends: python3-all, diffoscope, python3-pytest, python3-h5py, file, python3-tlsh


=====================================
diffoscope/comparators/7z.py
=====================================
@@ -0,0 +1,100 @@
+#
+# diffoscope: in-depth comparison of files, archives, and directories
+#
+# Copyright © 2023 Chris Lamb <lamby at debian.org>
+#
+# diffoscope is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# diffoscope is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with diffoscope.  If not, see <https://www.gnu.org/licenses/>.
+
+import os
+import re
+import logging
+import subprocess
+
+from diffoscope.tools import tool_required
+from diffoscope.tempfiles import get_temporary_directory
+from diffoscope.difference import Difference
+
+from .utils.archive import Archive
+from .utils.file import File
+from .utils.command import Command, our_check_output
+
+logger = logging.getLogger(__name__)
+
+
+class SevenZList(Command):
+    @tool_required("7z")
+    def cmdline(self):
+        return (
+            "7z",
+            "l",
+            self.path,
+        )
+
+    def filter(self, line):
+        val = line.decode("utf-8")
+        if val.startswith("Listing archive: ") or val.startswith("Path = "):
+            return b""
+        return line
+
+
+class SevenZContainer(Archive):
+    @tool_required("7z")
+    def open_archive(self):
+        self._temp_dir = get_temporary_directory(suffix="7z")
+
+        try:
+            our_check_output(
+                ("7z", "e", os.path.abspath(self.source.path)),
+                cwd=self._temp_dir.name,
+                stderr=subprocess.DEVNULL,
+            )
+        except subprocess.CalledProcessError:
+            return False
+
+        return self
+
+    def close_archive(self):
+        self._temp_dir.cleanup()
+
+    def get_member_names(self):
+        return os.listdir(self._temp_dir.name)
+
+    def extract(self, member_name, dest_dir):
+        return os.path.join(self._temp_dir.name, member_name)
+
+
+class SevenZFile(File):
+    DESCRIPTION = "Filesystem image"
+    FILE_TYPE_RE = re.compile(r"^DOS/MBR boot sector;")
+    CONTAINER_CLASSES = [SevenZContainer]
+
+    def compare_details(self, other, source=None):
+        return [
+            Difference.from_operation(
+                SevenZList, self.path, other.path, source="7z l"
+            )
+        ]
+
+    @classmethod
+    def recognizes(cls, file):
+        if not super().recognizes(file):
+            return False
+
+        try:
+            cmd = SevenZList(file.path)
+            cmd.start()
+        except RequiredToolNotFound:
+            return False
+
+        return b"Type = gzip\n" not in cmd.output


=====================================
diffoscope/comparators/__init__.py
=====================================
@@ -75,6 +75,7 @@ class ComparatorManager:
         ("ffprobe.FfprobeFile",),
         ("gnumeric.GnumericFile",),
         ("gzip.GzipFile",),
+        ("7z.SevenZFile",),
         ("haskell.HiFile",),
         ("icc.IccFile",),
         ("iso9660.Iso9660File",),


=====================================
diffoscope/external_tools.py
=====================================
@@ -23,6 +23,7 @@ that might resolve to, for example, `/usr/bin/abootimg`.
 """
 
 EXTERNAL_TOOLS = {
+    "7z": {"debian": "p7zip-full"},
     "aapt2": {"debian": "aapt"},
     "abootimg": {"debian": "abootimg", "guix": "abootimg"},
     "androguard": {"debian": "androguard"},


=====================================
tests/comparators/test_7z.py
=====================================
@@ -0,0 +1,74 @@
+#
+# diffoscope: in-depth comparison of files, archives, and directories
+#
+# Copyright © 2023 Chris Lamb <lamby at debian.org>
+#
+# diffoscope is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# diffoscope is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with diffoscope.  If not, see <https://www.gnu.org/licenses/>.
+
+import shutil
+import pytest
+
+from diffoscope.comparators.lz4 import Lz4File
+from diffoscope.comparators.binary import FilesystemFile
+from diffoscope.comparators.utils.specialize import specialize
+
+from ..utils.data import load_fixture, assert_diff
+from ..utils.tools import skip_unless_tools_exist
+from ..utils.nonexisting import assert_non_existing
+
+lz4a = load_fixture("test1.lz4")
+lz4b = load_fixture("test2.lz4")
+
+
+def test_identification(lz4a):
+    assert isinstance(lz4a, Lz4File)
+
+
+def test_no_differences(lz4a):
+    difference = lz4a.compare(lz4a)
+    assert difference is None
+
+
+ at pytest.fixture
+def differences(lz4a, lz4b):
+    return lz4a.compare(lz4b).details
+
+
+ at skip_unless_tools_exist("lz4")
+def test_content_source(differences):
+    assert differences[0].source1 == "test1"
+    assert differences[0].source2 == "test2"
+
+
+ at skip_unless_tools_exist("lz4")
+def test_content_source_without_extension(tmpdir, lz4a, lz4b):
+    path1 = str(tmpdir.join("test1"))
+    path2 = str(tmpdir.join("test2"))
+    shutil.copy(lz4a.path, path1)
+    shutil.copy(lz4b.path, path2)
+    lz4a = specialize(FilesystemFile(path1))
+    lz4b = specialize(FilesystemFile(path2))
+    difference = lz4a.compare(lz4b).details
+    assert difference[0].source1 == "test1-content"
+    assert difference[0].source2 == "test2-content"
+
+
+ at skip_unless_tools_exist("lz4")
+def test_content_diff(differences):
+    assert_diff(differences[0], "text_ascii_expected_diff")
+
+
+ at skip_unless_tools_exist("lz4")
+def test_compare_non_existing(monkeypatch, lz4a):
+    assert_non_existing(monkeypatch, lz4a)



View it on GitLab: https://salsa.debian.org/reproducible-builds/diffoscope/-/commit/59b86c1faea491aba3319d8358ffed94b52edf6b

-- 
View it on GitLab: https://salsa.debian.org/reproducible-builds/diffoscope/-/commit/59b86c1faea491aba3319d8358ffed94b52edf6b
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.reproducible-builds.org/pipermail/rb-commits/attachments/20231130/8fa56c7b/attachment.htm>


More information about the rb-commits mailing list