[Git][reproducible-builds/diffoscope][issue-333] WIP on DOS/MBR extraction. (Re: reproducible-builds/diffoscope#333)

Chris Lamb (@lamby) gitlab at salsa.debian.org
Thu Jul 20 12:39:50 UTC 2023



Chris Lamb pushed to branch issue-333 at Reproducible Builds / diffoscope


Commits:
144bbf89 by Chris Lamb at 2023-07-20T13:39:10+01:00
WIP on DOS/MBR extraction. (Re: reproducible-builds/diffoscope#333)

- - - - -


4 changed files:

- + diffoscope/comparators/7z.py
- diffoscope/comparators/__init__.py
- diffoscope/external_tools.py
- tests/comparators/test_gzip.py


Changes:

=====================================
diffoscope/comparators/7z.py
=====================================
@@ -0,0 +1,104 @@
+#
+# diffoscope: in-depth comparison of files, archives, and directories
+#
+# Copyright © 2023 Chris Lamb <lamby at debian.org>
+#
+# diffoscope is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# diffoscope is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with diffoscope.  If not, see <https://www.gnu.org/licenses/>.
+
+import os
+import re
+import logging
+import subprocess
+
+from diffoscope.tools import tool_required
+from diffoscope.tempfiles import get_temporary_directory
+from diffoscope.difference import Difference
+
+from .utils.archive import Archive
+from .utils.file import File
+from .utils.command import Command, our_check_output
+
+logger = logging.getLogger(__name__)
+
+
+class SevenZList(Command):
+    @tool_required("7z")
+    def cmdline(self):
+        return (
+            "7z",
+            "l",
+            self.path,
+        )
+
+    def filter(self, line):
+        haystack = line.decode("utf-8")
+        if haystack.startswith("Listing archive: ") or haystack.startswith(
+            "Path = "
+        ):
+            return b""
+        return line
+
+
+class SevenZContainer(Archive):
+    @tool_required("7z")
+    def open_archive(self):
+        self._temp_dir = get_temporary_directory(suffix="7z")
+
+        try:
+            our_check_output(
+                (
+                    "7z",
+                    "e",
+                    os.path.abspath(self.source.path),
+                ),
+                cwd=self._temp_dir.name,
+                stderr=subprocess.DEVNULL,
+            )
+        except subprocess.CalledProcessError:
+            return False
+
+        return self
+
+    def close_archive(self):
+        self._temp_dir.cleanup()
+
+    def get_member_names(self):
+        return os.listdir(self._temp_dir.name)
+
+    def extract(self, member_name, dest_dir):
+        return os.path.join(self._temp_dir.name, member_name)
+
+
+class SevenZFile(File):
+    DESCRIPTION = "Filesystem image"
+    FILE_TYPE_RE = re.compile(r"^DOS/MBR boot sector;")
+    CONTAINER_CLASSES = [SevenZContainer]
+
+    def compare_details(self, other, source=None):
+        return [
+            Difference.from_operation(
+                SevenZList, self.path, other.path, source="7z l"
+            )
+        ]
+
+    @classmethod
+    @tool_required("7z")
+    def recognizes(cls, file):
+        if not super().recognizes(file):
+            return False
+
+        cmd = SevenZList(file.path)
+        cmd.start()
+
+        return b"Type = gzip\n" not in cmd.output


=====================================
diffoscope/comparators/__init__.py
=====================================
@@ -75,6 +75,7 @@ class ComparatorManager:
         ("ffprobe.FfprobeFile",),
         ("gnumeric.GnumericFile",),
         ("gzip.GzipFile",),
+        ("7z.SevenZFile",),
         ("haskell.HiFile",),
         ("icc.IccFile",),
         ("iso9660.Iso9660File",),


=====================================
diffoscope/external_tools.py
=====================================
@@ -23,6 +23,7 @@ that might resolve to, for example, `/usr/bin/abootimg`.
 """
 
 EXTERNAL_TOOLS = {
+    "7z": {"debian": "p7zip-full"},
     "aapt2": {"debian": "aapt"},
     "abootimg": {"debian": "abootimg", "guix": "abootimg"},
     "androguard": {"debian": "androguard"},


=====================================
tests/comparators/test_gzip.py
=====================================
@@ -44,7 +44,8 @@ def test_identification(gzip1):
 
 def test_fallback_recognizes(gzip3):
     # the below always-True assertion is just to document the fact that we
-    # should identify it correctly regardless of any bugs in file(1)
+    # should identify it correctly (due to FALLBACK_FILE_EXTENSION_SUFFIX and
+    # FALLBACK_FILE_TYPE_HEADER_PREFIX) regardless of any bugs in file(1)
     assert (
         "gzip" not in gzip3.magic_file_type or "gzip" in gzip3.magic_file_type
     )



View it on GitLab: https://salsa.debian.org/reproducible-builds/diffoscope/-/commit/144bbf89acdef2726fa06953333bc1186018d74c

-- 
View it on GitLab: https://salsa.debian.org/reproducible-builds/diffoscope/-/commit/144bbf89acdef2726fa06953333bc1186018d74c
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.reproducible-builds.org/pipermail/rb-commits/attachments/20230720/57321ad5/attachment.htm>


More information about the rb-commits mailing list