[Git][reproducible-builds/diffoscope][issue-333] WIP on DOS/MBR extraction. (Re: reproducible-builds/diffoscope#333)
Chris Lamb (@lamby)
gitlab at salsa.debian.org
Thu Jul 20 12:39:50 UTC 2023
Chris Lamb pushed to branch issue-333 at Reproducible Builds / diffoscope
Commits:
144bbf89 by Chris Lamb at 2023-07-20T13:39:10+01:00
WIP on DOS/MBR extraction. (Re: reproducible-builds/diffoscope#333)
- - - - -
4 changed files:
- + diffoscope/comparators/7z.py
- diffoscope/comparators/__init__.py
- diffoscope/external_tools.py
- tests/comparators/test_gzip.py
Changes:
=====================================
diffoscope/comparators/7z.py
=====================================
@@ -0,0 +1,104 @@
+#
+# diffoscope: in-depth comparison of files, archives, and directories
+#
+# Copyright © 2023 Chris Lamb <lamby at debian.org>
+#
+# diffoscope is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# diffoscope is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with diffoscope. If not, see <https://www.gnu.org/licenses/>.
+
+import os
+import re
+import logging
+import subprocess
+
+from diffoscope.tools import tool_required
+from diffoscope.tempfiles import get_temporary_directory
+from diffoscope.difference import Difference
+
+from .utils.archive import Archive
+from .utils.file import File
+from .utils.command import Command, our_check_output
+
+logger = logging.getLogger(__name__)
+
+
+class SevenZList(Command):
+ @tool_required("7z")
+ def cmdline(self):
+ return (
+ "7z",
+ "l",
+ self.path,
+ )
+
+ def filter(self, line):
+ haystack = line.decode("utf-8")
+ if haystack.startswith("Listing archive: ") or haystack.startswith(
+ "Path = "
+ ):
+ return b""
+ return line
+
+
+class SevenZContainer(Archive):
+ @tool_required("7z")
+ def open_archive(self):
+ self._temp_dir = get_temporary_directory(suffix="7z")
+
+ try:
+ our_check_output(
+ (
+ "7z",
+ "e",
+ os.path.abspath(self.source.path),
+ ),
+ cwd=self._temp_dir.name,
+ stderr=subprocess.DEVNULL,
+ )
+ except subprocess.CalledProcessError:
+ return False
+
+ return self
+
+ def close_archive(self):
+ self._temp_dir.cleanup()
+
+ def get_member_names(self):
+ return os.listdir(self._temp_dir.name)
+
+ def extract(self, member_name, dest_dir):
+ return os.path.join(self._temp_dir.name, member_name)
+
+
+class SevenZFile(File):
+ DESCRIPTION = "Filesystem image"
+ FILE_TYPE_RE = re.compile(r"^DOS/MBR boot sector;")
+ CONTAINER_CLASSES = [SevenZContainer]
+
+ def compare_details(self, other, source=None):
+ return [
+ Difference.from_operation(
+ SevenZList, self.path, other.path, source="7z l"
+ )
+ ]
+
+ @classmethod
+ @tool_required("7z")
+ def recognizes(cls, file):
+ if not super().recognizes(file):
+ return False
+
+ cmd = SevenZList(file.path)
+ cmd.start()
+
+ return b"Type = gzip\n" not in cmd.output
=====================================
diffoscope/comparators/__init__.py
=====================================
@@ -75,6 +75,7 @@ class ComparatorManager:
("ffprobe.FfprobeFile",),
("gnumeric.GnumericFile",),
("gzip.GzipFile",),
+ ("7z.SevenZFile",),
("haskell.HiFile",),
("icc.IccFile",),
("iso9660.Iso9660File",),
=====================================
diffoscope/external_tools.py
=====================================
@@ -23,6 +23,7 @@ that might resolve to, for example, `/usr/bin/abootimg`.
"""
EXTERNAL_TOOLS = {
+ "7z": {"debian": "p7zip-full"},
"aapt2": {"debian": "aapt"},
"abootimg": {"debian": "abootimg", "guix": "abootimg"},
"androguard": {"debian": "androguard"},
=====================================
tests/comparators/test_gzip.py
=====================================
@@ -44,7 +44,8 @@ def test_identification(gzip1):
def test_fallback_recognizes(gzip3):
# the below always-True assertion is just to document the fact that we
- # should identify it correctly regardless of any bugs in file(1)
+ # should identify it correctly (due to FALLBACK_FILE_EXTENSION_SUFFIX and
+ # FALLBACK_FILE_TYPE_HEADER_PREFIX) regardless of any bugs in file(1)
assert (
"gzip" not in gzip3.magic_file_type or "gzip" in gzip3.magic_file_type
)
View it on GitLab: https://salsa.debian.org/reproducible-builds/diffoscope/-/commit/144bbf89acdef2726fa06953333bc1186018d74c
--
View it on GitLab: https://salsa.debian.org/reproducible-builds/diffoscope/-/commit/144bbf89acdef2726fa06953333bc1186018d74c
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.reproducible-builds.org/pipermail/rb-commits/attachments/20230720/57321ad5/attachment.htm>
More information about the rb-commits
mailing list