[Git][reproducible-builds/diffoscope][master] 2 commits: Some Python .pyc files report as "data", so support ".pyc" as a fallback extension.

Chris Lamb (@lamby) gitlab at salsa.debian.org
Mon May 9 16:54:00 UTC 2022



Chris Lamb pushed to branch master at Reproducible Builds / diffoscope


Commits:
e601024f by Chris Lamb at 2022-04-29T10:40:08-07:00
Some Python .pyc files report as "data", so support ".pyc" as a fallback extension.

- - - - -
707282bd by Chris Lamb at 2022-05-09T09:53:06-07:00
Add support for extracting vmlinuz/vmlinux Linux kernel images. (Closes: reproducible-builds/diffoscope#304)

- - - - -


5 changed files:

- MANIFEST.in
- diffoscope/comparators/__init__.py
- diffoscope/comparators/python.py
- + diffoscope/comparators/vmlinuz.py
- + scripts/extract-vmlinux


Changes:

=====================================
MANIFEST.in
=====================================
@@ -2,4 +2,5 @@ include COPYING
 include README.rst
 include CONTRIBUTING.md
 include pyproject.toml
+graft scripts
 graft tests


=====================================
diffoscope/comparators/__init__.py
=====================================
@@ -118,6 +118,7 @@ class ComparatorManager:
         ("xsb.XsbFile",),
         ("berkeley_db.BerkeleyDBFile",),
         ("zst.ZstFile",),
+        ("vmlinuz.VmlinuzFile",),
     )
 
     _singleton = {}


=====================================
diffoscope/comparators/python.py
=====================================
@@ -37,6 +37,7 @@ re_memory_address = re.compile(r" at 0x\w+(?=, )")
 class PycFile(File):
     DESCRIPTION = "Python .pyc files"
     FILE_TYPE_RE = re.compile(r"^python .*byte-compiled$")
+    FALLBACK_FILE_EXTENSION_SUFFIX = {".pyc"}
 
     def compare_details(self, other, source=None):
         if isinstance(other, MissingFile):


=====================================
diffoscope/comparators/vmlinuz.py
=====================================
@@ -0,0 +1,65 @@
+#
+# diffoscope: in-depth comparison of files, archives, and directories
+#
+# Copyright © 2022 Chris Lamb <lamby at debian.org>
+#
+# diffoscope is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# diffoscope is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with diffoscope.  If not, see <https://www.gnu.org/licenses/>.
+
+import logging
+import os.path
+import pathlib
+import re
+import subprocess
+
+from diffoscope.tools import tool_required
+
+from .utils.file import File
+from .utils.archive import Archive
+
+logger = logging.getLogger(__name__)
+
+
+class VmlinuzContainer(Archive):
+    def open_archive(self):
+        return self
+
+    def close_archive(self):
+        pass
+
+    def get_member_names(self):
+        return [self.get_compressed_content_name(".vmlinuz")]
+
+    @tool_required("readelf")
+    def extract(self, member_name, dest_dir):
+        dest_path = os.path.join(dest_dir, member_name)
+        logger.debug("extracting vmlinuz to %s", dest_path)
+
+        # Locate extract-vmlinux script
+        script = pathlib.Path(__file__).parent.parent.parent.joinpath(
+            "scripts", "extract-vmlinux"
+        )
+        with open(dest_path, "wb") as f:
+            subprocess.check_call(
+                [script, self.source.path],
+                stdout=f,
+                stderr=None,
+            )
+
+        return dest_path
+
+
+class VmlinuzFile(File):
+    DESCRIPTION = "Linux kernel images"
+    CONTAINER_CLASSES = [VmlinuzContainer]
+    FILE_TYPE_RE = re.compile(r"^Linux kernel\b")


=====================================
scripts/extract-vmlinux
=====================================
@@ -0,0 +1,64 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-only
+# ----------------------------------------------------------------------
+# extract-vmlinux - Extract uncompressed vmlinux from a kernel image
+#
+# Inspired from extract-ikconfig
+# (c) 2009,2010 Dick Streefland <dick at streefland.net>
+#
+# (c) 2011      Corentin Chary <corentin.chary at gmail.com>
+#
+# ----------------------------------------------------------------------
+
+check_vmlinux()
+{
+	# Use readelf to check if it's a valid ELF
+	# TODO: find a better to way to check that it's really vmlinux
+	#       and not just an elf
+	readelf -h $1 > /dev/null 2>&1 || return 1
+
+	cat $1
+	exit 0
+}
+
+try_decompress()
+{
+	# The obscure use of the "tr" filter is to work around older versions of
+	# "grep" that report the byte offset of the line instead of the pattern..
+
+	# Try to find the header ($1) and decompress from here
+	for	pos in `tr "$1\n$2" "\n$2=" < "$img" | grep -abo "^$2"`
+	do
+		pos=${pos%%:*}
+		tail -c+$pos "$img" | $3 > $tmp 2> /dev/null
+		check_vmlinux $tmp
+	done
+}
+
+# Check invocation:
+me=${0##*/}
+img=$1
+if	[ $# -ne 1 -o ! -s "$img" ]
+then
+	echo "Usage: $me <kernel-image>" >&2
+	exit 2
+fi
+
+# Prepare temp files:
+tmp=$(mktemp /tmp/vmlinux-XXX)
+trap "rm -f $tmp" 0
+
+# That didn't work, so retry after decompression.
+try_decompress '\037\213\010' xy    gunzip
+try_decompress '\3757zXZ\000' abcde unxz
+try_decompress 'BZh'          xy    bunzip2
+try_decompress '\135\0\0\0'   xxx   unlzma
+try_decompress '\211\114\132' xy    'lzop -d'
+try_decompress '\002!L\030'   xxx   'lz4 -d'
+try_decompress '(\265/\375'   xxx   unzstd
+
+# Finally check for uncompressed images or objects:
+check_vmlinux $img
+
+# Bail out:
+echo "$me: Cannot find vmlinux." >&2



View it on GitLab: https://salsa.debian.org/reproducible-builds/diffoscope/-/compare/d0ad734f54c29e2d1af2bda544cda719cfdad131...707282bdc104560452bac290ccd0e4a3dd346fc2

-- 
View it on GitLab: https://salsa.debian.org/reproducible-builds/diffoscope/-/compare/d0ad734f54c29e2d1af2bda544cda719cfdad131...707282bdc104560452bac290ccd0e4a3dd346fc2
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.reproducible-builds.org/pipermail/rb-commits/attachments/20220509/f4684eb5/attachment.htm>


More information about the rb-commits mailing list