[Git][reproducible-builds/diffoscope][master] Add support for comparing .pyc files. Thanks to Sergei Trofimovich. (Closes:...

Chris Lamb (@lamby) gitlab at salsa.debian.org
Tue Oct 5 14:00:23 UTC 2021



Chris Lamb pushed to branch master at Reproducible Builds / diffoscope


Commits:
83e7f903 by Chris Lamb at 2021-10-05T14:59:30+01:00
Add support for comparing .pyc files. Thanks to Sergei Trofimovich. (Closes: reproducible-builds/diffoscope#278)

- - - - -


7 changed files:

- diffoscope/comparators/__init__.py
- + diffoscope/comparators/python.py
- + tests/comparators/test_python.py
- + tests/data/pyc_expected_diff
- + tests/data/test1.pyc-renamed
- + tests/data/test2.pyc-renamed
- tests/test_source.py


Changes:

=====================================
diffoscope/comparators/__init__.py
=====================================
@@ -106,6 +106,7 @@ class ComparatorManager:
         ("pe32.Pe32PlusFile",),
         ("pgp.PgpFile",),
         ("pgp.PgpSignature",),
+        ("python.PycFile",),
         ("kbx.KbxFile",),
         ("fit.FlattenedImageTreeFile",),
         ("dtb.DeviceTreeFile",),


=====================================
diffoscope/comparators/python.py
=====================================
@@ -0,0 +1,122 @@
+#
+# diffoscope: in-depth comparison of files, archives, and directories
+#
+# Copyright © 2021 Chris Lamb <lamby at debian.org>
+# Copyright © 2021 Sergei Trofimovich
+#
+# diffoscope is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# diffoscope is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with diffoscope.  If not, see <https://www.gnu.org/licenses/>.
+
+import binascii
+import dis
+import io
+import marshal
+import re
+import struct
+import time
+import types
+
+from diffoscope.difference import Difference
+
+from .utils.file import File
+
+re_memory_address = re.compile(r" at 0x\w+(?=, )")
+
+
+class PycFile(File):
+    DESCRIPTION = "Python .pyc files"
+    FILE_TYPE_RE = re.compile(r"^python .*byte-compiled$")
+
+    def compare_details(self, other, source=None):
+        return [
+            Difference.from_text(
+                describe_pyc(self.path),
+                describe_pyc(other.path),
+                self.path,
+                other.path,
+                source="Python bytecode",
+            )
+        ]
+
+
+def describe_pyc(filename):
+    with open(filename, "rb") as f:
+        return "\n".join(parse_pyc(f))
+
+
+def parse_pyc(f):
+    magic = f.read(4)
+    yield "magic:    {}".format(hexlify(magic))
+
+    f.seek(4, 1)
+    moddate = f.read(4)
+    modtime = time.asctime(time.gmtime(struct.unpack("=L", moddate)[0]))
+    yield "moddate:  {} ({} UTC)".format(hexlify(moddate), modtime)
+
+    filesz = f.read(4)
+    filesz = struct.unpack("=L", filesz)
+    yield f"files sz: {filesz[0]}"
+
+    code = marshal.load(f)
+    yield from show_code(code)
+
+
+def show_code(code, indent=""):
+    yield f"{indent}code"
+
+    indent += "   "
+
+    for x in ("argcount", "nlocals", "stacksize", "flags"):
+        yield "{}{: <10}: {!r}".format(indent, x, getattr(code, f"co_{x}"))
+
+    yield from show_hex("code", code.co_code, indent=indent)
+    s = io.StringIO()
+    dis.disassemble(code, file=s)
+    for x in s.getvalue().splitlines():
+        yield "{}{}".format(indent, re_memory_address.sub("", x))
+
+    yield f"{indent}consts"
+    for const in code.co_consts:
+        if type(const) == types.CodeType:
+            yield from show_code(const, f"{indent}   ")
+        else:
+            yield f"   {indent}{const!r}"
+
+    for x in (
+        "names",
+        "varnames",
+        "freevars",
+        "cellvars",
+        "filename",
+        "name",
+        "firstlineno",
+    ):
+        yield "{}{: <10} {!r}".format(indent, x, getattr(code, f"co_{x}"))
+
+    yield from show_hex("lnotab", code.co_lnotab, indent=indent)
+
+
+def show_hex(label, val, indent):
+    val = hexlify(val)
+
+    if len(val) < 60:
+        yield f"{indent}{label} {val}"
+        return
+
+    yield f"{indent}{label}"
+    for i in range(0, len(val), 60):
+        yield "{}   {}".format(indent, val[i : i + 60])
+
+
+def hexlify(val):
+    return "0x{}".format(binascii.hexlify(val).decode("utf-8"))


=====================================
tests/comparators/test_python.py
=====================================
@@ -0,0 +1,45 @@
+#
+# diffoscope: in-depth comparison of files, archives, and directories
+#
+# Copyright © 2021 Chris Lamb <lamby at debian.org>
+#
+# diffoscope is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# diffoscope is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with diffoscope.  If not, see <https://www.gnu.org/licenses/>.
+
+import pytest
+
+from diffoscope.comparators.python import PycFile
+
+from ..utils.data import assert_diff, load_fixture
+
+
+pyc1 = load_fixture("test1.pyc-renamed")
+pyc2 = load_fixture("test2.pyc-renamed")
+
+
+def test_identification(pyc1, pyc2):
+    assert isinstance(pyc1, PycFile)
+    assert isinstance(pyc2, PycFile)
+
+
+def test_no_differences(pyc1):
+    assert pyc1.compare(pyc1) is None
+
+
+ at pytest.fixture
+def differences(pyc1, pyc2):
+    return pyc1.compare(pyc2).details
+
+
+def test_diff(differences):
+    assert_diff(differences[0], "pyc_expected_diff")


=====================================
tests/data/pyc_expected_diff
=====================================
@@ -0,0 +1,11 @@
+@@ -1,9 +1,9 @@
+ magic:    0x610d0d0a
+-moddate:  0xbd103561 (Sun Sep  5 18:47:25 2021 UTC)
++moddate:  0xae814d61 (Fri Sep 24 07:43:42 2021 UTC)
+ files sz: 14217
+ code
+    argcount  : 0
+    nlocals   : 0
+    stacksize : 3
+    flags     : 64
+    code


=====================================
tests/data/test1.pyc-renamed
=====================================
Binary files /dev/null and b/tests/data/test1.pyc-renamed differ


=====================================
tests/data/test2.pyc-renamed
=====================================
Binary files /dev/null and b/tests/data/test2.pyc-renamed differ


=====================================
tests/test_source.py
=====================================
@@ -130,6 +130,7 @@ ALLOWED_TEST_FILES = {
     "test1.png",
     "test1.ppu",
     "test1.ps",
+    "test1.pyc-renamed",
     "test1.rdb",
     "test1.rdx",
     "test1.rlib",
@@ -192,6 +193,7 @@ ALLOWED_TEST_FILES = {
     "test2.png",
     "test2.ppu",
     "test2.ps",
+    "test2.pyc-renamed",
     "test2.rdb",
     "test2.rdx",
     "test2.rlib",



View it on GitLab: https://salsa.debian.org/reproducible-builds/diffoscope/-/commit/83e7f9031bcf5266ec59c217e28f401b12caa901

-- 
View it on GitLab: https://salsa.debian.org/reproducible-builds/diffoscope/-/commit/83e7f9031bcf5266ec59c217e28f401b12caa901
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.reproducible-builds.org/pipermail/rb-commits/attachments/20211005/c3229258/attachment.htm>


More information about the rb-commits mailing list