[Git][reproducible-builds/diffoscope][master] Do not call marshal.loads(...) of precompiled Python bytecode as it is...

Chris Lamb (@lamby) gitlab at salsa.debian.org
Tue Aug 13 14:46:51 UTC 2024



Chris Lamb pushed to branch master at Reproducible Builds / diffoscope


Commits:
e75871b0 by Chris Lamb at 2024-08-13T15:46:15+01:00
Do not call marshal.loads(...) of precompiled Python bytecode as it is inherently unsafe. Replace for now with a brief summary of the code section of .pyc files. (Closes: reproducible-builds/diffoscope#371)

- - - - -


5 changed files:

- diffoscope/comparators/python.py
- tests/comparators/test_python.py
- tests/data/pyc_expected_diff
- tests/data/test1.pyc-renamed
- tests/data/test2.pyc-renamed


Changes:

=====================================
diffoscope/comparators/python.py
=====================================
@@ -18,14 +18,11 @@
 # along with diffoscope.  If not, see <https://www.gnu.org/licenses/>.
 
 import binascii
-import dis
 import io
-import marshal
 import os
 import re
 import struct
 import time
-import types
 
 from diffoscope.difference import Difference
 
@@ -85,55 +82,10 @@ def parse_pyc(f):
     filesz = struct.unpack("<L", filesz)
     yield f"files sz: {filesz[0]}"
 
-    code = marshal.load(f)
-    yield from show_code(code)
-
-
-def show_code(code, indent=""):
-    yield f"{indent}code"
-
-    indent += "   "
-
-    for x in ("argcount", "nlocals", "stacksize", "flags"):
-        yield "{}{: <10}: {!r}".format(indent, x, getattr(code, f"co_{x}"))
-
-    yield from show_hex("code", code.co_code, indent=indent)
-    s = io.StringIO()
-    dis.disassemble(code, file=s)
-    for x in s.getvalue().splitlines():
-        yield "{}{}".format(indent, re_memory_address.sub("", x))
-
-    yield f"{indent}consts"
-    for const in code.co_consts:
-        if isinstance(const, types.CodeType):
-            yield from show_code(const, f"{indent}   ")
-        else:
-            yield f"   {indent}{const!r}"
-
-    for x in (
-        "names",
-        "varnames",
-        "freevars",
-        "cellvars",
-        "filename",
-        "name",
-        "firstlineno",
-    ):
-        yield "{}{: <10} {!r}".format(indent, x, getattr(code, f"co_{x}"))
-
-    yield from show_hex("lnotab", code.co_lnotab, indent=indent)
-
-
-def show_hex(label, val, indent):
-    val = hexlify(val)
-
-    if len(val) < 60:
-        yield f"{indent}{label} {val}"
-        return
-
-    yield f"{indent}{label}"
-    for i in range(0, len(val), 60):
-        yield "{}   {}".format(indent, val[i : i + 60])
+    start = f.tell()
+    f.seek(0, os.SEEK_END)
+    size = f.tell() - start
+    yield f"code:     starts at offset {start} (size: {size} bytes)"
 
 
 def hexlify(val):


=====================================
tests/comparators/test_python.py
=====================================
@@ -32,13 +32,24 @@ pyc1 = load_fixture("test1.pyc-renamed")
 pyc2 = load_fixture("test2.pyc-renamed")
 
 
+def skip_unless_correct_python_version():
+    TEST_FIXTURES_GENERATED_BY = (3, 12)
+
+    display = ".".join(str(x) for x in TEST_FIXTURES_GENERATED_BY)
+
+    return skipif(
+        sys.version_info[:2] != TEST_FIXTURES_GENERATED_BY,
+        reason=f"Only Python {display} can de-marshal test1.pyc-renamed",
+    )
+
+
 @skip_unless_file_version_is_at_least("5.39")
 def test_identification(pyc1, pyc2):
     assert isinstance(pyc1, PycFile)
     assert isinstance(pyc2, PycFile)
 
 
- at skipif(sys.version_info >= (3, 10), reason="Unstable on 3.10+")
+ at skip_unless_correct_python_version()
 def test_no_differences(pyc1):
     # Disassembling bytecode prior to Python 3.10 is stable when applied to
     # itself, otherwise various memory offsets (or memory addresses?) are
@@ -52,15 +63,9 @@ def differences(pyc1, pyc2):
 
 
 @skip_unless_file_version_is_at_least("5.39")
- at skipif(
-    sys.version_info[:2] not in {(3, 9), (3, 10)},
-    reason="Only Python 3.9 and 3.10 can de-marshal test1.pyc-renamed",
-)
+ at skip_unless_correct_python_version()
 def test_diff(differences):
-    assert_diff_startswith(
-        differences[0],
-        "pyc_expected_diff",
-    )
+    assert_diff_startswith(differences[0], "pyc_expected_diff")
 
 
 def test_compare_non_existing(monkeypatch, pyc1):


=====================================
tests/data/pyc_expected_diff
=====================================
@@ -1,11 +1,6 @@
-@@ -1,9 +1,9 @@
- magic:    0x610d0d0a
--moddate:  0xbd103561 (Sun Sep  5 18:47:25 2021 UTC)
-+moddate:  0xae814d61 (Fri Sep 24 07:43:42 2021 UTC)
- files sz: 14217
- code
-    argcount  : 0
-    nlocals   : 0
-    stacksize : 3
-    flags     : 64
-    code
+@@ -1,4 +1,4 @@
+ magic:    0xcb0d0d0a
+-moddate:  0x436ebb66 (Tue Aug 13 14:31:31 2024 UTC)
++moddate:  0x3f6ebb66 (Tue Aug 13 14:31:27 2024 UTC)
+ files sz: 13
+ code:     starts at offset 16 (size: 121 bytes)


=====================================
tests/data/test1.pyc-renamed
=====================================
Binary files a/tests/data/test1.pyc-renamed and b/tests/data/test1.pyc-renamed differ


=====================================
tests/data/test2.pyc-renamed
=====================================
Binary files a/tests/data/test2.pyc-renamed and b/tests/data/test2.pyc-renamed differ



View it on GitLab: https://salsa.debian.org/reproducible-builds/diffoscope/-/commit/e75871b07e09cfd778181d905f540a15bd71e63a

-- 
View it on GitLab: https://salsa.debian.org/reproducible-builds/diffoscope/-/commit/e75871b07e09cfd778181d905f540a15bd71e63a
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.reproducible-builds.org/pipermail/rb-commits/attachments/20240813/b8124c54/attachment.htm>


More information about the rb-commits mailing list