[Git][reproducible-builds/diffoscope][master] Do not call marshal.loads(...) of precompiled Python bytecode as it is...
Chris Lamb (@lamby)
gitlab at salsa.debian.org
Tue Aug 13 14:46:51 UTC 2024
Chris Lamb pushed to branch master at Reproducible Builds / diffoscope
Commits:
e75871b0 by Chris Lamb at 2024-08-13T15:46:15+01:00
Do not call marshal.loads(...) of precompiled Python bytecode as it is inherently unsafe. Replace for now with a brief summary of the code section of .pyc files. (Closes: reproducible-builds/diffoscope#371)
- - - - -
5 changed files:
- diffoscope/comparators/python.py
- tests/comparators/test_python.py
- tests/data/pyc_expected_diff
- tests/data/test1.pyc-renamed
- tests/data/test2.pyc-renamed
Changes:
=====================================
diffoscope/comparators/python.py
=====================================
@@ -18,14 +18,11 @@
# along with diffoscope. If not, see <https://www.gnu.org/licenses/>.
import binascii
-import dis
import io
-import marshal
import os
import re
import struct
import time
-import types
from diffoscope.difference import Difference
@@ -85,55 +82,10 @@ def parse_pyc(f):
filesz = struct.unpack("<L", filesz)
yield f"files sz: {filesz[0]}"
- code = marshal.load(f)
- yield from show_code(code)
-
-
-def show_code(code, indent=""):
- yield f"{indent}code"
-
- indent += " "
-
- for x in ("argcount", "nlocals", "stacksize", "flags"):
- yield "{}{: <10}: {!r}".format(indent, x, getattr(code, f"co_{x}"))
-
- yield from show_hex("code", code.co_code, indent=indent)
- s = io.StringIO()
- dis.disassemble(code, file=s)
- for x in s.getvalue().splitlines():
- yield "{}{}".format(indent, re_memory_address.sub("", x))
-
- yield f"{indent}consts"
- for const in code.co_consts:
- if isinstance(const, types.CodeType):
- yield from show_code(const, f"{indent} ")
- else:
- yield f" {indent}{const!r}"
-
- for x in (
- "names",
- "varnames",
- "freevars",
- "cellvars",
- "filename",
- "name",
- "firstlineno",
- ):
- yield "{}{: <10} {!r}".format(indent, x, getattr(code, f"co_{x}"))
-
- yield from show_hex("lnotab", code.co_lnotab, indent=indent)
-
-
-def show_hex(label, val, indent):
- val = hexlify(val)
-
- if len(val) < 60:
- yield f"{indent}{label} {val}"
- return
-
- yield f"{indent}{label}"
- for i in range(0, len(val), 60):
- yield "{} {}".format(indent, val[i : i + 60])
+ start = f.tell()
+ f.seek(0, os.SEEK_END)
+ size = f.tell() - start
+ yield f"code: starts at offset {start} (size: {size} bytes)"
def hexlify(val):
=====================================
tests/comparators/test_python.py
=====================================
@@ -32,13 +32,24 @@ pyc1 = load_fixture("test1.pyc-renamed")
pyc2 = load_fixture("test2.pyc-renamed")
+def skip_unless_correct_python_version():
+ TEST_FIXTURES_GENERATED_BY = (3, 12)
+
+ display = ".".join(str(x) for x in TEST_FIXTURES_GENERATED_BY)
+
+ return skipif(
+ sys.version_info[:2] != TEST_FIXTURES_GENERATED_BY,
+ reason=f"Only Python {display} can de-marshal test1.pyc-renamed",
+ )
+
+
@skip_unless_file_version_is_at_least("5.39")
def test_identification(pyc1, pyc2):
assert isinstance(pyc1, PycFile)
assert isinstance(pyc2, PycFile)
- at skipif(sys.version_info >= (3, 10), reason="Unstable on 3.10+")
+ at skip_unless_correct_python_version()
def test_no_differences(pyc1):
# Disassembling bytecode prior to Python 3.10 is stable when applied to
# itself, otherwise various memory offsets (or memory addresses?) are
@@ -52,15 +63,9 @@ def differences(pyc1, pyc2):
@skip_unless_file_version_is_at_least("5.39")
- at skipif(
- sys.version_info[:2] not in {(3, 9), (3, 10)},
- reason="Only Python 3.9 and 3.10 can de-marshal test1.pyc-renamed",
-)
+ at skip_unless_correct_python_version()
def test_diff(differences):
- assert_diff_startswith(
- differences[0],
- "pyc_expected_diff",
- )
+ assert_diff_startswith(differences[0], "pyc_expected_diff")
def test_compare_non_existing(monkeypatch, pyc1):
=====================================
tests/data/pyc_expected_diff
=====================================
@@ -1,11 +1,6 @@
-@@ -1,9 +1,9 @@
- magic: 0x610d0d0a
--moddate: 0xbd103561 (Sun Sep 5 18:47:25 2021 UTC)
-+moddate: 0xae814d61 (Fri Sep 24 07:43:42 2021 UTC)
- files sz: 14217
- code
- argcount : 0
- nlocals : 0
- stacksize : 3
- flags : 64
- code
+@@ -1,4 +1,4 @@
+ magic: 0xcb0d0d0a
+-moddate: 0x436ebb66 (Tue Aug 13 14:31:31 2024 UTC)
++moddate: 0x3f6ebb66 (Tue Aug 13 14:31:27 2024 UTC)
+ files sz: 13
+ code: starts at offset 16 (size: 121 bytes)
=====================================
tests/data/test1.pyc-renamed
=====================================
Binary files a/tests/data/test1.pyc-renamed and b/tests/data/test1.pyc-renamed differ
=====================================
tests/data/test2.pyc-renamed
=====================================
Binary files a/tests/data/test2.pyc-renamed and b/tests/data/test2.pyc-renamed differ
View it on GitLab: https://salsa.debian.org/reproducible-builds/diffoscope/-/commit/e75871b07e09cfd778181d905f540a15bd71e63a
--
View it on GitLab: https://salsa.debian.org/reproducible-builds/diffoscope/-/commit/e75871b07e09cfd778181d905f540a15bd71e63a
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.reproducible-builds.org/pipermail/rb-commits/attachments/20240813/b8124c54/attachment.htm>
More information about the rb-commits
mailing list