[Git][reproducible-builds/diffoscope][master] Support Haskell 9.x series files. (Closes: reproducible-builds/diffoscope#309)

Chris Lamb (@lamby) gitlab at salsa.debian.org
Fri Jul 29 17:56:54 UTC 2022



Chris Lamb pushed to branch master at Reproducible Builds / diffoscope


Commits:
bd6c5d42 by Chris Lamb at 2022-07-29T10:55:26-07:00
Support Haskell 9.x series files. (Closes: reproducible-builds/diffoscope#309)

- - - - -


5 changed files:

- diffoscope/comparators/haskell.py
- tests/comparators/test_haskell.py
- tests/data/haskell_expected_diff
- tests/data/test1.hi
- tests/data/test2.hi


Changes:

=====================================
diffoscope/comparators/haskell.py
=====================================
@@ -108,33 +108,41 @@ class HiFile(File):
                 )
                 return False
 
-            # Skip some old descriptor thingy that has varying size
-            if buf == HI_MAGIC_32:
-                fp.read(4)
-            elif buf == HI_MAGIC_64:
-                fp.read(8)
-
-            # Read version, which is [Char]
             buf = fp.read(1)
 
-            # Small list optimisation - anything less than 0xff has its length
-            # in a single byte; everything else is 0xff followed by the 32-bit
-            # length (big-endian).
-            if buf[0] == 0xFF:
-                buf = fp.read(4)
-                length = struct.unpack(">I", buf)[0]
+            if buf != b"\x00":
+                # Support Haskell 9.x
+                version_found = fp.read(4).decode("utf-8")
             else:
-                length = buf[0]
-
-            # Now read characters; each is 32-bit big-endian.
-            try:
-                version_found = "".join(
-                    chr(struct.unpack(">I", fp.read(4))[0])
-                    for _ in range(length)
-                )
-            except ValueError:
-                # Don't traceback if we encounter and invalid Unicode character.
-                version_found = "(unknown)"
+                # Skip some old descriptor thingy that has varying size. We
+                # only skip one less than we should as we just peeked at a byte
+                # (currently in `buf`)
+                if buf == HI_MAGIC_32:
+                    fp.read(4 - 1)
+                elif buf == HI_MAGIC_64:
+                    fp.read(8 - 1)
+
+                # Read version, which is [Char]
+                buf = fp.read(1)
+
+                # Small list optimisation - anything less than 0xff has its length
+                # in a single byte; everything else is 0xff followed by the 32-bit
+                # length (big-endian).
+                if buf[0] == 0xFF:
+                    buf = fp.read(4)
+                    length = struct.unpack(">I", buf)[0]
+                else:
+                    length = buf[0]
+
+                # Now read characters; each is 32-bit big-endian.
+                try:
+                    version_found = "".join(
+                        chr(struct.unpack(">I", fp.read(4))[0])
+                        for _ in range(length)
+                    )
+                except ValueError:
+                    # Don't traceback if we encounter and invalid Unicode character.
+                    version_found = "(unknown)"
 
             if version_found != HiFile.hi_version:
                 logger.debug(


=====================================
tests/comparators/test_haskell.py
=====================================
@@ -1,7 +1,7 @@
 #
 # diffoscope: in-depth comparison of files, archives, and directories
 #
-# Copyright © 2016-2017, 2020 Chris Lamb <lamby at debian.org>
+# Copyright © 2016-2022 Chris Lamb <lamby at debian.org>
 #
 # diffoscope is free software: you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -21,7 +21,7 @@ import pytest
 from diffoscope.comparators.binary import FilesystemFile
 from diffoscope.comparators.haskell import HiFile
 
-from ..utils.data import get_data, load_fixture
+from ..utils.data import get_data, load_fixture, assert_diff
 from ..utils.tools import skip_unless_tools_exist
 
 
@@ -31,7 +31,7 @@ haskell2 = load_fixture("test2.hi")
 
 @skip_unless_tools_exist("ghc")
 def test_identification(haskell1):
-    if isinstance(haskell1, FilesystemFile):
+    if not isinstance(haskell1, HiFile):
         pytest.skip("mismatch between system ghc and fixture")
 
     assert isinstance(haskell1, HiFile)
@@ -48,8 +48,10 @@ def differences(haskell1, haskell2):
 
 @skip_unless_tools_exist("ghc")
 def test_diff(haskell1, differences):
-    if isinstance(haskell1, FilesystemFile):
+    if not isinstance(haskell1, HiFile):
         pytest.skip("mismatch between system ghc and fixture")
 
-    expected_diff = get_data("haskell_expected_diff")
-    assert differences[0].unified_diff == expected_diff
+    with open("tests/data/haskell_expected_diff", "w") as f:
+        f.write(differences[0].unified_diff)
+
+    assert_diff(differences[0], "haskell_expected_diff")


=====================================
tests/data/haskell_expected_diff
=====================================
The diff for this file was not included because it is too large.

=====================================
tests/data/test1.hi
=====================================
Binary files a/tests/data/test1.hi and b/tests/data/test1.hi differ


=====================================
tests/data/test2.hi
=====================================
Binary files a/tests/data/test2.hi and b/tests/data/test2.hi differ



View it on GitLab: https://salsa.debian.org/reproducible-builds/diffoscope/-/commit/bd6c5d4219baf90865afb69162f3d44416a0f50a

-- 
View it on GitLab: https://salsa.debian.org/reproducible-builds/diffoscope/-/commit/bd6c5d4219baf90865afb69162f3d44416a0f50a
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.reproducible-builds.org/pipermail/rb-commits/attachments/20220729/a759b630/attachment.htm>


More information about the rb-commits mailing list