[Git][reproducible-builds/diffoscope][master] 3 commits: Don't include specific ".debug"-like lines in the output, as it invariably a...

Chris Lamb (@lamby) gitlab at salsa.debian.org
Fri Aug 6 10:40:38 UTC 2021



Chris Lamb pushed to branch master at Reproducible Builds / diffoscope


Commits:
79189fff by Chris Lamb at 2021-08-06T11:24:00+01:00
Don't include specific ".debug"-like lines in the output, as it invariably a duplicate of the debug ID that exists better in the readelf(1) differences for this file.

- - - - -
7ae6dbbb by Chris Lamb at 2021-08-06T11:24:00+01:00
Don't include short "GCC" lines that differs on a single prefix byte either. These are distracting, not very useful and are simply the strings(1) command's idea of the build ID, which is displayed elsewhere in the diff.

- - - - -
1e3bffc5 by Chris Lamb at 2021-08-06T11:39:00+01:00
Update calls (and tests) for the new version of odt2txt.

- - - - -


5 changed files:

- diffoscope/comparators/elf.py
- diffoscope/comparators/odt.py
- tests/comparators/test_elf.py
- tests/comparators/test_odt.py
- tests/data/odt_expected_diff


Changes:

=====================================
diffoscope/comparators/elf.py
=====================================
@@ -640,10 +640,43 @@ class ElfContainer(DecompilableContainer):
 
 
 class Strings(Command):
+    re_debug_line = re.compile(r"^\s?\w{38,40}\.debug\n$")
+    re_gcc_line = re.compile(r"^.?GCC: \([^\)]+\)")
+
     @tool_required("strings")
     def cmdline(self):
         return ("strings", "--all", "--bytes=8", self.path)
 
+    def filter(self, line):
+        val = line.decode("utf-8")
+
+        # Don't include specific ".debug"-like lines in the output, as it
+        # invariably a duplicate of the debug ID that exists better in the
+        # readelf(1) differences for this file.
+        #
+        # For example:
+        #
+        #   b'0684311d738d2555027e737d5846e7478561fa.debug\n'
+        #   b'·Xu94356212e19a62aadc4db9c3c93076ac997706.debug\n'
+        #
+        if self.re_debug_line.match(val):
+            return b""
+
+        # Don't include short "GCC" lines that differs on a single prefix byte
+        # either. These are distracting, not very useful and are simply the
+        # strings(1) command's idea of the build ID, which is displayed
+        # elsewhere in the diff.
+        #
+        # For example:
+        #
+        #   │ -GCC: (Debian 10.2.1-6) 10.2.1 20210110
+        #   │ +QGCC: (Debian 10.2.1-6) 10.2.1 20210110
+        #
+        if self.re_gcc_line.match(val):
+            return line
+
+        return line
+
 
 class ElfFile(File):
     DESCRIPTION = "ELF binaries"


=====================================
diffoscope/comparators/odt.py
=====================================
@@ -28,7 +28,7 @@ from .utils.command import Command
 class Odt2txt(Command):
     @tool_required("odt2txt")
     def cmdline(self):
-        return ("odt2txt", "--encoding=UTF-8", self.path)
+        return ("odt2txt", "--stdout", self.path)
 
 
 class OdtFile(File):


=====================================
tests/comparators/test_elf.py
=====================================
@@ -245,12 +245,10 @@ def test_differences_with_dbgsym(dbgsym_differences):
     assert dbgsym_differences.details[2].source1 == "data.tar.xz"
     bin_details = dbgsym_differences.details[2].details[0].details[0]
     assert bin_details.source1 == "./usr/bin/test"
-    assert bin_details.details[1].source1.startswith("strings --all")
-    assert "shstrtab" in bin_details.details[1].unified_diff
-    assert bin_details.details[2].source1.startswith("objdump")
+    assert bin_details.details[1].source1.startswith("objdump")
     assert (
         "test-cases/dbgsym/package/test.c:2"
-        in bin_details.details[2].unified_diff
+        in bin_details.details[1].unified_diff
     )
 
 
@@ -259,8 +257,8 @@ def test_differences_with_dbgsym(dbgsym_differences):
 @skip_unless_module_exists("debian.deb822")
 def test_original_gnu_debuglink(dbgsym_differences):
     bin_details = dbgsym_differences.details[2].details[0].details[0]
-    assert ".gnu_debuglink" in bin_details.details[3].source1
-    assert_diff(bin_details.details[3], "gnu_debuglink_expected_diff")
+    assert ".gnu_debuglink" in bin_details.details[2].source1
+    assert_diff(bin_details.details[2], "gnu_debuglink_expected_diff")
 
 
 def test_ignore_readelf_errors1_identify(ignore_readelf_errors1):


=====================================
tests/comparators/test_odt.py
=====================================
@@ -17,17 +17,27 @@
 # along with diffoscope.  If not, see <https://www.gnu.org/licenses/>.
 
 import pytest
+import subprocess
 
 from diffoscope.comparators.odt import OdtFile
 
-from ..utils.data import load_fixture, get_data
-from ..utils.tools import skip_unless_tools_exist
+from ..utils.data import load_fixture, assert_diff
+from ..utils.tools import skip_unless_tool_is_at_least
+
 from ..utils.nonexisting import assert_non_existing
 
 odt1 = load_fixture("test1.odt")
 odt2 = load_fixture("test2.odt")
 
 
+def odt2txt_version():
+    try:
+        out = subprocess.check_output(["odt2txt", "--version"])
+    except subprocess.CalledProcessError as e:
+        out = e.output
+    return out.decode("UTF-8").splitlines()[0].split()[1].strip()
+
+
 def test_identification(odt1):
     assert isinstance(odt1, OdtFile)
 
@@ -42,12 +52,11 @@ def differences(odt1, odt2):
     return odt1.compare(odt2).details
 
 
- at skip_unless_tools_exist("odt2txt")
+ at skip_unless_tool_is_at_least("odt2txt", odt2txt_version, "0.7")
 def test_diff(differences):
-    expected_diff = get_data("odt_expected_diff")
-    assert differences[0].unified_diff == expected_diff
+    assert_diff(differences[0], "odt_expected_diff")
 
 
- at skip_unless_tools_exist("odt2txt")
+ at skip_unless_tool_is_at_least("odt2txt", odt2txt_version, "0.7")
 def test_compare_non_existing(monkeypatch, odt1):
     assert_non_existing(monkeypatch, odt1, has_null_source=False)


=====================================
tests/data/odt_expected_diff
=====================================
@@ -1,5 +1,3 @@
-@@ -1,3 +1,3 @@
- 
--a
-+b
- 
+@@ -1 +1 @@
+-a
++b



View it on GitLab: https://salsa.debian.org/reproducible-builds/diffoscope/-/compare/232aea244e2750958fa714d1c1d56870afb5b89c...1e3bffc5b6516c5cd5ae03963a321987b329c275

-- 
View it on GitLab: https://salsa.debian.org/reproducible-builds/diffoscope/-/compare/232aea244e2750958fa714d1c1d56870afb5b89c...1e3bffc5b6516c5cd5ae03963a321987b329c275
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.reproducible-builds.org/pipermail/rb-commits/attachments/20210806/d443bfdc/attachment.htm>


More information about the rb-commits mailing list