[diffoscope] 10/10: Install detached symbols from debug .deb before comparing ELF files

Jérémy Bobbio lunar at moszumanska.debian.org
Wed Jan 20 16:11:45 CET 2016


This is an automated email from the git hooks/post-receive script.

lunar pushed a commit to branch master
in repository diffoscope.

commit eb070310e23e44deb6b0caaa7d022d72b13406a6
Author: Jérémy Bobbio <lunar at debian.org>
Date:   Wed Jan 20 14:02:32 2016 +0000

    Install detached symbols from debug .deb before comparing ELF files
    
    Before comparing an ELF file without debug symbols, we test if
    it's coming from Debian package, and have both a Build Id and a .gnu_debuglink
    section. In that case, we now look for packages in the same container (e.g.
    directory) for one containing matching debug symbols.
    
    If one is found, we put the relevant debug symbols in a `.debug` sub-directory
    near where the ELF file has been extracted. This will then be picked
    automatically when running `objdump --disassemble --line-numbers`.
    
    Sadly, objdump currently does not know how to process compressed debug symbols
    (#812089). So we first have to uncompress the debug symbols, and fix the CRC in
    the original `.gnu_debuglink` section to match the new file. Far from ideal,
    but as we record the original difference before doing the change, this should
    not be much of a problem.
---
 diffoscope/comparators/deb.py                      |  36 ++++++++-
 diffoscope/comparators/elf.py                      |  83 ++++++++++++++++++++-
 tests/comparators/test_elf.py                      |  34 +++++++++
 .../data/dbgsym/add/test-dbgsym-dbgsym_1_amd64.deb | Bin 0 -> 2628 bytes
 tests/data/dbgsym/add/test-dbgsym_1_amd64.deb      | Bin 0 -> 2356 bytes
 .../dbgsym/mult/test-dbgsym-dbgsym_1_amd64.deb     | Bin 0 -> 2632 bytes
 tests/data/dbgsym/mult/test-dbgsym_1_amd64.deb     | Bin 0 -> 2364 bytes
 tests/data/dbgsym/test-dbgsym_1.dsc                |  15 ++++
 tests/data/dbgsym/test-dbgsym_1.tar.gz             | Bin 0 -> 729 bytes
 tests/data/gnu_debuglink_expected_diff             |  12 +++
 10 files changed, 178 insertions(+), 2 deletions(-)

diff --git a/diffoscope/comparators/deb.py b/diffoscope/comparators/deb.py
index 9bcfd38..2845f0d 100644
--- a/diffoscope/comparators/deb.py
+++ b/diffoscope/comparators/deb.py
@@ -19,8 +19,13 @@
 
 import re
 import os.path
+try:
+    from debian import deb822
+except ImportError:
+    deb822 = None
 from diffoscope import logger
 from diffoscope.difference import Difference
+import diffoscope.comparators
 from diffoscope.comparators.binary import File
 from diffoscope.comparators.libarchive import LibarchiveContainer, list_libarchive
 from diffoscope.comparators.utils import \
@@ -28,8 +33,26 @@ from diffoscope.comparators.utils import \
 from diffoscope.comparators.tar import TarContainer
 
 
+# Return a dict with build ids as keys and file as values for all deb in the
+# given container
+def get_build_id_map(container):
+    d = {}
+    for member in container.get_members().values():
+        diffoscope.comparators.specialize(member)
+        if isinstance(member, DebFile) and member.control:
+            build_ids = member.control.get('Build-Ids', None)
+            if build_ids:
+                d.update({build_id: member for build_id in build_ids.split(',')})
+    return d
+
+
 class DebContainer(LibarchiveContainer):
-    pass
+    @property
+    def data_tar(self):
+        for name, member in self.get_members().items():
+            if name.startswith('data.tar.'):
+                diffoscope.comparators.specialize(member)
+                return diffoscope.comparators.specialize(member.as_container.get_member('content'))
 
 
 class DebFile(File):
@@ -51,6 +74,17 @@ class DebFile(File):
                 self._md5sums = {}
         return self._md5sums
 
+    @property
+    def control(self):
+        if not deb822:
+            return None
+        if not hasattr(self, '_control'):
+            control_file = self.as_container.lookup_file('control.tar.gz', 'control.tar', './control')
+            if control_file:
+                with open(control_file.path) as f:
+                    self._control = deb822.Deb822(f)
+        return self._control
+
     def compare_details(self, other, source=None):
         my_content = get_ar_content(self.path)
         other_content = get_ar_content(other.path)
diff --git a/diffoscope/comparators/elf.py b/diffoscope/comparators/elf.py
index 3746f9f..6a233f7 100644
--- a/diffoscope/comparators/elf.py
+++ b/diffoscope/comparators/elf.py
@@ -24,6 +24,7 @@ import subprocess
 from diffoscope import tool_required, OutputParsingError
 from diffoscope import logger
 from diffoscope.comparators.binary import File
+from diffoscope.comparators.deb import DebFile, get_build_id_map
 from diffoscope.comparators.utils import get_ar_content, Command, Container
 from diffoscope.difference import Difference
 
@@ -259,6 +260,32 @@ class ElfStringSection(ElfSection):
                 command_args=[self._name])
 
 
+ at tool_required('readelf')
+def get_build_id(path):
+    try:
+        output = subprocess.check_output(['readelf', '--notes', path])
+    except subprocess.CalledProcessError as e:
+        logger.debug('Unable to get Build Id for %s: %s', path, e)
+        return None
+    m = re.search(r'^\s+Build ID: ([0-9a-f]+)$', output.decode('utf-8'), flags=re.MULTILINE)
+    if not m:
+        return None
+    return m.group(1)
+
+
+ at tool_required('readelf')
+def get_debug_link(path):
+    try:
+        output = subprocess.check_output(['readelf', '--string-dump=.gnu_debuglink', path])
+    except subprocess.CalledProcessError as e:
+        logger.debug('Unable to get Build Id for %s: %s', path, e)
+        return None
+    m = re.search(r'^\s+\[\s+0\]\s+(\S+)$', output.decode('utf-8', errors='replace'), flags=re.MULTILINE)
+    if not m:
+        return None
+    return m.group(1)
+
+
 class ElfContainer(Container):
     SECTION_FLAG_MAPPING = {'X': ElfCodeSection, 'S': ElfStringSection, '_': ElfSection}
 
@@ -268,7 +295,7 @@ class ElfContainer(Container):
         logger.debug('creating ElfContainer for file %s', self.source.path)
         cmd = ['readelf', '--wide', '--section-headers', self.source.path]
         output = subprocess.check_output(cmd, shell=False)
-
+        has_debug_symbols = False
         try:
             output = output.decode('utf-8').split('\n')
             if output[1].startswith('File:'):
@@ -284,6 +311,8 @@ class ElfContainer(Container):
                 # Strip number column because there may be spaces in the brakets
                 line = line.split(']', 1)[1].split()
                 name, type, flags = line[0], line[1], line[6] + '_'
+                if name.startswith('.debug') or name.startswith('.zdebug'):
+                    has_debug_symbols = True
                 if _should_skip_section(name, type):
                     continue
                 # Use first match, with last option being '_' as fallback
@@ -296,6 +325,58 @@ class ElfContainer(Container):
             logger.debug('OutputParsingError in %s from `%s` output - %s:%s'
                     % (self.__class__.__name__, command, e.__class__.__name__, e))
             raise OutputParsingError(command, self)
+        if not has_debug_symbols:
+            self._install_debug_symbols()
+
+    @tool_required('objcopy')
+    def _install_debug_symbols(self):
+        # Figure out if we are in a Debian package first
+        try:
+            deb = self.source.container.source.container.source.container.source
+        except AttributeError:
+            return
+        # It needs to be a .deb and we need access a to a -dbgsym package in
+        # the same .changes, directory or archive
+        if not isinstance(deb, DebFile) and not deb.container:
+            return
+        # Retrieve the Build Id for the ELF file we are exhamining
+        build_id = get_build_id(self.source.path)
+        debuglink = get_debug_link(self.source.path)
+        if not build_id or not debuglink:
+            return
+        logger.debug('Looking for a dbgsym package for Build Id %s (debuglink: %s)', build_id, debuglink)
+        # Build a map of Build-Ids if it doesn't exist yet
+        if not hasattr(deb.container, 'dbgsym_build_id_map'):
+            deb.container.dbgsym_build_id_map = get_build_id_map(deb.container)
+        if not build_id in deb.container.dbgsym_build_id_map:
+            logger.debug('Unable to find a matching debug package for Build Id %s', build_id)
+            return
+        dbgsym_package = deb.container.dbgsym_build_id_map[build_id]
+        debug_file = dbgsym_package.as_container.data_tar.as_container.lookup_file('./usr/lib/debug/.build-id/{0}/{1}.debug'.format(build_id[0:2], build_id[2:]))
+        # Create a .debug directory and link the debug symbols there with the right name
+        dest_path = os.path.join(os.path.dirname(self.source.path), '.debug', os.path.basename(debuglink))
+        os.mkdir(os.path.dirname(dest_path))
+        # If #812089 was fixed, we would just do:
+        #os.link(debug_file.path, dest_path)
+        # But for now, we need to do more complicated things…
+        # 1. Use objcopy to create a file with only the original .gnu_debuglink section
+	#    as we will have to update it to get the CRC right.
+        debuglink_path = '{}.debuglink'.format(self.source.path)
+        subprocess.check_call(['objcopy', '--only-section=.gnu_debuglink', self.source.path, debuglink_path], shell=False, stderr=subprocess.DEVNULL)
+	# 2. Monkey-patch the ElfSection object created for the .gnu_debuglink to
+        #    change the path to point to this new file
+        section = self._sections['.gnu_debuglink']
+        class MonkeyPatchedElfSection(section.__class__):
+            @property
+            def path(self):
+                return debuglink_path
+        section.__class__ = MonkeyPatchedElfSection
+        # 3. Create a file with the debug symbols in uncompressed form
+        subprocess.check_call(['objcopy', '--decompress-debug-sections', debug_file.path, dest_path], shell=False, stderr=subprocess.DEVNULL)
+        # 4. Update the .gnu_debuglink to this new file so we get the CRC right
+        subprocess.check_call(['objcopy', '--remove-section=.gnu_debuglink', self.source.path], shell=False, stderr=subprocess.DEVNULL)
+        subprocess.check_call(['objcopy', '--add-gnu-debuglink=%s' % dest_path, self.source.path], shell=False, stderr=subprocess.DEVNULL)
+        logger.debug('Installed debug symbols at %s', dest_path)
 
     def get_member_names(self):
         return self._sections.keys()
diff --git a/tests/comparators/test_elf.py b/tests/comparators/test_elf.py
index 1c23786..4732762 100644
--- a/tests/comparators/test_elf.py
+++ b/tests/comparators/test_elf.py
@@ -21,8 +21,10 @@ import os.path
 import pytest
 from diffoscope.comparators import specialize
 from diffoscope.comparators.binary import FilesystemFile, NonExistingFile
+from diffoscope.comparators.directory import FilesystemDirectory
 from diffoscope.comparators.elf import ElfFile, StaticLibFile
 from diffoscope.config import Config
+from diffoscope.presenters.text import output_text
 from conftest import tool_missing
 
 TEST_OBJ1_PATH = os.path.join(os.path.dirname(__file__), '../data/test1.o')
@@ -98,3 +100,35 @@ def test_lib_compare_non_existing(monkeypatch, lib1):
     difference = lib1.compare(NonExistingFile('/nonexisting', lib1))
     assert difference.source2 == '/nonexisting'
     assert len(difference.details) > 0
+
+TEST_DBGSYM_DIR1_PATH = os.path.join(os.path.dirname(__file__), '../data/dbgsym/add')
+TEST_DBGSYM_DIR2_PATH = os.path.join(os.path.dirname(__file__), '../data/dbgsym/mult')
+
+ at pytest.fixture
+def dbgsym_dir1():
+    return specialize(FilesystemDirectory(TEST_DBGSYM_DIR1_PATH))
+
+ at pytest.fixture
+def dbgsym_dir2():
+    return specialize(FilesystemDirectory(TEST_DBGSYM_DIR2_PATH))
+
+ at pytest.fixture
+def dbgsym_differences(dbgsym_dir1, dbgsym_dir2):
+    return dbgsym_dir1.compare(dbgsym_dir2)
+
+ at pytest.mark.skipif(any([tool_missing(tool) for tool in ['readelf', 'objdump', 'objcopy']]), reason='missing readelf, objdump, or objcopy')
+def test_differences_with_dbgsym(dbgsym_differences):
+    output_text(dbgsym_differences, print)
+    assert dbgsym_differences.details[1].source1 == 'test-dbgsym_1_amd64.deb'
+    assert dbgsym_differences.details[1].details[2].source1 == 'data.tar.xz'
+    bin_details = dbgsym_differences.details[1].details[2].details[0].details[0]
+    assert bin_details.source1 == './usr/bin/test'
+    assert bin_details.details[1].source1.startswith('objdump')
+    assert 'test-cases/dbgsym/package/test.c:2' in bin_details.details[1].unified_diff
+
+ at pytest.mark.skipif(any([tool_missing(tool) for tool in ['readelf', 'objdump', 'objcopy']]), reason='missing readelf, objdump, or objcopy')
+def test_original_gnu_debuglink(dbgsym_differences):
+    bin_details = dbgsym_differences.details[1].details[2].details[0].details[0]
+    assert '.gnu_debuglink' in bin_details.details[2].source1
+    expected_gnu_debuglink = open(os.path.join(os.path.dirname(__file__), '../data/gnu_debuglink_expected_diff')).read()
+    assert bin_details.details[2].unified_diff == expected_gnu_debuglink
diff --git a/tests/data/dbgsym/add/test-dbgsym-dbgsym_1_amd64.deb b/tests/data/dbgsym/add/test-dbgsym-dbgsym_1_amd64.deb
new file mode 100644
index 0000000..3cfbe1b
Binary files /dev/null and b/tests/data/dbgsym/add/test-dbgsym-dbgsym_1_amd64.deb differ
diff --git a/tests/data/dbgsym/add/test-dbgsym_1_amd64.deb b/tests/data/dbgsym/add/test-dbgsym_1_amd64.deb
new file mode 100644
index 0000000..eb3f9fe
Binary files /dev/null and b/tests/data/dbgsym/add/test-dbgsym_1_amd64.deb differ
diff --git a/tests/data/dbgsym/mult/test-dbgsym-dbgsym_1_amd64.deb b/tests/data/dbgsym/mult/test-dbgsym-dbgsym_1_amd64.deb
new file mode 100644
index 0000000..6673522
Binary files /dev/null and b/tests/data/dbgsym/mult/test-dbgsym-dbgsym_1_amd64.deb differ
diff --git a/tests/data/dbgsym/mult/test-dbgsym_1_amd64.deb b/tests/data/dbgsym/mult/test-dbgsym_1_amd64.deb
new file mode 100644
index 0000000..77cfe0b
Binary files /dev/null and b/tests/data/dbgsym/mult/test-dbgsym_1_amd64.deb differ
diff --git a/tests/data/dbgsym/test-dbgsym_1.dsc b/tests/data/dbgsym/test-dbgsym_1.dsc
new file mode 100644
index 0000000..644dc5c
--- /dev/null
+++ b/tests/data/dbgsym/test-dbgsym_1.dsc
@@ -0,0 +1,15 @@
+Format: 1.0
+Source: test-dbgsym
+Binary: test-dbgsym
+Architecture: any
+Version: 1
+Maintainer: Public Domain
+Build-Depends: debhelper (>= 9)
+Package-List:
+ test-dbgsym deb unknown unknown arch=any
+Checksums-Sha1:
+ 47366ea942a8862bc921924dab97318307c7b81e 729 test-dbgsym_1.tar.gz
+Checksums-Sha256:
+ fdbf82cd56628391bb7ee2715be8f486aa86999257daeb91adcb62392c1dbfe7 729 test-dbgsym_1.tar.gz
+Files:
+ ce130dbb7911de551405a3472799deca 729 test-dbgsym_1.tar.gz
diff --git a/tests/data/dbgsym/test-dbgsym_1.tar.gz b/tests/data/dbgsym/test-dbgsym_1.tar.gz
new file mode 100644
index 0000000..7ac3f11
Binary files /dev/null and b/tests/data/dbgsym/test-dbgsym_1.tar.gz differ
diff --git a/tests/data/gnu_debuglink_expected_diff b/tests/data/gnu_debuglink_expected_diff
new file mode 100644
index 0000000..0bb798c
--- /dev/null
+++ b/tests/data/gnu_debuglink_expected_diff
@@ -0,0 +1,12 @@
+@@ -1,7 +1,7 @@
+ 
+ Hex dump of section '.gnu_debuglink':
+-  0x00000000 66626137 37633062 39376332 30356639 fba77c0b97c205f9
+-  0x00000010 31343537 38333630 34356265 64396134 1457836045bed9a4
+-  0x00000020 31616431 62642e64 65627567 00000000 1ad1bd.debug....
+-  0x00000030 76f9942d                            v..-
++  0x00000000 33336535 61393266 63366362 64663338 33e5a92fc6cbdf38
++  0x00000010 61336566 34343333 38333364 33613639 a3ef4433833d3a69
++  0x00000020 64326333 63642e64 65627567 00000000 d2c3cd.debug....
++  0x00000030 4732b79e                            G2..
+ 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/diffoscope.git


More information about the diffoscope mailing list