[diffoscope] 01/01: Implement ElfFile as a container of sections

Jérémy Bobbio lunar at moszumanska.debian.org
Mon Jan 18 19:00:20 CET 2016


This is an automated email from the git hooks/post-receive script.

lunar pushed a commit to branch master
in repository diffoscope.

commit 39d5fc0c01a62188f0c0ea74d27d558158fa11a9
Author: Dhole <dhole at openmailbox.org>
Date:   Mon Jan 4 17:47:55 2016 +0100

    Implement ElfFile as a container of sections
    
    Each section is treated differently (disassembled, hex-dumped, searched
    for strings) according to its type.
    
    Adding --line-numbers to objdump when disassembling to show source code
    file and line in case debugging info is available.
    
    Closes: #808197
---
 diffoscope/__init__.py                   |   4 +
 diffoscope/comparators/__init__.py       |   3 +-
 diffoscope/comparators/binary.py         |   8 +-
 diffoscope/comparators/elf.py            | 171 +++++++++++++++++++++++++++----
 tests/data/elf_lib_objdump_expected_diff |  18 +---
 tests/data/elf_obj_expected_diff         |  17 +--
 6 files changed, 169 insertions(+), 52 deletions(-)

diff --git a/diffoscope/__init__.py b/diffoscope/__init__.py
index 0988579..e6e6265 100644
--- a/diffoscope/__init__.py
+++ b/diffoscope/__init__.py
@@ -116,6 +116,10 @@ class RequiredToolNotFound(Exception):
             return None
         return providers.get(get_current_os(), None)
 
+class OutputParsingError(Exception):
+    def __init__(self, command, object):
+        self.command = command
+        self.object_class = object.__class__
 
 def get_current_os():
     import platform
diff --git a/diffoscope/comparators/__init__.py b/diffoscope/comparators/__init__.py
index d308b6e..dabfc3b 100644
--- a/diffoscope/comparators/__init__.py
+++ b/diffoscope/comparators/__init__.py
@@ -46,7 +46,7 @@ except ImportError as ex:
 from diffoscope.comparators.device import Device
 from diffoscope.comparators.dex import DexFile
 from diffoscope.comparators.directory import Directory, compare_directories
-from diffoscope.comparators.elf import ElfFile, StaticLibFile
+from diffoscope.comparators.elf import ElfFile, ElfSection, StaticLibFile
 from diffoscope.comparators.fsimage import FsImageFile
 from diffoscope.comparators.fonts import TtfFile
 from diffoscope.comparators.gettext import MoFile
@@ -127,6 +127,7 @@ FILE_CLASSES = (
     DotDscFile,
     Md5sumsFile,
     DebDataTarFile,
+    ElfSection,
     TextFile,
     Bzip2File,
     CpioFile,
diff --git a/diffoscope/comparators/binary.py b/diffoscope/comparators/binary.py
index 733f1ba..91dd671 100644
--- a/diffoscope/comparators/binary.py
+++ b/diffoscope/comparators/binary.py
@@ -35,7 +35,7 @@ except ImportError:
 import magic
 from diffoscope.config import Config
 from diffoscope.difference import Difference
-from diffoscope import tool_required, RequiredToolNotFound, logger
+from diffoscope import tool_required, RequiredToolNotFound, OutputParsingError, logger
 
 
 def hexdump_fallback(path):
@@ -216,6 +216,12 @@ class File(object, metaclass=ABCMeta):
                 package = e.get_package()
                 if package:
                     difference.add_comment("Install '%s' to get a better output." % package)
+            except OutputParsingError as e:
+                difference = self.compare_bytes(other, source=source)
+                if difference is None:
+                    return None
+                difference.add_comment("Error parsing output of `%s` for %s" %
+                        (e.command, e.object_class))
             return difference
         return self.compare_bytes(other, source)
 
diff --git a/diffoscope/comparators/elf.py b/diffoscope/comparators/elf.py
index e199270..2d4912c 100644
--- a/diffoscope/comparators/elf.py
+++ b/diffoscope/comparators/elf.py
@@ -19,9 +19,11 @@
 
 import os.path
 import re
-from diffoscope import tool_required
+import subprocess
+from diffoscope import tool_required, OutputParsingError
+from diffoscope import logger
 from diffoscope.comparators.binary import File
-from diffoscope.comparators.utils import get_ar_content, Command
+from diffoscope.comparators.utils import get_ar_content, Command, Container
 from diffoscope.difference import Difference
 
 
@@ -33,7 +35,7 @@ class Readelf(Command):
 
     @tool_required('readelf')
     def cmdline(self):
-        return ['readelf', '-W'] + self.readelf_options() + [self.path]
+        return ['readelf', '--wide'] + self.readelf_options() + [self.path]
 
     def readelf_options(self):
         return []
@@ -49,37 +51,167 @@ class Readelf(Command):
 
 class ReadelfAll(Readelf):
     def readelf_options(self):
-        return ['-all']
+        return ['--all']
 
 class ReadelfDebugDump(Readelf):
     def readelf_options(self):
         return ['--debug-dump']
 
-class ObjdumpDisassemble(Command):
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        # we don't care about the name of the archive
-        self._archive_re = re.compile(r'^In archive %s:' % re.escape(self.path))
+class ReadElfSection(Readelf):
+    def __init__(self, path, section_name, *args, **kwargs):
+        self._path = path
+        self._section_name = section_name
+        super().__init__(path, *args, **kwargs)
+
+    def readelf_options(self):
+        return ['--hex-dump']
+
+    @tool_required('readelf')
+    def cmdline(self):
+        return ['readelf', '--wide'] + self.readelf_options() + \
+            [self._section_name, self.path]
+
+class ReadelfStringSection(ReadElfSection):
+    def readelf_options(self):
+        return ['--string-dump']
+
+class ObjdumpSection(Command):
+    def __init__(self, path, section_name, *args, **kwargs):
+        self._path = path
+        self._path_bin = path.encode('utf-8')
+        self._section_name = section_name
+        super().__init__(path, *args, **kwargs)
+
+    def objdump_options(self):
+        return []
 
     @tool_required('objdump')
     def cmdline(self):
-        return ['objdump', '--disassemble', '--full-contents', self.path]
+        return ['objdump'] + self.objdump_options() + \
+            ['--section='+self._section_name, self.path]
 
     def filter(self, line):
-        try:
-            # we don't care about the name of the archive
-            line = self._archive_re.sub('In archive:', line.decode('utf-8'))
-            # the full path can appear in the output, we need to remove it
-            return line.replace(self.path, '<elf>').encode('utf-8')
-        except UnicodeDecodeError:
-            return line
+        # Remove the filename from the output
+        if line.startswith(self._path_bin + b':'):
+            return b''
+        if line.startswith(b'In archive'):
+            return b''
+        return line
+
+class ObjdumpDisassembleSection(ObjdumpSection):
+    def objdump_options(self):
+        # With '--line-numbers' we get the source filename and line within the
+        # disassembled instructions.
+        # objdump can get the debugging information from the elf or from the
+        # stripped symbols file specified in the .gnu_debuglink section
+        return ['--line-numbers', '--disassemble']
 
 def _compare_elf_data(path1, path2):
     return [Difference.from_command(ReadelfAll, path1, path2),
-            Difference.from_command(ReadelfDebugDump, path1, path2),
-            Difference.from_command(ObjdumpDisassemble, path1, path2)]
+            Difference.from_command(ReadelfDebugDump, path1, path2)]
+
+class ElfSection(File):
+    def __init__(self, elf_container, member_name):
+        self._elf_container = elf_container
+        self._name = member_name
+
+    @property
+    def name(self):
+        return self._name
+
+    @property
+    def path(self):
+        raise NotImplementedError('elf sections cannot be extracted')
+        #return self._elf_container.source.path
+
+    def cleanup(self):
+        pass
+
+    def is_directory(self):
+        return False
+
+    def is_symlink(self):
+        return False
+
+    def is_device(self):
+        return False
+
+    def has_same_content_as(self, other):
+        # Always force diff of the section
+        return False
+
+    @staticmethod
+    def recognizes(file):
+        # No file should be recognized as an elf section
+        return False
+
+    def compare(self, other, source=None):
+        return Difference.from_command(ReadElfSection,
+                self._elf_container.source.path,
+                other._elf_container.source.path,
+                command_args=[self._name])
+
+class ElfCodeSection(ElfSection):
+    def compare(self, other, source=None):
+        return Difference.from_command(ObjdumpDisassembleSection,
+                self._elf_container.source.path,
+                other._elf_container.source.path,
+                command_args=[self._name])
+
+class ElfStringSection(ElfSection):
+    def compare(self, other, source=None):
+        return Difference.from_command(ReadelfStringSection,
+                self._elf_container.source.path,
+                other._elf_container.source.path,
+                command_args=[self._name])
+
+
+class ElfContainer(Container):
+    SECTION_TYPES = {'X': ElfCodeSection, 'S': ElfStringSection, '_': ElfSection}
+
+    @tool_required('readelf')
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        logger.debug('creating ElfContainer for file %s', self.source.path)
+        cmd = ['readelf', '--wide', '--section-headers', self.source.path]
+        output = subprocess.check_output(cmd, shell=False)
+
+        try:
+            output = output.decode('utf-8').split('\n')
+            if output[1].startswith('File:'):
+                output = output[2:]
+            output = output[5:]
+
+            self._sections = {}
+            self._section_list = [] # using a list to store original order
+            # Entires of readelf --section-headers have the following columns:
+            # [Nr]  Name  Type  Address  Off  Size  ES  Flg  Lk  Inf  Al
+            for line in output:
+                if line.startswith('Key to Flags'):
+                    break
+                # Strip number column because there may be spaces in the brakets
+                line = line.split(']', 1)[1].split()
+                name, flag = line[0], line[6] + '_'
+                # Use first match, with last option being '_' as fallback
+                type = [ElfContainer.SECTION_TYPES[type] for type in flag if \
+                        type in ElfContainer.SECTION_TYPES][0]
+                self._sections[name] = type
+                self._section_list.append(name)
+                logger.debug('adding %s section as %s', name, type)
+        except Exception as e:
+            command = ' '.join(cmd)
+            logger.debug('OutputParsingError in %s from `%s` output - %s:%s'
+                    % (self.__class__.__name__, command, e.__class__.__name__, e))
+            raise OutputParsingError(command, self)
+
+    def get_member_names(self):
+        return self._section_list
+
+    def get_member(self, member_name):
+        return self._sections[member_name](self, member_name)
 
 class ElfFile(File):
+    CONTAINER_CLASS = ElfContainer
     RE_FILE_TYE = re.compile(r'^ELF ')
 
     @staticmethod
@@ -90,6 +222,7 @@ class ElfFile(File):
         return _compare_elf_data(self.path, other.path)
 
 class StaticLibFile(File):
+    CONTAINER_CLASS = ElfContainer
     RE_FILE_TYPE = re.compile(r'\bar archive\b')
     RE_FILE_EXTENSION = re.compile(r'\.a$')
 
diff --git a/tests/data/elf_lib_objdump_expected_diff b/tests/data/elf_lib_objdump_expected_diff
index feaac29..27600ff 100644
--- a/tests/data/elf_lib_objdump_expected_diff
+++ b/tests/data/elf_lib_objdump_expected_diff
@@ -1,23 +1,9 @@
-@@ -1,23 +1,23 @@
- In archive:
- 
- test.o:     file format elf64-x86-64
- 
- Contents of section .text:
-- 0000 554889e5 b82a0000 005dc3             UH...*...].     
-+ 0000 554889e5 b8ffffff ff5dc3             UH.......].     
- Contents of section .comment:
-  0000 00474343 3a202844 65626961 6e20342e  .GCC: (Debian 4.
-  0010 372e322d 35292034 2e372e32 00        7.2-5) 4.7.2.   
- Contents of section .eh_frame:
-  0000 14000000 00000000 017a5200 01781001  .........zR..x..
-  0010 1b0c0708 90010000 1c000000 1c000000  ................
-  0020 00000000 0b000000 00410e10 8602430d  .........A....C.
-  0030 06460c07 08000000                    .F......        
+@@ -4,10 +4,10 @@
  
  Disassembly of section .text:
  
  0000000000000000 <f>:
+ f():
     0:	55                   	push   %rbp
     1:	48 89 e5             	mov    %rsp,%rbp
 -   4:	b8 2a 00 00 00       	mov    $0x2a,%eax
diff --git a/tests/data/elf_obj_expected_diff b/tests/data/elf_obj_expected_diff
index 3aabe2b..0c48a8f 100644
--- a/tests/data/elf_obj_expected_diff
+++ b/tests/data/elf_obj_expected_diff
@@ -1,22 +1,9 @@
-@@ -1,22 +1,22 @@
- 
- <elf>:     file format elf64-x86-64
- 
- Contents of section .text:
-- 0000 554889e5 b82a0000 005dc3             UH...*...].     
-+ 0000 554889e5 b8ffffff ff5dc3             UH.......].     
- Contents of section .comment:
-  0000 00474343 3a202844 65626961 6e20342e  .GCC: (Debian 4.
-  0010 372e322d 35292034 2e372e32 00        7.2-5) 4.7.2.   
- Contents of section .eh_frame:
-  0000 14000000 00000000 017a5200 01781001  .........zR..x..
-  0010 1b0c0708 90010000 1c000000 1c000000  ................
-  0020 00000000 0b000000 00410e10 8602430d  .........A....C.
-  0030 06460c07 08000000                    .F......        
+@@ -3,10 +3,10 @@
  
  Disassembly of section .text:
  
  0000000000000000 <f>:
+ f():
     0:	55                   	push   %rbp
     1:	48 89 e5             	mov    %rsp,%rbp
 -   4:	b8 2a 00 00 00       	mov    $0x2a,%eax

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/diffoscope.git


More information about the diffoscope mailing list