[diffoscope] 01/01: Implement ElfFile as a container of sections
Jérémy Bobbio
lunar at moszumanska.debian.org
Mon Jan 18 19:00:20 CET 2016
This is an automated email from the git hooks/post-receive script.
lunar pushed a commit to branch master
in repository diffoscope.
commit 39d5fc0c01a62188f0c0ea74d27d558158fa11a9
Author: Dhole <dhole at openmailbox.org>
Date: Mon Jan 4 17:47:55 2016 +0100
Implement ElfFile as a container of sections
Each section is treated differently (disassembled, hex-dumped, searched
for strings) according to its type.
Adding --line-numbers to objdump when disassembling to show source code
file and line in case debugging info is available.
Closes: #808197
---
diffoscope/__init__.py | 4 +
diffoscope/comparators/__init__.py | 3 +-
diffoscope/comparators/binary.py | 8 +-
diffoscope/comparators/elf.py | 171 +++++++++++++++++++++++++++----
tests/data/elf_lib_objdump_expected_diff | 18 +---
tests/data/elf_obj_expected_diff | 17 +--
6 files changed, 169 insertions(+), 52 deletions(-)
diff --git a/diffoscope/__init__.py b/diffoscope/__init__.py
index 0988579..e6e6265 100644
--- a/diffoscope/__init__.py
+++ b/diffoscope/__init__.py
@@ -116,6 +116,10 @@ class RequiredToolNotFound(Exception):
return None
return providers.get(get_current_os(), None)
+class OutputParsingError(Exception):
+ def __init__(self, command, object):
+ self.command = command
+ self.object_class = object.__class__
def get_current_os():
import platform
diff --git a/diffoscope/comparators/__init__.py b/diffoscope/comparators/__init__.py
index d308b6e..dabfc3b 100644
--- a/diffoscope/comparators/__init__.py
+++ b/diffoscope/comparators/__init__.py
@@ -46,7 +46,7 @@ except ImportError as ex:
from diffoscope.comparators.device import Device
from diffoscope.comparators.dex import DexFile
from diffoscope.comparators.directory import Directory, compare_directories
-from diffoscope.comparators.elf import ElfFile, StaticLibFile
+from diffoscope.comparators.elf import ElfFile, ElfSection, StaticLibFile
from diffoscope.comparators.fsimage import FsImageFile
from diffoscope.comparators.fonts import TtfFile
from diffoscope.comparators.gettext import MoFile
@@ -127,6 +127,7 @@ FILE_CLASSES = (
DotDscFile,
Md5sumsFile,
DebDataTarFile,
+ ElfSection,
TextFile,
Bzip2File,
CpioFile,
diff --git a/diffoscope/comparators/binary.py b/diffoscope/comparators/binary.py
index 733f1ba..91dd671 100644
--- a/diffoscope/comparators/binary.py
+++ b/diffoscope/comparators/binary.py
@@ -35,7 +35,7 @@ except ImportError:
import magic
from diffoscope.config import Config
from diffoscope.difference import Difference
-from diffoscope import tool_required, RequiredToolNotFound, logger
+from diffoscope import tool_required, RequiredToolNotFound, OutputParsingError, logger
def hexdump_fallback(path):
@@ -216,6 +216,12 @@ class File(object, metaclass=ABCMeta):
package = e.get_package()
if package:
difference.add_comment("Install '%s' to get a better output." % package)
+ except OutputParsingError as e:
+ difference = self.compare_bytes(other, source=source)
+ if difference is None:
+ return None
+ difference.add_comment("Error parsing output of `%s` for %s" %
+ (e.command, e.object_class))
return difference
return self.compare_bytes(other, source)
diff --git a/diffoscope/comparators/elf.py b/diffoscope/comparators/elf.py
index e199270..2d4912c 100644
--- a/diffoscope/comparators/elf.py
+++ b/diffoscope/comparators/elf.py
@@ -19,9 +19,11 @@
import os.path
import re
-from diffoscope import tool_required
+import subprocess
+from diffoscope import tool_required, OutputParsingError
+from diffoscope import logger
from diffoscope.comparators.binary import File
-from diffoscope.comparators.utils import get_ar_content, Command
+from diffoscope.comparators.utils import get_ar_content, Command, Container
from diffoscope.difference import Difference
@@ -33,7 +35,7 @@ class Readelf(Command):
@tool_required('readelf')
def cmdline(self):
- return ['readelf', '-W'] + self.readelf_options() + [self.path]
+ return ['readelf', '--wide'] + self.readelf_options() + [self.path]
def readelf_options(self):
return []
@@ -49,37 +51,167 @@ class Readelf(Command):
class ReadelfAll(Readelf):
def readelf_options(self):
- return ['-all']
+ return ['--all']
class ReadelfDebugDump(Readelf):
def readelf_options(self):
return ['--debug-dump']
-class ObjdumpDisassemble(Command):
- def __init__(self, *args, **kwargs):
- super().__init__(*args, **kwargs)
- # we don't care about the name of the archive
- self._archive_re = re.compile(r'^In archive %s:' % re.escape(self.path))
+class ReadElfSection(Readelf):
+ def __init__(self, path, section_name, *args, **kwargs):
+ self._path = path
+ self._section_name = section_name
+ super().__init__(path, *args, **kwargs)
+
+ def readelf_options(self):
+ return ['--hex-dump']
+
+ @tool_required('readelf')
+ def cmdline(self):
+ return ['readelf', '--wide'] + self.readelf_options() + \
+ [self._section_name, self.path]
+
+class ReadelfStringSection(ReadElfSection):
+ def readelf_options(self):
+ return ['--string-dump']
+
+class ObjdumpSection(Command):
+ def __init__(self, path, section_name, *args, **kwargs):
+ self._path = path
+ self._path_bin = path.encode('utf-8')
+ self._section_name = section_name
+ super().__init__(path, *args, **kwargs)
+
+ def objdump_options(self):
+ return []
@tool_required('objdump')
def cmdline(self):
- return ['objdump', '--disassemble', '--full-contents', self.path]
+ return ['objdump'] + self.objdump_options() + \
+ ['--section='+self._section_name, self.path]
def filter(self, line):
- try:
- # we don't care about the name of the archive
- line = self._archive_re.sub('In archive:', line.decode('utf-8'))
- # the full path can appear in the output, we need to remove it
- return line.replace(self.path, '<elf>').encode('utf-8')
- except UnicodeDecodeError:
- return line
+ # Remove the filename from the output
+ if line.startswith(self._path_bin + b':'):
+ return b''
+ if line.startswith(b'In archive'):
+ return b''
+ return line
+
+class ObjdumpDisassembleSection(ObjdumpSection):
+ def objdump_options(self):
+ # With '--line-numbers' we get the source filename and line within the
+ # disassembled instructions.
+ # objdump can get the debugging information from the elf or from the
+ # stripped symbols file specified in the .gnu_debuglink section
+ return ['--line-numbers', '--disassemble']
def _compare_elf_data(path1, path2):
return [Difference.from_command(ReadelfAll, path1, path2),
- Difference.from_command(ReadelfDebugDump, path1, path2),
- Difference.from_command(ObjdumpDisassemble, path1, path2)]
+ Difference.from_command(ReadelfDebugDump, path1, path2)]
+
+class ElfSection(File):
+ def __init__(self, elf_container, member_name):
+ self._elf_container = elf_container
+ self._name = member_name
+
+ @property
+ def name(self):
+ return self._name
+
+ @property
+ def path(self):
+ raise NotImplementedError('elf sections cannot be extracted')
+ #return self._elf_container.source.path
+
+ def cleanup(self):
+ pass
+
+ def is_directory(self):
+ return False
+
+ def is_symlink(self):
+ return False
+
+ def is_device(self):
+ return False
+
+ def has_same_content_as(self, other):
+ # Always force diff of the section
+ return False
+
+ @staticmethod
+ def recognizes(file):
+ # No file should be recognized as an elf section
+ return False
+
+ def compare(self, other, source=None):
+ return Difference.from_command(ReadElfSection,
+ self._elf_container.source.path,
+ other._elf_container.source.path,
+ command_args=[self._name])
+
+class ElfCodeSection(ElfSection):
+ def compare(self, other, source=None):
+ return Difference.from_command(ObjdumpDisassembleSection,
+ self._elf_container.source.path,
+ other._elf_container.source.path,
+ command_args=[self._name])
+
+class ElfStringSection(ElfSection):
+ def compare(self, other, source=None):
+ return Difference.from_command(ReadelfStringSection,
+ self._elf_container.source.path,
+ other._elf_container.source.path,
+ command_args=[self._name])
+
+
+class ElfContainer(Container):
+ SECTION_TYPES = {'X': ElfCodeSection, 'S': ElfStringSection, '_': ElfSection}
+
+ @tool_required('readelf')
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ logger.debug('creating ElfContainer for file %s', self.source.path)
+ cmd = ['readelf', '--wide', '--section-headers', self.source.path]
+ output = subprocess.check_output(cmd, shell=False)
+
+ try:
+ output = output.decode('utf-8').split('\n')
+ if output[1].startswith('File:'):
+ output = output[2:]
+ output = output[5:]
+
+ self._sections = {}
+ self._section_list = [] # using a list to store original order
+ # Entires of readelf --section-headers have the following columns:
+ # [Nr] Name Type Address Off Size ES Flg Lk Inf Al
+ for line in output:
+ if line.startswith('Key to Flags'):
+ break
+ # Strip number column because there may be spaces in the brakets
+ line = line.split(']', 1)[1].split()
+ name, flag = line[0], line[6] + '_'
+ # Use first match, with last option being '_' as fallback
+ type = [ElfContainer.SECTION_TYPES[type] for type in flag if \
+ type in ElfContainer.SECTION_TYPES][0]
+ self._sections[name] = type
+ self._section_list.append(name)
+ logger.debug('adding %s section as %s', name, type)
+ except Exception as e:
+ command = ' '.join(cmd)
+ logger.debug('OutputParsingError in %s from `%s` output - %s:%s'
+ % (self.__class__.__name__, command, e.__class__.__name__, e))
+ raise OutputParsingError(command, self)
+
+ def get_member_names(self):
+ return self._section_list
+
+ def get_member(self, member_name):
+ return self._sections[member_name](self, member_name)
class ElfFile(File):
+ CONTAINER_CLASS = ElfContainer
RE_FILE_TYE = re.compile(r'^ELF ')
@staticmethod
@@ -90,6 +222,7 @@ class ElfFile(File):
return _compare_elf_data(self.path, other.path)
class StaticLibFile(File):
+ CONTAINER_CLASS = ElfContainer
RE_FILE_TYPE = re.compile(r'\bar archive\b')
RE_FILE_EXTENSION = re.compile(r'\.a$')
diff --git a/tests/data/elf_lib_objdump_expected_diff b/tests/data/elf_lib_objdump_expected_diff
index feaac29..27600ff 100644
--- a/tests/data/elf_lib_objdump_expected_diff
+++ b/tests/data/elf_lib_objdump_expected_diff
@@ -1,23 +1,9 @@
-@@ -1,23 +1,23 @@
- In archive:
-
- test.o: file format elf64-x86-64
-
- Contents of section .text:
-- 0000 554889e5 b82a0000 005dc3 UH...*...].
-+ 0000 554889e5 b8ffffff ff5dc3 UH.......].
- Contents of section .comment:
- 0000 00474343 3a202844 65626961 6e20342e .GCC: (Debian 4.
- 0010 372e322d 35292034 2e372e32 00 7.2-5) 4.7.2.
- Contents of section .eh_frame:
- 0000 14000000 00000000 017a5200 01781001 .........zR..x..
- 0010 1b0c0708 90010000 1c000000 1c000000 ................
- 0020 00000000 0b000000 00410e10 8602430d .........A....C.
- 0030 06460c07 08000000 .F......
+@@ -4,10 +4,10 @@
Disassembly of section .text:
0000000000000000 <f>:
+ f():
0: 55 push %rbp
1: 48 89 e5 mov %rsp,%rbp
- 4: b8 2a 00 00 00 mov $0x2a,%eax
diff --git a/tests/data/elf_obj_expected_diff b/tests/data/elf_obj_expected_diff
index 3aabe2b..0c48a8f 100644
--- a/tests/data/elf_obj_expected_diff
+++ b/tests/data/elf_obj_expected_diff
@@ -1,22 +1,9 @@
-@@ -1,22 +1,22 @@
-
- <elf>: file format elf64-x86-64
-
- Contents of section .text:
-- 0000 554889e5 b82a0000 005dc3 UH...*...].
-+ 0000 554889e5 b8ffffff ff5dc3 UH.......].
- Contents of section .comment:
- 0000 00474343 3a202844 65626961 6e20342e .GCC: (Debian 4.
- 0010 372e322d 35292034 2e372e32 00 7.2-5) 4.7.2.
- Contents of section .eh_frame:
- 0000 14000000 00000000 017a5200 01781001 .........zR..x..
- 0010 1b0c0708 90010000 1c000000 1c000000 ................
- 0020 00000000 0b000000 00410e10 8602430d .........A....C.
- 0030 06460c07 08000000 .F......
+@@ -3,10 +3,10 @@
Disassembly of section .text:
0000000000000000 <f>:
+ f():
0: 55 push %rbp
1: 48 89 e5 mov %rsp,%rbp
- 4: b8 2a 00 00 00 mov $0x2a,%eax
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/diffoscope.git
More information about the diffoscope
mailing list