[diffoscope] 01/01: Add optional support for binwalking to find (eg.) concatenated CPIO archives. (Closes: #820631)

Chris Lamb chris at chris-lamb.co.uk
Sat Sep 9 12:17:14 CEST 2017


This is an automated email from the git hooks/post-receive script.

lamby pushed a commit to branch master
in repository diffoscope.

commit 1c26813019c09bf4b2e82da9668564cec58b504e
Author: Chris Lamb <lamby at debian.org>
Date:   Sat Sep 9 09:52:54 2017 +0100

    Add optional support for binwalking to find (eg.) concatenated CPIO archives. (Closes: #820631)
---
 debian/control                     |   1 +
 debian/rules                       |   1 +
 diffoscope/comparators/__init__.py |   1 +
 diffoscope/comparators/binwalk.py  |  95 +++++++++++++++++++++++++++++++++++++
 tests/comparators/test_binwalk.py  |  71 +++++++++++++++++++++++++++
 tests/data/binwalk_expected_diff   |   9 ++++
 tests/data/test1.binwalk           | Bin 0 -> 3072 bytes
 tests/data/test2.binwalk           | Bin 0 -> 3072 bytes
 8 files changed, 178 insertions(+)

diff --git a/debian/control b/debian/control
index 49be28f..92a3a71 100644
--- a/debian/control
+++ b/debian/control
@@ -45,6 +45,7 @@ Build-Depends:
  poppler-utils <!nocheck>,
  python-argcomplete,
  python3-all,
+ python3-binwalk <!nocheck>,
  python3-debian <!nocheck>,
  python3-distro <!nocheck>,
  python3-docutils,
diff --git a/debian/rules b/debian/rules
index 577c3ee..1e4f479 100755
--- a/debian/rules
+++ b/debian/rules
@@ -57,6 +57,7 @@ override_dh_python3:
 	dh_python3 -p diffoscope \
 		--depends=distro \
 		--recommends=argcomplete \
+		--recommends=binwalk \
 		--recommends=defusedxml \
 		--recommends=guestfs \
 		--recommends=progressbar \
diff --git a/diffoscope/comparators/__init__.py b/diffoscope/comparators/__init__.py
index 7653741..3a691f2 100644
--- a/diffoscope/comparators/__init__.py
+++ b/diffoscope/comparators/__init__.py
@@ -37,6 +37,7 @@ class ComparatorManager(object):
         ('deb.Md5sumsFile',),
         ('deb.DebDataTarFile',),
         ('elf.ElfSection',),
+        ('binwalk.BinwalkFile',),
         ('ps.PsFile',),
         ('javascript.JavaScriptFile',),
         ('json.JSONFile',),
diff --git a/diffoscope/comparators/binwalk.py b/diffoscope/comparators/binwalk.py
new file mode 100644
index 0000000..dc6f665
--- /dev/null
+++ b/diffoscope/comparators/binwalk.py
@@ -0,0 +1,95 @@
+# -*- coding: utf-8 -*-
+#
+# diffoscope: in-depth comparison of files, archives, and directories
+#
+# Copyright © 2017 Chris Lamb <lamby at debian.org>
+#
+# diffoscope is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# diffoscope is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with diffoscope.  If not, see <https://www.gnu.org/licenses/>.
+
+import os
+import re
+import glob
+import logging
+
+from diffoscope.tempfiles import get_temporary_directory
+
+from .utils.file import File
+from .utils.archive import Archive
+
+
+try:
+    import binwalk
+except ImportError:
+    binwalk = None
+
+logger = logging.getLogger(__name__)
+
+
+class BinwalkFileContainer(Archive):
+    def open_archive(self):
+        return self
+
+    def close_archive(self):
+        self.source._unpacked.cleanup()
+
+    def get_member_names(self):
+        return sorted(self.source._members.keys())
+
+    def extract(self, member_name, dest_dir):
+        return self.source._members[member_name]
+
+
+class BinwalkFile(File):
+    FILE_TYPE_RE = re.compile(r'\bcpio archive\b')
+    CONTAINER_CLASS = BinwalkFileContainer
+
+    @classmethod
+    def recognizes(cls, file):
+        if binwalk is None:
+            return False
+
+        if not super().recognizes(file):
+            return False
+
+        # Don't recurse; binwalk has already found everything
+        if isinstance(file.container, cls.CONTAINER_CLASS):
+            return False
+
+        unpacked = get_temporary_directory(prefix='binwalk')
+        logger.debug("Extracting %s to %s", file.path, unpacked.name)
+
+        binwalk.scan(
+            file.path,
+            dd='cpio:cpio',
+            carve=True,
+            quiet=True,
+            signature=True,
+            directory=unpacked.name,
+        )
+
+        members = {
+            os.path.basename(x): x
+            for x in glob.glob(os.path.join(unpacked.name, '*/*'))
+        }
+
+        logger.debug("Found %d embedded member(s)", len(members))
+
+        if not members:
+            unpacked.cleanup()
+            return False
+
+        file._members = members
+        file._unpacked = unpacked
+
+        return True
diff --git a/tests/comparators/test_binwalk.py b/tests/comparators/test_binwalk.py
new file mode 100644
index 0000000..45766d9
--- /dev/null
+++ b/tests/comparators/test_binwalk.py
@@ -0,0 +1,71 @@
+# -*- coding: utf-8 -*-
+#
+# diffoscope: in-depth comparison of files, archives, and directories
+#
+# Copyright © 2017 Chris Lamb <lamby at debian.org>
+#
+# diffoscope is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# diffoscope is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with diffoscope.  If not, see <https://www.gnu.org/licenses/>.
+
+import pytest
+
+from diffoscope.comparators.binwalk import BinwalkFile
+
+from ..utils.data import load_fixture, get_data
+from ..utils.tools import skip_unless_tools_exist, skip_unless_module_exists
+from ..utils.nonexisting import assert_non_existing
+
+binwalk1 = load_fixture('test1.binwalk')
+binwalk2 = load_fixture('test2.binwalk')
+
+
+ at skip_unless_module_exists('binwalk')
+def test_identification(binwalk1, binwalk2):
+    assert isinstance(binwalk1, BinwalkFile)
+    assert isinstance(binwalk2, BinwalkFile)
+
+
+ at skip_unless_module_exists('binwalk')
+def test_no_differences(binwalk1):
+    difference = binwalk1.compare(binwalk1)
+    assert difference is None
+
+
+ at pytest.fixture
+def differences(binwalk1, binwalk2):
+    return binwalk1.compare(binwalk2).details
+
+
+ at skip_unless_tools_exist('cpio')
+ at skip_unless_module_exists('binwalk')
+def test_listing(differences):
+    assert differences[0].source1 == '0.cpio'
+    assert differences[1].source2 == '600.cpio'
+
+    expected_diff = get_data('binwalk_expected_diff')
+    assert differences[0].details[0].unified_diff == expected_diff
+
+
+ at skip_unless_tools_exist('cpio')
+ at skip_unless_module_exists('binwalk')
+def test_symlink(differences):
+    assert differences[0].details[1].source1 == 'dir/link'
+    assert differences[0].details[1].comment == 'symlink'
+    expected_diff = get_data('symlink_expected_diff')
+    assert differences[0].details[1].unified_diff == expected_diff
+
+
+ at skip_unless_tools_exist('cpio')
+ at skip_unless_module_exists('binwalk')
+def test_compare_non_existing(monkeypatch, binwalk1):
+    assert_non_existing(monkeypatch, binwalk1)
diff --git a/tests/data/binwalk_expected_diff b/tests/data/binwalk_expected_diff
new file mode 100644
index 0000000..408bebc
--- /dev/null
+++ b/tests/data/binwalk_expected_diff
@@ -0,0 +1,9 @@
+@@ -1,4 +1,4 @@
+-drwxr-xr-x   3     1000     1000        0 2017-09-09 09:59:40.000000 .
+-drwxr-xr-x   2     1000     1000        0 2017-09-09 09:59:40.000000 dir
+-lrwxrwxrwx   1     1000     1000        6 2017-09-09 09:59:40.000000 dir/link -> broken
+--rw-r--r--   1     1000     1000      446 2017-09-09 09:59:40.000000 dir/text
++drwxr-xr-x   3     1000     1000        0 2017-09-09 09:59:51.000000 .
++drwxr-xr-x   2     1000     1000        0 2017-09-09 09:59:51.000000 dir
++lrwxrwxrwx   1     1000     1000       13 2017-09-09 09:59:51.000000 dir/link -> really-broken
++-rw-r--r--   1     1000     1000      671 2017-09-09 09:59:51.000000 dir/text
diff --git a/tests/data/test1.binwalk b/tests/data/test1.binwalk
new file mode 100644
index 0000000..b2a139f
Binary files /dev/null and b/tests/data/test1.binwalk differ
diff --git a/tests/data/test2.binwalk b/tests/data/test2.binwalk
new file mode 100644
index 0000000..e4b1b4b
Binary files /dev/null and b/tests/data/test2.binwalk differ

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/diffoscope.git


More information about the diffoscope mailing list