[diffoscope] 01/01: Add support for comparing Berkeley DB files. This is currently incomplete because the Berkeley DB libraries do not return the same uid/hash reliably (it returns "random" memory contents) so we must strip those from the human-readable output. (Closes: #890528)

Chris Lamb chris at chris-lamb.co.uk
Thu Feb 15 17:38:46 CET 2018


This is an automated email from the git hooks/post-receive script.

lamby pushed a commit to branch master
in repository diffoscope.

commit 8fbd7e3f373106e82aea3130017c28320c0a35b9
Author: Chris Lamb <lamby at debian.org>
Date:   Thu Feb 15 16:36:07 2018 +0000

    Add support for comparing Berkeley DB files. This is currently incomplete because the Berkeley DB libraries do not return the same uid/hash reliably (it returns "random" memory contents) so we must strip those from the human-readable output. (Closes: #890528)
---
 debian/control                        |   1 +
 diffoscope/comparators/__init__.py    |   1 +
 diffoscope/comparators/berkeley_db.py |  53 +++++++++++++++++++++++++++
 diffoscope/external_tools.py          |   3 ++
 tests/comparators/test_berkeley_db.py |  54 ++++++++++++++++++++++++++++
 tests/data/berkeley_db_expected_diff  |  66 ++++++++++++++++++++++++++++++++++
 tests/data/test1.db                   | Bin 0 -> 32768 bytes
 tests/data/test2.db                   | Bin 0 -> 16384 bytes
 8 files changed, 178 insertions(+)

diff --git a/debian/control b/debian/control
index 4e001eb..5d6bac7 100644
--- a/debian/control
+++ b/debian/control
@@ -15,6 +15,7 @@ Build-Depends:
  binutils-multiarch <!nocheck>,
  caca-utils <!nocheck>,
  colord <!nocheck>,
+ db-util <!nocheck>,
  debhelper (>= 11~),
  default-jdk-headless <!nocheck> | default-jdk <!nocheck>,
  dh-python (>= 2.20160818~),
diff --git a/diffoscope/comparators/__init__.py b/diffoscope/comparators/__init__.py
index 10165f9..a1f7433 100644
--- a/diffoscope/comparators/__init__.py
+++ b/diffoscope/comparators/__init__.py
@@ -91,6 +91,7 @@ class ComparatorManager(object):
         ('dtb.DeviceTreeFile',),
         ('ogg.OggFile',),
         ('xsb.XsbFile',),
+        ('berkeley_db.BerkeleyDBFile',),
     )
 
     _singleton = {}
diff --git a/diffoscope/comparators/berkeley_db.py b/diffoscope/comparators/berkeley_db.py
new file mode 100644
index 0000000..4380bd1
--- /dev/null
+++ b/diffoscope/comparators/berkeley_db.py
@@ -0,0 +1,53 @@
+# -*- coding: utf-8 -*-
+#
+# diffoscope: in-depth comparison of files, archives, and directories
+#
+# Copyright © 2018 Chris Lamb <lamby at debian.org>
+#
+# diffoscope is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# diffoscope is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with diffoscope.  If not, see <https://www.gnu.org/licenses/>.
+
+import re
+
+from diffoscope.tools import tool_required
+from diffoscope.difference import Difference
+
+from .utils.file import File
+from .utils.command import Command
+
+
+class DbDump(Command):
+    @tool_required('db_dump')
+    def cmdline(self):
+        return ('db_dump', '-d', 'a', self.path)
+
+    def filter(self, line):
+        l = line.decode('utf-8')
+        # We must strip some fields as libdb itself does not repeatedly read
+        # its own metadata reliably, even on the same file.
+        for x in ('h_hash: ', 'bt_compare: ', '\tuid: '):
+            if l.startswith(x):
+                return b''
+        return line
+
+
+class BerkeleyDBFile(File):
+    FILE_TYPE_RE = re.compile(r'^Berkeley DB ')
+
+    def compare_details(self, other, source=None):
+        return [Difference.from_command(
+            DbDump,
+            self.path,
+            other.path,
+            source="Berkeley DB file",
+        )]
diff --git a/diffoscope/external_tools.py b/diffoscope/external_tools.py
index e58a1de..eea15ce 100644
--- a/diffoscope/external_tools.py
+++ b/diffoscope/external_tools.py
@@ -24,6 +24,9 @@ EXTERNAL_TOOLS = {
     'apktool': {
         'debian': 'apktool',
     },
+    'db_dump': {
+        'debian': 'db-util',
+    },
     'bsdtar': {
         'debian': 'libarchive-tools',
         'arch': 'libarchive',
diff --git a/tests/comparators/test_berkeley_db.py b/tests/comparators/test_berkeley_db.py
new file mode 100644
index 0000000..342a1ee
--- /dev/null
+++ b/tests/comparators/test_berkeley_db.py
@@ -0,0 +1,54 @@
+# -*- coding: utf-8 -*-
+#
+# diffoscope: in-depth comparison of files, archives, and directories
+#
+# Copyright © 2018 Chris Lamb <lamby at debian.org>
+#
+# diffoscope is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# diffoscope is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with diffoscope.  If not, see <https://www.gnu.org/licenses/>.
+
+import pytest
+
+from diffoscope.comparators.berkeley_db import BerkeleyDBFile
+
+from ..utils.data import load_fixture, get_data
+from ..utils.tools import skip_unless_tools_exist
+from ..utils.nonexisting import assert_non_existing
+
+db1 = load_fixture('test1.db')
+db2 = load_fixture('test2.db')
+
+
+def test_identification(db1):
+    assert isinstance(db1, BerkeleyDBFile)
+
+
+ at pytest.fixture
+def differences(db1, db2):
+    return db1.compare(db2).details
+
+
+def test_no_differences(db1):
+    difference = db1.compare(db1)
+    assert difference is None
+
+
+ at skip_unless_tools_exist('db_dump')
+def test_diff(differences):
+    expected_diff = get_data('berkeley_db_expected_diff')
+    assert differences[0].unified_diff == expected_diff
+
+
+ at skip_unless_tools_exist('db_dump')
+def test_compare_non_existing(monkeypatch, db1):
+    assert_non_existing(monkeypatch, db1, has_null_source=False)
diff --git a/tests/data/berkeley_db_expected_diff b/tests/data/berkeley_db_expected_diff
new file mode 100644
index 0000000..ab91208
--- /dev/null
+++ b/tests/data/berkeley_db_expected_diff
@@ -0,0 +1,66 @@
+@@ -9,61 +9,29 @@
+ 	magic: 0x53162
+ 	version: 9
+ 	pagesize: 4096
+ 	type: 9
+ 	metaflags 0
+ 	keys: 0	records: 0
+ 	free list: 0
+-	last_pgno: 7
++	last_pgno: 3
+ 	flags: 0x20 (multiple-databases)
+ 	minkey: 2
+ 	root: 1
+ page 1: btree leaf: LSN [0][1]: level 1
+-	prev:    0 next:    0 entries:    6 offset: 4000
+-	[000] 4040 len:  19 data: unstable|main|amd64
+-	[001] 4032 len:   4 data: 00000004
+-	[002] 4072 len:  18 data: unstable|main|i386
+-	[003] 4064 len:   4 data: 00000002
+-	[004] 4008 len:  20 data: unstable|main|source
+-	[005] 4000 len:   4 data: 00000006
++	prev:    0 next:    0 entries:    2 offset: 4064
++	[000] 4072 len:  19 data: compressedfilelists
++	[001] 4064 len:   4 data: 00000002
+ page 2: btree metadata: LSN [0][1]: level 0
+ 	magic: 0x53162
+ 	version: 9
+ 	pagesize: 4096
+ 	type: 9
+ 	metaflags 0
+ 	keys: 0	records: 0
+ 	free list: 0
+ 	last_pgno: 2
+ 	flags: 0x20 (multiple-databases)
+ 	minkey: 2
+ 	root: 3
+ page 3: btree leaf: LSN [0][1]: level 1
+ 	prev:    0 next:    0 entries:    0 offset: 4096
+-page 4: btree metadata: LSN [0][1]: level 0
+-	magic: 0x53162
+-	version: 9
+-	pagesize: 4096
+-	type: 9
+-	metaflags 0
+-	keys: 0	records: 0
+-	free list: 0
+-	last_pgno: 4
+-	flags: 0x20 (multiple-databases)
+-	minkey: 2
+-	root: 5
+-page 5: btree leaf: LSN [0][1]: level 1
+-	prev:    0 next:    0 entries:    0 offset: 4096
+-page 6: btree metadata: LSN [0][1]: level 0
+-	magic: 0x53162
+-	version: 9
+-	pagesize: 4096
+-	type: 9
+-	metaflags 0
+-	keys: 0	records: 0
+-	free list: 0
+-	last_pgno: 6
+-	flags: 0x20 (multiple-databases)
+-	minkey: 2
+-	root: 7
+-page 7: btree leaf: LSN [0][1]: level 1
+-	prev:    0 next:    0 entries:    0 offset: 4096
diff --git a/tests/data/test1.db b/tests/data/test1.db
new file mode 100644
index 0000000..8364ad7
Binary files /dev/null and b/tests/data/test1.db differ
diff --git a/tests/data/test2.db b/tests/data/test2.db
new file mode 100644
index 0000000..8325722
Binary files /dev/null and b/tests/data/test2.db differ

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/diffoscope.git


More information about the diffoscope mailing list