[diffoscope] 01/01: Add support for comparing Berkeley DB files. This is currently incomplete because the Berkeley DB libraries do not return the same uid/hash reliably (it returns "random" memory contents) so we must strip those from the human-readable output. (Closes: #890528)
Chris Lamb
chris at chris-lamb.co.uk
Thu Feb 15 17:38:46 CET 2018
This is an automated email from the git hooks/post-receive script.
lamby pushed a commit to branch master
in repository diffoscope.
commit 8fbd7e3f373106e82aea3130017c28320c0a35b9
Author: Chris Lamb <lamby at debian.org>
Date: Thu Feb 15 16:36:07 2018 +0000
Add support for comparing Berkeley DB files. This is currently incomplete because the Berkeley DB libraries do not return the same uid/hash reliably (it returns "random" memory contents) so we must strip those from the human-readable output. (Closes: #890528)
---
debian/control | 1 +
diffoscope/comparators/__init__.py | 1 +
diffoscope/comparators/berkeley_db.py | 53 +++++++++++++++++++++++++++
diffoscope/external_tools.py | 3 ++
tests/comparators/test_berkeley_db.py | 54 ++++++++++++++++++++++++++++
tests/data/berkeley_db_expected_diff | 66 ++++++++++++++++++++++++++++++++++
tests/data/test1.db | Bin 0 -> 32768 bytes
tests/data/test2.db | Bin 0 -> 16384 bytes
8 files changed, 178 insertions(+)
diff --git a/debian/control b/debian/control
index 4e001eb..5d6bac7 100644
--- a/debian/control
+++ b/debian/control
@@ -15,6 +15,7 @@ Build-Depends:
binutils-multiarch <!nocheck>,
caca-utils <!nocheck>,
colord <!nocheck>,
+ db-util <!nocheck>,
debhelper (>= 11~),
default-jdk-headless <!nocheck> | default-jdk <!nocheck>,
dh-python (>= 2.20160818~),
diff --git a/diffoscope/comparators/__init__.py b/diffoscope/comparators/__init__.py
index 10165f9..a1f7433 100644
--- a/diffoscope/comparators/__init__.py
+++ b/diffoscope/comparators/__init__.py
@@ -91,6 +91,7 @@ class ComparatorManager(object):
('dtb.DeviceTreeFile',),
('ogg.OggFile',),
('xsb.XsbFile',),
+ ('berkeley_db.BerkeleyDBFile',),
)
_singleton = {}
diff --git a/diffoscope/comparators/berkeley_db.py b/diffoscope/comparators/berkeley_db.py
new file mode 100644
index 0000000..4380bd1
--- /dev/null
+++ b/diffoscope/comparators/berkeley_db.py
@@ -0,0 +1,53 @@
+# -*- coding: utf-8 -*-
+#
+# diffoscope: in-depth comparison of files, archives, and directories
+#
+# Copyright © 2018 Chris Lamb <lamby at debian.org>
+#
+# diffoscope is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# diffoscope is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with diffoscope. If not, see <https://www.gnu.org/licenses/>.
+
+import re
+
+from diffoscope.tools import tool_required
+from diffoscope.difference import Difference
+
+from .utils.file import File
+from .utils.command import Command
+
+
+class DbDump(Command):
+ @tool_required('db_dump')
+ def cmdline(self):
+ return ('db_dump', '-d', 'a', self.path)
+
+ def filter(self, line):
+ l = line.decode('utf-8')
+ # We must strip some fields as libdb itself does not repeatedly read
+ # its own metadata reliably, even on the same file.
+ for x in ('h_hash: ', 'bt_compare: ', '\tuid: '):
+ if l.startswith(x):
+ return b''
+ return line
+
+
+class BerkeleyDBFile(File):
+ FILE_TYPE_RE = re.compile(r'^Berkeley DB ')
+
+ def compare_details(self, other, source=None):
+ return [Difference.from_command(
+ DbDump,
+ self.path,
+ other.path,
+ source="Berkeley DB file",
+ )]
diff --git a/diffoscope/external_tools.py b/diffoscope/external_tools.py
index e58a1de..eea15ce 100644
--- a/diffoscope/external_tools.py
+++ b/diffoscope/external_tools.py
@@ -24,6 +24,9 @@ EXTERNAL_TOOLS = {
'apktool': {
'debian': 'apktool',
},
+ 'db_dump': {
+ 'debian': 'db-util',
+ },
'bsdtar': {
'debian': 'libarchive-tools',
'arch': 'libarchive',
diff --git a/tests/comparators/test_berkeley_db.py b/tests/comparators/test_berkeley_db.py
new file mode 100644
index 0000000..342a1ee
--- /dev/null
+++ b/tests/comparators/test_berkeley_db.py
@@ -0,0 +1,54 @@
+# -*- coding: utf-8 -*-
+#
+# diffoscope: in-depth comparison of files, archives, and directories
+#
+# Copyright © 2018 Chris Lamb <lamby at debian.org>
+#
+# diffoscope is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# diffoscope is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with diffoscope. If not, see <https://www.gnu.org/licenses/>.
+
+import pytest
+
+from diffoscope.comparators.berkeley_db import BerkeleyDBFile
+
+from ..utils.data import load_fixture, get_data
+from ..utils.tools import skip_unless_tools_exist
+from ..utils.nonexisting import assert_non_existing
+
+db1 = load_fixture('test1.db')
+db2 = load_fixture('test2.db')
+
+
+def test_identification(db1):
+ assert isinstance(db1, BerkeleyDBFile)
+
+
+ at pytest.fixture
+def differences(db1, db2):
+ return db1.compare(db2).details
+
+
+def test_no_differences(db1):
+ difference = db1.compare(db1)
+ assert difference is None
+
+
+ at skip_unless_tools_exist('db_dump')
+def test_diff(differences):
+ expected_diff = get_data('berkeley_db_expected_diff')
+ assert differences[0].unified_diff == expected_diff
+
+
+ at skip_unless_tools_exist('db_dump')
+def test_compare_non_existing(monkeypatch, db1):
+ assert_non_existing(monkeypatch, db1, has_null_source=False)
diff --git a/tests/data/berkeley_db_expected_diff b/tests/data/berkeley_db_expected_diff
new file mode 100644
index 0000000..ab91208
--- /dev/null
+++ b/tests/data/berkeley_db_expected_diff
@@ -0,0 +1,66 @@
+@@ -9,61 +9,29 @@
+ magic: 0x53162
+ version: 9
+ pagesize: 4096
+ type: 9
+ metaflags 0
+ keys: 0 records: 0
+ free list: 0
+- last_pgno: 7
++ last_pgno: 3
+ flags: 0x20 (multiple-databases)
+ minkey: 2
+ root: 1
+ page 1: btree leaf: LSN [0][1]: level 1
+- prev: 0 next: 0 entries: 6 offset: 4000
+- [000] 4040 len: 19 data: unstable|main|amd64
+- [001] 4032 len: 4 data: 00000004
+- [002] 4072 len: 18 data: unstable|main|i386
+- [003] 4064 len: 4 data: 00000002
+- [004] 4008 len: 20 data: unstable|main|source
+- [005] 4000 len: 4 data: 00000006
++ prev: 0 next: 0 entries: 2 offset: 4064
++ [000] 4072 len: 19 data: compressedfilelists
++ [001] 4064 len: 4 data: 00000002
+ page 2: btree metadata: LSN [0][1]: level 0
+ magic: 0x53162
+ version: 9
+ pagesize: 4096
+ type: 9
+ metaflags 0
+ keys: 0 records: 0
+ free list: 0
+ last_pgno: 2
+ flags: 0x20 (multiple-databases)
+ minkey: 2
+ root: 3
+ page 3: btree leaf: LSN [0][1]: level 1
+ prev: 0 next: 0 entries: 0 offset: 4096
+-page 4: btree metadata: LSN [0][1]: level 0
+- magic: 0x53162
+- version: 9
+- pagesize: 4096
+- type: 9
+- metaflags 0
+- keys: 0 records: 0
+- free list: 0
+- last_pgno: 4
+- flags: 0x20 (multiple-databases)
+- minkey: 2
+- root: 5
+-page 5: btree leaf: LSN [0][1]: level 1
+- prev: 0 next: 0 entries: 0 offset: 4096
+-page 6: btree metadata: LSN [0][1]: level 0
+- magic: 0x53162
+- version: 9
+- pagesize: 4096
+- type: 9
+- metaflags 0
+- keys: 0 records: 0
+- free list: 0
+- last_pgno: 6
+- flags: 0x20 (multiple-databases)
+- minkey: 2
+- root: 7
+-page 7: btree leaf: LSN [0][1]: level 1
+- prev: 0 next: 0 entries: 0 offset: 4096
diff --git a/tests/data/test1.db b/tests/data/test1.db
new file mode 100644
index 0000000..8364ad7
Binary files /dev/null and b/tests/data/test1.db differ
diff --git a/tests/data/test2.db b/tests/data/test2.db
new file mode 100644
index 0000000..8325722
Binary files /dev/null and b/tests/data/test2.db differ
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/diffoscope.git
More information about the diffoscope
mailing list