[diffoscope] 01/01: Use libarchive to list files in tar and cpio archives

Jérémy Bobbio lunar at moszumanska.debian.org
Sun Jan 3 21:20:55 CET 2016


This is an automated email from the git hooks/post-receive script.

lunar pushed a commit to branch master
in repository diffoscope.

commit 3cc336c70f773a3ece79792722b6b4204440a0cd
Author: Jérémy Bobbio <lunar at debian.org>
Date:   Sun Jan 3 19:44:55 2016 +0000

    Use libarchive to list files in tar and cpio archives
    
    The output of cpio and tar is too hard to control properly, so let's
    create a similar enough output using libarchive functions.
    
    We need to monkeypatch the libarchive binding in order to add the
    missing accessors.
    
    Sadly, there's no way in libarchive API to distinguish an archive
    which would have usernames and uids from one that would have only usernames.
    But good enough, I guess.
    
    Closes: #808809
---
 diffoscope/comparators/cpio.py        |  6 +++--
 diffoscope/comparators/deb.py         |  8 ++++---
 diffoscope/comparators/libarchive.py  | 43 +++++++++++++++++++++++++++++++++++
 diffoscope/comparators/tar.py         | 11 ++++-----
 tests/comparators/test_fsimage.py     |  4 ++--
 tests/comparators/test_utils.py       |  2 +-
 tests/data/cpio_listing_expected_diff | 14 ++++++------
 tests/data/ext4_expected_diffs        | 10 ++++----
 tests/data/rpm_listing_expected_diff  |  4 ++--
 tests/data/tar_listing_expected_diff  | 16 ++++++-------
 10 files changed, 81 insertions(+), 37 deletions(-)

diff --git a/diffoscope/comparators/cpio.py b/diffoscope/comparators/cpio.py
index 9c235b3..a257090 100644
--- a/diffoscope/comparators/cpio.py
+++ b/diffoscope/comparators/cpio.py
@@ -21,7 +21,7 @@
 import re
 from diffoscope import tool_required
 from diffoscope.comparators.binary import File
-from diffoscope.comparators.libarchive import LibarchiveContainer
+from diffoscope.comparators.libarchive import LibarchiveContainer, list_libarchive
 from diffoscope.comparators.utils import Command
 from diffoscope.difference import Difference
 
@@ -41,4 +41,6 @@ class CpioFile(File):
         return CpioFile.RE_FILE_TYPE.search(file.magic_file_type)
 
     def compare_details(self, other, source=None):
-        return [Difference.from_command(CpioContent, self.path, other.path, source="file list")]
+        return [Difference.from_text_readers(list_libarchive(self.path),
+                                             list_libarchive(other.path),
+                                             self.path, other.path, source="file list")]
diff --git a/diffoscope/comparators/deb.py b/diffoscope/comparators/deb.py
index 733b8b4..9bcfd38 100644
--- a/diffoscope/comparators/deb.py
+++ b/diffoscope/comparators/deb.py
@@ -22,10 +22,10 @@ import os.path
 from diffoscope import logger
 from diffoscope.difference import Difference
 from diffoscope.comparators.binary import File
-from diffoscope.comparators.libarchive import LibarchiveContainer
+from diffoscope.comparators.libarchive import LibarchiveContainer, list_libarchive
 from diffoscope.comparators.utils import \
     Archive, ArchiveMember, get_ar_content
-from diffoscope.comparators.tar import TarContainer, TarListing
+from diffoscope.comparators.tar import TarContainer
 
 
 class DebContainer(LibarchiveContainer):
@@ -113,4 +113,6 @@ class DebDataTarFile(File):
                isinstance(file.container.source.container.source, DebFile)
 
     def compare_details(self, other, source=None):
-        return [Difference.from_command(TarListing, self.path, other.path)]
+        return [Difference.from_text_readers(list_libarchive(self.path),
+                                        list_libarchive(other.path),
+                                        self.path, other.path, source="file list")]
diff --git a/diffoscope/comparators/libarchive.py b/diffoscope/comparators/libarchive.py
index f67e310..88c908a 100644
--- a/diffoscope/comparators/libarchive.py
+++ b/diffoscope/comparators/libarchive.py
@@ -21,6 +21,7 @@
 from contextlib import contextmanager
 import ctypes
 import os.path
+import time
 import libarchive
 from diffoscope import logger
 from diffoscope.comparators.device import Device
@@ -36,12 +37,54 @@ if not hasattr(libarchive.ffi, 'entry_rdevmajor'):
 if not hasattr(libarchive.ffi, 'entry_rdevminor'):
     libarchive.ffi.ffi('entry_rdevminor', [libarchive.ffi.c_archive_entry_p], ctypes.c_uint)
     libarchive.ArchiveEntry.rdevminor = property(lambda self: libarchive.ffi.entry_rdevminor(self._entry_p))
+# Monkeypatch libarchive-c (<< 2.3)
+if not hasattr(libarchive.ffi, 'entry_nlink'):
+    libarchive.ffi.ffi('entry_nlink', [libarchive.ffi.c_archive_entry_p], ctypes.c_uint)
+    libarchive.ArchiveEntry.nlink = property(lambda self: libarchive.ffi.entry_nlink(self._entry_p))
+if not hasattr(libarchive.ffi, 'entry_uid'):
+    libarchive.ffi.ffi('entry_uid', [libarchive.ffi.c_archive_entry_p], ctypes.c_uint32)
+    libarchive.ArchiveEntry.uid = property(lambda self: libarchive.ffi.entry_uid(self._entry_p))
+if not hasattr(libarchive.ffi, 'entry_gid'):
+    libarchive.ffi.ffi('entry_gid', [libarchive.ffi.c_archive_entry_p], ctypes.c_uint32)
+    libarchive.ArchiveEntry.gid = property(lambda self: libarchive.ffi.entry_uid(self._entry_p))
+if not hasattr(libarchive.ffi, 'entry_mtime_nsec'):
+    libarchive.ffi.ffi('entry_mtime_nsec', [libarchive.ffi.c_archive_entry_p], ctypes.c_long)
+    libarchive.ArchiveEntry.mtime_nsec = property(lambda self: libarchive.ffi.entry_mtime_nsec(self._entry_p))
+if not hasattr(libarchive.ffi, 'entry_uname'):
+    libarchive.ffi.ffi('entry_uname', [libarchive.ffi.c_archive_entry_p], ctypes.c_char_p)
+    libarchive.ArchiveEntry.uname = property(lambda self: libarchive.ffi.entry_uname(self._entry_p))
+if not hasattr(libarchive.ffi, 'entry_gname'):
+    libarchive.ffi.ffi('entry_gname', [libarchive.ffi.c_archive_entry_p], ctypes.c_char_p)
+    libarchive.ArchiveEntry.gname = property(lambda self: libarchive.ffi.entry_gname(self._entry_p))
 
 # Monkeypatch libarchive-c so we always get pathname as (Unicode) str
 # Otherwise, we'll get sometimes str and sometimes bytes and always pain.
 libarchive.ArchiveEntry.pathname = property(lambda self: libarchive.ffi.entry_pathname(self._entry_p).decode('utf-8', errors='surrogateescape'))
 
 
+def list_libarchive(path):
+    with libarchive.file_reader(path) as archive:
+        for entry in archive:
+            if entry.isblk or entry.ischr:
+                size_or_dev = '{major:>3},{minor:>3}'.format(major=entry.rdevmajor, minor=entry.rdevminor)
+            else:
+                size_or_dev = entry.size
+            mtime = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(entry.mtime)) + '.{:06d}'.format(entry.mtime_nsec // 1000)
+            if entry.issym:
+                name_and_link = '{entry.name} -> {entry.linkname}'.format(entry=entry)
+            else:
+                name_and_link = entry.name
+            if entry.uname:
+                user = '{user:<8} {uid:>7}'.format(user=entry.uname.decode('utf-8', errors='surrogateescape'), uid='({})'.format(entry.uid))
+            else:
+                user = entry.uid
+            if entry.gname:
+                group = '{group:<8} {gid:>7}'.format(group=entry.gname.decode('utf-8', errors='surrogateescape'), gid='({})'.format(entry.gid))
+            else:
+                group = entry.gid
+            yield '{strmode} {entry.nlink:>3} {user:>8} {group:>8} {size_or_dev:>8} {mtime:>8} {name_and_link}\n'.format(strmode=entry.strmode.decode('us-ascii'), entry=entry, user=user, group=group, size_or_dev=size_or_dev, mtime=mtime, name_and_link=name_and_link)
+
+
 class LibarchiveMember(ArchiveMember):
     def __init__(self, archive, entry):
         super().__init__(archive, entry.pathname)
diff --git a/diffoscope/comparators/tar.py b/diffoscope/comparators/tar.py
index 0739817..ab5d8cd 100644
--- a/diffoscope/comparators/tar.py
+++ b/diffoscope/comparators/tar.py
@@ -20,17 +20,12 @@
 import re
 from diffoscope.difference import Difference
 from diffoscope.comparators.binary import File
-from diffoscope.comparators.libarchive import LibarchiveContainer
+from diffoscope.comparators.libarchive import LibarchiveContainer, list_libarchive
 from diffoscope.comparators.utils import Command, tool_required
 
 class TarContainer(LibarchiveContainer):
     pass
 
-class TarListing(Command):
-    @tool_required('tar')
-    def cmdline(self):
-        return ['tar', '--full-time', '-tvf', self.path]
-
 
 class TarFile(File):
     CONTAINER_CLASS = TarContainer
@@ -41,4 +36,6 @@ class TarFile(File):
         return TarFile.RE_FILE_TYPE.search(file.magic_file_type)
 
     def compare_details(self, other, source=None):
-        return [Difference.from_command(TarListing, self.path, other.path)]
+        return [Difference.from_text_readers(list_libarchive(self.path),
+                                        list_libarchive(other.path),
+                                        self.path, other.path, source="file list")]
diff --git a/tests/comparators/test_fsimage.py b/tests/comparators/test_fsimage.py
index d6acc65..d2305a3 100644
--- a/tests/comparators/test_fsimage.py
+++ b/tests/comparators/test_fsimage.py
@@ -74,8 +74,8 @@ def test_differences(differences):
     tarinfo = differences[0].details[0]
     tardiff = differences[0].details[1]
     encodingdiff = tardiff.details[0]
-    assert tarinfo.source1 == 'tar --full-time -tvf {}'
-    assert tarinfo.source2 == 'tar --full-time -tvf {}'
+    assert tarinfo.source1 == 'file list'
+    assert tarinfo.source2 == 'file list'
     assert tardiff.source1 == './date.txt'
     assert tardiff.source2 == './date.txt'
     assert encodingdiff.source1 == 'encoding'
diff --git a/tests/comparators/test_utils.py b/tests/comparators/test_utils.py
index 7aa3326..aad8c2d 100644
--- a/tests/comparators/test_utils.py
+++ b/tests/comparators/test_utils.py
@@ -72,7 +72,7 @@ def test_no_fuzzy_matching(monkeypatch, fuzzy_tar_in_tar1, fuzzy_tar_in_tar2):
     monkeypatch.setattr(Config, 'fuzzy_threshold', 0)
     difference = fuzzy_tar_in_tar1.compare(fuzzy_tar_in_tar2)
     assert len(difference.details) == 1
-    assert difference.details[0].source1 == 'tar --full-time -tvf {}'
+    assert difference.details[0].source1 == 'file list'
 
 @pytest.mark.skipif(miss_tlsh, reason='tlsh is missing')
 def test_no_fuzzy_matching_new_file(monkeypatch, fuzzy_tar_in_tar1, fuzzy_tar_in_tar2):
diff --git a/tests/data/cpio_listing_expected_diff b/tests/data/cpio_listing_expected_diff
index c97fd5e..0a9365e 100644
--- a/tests/data/cpio_listing_expected_diff
+++ b/tests/data/cpio_listing_expected_diff
@@ -1,8 +1,8 @@
 @@ -1,4 +1,4 @@
--lrwxrwxrwx   1 1000     1000            6 Jun 24 15:10 dir/link -> broken
-+lrwxrwxrwx   1 1000     1000           13 Jun 24 15:11 dir/link -> really-broken
- crw-r--r--   1 0        0          1,   3 Jun 24 14:47 dir/null
---rw-r--r--   1 1000     1000          446 Jun 24 15:10 dir/text
--drwxr-xr-x   2 1000     1000            0 Jun 24 15:10 dir
-+-rw-r--r--   1 1000     1000          671 Jun 24 15:11 dir/text
-+drwxr-xr-x   2 1000     1000            0 Jun 24 15:11 dir
+-lrwxrwxrwx   1     1000     1000        6 2015-06-24 15:10:12.000000 dir/link -> broken
++lrwxrwxrwx   1     1000     1000       13 2015-06-24 15:11:35.000000 dir/link -> really-broken
+ crw-r--r--   1        0        0    1,  3 2015-06-24 14:47:34.000000 dir/null
+--rw-r--r--   1     1000     1000      446 2015-06-24 15:10:17.000000 dir/text
+-drwxr-xr-x   2     1000     1000        0 2015-06-24 15:10:12.000000 dir
++-rw-r--r--   1     1000     1000      671 2015-06-24 15:11:38.000000 dir/text
++drwxr-xr-x   2     1000     1000        0 2015-06-24 15:11:35.000000 dir
diff --git a/tests/data/ext4_expected_diffs b/tests/data/ext4_expected_diffs
index 3b0ff80..fe7660e 100644
--- a/tests/data/ext4_expected_diffs
+++ b/tests/data/ext4_expected_diffs
@@ -1,9 +1,9 @@
 @@ -1,3 +1,3 @@
--drwxr-xr-x 0/0               0 2015-12-02 16:01:40 ./
-+drwxr-xr-x 0/0               0 2015-12-02 16:03:11 ./
- drwx------ 0/0               0 2015-12-02 16:00:55 ./lost+found/
---rw-rw-rw- 1234/1234        28 2015-12-02 16:01:40 ./date.txt
-+-r--r--r-- 4321/4321        44 2015-12-02 16:03:11 ./date.txt
+-drwxr-xr-x   0        0        0        0 2015-12-02 16:01:40.000000 ./
++drwxr-xr-x   0        0        0        0 2015-12-02 16:03:11.000000 ./
+ drwx------   0        0        0        0 2015-12-02 16:00:55.000000 ./lost+found/
+--rw-rw-rw-   0     1234     1234       28 2015-12-02 16:01:40.000000 ./date.txt
++-r--r--r--   0     4321     4321       44 2015-12-02 16:03:11.000000 ./date.txt
 @@ -1 +1 @@
 -Wed Dec 2 17:01:40 CET 2015
 +jeudi 3 décembre 2015, 06:03:11 (UTC+1400)
diff --git a/tests/data/rpm_listing_expected_diff b/tests/data/rpm_listing_expected_diff
index 25c998c..3e59d7d 100644
--- a/tests/data/rpm_listing_expected_diff
+++ b/tests/data/rpm_listing_expected_diff
@@ -1,3 +1,3 @@
 @@ -1 +1 @@
---rw-r--r--   1 0        0             446 Jun 24 17:55 ./dir/text
-+-rw-r--r--   1 0        0             671 Jun 24 17:55 ./dir/text
+--rw-r--r--   1        0        0      446 2015-06-24 17:55:18.000000 ./dir/text
++-rw-r--r--   1        0        0      671 2015-06-24 17:55:59.000000 ./dir/text
diff --git a/tests/data/tar_listing_expected_diff b/tests/data/tar_listing_expected_diff
index b91dc34..7190741 100644
--- a/tests/data/tar_listing_expected_diff
+++ b/tests/data/tar_listing_expected_diff
@@ -1,9 +1,9 @@
 @@ -1,4 +1,4 @@
--drwxr-xr-x lunar/lunar       0 2015-06-29 15:49:09 dir/
---rw-r--r-- lunar/lunar     446 2015-06-29 15:49:09 dir/text
--crw-r--r-- root/root       1,3 2015-06-29 15:49:09 dir/null
--lrwxrwxrwx lunar/lunar       0 2015-06-29 15:49:09 dir/link -> broken
-+drwxr-xr-x lunar/lunar       0 2015-06-29 15:49:41 dir/
-+-rw-r--r-- lunar/lunar     671 2015-06-29 15:49:41 dir/text
-+crw-r--r-- root/root       1,3 2015-06-29 15:49:41 dir/null
-+lrwxrwxrwx lunar/lunar       0 2015-06-29 15:49:41 dir/link -> really-broken
+-drwxr-xr-x   0 lunar     (1000) lunar     (1000)        0 2015-06-29 15:49:09.000000 dir/
+--rw-r--r--   0 lunar     (1000) lunar     (1000)      446 2015-06-29 15:49:09.000000 dir/text
+-crw-r--r--   0 root         (0) root         (0)    1,  3 2015-06-29 15:49:09.000000 dir/null
+-lrwxrwxrwx   0 lunar     (1000) lunar     (1000)        0 2015-06-29 15:49:09.000000 dir/link -> broken
++drwxr-xr-x   0 lunar     (1000) lunar     (1000)        0 2015-06-29 15:49:41.000000 dir/
++-rw-r--r--   0 lunar     (1000) lunar     (1000)      671 2015-06-29 15:49:41.000000 dir/text
++crw-r--r--   0 root         (0) root         (0)    1,  3 2015-06-29 15:49:41.000000 dir/null
++lrwxrwxrwx   0 lunar     (1000) lunar     (1000)        0 2015-06-29 15:49:41.000000 dir/link -> really-broken

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/diffoscope.git


More information about the diffoscope mailing list