[diffoscope] 01/01: Use libarchive to list files in tar and cpio archives
Jérémy Bobbio
lunar at moszumanska.debian.org
Sun Jan 3 21:20:55 CET 2016
This is an automated email from the git hooks/post-receive script.
lunar pushed a commit to branch master
in repository diffoscope.
commit 3cc336c70f773a3ece79792722b6b4204440a0cd
Author: Jérémy Bobbio <lunar at debian.org>
Date: Sun Jan 3 19:44:55 2016 +0000
Use libarchive to list files in tar and cpio archives
The output of cpio and tar is too hard to control properly, so let's
create a similar enough output using libarchive functions.
We need to monkeypatch the libarchive binding in order to add the
missing accessors.
Sadly, there's no way in libarchive API to distinguish an archive
which would have usernames and uids from one that would have only usernames.
But good enough, I guess.
Closes: #808809
---
diffoscope/comparators/cpio.py | 6 +++--
diffoscope/comparators/deb.py | 8 ++++---
diffoscope/comparators/libarchive.py | 43 +++++++++++++++++++++++++++++++++++
diffoscope/comparators/tar.py | 11 ++++-----
tests/comparators/test_fsimage.py | 4 ++--
tests/comparators/test_utils.py | 2 +-
tests/data/cpio_listing_expected_diff | 14 ++++++------
tests/data/ext4_expected_diffs | 10 ++++----
tests/data/rpm_listing_expected_diff | 4 ++--
tests/data/tar_listing_expected_diff | 16 ++++++-------
10 files changed, 81 insertions(+), 37 deletions(-)
diff --git a/diffoscope/comparators/cpio.py b/diffoscope/comparators/cpio.py
index 9c235b3..a257090 100644
--- a/diffoscope/comparators/cpio.py
+++ b/diffoscope/comparators/cpio.py
@@ -21,7 +21,7 @@
import re
from diffoscope import tool_required
from diffoscope.comparators.binary import File
-from diffoscope.comparators.libarchive import LibarchiveContainer
+from diffoscope.comparators.libarchive import LibarchiveContainer, list_libarchive
from diffoscope.comparators.utils import Command
from diffoscope.difference import Difference
@@ -41,4 +41,6 @@ class CpioFile(File):
return CpioFile.RE_FILE_TYPE.search(file.magic_file_type)
def compare_details(self, other, source=None):
- return [Difference.from_command(CpioContent, self.path, other.path, source="file list")]
+ return [Difference.from_text_readers(list_libarchive(self.path),
+ list_libarchive(other.path),
+ self.path, other.path, source="file list")]
diff --git a/diffoscope/comparators/deb.py b/diffoscope/comparators/deb.py
index 733b8b4..9bcfd38 100644
--- a/diffoscope/comparators/deb.py
+++ b/diffoscope/comparators/deb.py
@@ -22,10 +22,10 @@ import os.path
from diffoscope import logger
from diffoscope.difference import Difference
from diffoscope.comparators.binary import File
-from diffoscope.comparators.libarchive import LibarchiveContainer
+from diffoscope.comparators.libarchive import LibarchiveContainer, list_libarchive
from diffoscope.comparators.utils import \
Archive, ArchiveMember, get_ar_content
-from diffoscope.comparators.tar import TarContainer, TarListing
+from diffoscope.comparators.tar import TarContainer
class DebContainer(LibarchiveContainer):
@@ -113,4 +113,6 @@ class DebDataTarFile(File):
isinstance(file.container.source.container.source, DebFile)
def compare_details(self, other, source=None):
- return [Difference.from_command(TarListing, self.path, other.path)]
+ return [Difference.from_text_readers(list_libarchive(self.path),
+ list_libarchive(other.path),
+ self.path, other.path, source="file list")]
diff --git a/diffoscope/comparators/libarchive.py b/diffoscope/comparators/libarchive.py
index f67e310..88c908a 100644
--- a/diffoscope/comparators/libarchive.py
+++ b/diffoscope/comparators/libarchive.py
@@ -21,6 +21,7 @@
from contextlib import contextmanager
import ctypes
import os.path
+import time
import libarchive
from diffoscope import logger
from diffoscope.comparators.device import Device
@@ -36,12 +37,54 @@ if not hasattr(libarchive.ffi, 'entry_rdevmajor'):
if not hasattr(libarchive.ffi, 'entry_rdevminor'):
libarchive.ffi.ffi('entry_rdevminor', [libarchive.ffi.c_archive_entry_p], ctypes.c_uint)
libarchive.ArchiveEntry.rdevminor = property(lambda self: libarchive.ffi.entry_rdevminor(self._entry_p))
+# Monkeypatch libarchive-c (<< 2.3)
+if not hasattr(libarchive.ffi, 'entry_nlink'):
+ libarchive.ffi.ffi('entry_nlink', [libarchive.ffi.c_archive_entry_p], ctypes.c_uint)
+ libarchive.ArchiveEntry.nlink = property(lambda self: libarchive.ffi.entry_nlink(self._entry_p))
+if not hasattr(libarchive.ffi, 'entry_uid'):
+ libarchive.ffi.ffi('entry_uid', [libarchive.ffi.c_archive_entry_p], ctypes.c_uint32)
+ libarchive.ArchiveEntry.uid = property(lambda self: libarchive.ffi.entry_uid(self._entry_p))
+if not hasattr(libarchive.ffi, 'entry_gid'):
+ libarchive.ffi.ffi('entry_gid', [libarchive.ffi.c_archive_entry_p], ctypes.c_uint32)
+ libarchive.ArchiveEntry.gid = property(lambda self: libarchive.ffi.entry_uid(self._entry_p))
+if not hasattr(libarchive.ffi, 'entry_mtime_nsec'):
+ libarchive.ffi.ffi('entry_mtime_nsec', [libarchive.ffi.c_archive_entry_p], ctypes.c_long)
+ libarchive.ArchiveEntry.mtime_nsec = property(lambda self: libarchive.ffi.entry_mtime_nsec(self._entry_p))
+if not hasattr(libarchive.ffi, 'entry_uname'):
+ libarchive.ffi.ffi('entry_uname', [libarchive.ffi.c_archive_entry_p], ctypes.c_char_p)
+ libarchive.ArchiveEntry.uname = property(lambda self: libarchive.ffi.entry_uname(self._entry_p))
+if not hasattr(libarchive.ffi, 'entry_gname'):
+ libarchive.ffi.ffi('entry_gname', [libarchive.ffi.c_archive_entry_p], ctypes.c_char_p)
+ libarchive.ArchiveEntry.gname = property(lambda self: libarchive.ffi.entry_gname(self._entry_p))
# Monkeypatch libarchive-c so we always get pathname as (Unicode) str
# Otherwise, we'll get sometimes str and sometimes bytes and always pain.
libarchive.ArchiveEntry.pathname = property(lambda self: libarchive.ffi.entry_pathname(self._entry_p).decode('utf-8', errors='surrogateescape'))
+def list_libarchive(path):
+ with libarchive.file_reader(path) as archive:
+ for entry in archive:
+ if entry.isblk or entry.ischr:
+ size_or_dev = '{major:>3},{minor:>3}'.format(major=entry.rdevmajor, minor=entry.rdevminor)
+ else:
+ size_or_dev = entry.size
+ mtime = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(entry.mtime)) + '.{:06d}'.format(entry.mtime_nsec // 1000)
+ if entry.issym:
+ name_and_link = '{entry.name} -> {entry.linkname}'.format(entry=entry)
+ else:
+ name_and_link = entry.name
+ if entry.uname:
+ user = '{user:<8} {uid:>7}'.format(user=entry.uname.decode('utf-8', errors='surrogateescape'), uid='({})'.format(entry.uid))
+ else:
+ user = entry.uid
+ if entry.gname:
+ group = '{group:<8} {gid:>7}'.format(group=entry.gname.decode('utf-8', errors='surrogateescape'), gid='({})'.format(entry.gid))
+ else:
+ group = entry.gid
+ yield '{strmode} {entry.nlink:>3} {user:>8} {group:>8} {size_or_dev:>8} {mtime:>8} {name_and_link}\n'.format(strmode=entry.strmode.decode('us-ascii'), entry=entry, user=user, group=group, size_or_dev=size_or_dev, mtime=mtime, name_and_link=name_and_link)
+
+
class LibarchiveMember(ArchiveMember):
def __init__(self, archive, entry):
super().__init__(archive, entry.pathname)
diff --git a/diffoscope/comparators/tar.py b/diffoscope/comparators/tar.py
index 0739817..ab5d8cd 100644
--- a/diffoscope/comparators/tar.py
+++ b/diffoscope/comparators/tar.py
@@ -20,17 +20,12 @@
import re
from diffoscope.difference import Difference
from diffoscope.comparators.binary import File
-from diffoscope.comparators.libarchive import LibarchiveContainer
+from diffoscope.comparators.libarchive import LibarchiveContainer, list_libarchive
from diffoscope.comparators.utils import Command, tool_required
class TarContainer(LibarchiveContainer):
pass
-class TarListing(Command):
- @tool_required('tar')
- def cmdline(self):
- return ['tar', '--full-time', '-tvf', self.path]
-
class TarFile(File):
CONTAINER_CLASS = TarContainer
@@ -41,4 +36,6 @@ class TarFile(File):
return TarFile.RE_FILE_TYPE.search(file.magic_file_type)
def compare_details(self, other, source=None):
- return [Difference.from_command(TarListing, self.path, other.path)]
+ return [Difference.from_text_readers(list_libarchive(self.path),
+ list_libarchive(other.path),
+ self.path, other.path, source="file list")]
diff --git a/tests/comparators/test_fsimage.py b/tests/comparators/test_fsimage.py
index d6acc65..d2305a3 100644
--- a/tests/comparators/test_fsimage.py
+++ b/tests/comparators/test_fsimage.py
@@ -74,8 +74,8 @@ def test_differences(differences):
tarinfo = differences[0].details[0]
tardiff = differences[0].details[1]
encodingdiff = tardiff.details[0]
- assert tarinfo.source1 == 'tar --full-time -tvf {}'
- assert tarinfo.source2 == 'tar --full-time -tvf {}'
+ assert tarinfo.source1 == 'file list'
+ assert tarinfo.source2 == 'file list'
assert tardiff.source1 == './date.txt'
assert tardiff.source2 == './date.txt'
assert encodingdiff.source1 == 'encoding'
diff --git a/tests/comparators/test_utils.py b/tests/comparators/test_utils.py
index 7aa3326..aad8c2d 100644
--- a/tests/comparators/test_utils.py
+++ b/tests/comparators/test_utils.py
@@ -72,7 +72,7 @@ def test_no_fuzzy_matching(monkeypatch, fuzzy_tar_in_tar1, fuzzy_tar_in_tar2):
monkeypatch.setattr(Config, 'fuzzy_threshold', 0)
difference = fuzzy_tar_in_tar1.compare(fuzzy_tar_in_tar2)
assert len(difference.details) == 1
- assert difference.details[0].source1 == 'tar --full-time -tvf {}'
+ assert difference.details[0].source1 == 'file list'
@pytest.mark.skipif(miss_tlsh, reason='tlsh is missing')
def test_no_fuzzy_matching_new_file(monkeypatch, fuzzy_tar_in_tar1, fuzzy_tar_in_tar2):
diff --git a/tests/data/cpio_listing_expected_diff b/tests/data/cpio_listing_expected_diff
index c97fd5e..0a9365e 100644
--- a/tests/data/cpio_listing_expected_diff
+++ b/tests/data/cpio_listing_expected_diff
@@ -1,8 +1,8 @@
@@ -1,4 +1,4 @@
--lrwxrwxrwx 1 1000 1000 6 Jun 24 15:10 dir/link -> broken
-+lrwxrwxrwx 1 1000 1000 13 Jun 24 15:11 dir/link -> really-broken
- crw-r--r-- 1 0 0 1, 3 Jun 24 14:47 dir/null
---rw-r--r-- 1 1000 1000 446 Jun 24 15:10 dir/text
--drwxr-xr-x 2 1000 1000 0 Jun 24 15:10 dir
-+-rw-r--r-- 1 1000 1000 671 Jun 24 15:11 dir/text
-+drwxr-xr-x 2 1000 1000 0 Jun 24 15:11 dir
+-lrwxrwxrwx 1 1000 1000 6 2015-06-24 15:10:12.000000 dir/link -> broken
++lrwxrwxrwx 1 1000 1000 13 2015-06-24 15:11:35.000000 dir/link -> really-broken
+ crw-r--r-- 1 0 0 1, 3 2015-06-24 14:47:34.000000 dir/null
+--rw-r--r-- 1 1000 1000 446 2015-06-24 15:10:17.000000 dir/text
+-drwxr-xr-x 2 1000 1000 0 2015-06-24 15:10:12.000000 dir
++-rw-r--r-- 1 1000 1000 671 2015-06-24 15:11:38.000000 dir/text
++drwxr-xr-x 2 1000 1000 0 2015-06-24 15:11:35.000000 dir
diff --git a/tests/data/ext4_expected_diffs b/tests/data/ext4_expected_diffs
index 3b0ff80..fe7660e 100644
--- a/tests/data/ext4_expected_diffs
+++ b/tests/data/ext4_expected_diffs
@@ -1,9 +1,9 @@
@@ -1,3 +1,3 @@
--drwxr-xr-x 0/0 0 2015-12-02 16:01:40 ./
-+drwxr-xr-x 0/0 0 2015-12-02 16:03:11 ./
- drwx------ 0/0 0 2015-12-02 16:00:55 ./lost+found/
---rw-rw-rw- 1234/1234 28 2015-12-02 16:01:40 ./date.txt
-+-r--r--r-- 4321/4321 44 2015-12-02 16:03:11 ./date.txt
+-drwxr-xr-x 0 0 0 0 2015-12-02 16:01:40.000000 ./
++drwxr-xr-x 0 0 0 0 2015-12-02 16:03:11.000000 ./
+ drwx------ 0 0 0 0 2015-12-02 16:00:55.000000 ./lost+found/
+--rw-rw-rw- 0 1234 1234 28 2015-12-02 16:01:40.000000 ./date.txt
++-r--r--r-- 0 4321 4321 44 2015-12-02 16:03:11.000000 ./date.txt
@@ -1 +1 @@
-Wed Dec 2 17:01:40 CET 2015
+jeudi 3 décembre 2015, 06:03:11 (UTC+1400)
diff --git a/tests/data/rpm_listing_expected_diff b/tests/data/rpm_listing_expected_diff
index 25c998c..3e59d7d 100644
--- a/tests/data/rpm_listing_expected_diff
+++ b/tests/data/rpm_listing_expected_diff
@@ -1,3 +1,3 @@
@@ -1 +1 @@
---rw-r--r-- 1 0 0 446 Jun 24 17:55 ./dir/text
-+-rw-r--r-- 1 0 0 671 Jun 24 17:55 ./dir/text
+--rw-r--r-- 1 0 0 446 2015-06-24 17:55:18.000000 ./dir/text
++-rw-r--r-- 1 0 0 671 2015-06-24 17:55:59.000000 ./dir/text
diff --git a/tests/data/tar_listing_expected_diff b/tests/data/tar_listing_expected_diff
index b91dc34..7190741 100644
--- a/tests/data/tar_listing_expected_diff
+++ b/tests/data/tar_listing_expected_diff
@@ -1,9 +1,9 @@
@@ -1,4 +1,4 @@
--drwxr-xr-x lunar/lunar 0 2015-06-29 15:49:09 dir/
---rw-r--r-- lunar/lunar 446 2015-06-29 15:49:09 dir/text
--crw-r--r-- root/root 1,3 2015-06-29 15:49:09 dir/null
--lrwxrwxrwx lunar/lunar 0 2015-06-29 15:49:09 dir/link -> broken
-+drwxr-xr-x lunar/lunar 0 2015-06-29 15:49:41 dir/
-+-rw-r--r-- lunar/lunar 671 2015-06-29 15:49:41 dir/text
-+crw-r--r-- root/root 1,3 2015-06-29 15:49:41 dir/null
-+lrwxrwxrwx lunar/lunar 0 2015-06-29 15:49:41 dir/link -> really-broken
+-drwxr-xr-x 0 lunar (1000) lunar (1000) 0 2015-06-29 15:49:09.000000 dir/
+--rw-r--r-- 0 lunar (1000) lunar (1000) 446 2015-06-29 15:49:09.000000 dir/text
+-crw-r--r-- 0 root (0) root (0) 1, 3 2015-06-29 15:49:09.000000 dir/null
+-lrwxrwxrwx 0 lunar (1000) lunar (1000) 0 2015-06-29 15:49:09.000000 dir/link -> broken
++drwxr-xr-x 0 lunar (1000) lunar (1000) 0 2015-06-29 15:49:41.000000 dir/
++-rw-r--r-- 0 lunar (1000) lunar (1000) 671 2015-06-29 15:49:41.000000 dir/text
++crw-r--r-- 0 root (0) root (0) 1, 3 2015-06-29 15:49:41.000000 dir/null
++lrwxrwxrwx 0 lunar (1000) lunar (1000) 0 2015-06-29 15:49:41.000000 dir/link -> really-broken
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/diffoscope.git
More information about the diffoscope
mailing list