[diffoscope] 01/01: comparators: add a test for fallback_recognizes and improve the behaviour

Ximin Luo infinity0 at debian.org
Thu Sep 21 14:41:00 CEST 2017


This is an automated email from the git hooks/post-receive script.

infinity0 pushed a commit to branch master
in repository diffoscope.

commit 27be3f4ce9e6202b1472a8705176ddb4865893ac
Author: Ximin Luo <infinity0 at debian.org>
Date:   Thu Sep 21 14:40:38 2017 +0200

    comparators: add a test for fallback_recognizes and improve the behaviour
---
 diffoscope/comparators/cbfs.py              |   4 ++--
 diffoscope/comparators/deb.py               |   8 ++++----
 diffoscope/comparators/device.py            |   4 ++--
 diffoscope/comparators/directory.py         |   4 ++--
 diffoscope/comparators/elf.py               |   4 ++--
 diffoscope/comparators/haskell.py           |   4 ++--
 diffoscope/comparators/missing_file.py      |   4 ++--
 diffoscope/comparators/rdata.py             |   4 ++--
 diffoscope/comparators/symlink.py           |   4 ++--
 diffoscope/comparators/utils/file.py        |  17 +++++++++++++++--
 diffoscope/comparators/utils/specialize.py  |  11 +++++++++++
 diffoscope/comparators/zip.py               |   4 ++--
 tests/comparators/test_gzip.py              |  13 +++++++++++--
 tests/data/debian-bug-876316-control.tar.gz | Bin 0 -> 602 bytes
 14 files changed, 59 insertions(+), 26 deletions(-)

diff --git a/diffoscope/comparators/cbfs.py b/diffoscope/comparators/cbfs.py
index 12838a2..3aa40b6 100644
--- a/diffoscope/comparators/cbfs.py
+++ b/diffoscope/comparators/cbfs.py
@@ -101,8 +101,8 @@ def is_header_valid(buf, size, offset=0):
 class CbfsFile(File):
     CONTAINER_CLASS = CbfsContainer
 
-    @staticmethod
-    def recognizes(file):
+    @classmethod
+    def recognizes(cls, file):
         size = os.stat(file.path).st_size
         if size < CBFS_HEADER_SIZE or size > CBFS_MAXIMUM_FILE_SIZE:
             return False
diff --git a/diffoscope/comparators/deb.py b/diffoscope/comparators/deb.py
index f2f35a7..7254219 100644
--- a/diffoscope/comparators/deb.py
+++ b/diffoscope/comparators/deb.py
@@ -125,8 +125,8 @@ class DebFile(File):
 
 
 class Md5sumsFile(File):
-    @staticmethod
-    def recognizes(file):
+    @classmethod
+    def recognizes(cls, file):
         return isinstance(file, ArchiveMember) and \
                file.name == './md5sums' and \
                isinstance(file.container.source, ArchiveMember) and \
@@ -179,8 +179,8 @@ class DebTarContainer(TarContainer):
 class DebDataTarFile(File):
     CONTAINER_CLASS = DebTarContainer
 
-    @staticmethod
-    def recognizes(file):
+    @classmethod
+    def recognizes(cls, file):
         return isinstance(file, ArchiveMember) and \
                isinstance(file.container.source, ArchiveMember) and \
                DebContainer.RE_DATA_TAR.match(file.container.source.name) and \
diff --git a/diffoscope/comparators/device.py b/diffoscope/comparators/device.py
index f7bcc38..f1a05e6 100644
--- a/diffoscope/comparators/device.py
+++ b/diffoscope/comparators/device.py
@@ -31,8 +31,8 @@ logger = logging.getLogger(__name__)
 
 
 class Device(File):
-    @staticmethod
-    def recognizes(file):
+    @classmethod
+    def recognizes(cls, file):
         return file.is_device()
 
     def get_device(self):
diff --git a/diffoscope/comparators/directory.py b/diffoscope/comparators/directory.py
index 44c39dd..fd655a6 100644
--- a/diffoscope/comparators/directory.py
+++ b/diffoscope/comparators/directory.py
@@ -146,8 +146,8 @@ def compare_directories(path1, path2, source=None):
 
 
 class Directory(object):
-    @staticmethod
-    def recognizes(file):
+    @classmethod
+    def recognizes(cls, file):
         return file.is_directory()
 
     @classmethod
diff --git a/diffoscope/comparators/elf.py b/diffoscope/comparators/elf.py
index 4f80ae0..003d3dc 100644
--- a/diffoscope/comparators/elf.py
+++ b/diffoscope/comparators/elf.py
@@ -312,8 +312,8 @@ class ElfSection(File):
     def fuzzy_hash(self):
         return None
 
-    @staticmethod
-    def recognizes(file):
+    @classmethod
+    def recognizes(cls, file):
         # No file should be recognized as an elf section
         return False
 
diff --git a/diffoscope/comparators/haskell.py b/diffoscope/comparators/haskell.py
index b27bebb..22e074b 100644
--- a/diffoscope/comparators/haskell.py
+++ b/diffoscope/comparators/haskell.py
@@ -71,8 +71,8 @@ class HiFile(File):
     """
     RE_FILE_EXTENSION = re.compile(r'\.(p_|dyn_)?hi$')
 
-    @staticmethod
-    def recognizes(file):
+    @classmethod
+    def recognizes(cls, file):
         if not HiFile.RE_FILE_EXTENSION.search(file.name):
             return False
 
diff --git a/diffoscope/comparators/missing_file.py b/diffoscope/comparators/missing_file.py
index 8734f83..71cb4c3 100644
--- a/diffoscope/comparators/missing_file.py
+++ b/diffoscope/comparators/missing_file.py
@@ -34,8 +34,8 @@ class MissingFile(File):
     Represents a missing file when comparing containers.
     """
 
-    @staticmethod
-    def recognizes(file):
+    @classmethod
+    def recognizes(cls, file):
         if isinstance(file, FilesystemFile) and not os.path.lexists(file.name):
             assert Config().new_file, '%s does not exist' % file.name
             return True
diff --git a/diffoscope/comparators/rdata.py b/diffoscope/comparators/rdata.py
index ac77b8d..833c335 100644
--- a/diffoscope/comparators/rdata.py
+++ b/diffoscope/comparators/rdata.py
@@ -68,8 +68,8 @@ class RdsReader(Command):
 
 
 class RdsFile(File):
-    @staticmethod
-    def recognizes(file):
+    @classmethod
+    def recognizes(cls, file):
         if check_rds_extension(file) or \
                 file.container and \
                 check_rds_extension(file.container.source):
diff --git a/diffoscope/comparators/symlink.py b/diffoscope/comparators/symlink.py
index 400297e..daca922 100644
--- a/diffoscope/comparators/symlink.py
+++ b/diffoscope/comparators/symlink.py
@@ -29,8 +29,8 @@ logger = logging.getLogger(__name__)
 
 
 class Symlink(File):
-    @staticmethod
-    def recognizes(file):
+    @classmethod
+    def recognizes(cls, file):
         return file.is_symlink()
 
     @property
diff --git a/diffoscope/comparators/utils/file.py b/diffoscope/comparators/utils/file.py
index c5d85d3..c4b20c4 100644
--- a/diffoscope/comparators/utils/file.py
+++ b/diffoscope/comparators/utils/file.py
@@ -162,14 +162,27 @@ class File(object, metaclass=abc.ABCMeta):
 
         The default test returns True if the file matches these tests:
 
-        cls.FALLBACK_FILE_EXTENSION_SUFFIX AND
-        cls.FALLBACK_FILE_TYPE_HEADER_PREFIX
+        (cls.FALLBACK_FILE_EXTENSION_SUFFIX AND cls.FILE_EXTENSION_SUFFIX) AND
+        (cls.FALLBACK_FILE_TYPE_HEADER_PREFIX AND cls.FILE_TYPE_HEADER_PREFIX)
+
+        We also AND-compare with the non-fallback versions to ensure that
+        subclasses don't "accidentally match" (e.g. IpkFile vs GzipFile).
         """
+        if cls.recognizes.__func__ != File.recognizes.__func__:
+            # If the class has overridden the default recognizes() then the
+            # logic below about AND-comparing with the non-fallback versions is
+            # not valid, they have to re-implement it
+            return False
+
         all_tests = [test for test in (
             (cls.FALLBACK_FILE_EXTENSION_SUFFIX,
              str.endswith, file.name),
+            (cls.FILE_EXTENSION_SUFFIX,
+             str.endswith, file.name),
             (cls.FALLBACK_FILE_TYPE_HEADER_PREFIX,
              bytes.startswith, file.file_header),
+            (cls.FILE_TYPE_HEADER_PREFIX,
+             bytes.startswith, file.file_header),
         ) if test[0]]  # filter out undefined tests, inc. file_type_tests if it's empty
 
         return _run_tests(all, all_tests) if all_tests else False
diff --git a/diffoscope/comparators/utils/specialize.py b/diffoscope/comparators/utils/specialize.py
index 55b8aa7..eac941c 100644
--- a/diffoscope/comparators/utils/specialize.py
+++ b/diffoscope/comparators/utils/specialize.py
@@ -56,3 +56,14 @@ def specialize(file):
 
     logger.debug("Unidentified file. Magic says: %s", file.magic_file_type)
     return file
+
+
+def is_direct_instance(file, cls):
+    # is_direct_instance(<GzipFile>, GzipFile) == True, but
+    # is_direct_instance(<IpkFile>, GzipFile) == False
+    if not isinstance(file, cls):
+        return False
+    for c in ComparatorManager().classes:
+        if c != cls and isinstance(file, c):
+            return False
+    return True
diff --git a/diffoscope/comparators/zip.py b/diffoscope/comparators/zip.py
index 77da401..0815ddc 100644
--- a/diffoscope/comparators/zip.py
+++ b/diffoscope/comparators/zip.py
@@ -151,8 +151,8 @@ class MozillaZipContainer(ZipContainer):
 class MozillaZipFile(File):
     CONTAINER_CLASS = MozillaZipContainer
 
-    @staticmethod
-    def recognizes(file):
+    @classmethod
+    def recognizes(cls, file):
         # Mozilla-optimized ZIPs start with a 32-bit little endian integer
         # indicating the amount of data to preload, followed by the ZIP
         # central directory (with a PK\x01\x02 signature)
diff --git a/tests/comparators/test_gzip.py b/tests/comparators/test_gzip.py
index 0d4ae19..57489de 100644
--- a/tests/comparators/test_gzip.py
+++ b/tests/comparators/test_gzip.py
@@ -24,17 +24,26 @@ from diffoscope.config import Config
 from diffoscope.comparators.gzip import GzipFile
 from diffoscope.comparators.binary import FilesystemFile
 from diffoscope.comparators.missing_file import MissingFile
-from diffoscope.comparators.utils.specialize import specialize
+from diffoscope.comparators.utils.specialize import specialize, is_direct_instance
 
 from ..utils.data import load_fixture, get_data
 
 
 gzip1 = load_fixture('test1.gz')
 gzip2 = load_fixture('test2.gz')
+gzip3 = load_fixture('debian-bug-876316-control.tar.gz')
 
 
 def test_identification(gzip1):
-    assert isinstance(gzip1, GzipFile)
+    assert is_direct_instance(gzip1, GzipFile)
+
+
+def test_fallback_recognizes(gzip3):
+    # the below always-True assertion is just to document the fact that we
+    # should identify it correctly regardless of any bugs in file(1)
+    assert ("gzip" not in gzip3.magic_file_type or
+            "gzip" in gzip3.magic_file_type)
+    assert is_direct_instance(gzip3, GzipFile)
 
 
 def test_no_differences(gzip1):
diff --git a/tests/data/debian-bug-876316-control.tar.gz b/tests/data/debian-bug-876316-control.tar.gz
new file mode 100644
index 0000000..3f80fb6
Binary files /dev/null and b/tests/data/debian-bug-876316-control.tar.gz differ

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/diffoscope.git


More information about the diffoscope mailing list