[diffoscope] 01/01: comparators: factor common logic from various comparators into File.recognizes

Ximin Luo infinity0 at debian.org
Tue Jul 25 18:04:03 CEST 2017


This is an automated email from the git hooks/post-receive script.

infinity0 pushed a commit to branch master
in repository diffoscope.

commit eaea12310abbe71c931153985a76ceb8206f0b55
Author: Ximin Luo <infinity0 at debian.org>
Date:   Tue Jul 25 18:01:56 2017 +0200

    comparators: factor common logic from various comparators into File.recognizes
---
 diffoscope/comparators/apk.py              |  7 ------
 diffoscope/comparators/elf.py              |  5 -----
 diffoscope/comparators/fontconfig.py       | 11 +--------
 diffoscope/comparators/rdata.py            |  3 +--
 diffoscope/comparators/rust.py             | 14 +++++-------
 diffoscope/comparators/utils/file.py       | 36 +++++++++++++++++++++++++++---
 diffoscope/comparators/utils/specialize.py | 19 +++-------------
 diffoscope/comparators/zip.py              |  6 +----
 8 files changed, 44 insertions(+), 57 deletions(-)

diff --git a/diffoscope/comparators/apk.py b/diffoscope/comparators/apk.py
index 9ac2887..cb2badf 100644
--- a/diffoscope/comparators/apk.py
+++ b/diffoscope/comparators/apk.py
@@ -151,13 +151,6 @@ class ApkFile(File):
     RE_FILE_EXTENSION = re.compile(r'\.apk$')
     CONTAINER_CLASS = ApkContainer
 
-    @staticmethod
-    def recognizes(file):
-        if not ApkFile.RE_FILE_EXTENSION.search(file.name):
-            return False
-        return (ApkFile.RE_FILE_TYPE.search(file.magic_file_type) or
-                file.file_header[:4] == ApkFile.RE_FILE_TYPE_FALLBACK_HEADER)
-
     def compare_details(self, other, source=None):
         zipinfo_difference = Difference.from_command(Zipinfo, self.path, other.path) or \
                              Difference.from_command(ZipinfoVerbose, self.path, other.path)
diff --git a/diffoscope/comparators/elf.py b/diffoscope/comparators/elf.py
index 17b0f26..f6240cf 100644
--- a/diffoscope/comparators/elf.py
+++ b/diffoscope/comparators/elf.py
@@ -540,11 +540,6 @@ class StaticLibFile(File):
     RE_FILE_TYPE = re.compile(r'\bar archive\b')
     RE_FILE_EXTENSION = re.compile(r'\.a$')
 
-    @staticmethod
-    def recognizes(file):
-        return StaticLibFile.RE_FILE_TYPE.search(file.magic_file_type) and \
-            StaticLibFile.RE_FILE_EXTENSION.search(file.name)
-
     def compare_details(self, other, source=None):
         differences = [Difference.from_text_readers(
             list_libarchive(self.path),
diff --git a/diffoscope/comparators/fontconfig.py b/diffoscope/comparators/fontconfig.py
index dc2b981..887fb6c 100644
--- a/diffoscope/comparators/fontconfig.py
+++ b/diffoscope/comparators/fontconfig.py
@@ -27,18 +27,9 @@ from .utils.command import Command
 
 
 class FontconfigCacheFile(File):
-    MAGIC = struct.pack('<H', 0xFC04)
+    RE_FILE_TYPE_FALLBACK_HEADER = struct.pack('<H', 0xFC04)
     RE_FILE_EXTENSION = re.compile(r'\-le64\.cache-4$')
 
-    @staticmethod
-    def recognizes(file):
-        if not FontconfigCacheFile.RE_FILE_EXTENSION.search(file.name):
-            return False
-
-        with open(file.path, 'rb') as f:
-            return f.read(len(FontconfigCacheFile.MAGIC)) == \
-                FontconfigCacheFile.MAGIC
-
     def compare_details(self, other, source=None):
         return [Difference.from_text(
             describe_cache_file(self.path),
diff --git a/diffoscope/comparators/rdata.py b/diffoscope/comparators/rdata.py
index d64fe2b..6992108 100644
--- a/diffoscope/comparators/rdata.py
+++ b/diffoscope/comparators/rdata.py
@@ -70,8 +70,7 @@ class RdsFile(File):
         if check_rds_extension(file) or \
                 file.container and \
                 check_rds_extension(file.container.source):
-            with open(file.path, 'rb') as f:
-                return f.read(8) == HEADER
+            return file.file_header.startswith(HEADER)
         return False
 
     def compare_details(self, other, source=None):
diff --git a/diffoscope/comparators/rust.py b/diffoscope/comparators/rust.py
index 7ffe63b..9e7a6c9 100644
--- a/diffoscope/comparators/rust.py
+++ b/diffoscope/comparators/rust.py
@@ -18,6 +18,7 @@
 # You should have received a copy of the GNU General Public License
 # along with diffoscope.  If not, see <https://www.gnu.org/licenses/>.
 
+import re
 import zlib
 import os.path
 import logging
@@ -25,6 +26,7 @@ import logging
 from diffoscope.difference import Difference
 
 from .utils.archive import Archive
+from .utils.file import File
 
 RLIB_BYTECODE_OBJECT_V1_DATASIZE_OFFSET = 15
 RLIB_BYTECODE_OBJECT_V1_DATA_OFFSET = 23
@@ -55,16 +57,10 @@ class RustObjectContainer(Archive):
         return dest_path
 
 
-class RustObjectFile(object):
+class RustObjectFile(File):
     CONTAINER_CLASS = RustObjectContainer
-
-    @staticmethod
-    def recognizes(file):
-        if not file.name.endswith(".deflate"):
-            return False
-        # See librustc_trans/back/link.rs for details of this format
-        with open(file.path, "rb") as fp:
-            return fp.read(RLIB_BYTECODE_OBJECT_V1_DATASIZE_OFFSET) == b'RUST_OBJECT\x01\x00\x00\x00'
+    RE_FILE_TYPE_FALLBACK_HEADER = b'RUST_OBJECT\x01\x00\x00\x00'
+    RE_FILE_EXTENSION = re.compile(r'\.deflate$')
 
     def compare_details(self, other, source=None):
         return [Difference.from_text(self.magic_file_type, other.magic_file_type, self, other, source='metadata')]
diff --git a/diffoscope/comparators/utils/file.py b/diffoscope/comparators/utils/file.py
index 4fa996c..bada1a5 100644
--- a/diffoscope/comparators/utils/file.py
+++ b/diffoscope/comparators/utils/file.py
@@ -58,9 +58,6 @@ def path_apparent_size(path=".", visited=None):
 
 
 class File(object, metaclass=abc.ABCMeta):
-    RE_FILE_TYPE = None
-    RE_FILE_EXTENSION = None
-
     if hasattr(magic, 'open'): # use Magic-file-extensions from file
         @classmethod
         def guess_file_type(self, path):
@@ -109,6 +106,39 @@ class File(object, metaclass=abc.ABCMeta):
     def __del__(self):
         self.cleanup()
 
+    RE_FILE_TYPE = None
+    RE_FILE_EXTENSION = None
+    RE_FILE_TYPE_FALLBACK_HEADER = None
+    RE_CLASS = re.compile("").__class__
+
+    @classmethod
+    def recognizes(cls, file):
+        # The structure below allows us to construct a boolean tree of tests
+        # that can be combined with all() and any(). Tests that are not defined
+        # for a class are filtered out, so that we don't get into a "vacuous
+        # truth" situation like a naive all([]) invocation would give.
+
+        def run_tests(tests, fold):
+            return fold(t(x, y) for x, t, y in tests)
+
+        file_type_tests = [test for test in (
+            (cls.RE_FILE_TYPE,
+             cls.RE_CLASS.search, file.magic_file_type),
+            (cls.RE_FILE_TYPE_FALLBACK_HEADER,
+             lambda m, h: h.startswith(m), file.file_header),
+        ) if test[0]] # filter out undefined tests
+
+        all_tests = [test for test in (
+            (cls.RE_FILE_EXTENSION,
+             cls.RE_CLASS.search, file.name),
+            (file_type_tests,
+             run_tests, any),
+        ) if test[0]] # filter out undefined tests, inc. file_type_tests if it's empty
+
+        if all_tests:
+            return run_tests(all_tests, all)
+        return False
+
     # This might be different from path and is used to do file extension matching
     @property
     def name(self):
diff --git a/diffoscope/comparators/utils/specialize.py b/diffoscope/comparators/utils/specialize.py
index 998c949..82bc501 100644
--- a/diffoscope/comparators/utils/specialize.py
+++ b/diffoscope/comparators/utils/specialize.py
@@ -32,22 +32,9 @@ def specialize(file):
             return file
 
         # Does this file class match?
-        flag = False
-        if hasattr(cls, 'recognizes'):
-            with profile('recognizes', file):
-                flag = cls.recognizes(file)
-        else:
-            re_tests = [(x, y) for x, y in (
-                (cls.RE_FILE_TYPE, file.magic_file_type),
-                (cls.RE_FILE_EXTENSION, file.name),
-            ) if x]
-
-            # If neither are defined, it's *not* a match.
-            if re_tests:
-                flag = all(x.search(y) for x, y in re_tests)
-
-        if not flag:
-            continue
+        with profile('recognizes', file):
+            if not cls.recognizes(file):
+                continue
 
         # Found a match; perform type magic
         logger.debug("Using %s for %s", cls.__name__, file.name)
diff --git a/diffoscope/comparators/zip.py b/diffoscope/comparators/zip.py
index a3e6fd5..c079a95 100644
--- a/diffoscope/comparators/zip.py
+++ b/diffoscope/comparators/zip.py
@@ -154,11 +154,7 @@ class MozillaZipFile(File):
         # Mozilla-optimized ZIPs start with a 32-bit little endian integer
         # indicating the amount of data to preload, followed by the ZIP
         # central directory (with a PK\x01\x02 signature)
-        with open(file.path, 'rb') as f:
-            preload = f.read(4)
-            if len(preload) == 4:
-                signature = f.read(4)
-                return signature == b'PK\x01\x02'
+        return file.file_header[4:8] == b'PK\x01\x02'
 
     def compare_details(self, other, source=None):
         zipinfo_difference = Difference.from_command(MozillaZipinfo, self.path, other.path) or \

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/diffoscope.git


More information about the diffoscope mailing list