[diffoscope] 01/01: comparators: add a fallback_recognizes to work around file(1) #876316. (Closes: #875282)
Ximin Luo
infinity0 at debian.org
Thu Sep 21 13:50:00 CEST 2017
This is an automated email from the git hooks/post-receive script.
infinity0 pushed a commit to branch master
in repository diffoscope.
commit 7b8b9ae712a4f129db03ced11d8eee3c714a22fe
Author: Ximin Luo <infinity0 at debian.org>
Date: Thu Sep 21 13:49:06 2017 +0200
comparators: add a fallback_recognizes to work around file(1) #876316. (Closes: #875282)
---
diffoscope/comparators/directory.py | 4 +++
diffoscope/comparators/gzip.py | 4 +++
diffoscope/comparators/utils/file.py | 48 ++++++++++++++++++++++++++----
diffoscope/comparators/utils/specialize.py | 36 ++++++++++++++--------
diffoscope/comparators/xz.py | 4 +++
5 files changed, 78 insertions(+), 18 deletions(-)
diff --git a/diffoscope/comparators/directory.py b/diffoscope/comparators/directory.py
index 3b195bc..44c39dd 100644
--- a/diffoscope/comparators/directory.py
+++ b/diffoscope/comparators/directory.py
@@ -150,6 +150,10 @@ class Directory(object):
def recognizes(file):
return file.is_directory()
+ @classmethod
+ def fallback_recognizes(cls, file):
+ return False
+
class FilesystemDirectory(Directory):
def __init__(self, path):
diff --git a/diffoscope/comparators/gzip.py b/diffoscope/comparators/gzip.py
index 6dd1d0b..31843d6 100644
--- a/diffoscope/comparators/gzip.py
+++ b/diffoscope/comparators/gzip.py
@@ -56,5 +56,9 @@ class GzipFile(File):
CONTAINER_CLASS = GzipContainer
FILE_TYPE_RE = re.compile(r'^gzip compressed data\b')
+ # Work around file(1) Debian bug #876316
+ FALLBACK_FILE_EXTENSION_SUFFIX = ".gz"
+ FALLBACK_FILE_TYPE_HEADER_PREFIX = b"\x1f\x8b"
+
def compare_details(self, other, source=None):
return [Difference.from_text(self.magic_file_type, other.magic_file_type, self, other, source='metadata')]
diff --git a/diffoscope/comparators/utils/file.py b/diffoscope/comparators/utils/file.py
index 14d5412..c5d85d3 100644
--- a/diffoscope/comparators/utils/file.py
+++ b/diffoscope/comparators/utils/file.py
@@ -57,6 +57,10 @@ def path_apparent_size(path=".", visited=None):
return sum(visited.values())
+def _run_tests(fold, tests):
+ return fold(t(y, x) for x, t, y in tests)
+
+
class File(object, metaclass=abc.ABCMeta):
if hasattr(magic, 'open'): # use Magic-file-extensions from file
@classmethod
@@ -112,14 +116,26 @@ class File(object, metaclass=abc.ABCMeta):
@classmethod
def recognizes(cls, file):
+ """Check if a file's type matches the one represented by this class.
+
+ The default test returns True if the file matches these tests:
+
+ (cls.FILE_TYPE_RE OR
+ cls.FILE_TYPE_HEADER_PREFIX) AND
+ (cls.FILE_EXTENSION_SUFFIX)
+
+ If any test is None then the test is ignored and effectively deleted
+ from the above definition.
+
+ By default, the tests are all None and the test returns False for all
+ files. Subclasses may override them with specific values, or override
+ this method to implement a totally different test.
+ """
# The structure below allows us to construct a boolean tree of tests
# that can be combined with all() and any(). Tests that are not defined
# for a class are filtered out, so that we don't get into a "vacuous
# truth" situation like a naive all([]) invocation would give.
- def run_tests(fold, tests):
- return fold(t(y, x) for x, t, y in tests)
-
file_type_tests = [test for test in (
(cls.FILE_TYPE_RE,
lambda m, t: t.search(m), file.magic_file_type),
@@ -131,10 +147,32 @@ class File(object, metaclass=abc.ABCMeta):
(cls.FILE_EXTENSION_SUFFIX,
str.endswith, file.name),
(file_type_tests,
- run_tests, any),
+ _run_tests, any),
+ ) if test[0]] # filter out undefined tests, inc. file_type_tests if it's empty
+
+ return _run_tests(all, all_tests) if all_tests else False
+
+ FALLBACK_FILE_EXTENSION_SUFFIX = None
+ FALLBACK_FILE_TYPE_HEADER_PREFIX = None
+
+ @classmethod
+ def fallback_recognizes(cls, file):
+ """This is checked if the file could not be identified by recognizes().
+ This helps to work around bugs in file(1), see Debian bug #876316.
+
+ The default test returns True if the file matches these tests:
+
+ cls.FALLBACK_FILE_EXTENSION_SUFFIX AND
+ cls.FALLBACK_FILE_TYPE_HEADER_PREFIX
+ """
+ all_tests = [test for test in (
+ (cls.FALLBACK_FILE_EXTENSION_SUFFIX,
+ str.endswith, file.name),
+ (cls.FALLBACK_FILE_TYPE_HEADER_PREFIX,
+ bytes.startswith, file.file_header),
) if test[0]] # filter out undefined tests, inc. file_type_tests if it's empty
- return run_tests(all, all_tests) if all_tests else False
+ return _run_tests(all, all_tests) if all_tests else False
# This might be different from path and is used to do file extension matching
@property
diff --git a/diffoscope/comparators/utils/specialize.py b/diffoscope/comparators/utils/specialize.py
index 82bc501..55b8aa7 100644
--- a/diffoscope/comparators/utils/specialize.py
+++ b/diffoscope/comparators/utils/specialize.py
@@ -26,23 +26,33 @@ from .. import ComparatorManager
logger = logging.getLogger(__name__)
+def try_recognize(file, cls, recognizes):
+ if isinstance(file, cls):
+ return True
+
+ # Does this file class match?
+ with profile('recognizes', file):
+ #logger.debug("trying %s on %s", cls, file)
+ if not recognizes(file):
+ return False
+
+ # Found a match; perform type magic
+ logger.debug("Using %s for %s", cls.__name__, file.name)
+ new_cls = type(cls.__name__, (cls, type(file)), {})
+ file.__class__ = new_cls
+
+ return True
+
+
def specialize(file):
for cls in ComparatorManager().classes:
- if isinstance(file, cls):
+ if try_recognize(file, cls, cls.recognizes):
return file
- # Does this file class match?
- with profile('recognizes', file):
- if not cls.recognizes(file):
- continue
-
- # Found a match; perform type magic
- logger.debug("Using %s for %s", cls.__name__, file.name)
- new_cls = type(cls.__name__, (cls, type(file)), {})
- file.__class__ = new_cls
-
- return file
+ for cls in ComparatorManager().classes:
+ if try_recognize(file, cls, cls.fallback_recognizes):
+ logger.debug("File recognized by fallback. Magic says: %s", file.magic_file_type)
+ return file
logger.debug("Unidentified file. Magic says: %s", file.magic_file_type)
-
return file
diff --git a/diffoscope/comparators/xz.py b/diffoscope/comparators/xz.py
index da75050..a76408c 100644
--- a/diffoscope/comparators/xz.py
+++ b/diffoscope/comparators/xz.py
@@ -54,3 +54,7 @@ class XzContainer(Archive):
class XzFile(File):
CONTAINER_CLASS = XzContainer
FILE_TYPE_RE = re.compile(r'^XZ compressed data$')
+
+ # Work around file(1) Debian bug #876316
+ FALLBACK_FILE_EXTENSION_SUFFIX = ".xz"
+ FALLBACK_FILE_TYPE_HEADER_PREFIX = b"\xfd7zXZ\x00"
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/diffoscope.git
More information about the diffoscope
mailing list