[diffoscope] 03/03: diffoscope/comparators: Split out as many utilities from (eg.) comparators.binary.

Chris Lamb chris at chris-lamb.co.uk
Wed Dec 28 12:55:49 CET 2016


This is an automated email from the git hooks/post-receive script.

lamby pushed a commit to branch master
in repository diffoscope.

commit d768db1e067047d1d2b697bb2398965c27943872
Author: Chris Lamb <lamby at debian.org>
Date:   Wed Dec 28 11:53:39 2016 +0000

    diffoscope/comparators: Split out as many utilities from (eg.) comparators.binary.
    
    Signed-off-by: Chris Lamb <lamby at debian.org>
---
 diffoscope/comparators/__init__.py                 |  31 +-
 diffoscope/comparators/apk.py                      |   3 +-
 diffoscope/comparators/ar.py                       |   2 +-
 diffoscope/comparators/binary.py                   | 308 +-----------------
 diffoscope/comparators/bzip2.py                    |   5 +-
 diffoscope/comparators/cbfs.py                     |   3 +-
 diffoscope/comparators/cpio.py                     |   2 +-
 diffoscope/comparators/deb.py                      |   6 +-
 diffoscope/comparators/debian.py                   |   2 +-
 diffoscope/comparators/dex.py                      |   3 +-
 diffoscope/comparators/directory.py                |   6 +-
 diffoscope/comparators/elf.py                      |   3 +-
 diffoscope/comparators/fonts.py                    |   2 +-
 diffoscope/comparators/fsimage.py                  |   2 +-
 diffoscope/comparators/gettext.py                  |   2 +-
 diffoscope/comparators/gzip.py                     |   3 +-
 diffoscope/comparators/haskell.py                  |   2 +-
 diffoscope/comparators/icc.py                      |   2 +-
 diffoscope/comparators/image.py                    |   2 +-
 diffoscope/comparators/iso9660.py                  |   2 +-
 diffoscope/comparators/java.py                     |   2 +-
 diffoscope/comparators/javascript.py               |   4 +-
 diffoscope/comparators/llvm.py                     |   2 +-
 diffoscope/comparators/macho.py                    |   2 +-
 diffoscope/comparators/missing_file.py             | 100 ++++++
 diffoscope/comparators/mono.py                     |   2 +-
 diffoscope/comparators/openssh.py                  |   2 +-
 diffoscope/comparators/pdf.py                      |   2 +-
 diffoscope/comparators/png.py                      |   2 +-
 diffoscope/comparators/ppu.py                      |   2 +-
 diffoscope/comparators/ps.py                       |   2 +-
 diffoscope/comparators/rpm.py                      |   2 +-
 diffoscope/comparators/rust.py                     |   3 +-
 diffoscope/comparators/sqlite.py                   |   2 +-
 diffoscope/comparators/squashfs.py                 |   3 +-
 diffoscope/comparators/utils/__init__.py           | 343 ---------------------
 diffoscope/comparators/utils/archive.py            | 141 +++++++++
 diffoscope/comparators/utils/command.py            | 110 +++++++
 diffoscope/comparators/utils/compare.py            |  58 +++-
 diffoscope/comparators/utils/container.py          | 125 ++++++++
 .../comparators/{binary.py => utils/file.py}       | 136 +-------
 .../comparators/utils/{loading.py => filenames.py} |  30 +-
 diffoscope/comparators/utils/libarchive.py         |   2 +-
 .../utils/{loading.py => specialize.py}            |  21 ++
 diffoscope/comparators/xz.py                       |   3 +-
 diffoscope/comparators/zip.py                      |   3 +-
 tests/comparators/test_binary.py                   |   9 +-
 tests/comparators/test_bzip2.py                    |   2 +-
 tests/comparators/test_cbfs.py                     |   2 +-
 tests/comparators/test_deb.py                      |   5 +-
 tests/comparators/test_debian.py                   |   5 +-
 tests/comparators/test_dex.py                      |   2 +-
 tests/comparators/test_elf.py                      |   5 +-
 tests/comparators/test_epub.py                     |   2 +-
 tests/comparators/test_fonts.py                    |   2 +-
 tests/comparators/test_fsimage.py                  |   2 +-
 tests/comparators/test_gettext.py                  |   2 +-
 tests/comparators/test_gzip.py                     |   5 +-
 tests/comparators/test_icc.py                      |   2 +-
 tests/comparators/test_image.py                    |   2 +-
 tests/comparators/test_ipk.py                      |   2 +-
 tests/comparators/test_iso9660.py                  |   2 +-
 tests/comparators/test_java.py                     |   2 +-
 tests/comparators/test_javascript.py               |   2 +-
 tests/comparators/test_macho.py                    |   2 +-
 tests/comparators/test_mono.py                     |   2 +-
 tests/comparators/test_openssh_pub_key.py          |   2 +-
 tests/comparators/test_tar.py                      |   2 +-
 tests/comparators/test_text.py                     |   2 +-
 tests/comparators/test_utils.py                    |   2 +-
 tests/comparators/test_xz.py                       |   2 +-
 tests/comparators/utils.py                         |   5 +-
 72 files changed, 664 insertions(+), 905 deletions(-)

diff --git a/diffoscope/comparators/__init__.py b/diffoscope/comparators/__init__.py
index 851adc6..692f762 100644
--- a/diffoscope/comparators/__init__.py
+++ b/diffoscope/comparators/__init__.py
@@ -18,21 +18,9 @@
 # You should have received a copy of the GNU General Public License
 # along with diffoscope.  If not, see <https://www.gnu.org/licenses/>.
 
-import re
-import sys
-import magic
-import os.path
-import operator
-
-from diffoscope import logger, tool_required
-from diffoscope.config import Config
-from diffoscope.profiling import profile
-from diffoscope.difference import Difference
-
-
 COMPARATORS = (
     ('directory.Directory',),
-    ('binary.MissingFile',),
+    ('missing_file.MissingFile',),
     ('symlink.Symlink',),
     ('device.Device',),
     ('debian.DotChangesFile', 'debian_fallback.DotChangesFile'),
@@ -81,20 +69,3 @@ COMPARATORS = (
     ('git.GitIndexFile',),
     ('openssh.PublicKeyFile',),
 )
-
-
-def specialize(file):
-    for cls in FILE_CLASSES:
-        if isinstance(file, cls):
-            return file
-        with profile('recognizes', file):
-            if cls.recognizes(file):
-                logger.debug("Using %s for %s", cls.__name__, file.name)
-                new_cls = type(cls.__name__, (cls, type(file)), {})
-                file.__class__ = new_cls
-                return file
-    logger.debug('Unidentified file. Magic says: %s', file.magic_file_type)
-    return file
-
-from .utils.loading import import_comparators
-FILE_CLASSES = import_comparators(COMPARATORS)
diff --git a/diffoscope/comparators/apk.py b/diffoscope/comparators/apk.py
index b6c5271..c5e2008 100644
--- a/diffoscope/comparators/apk.py
+++ b/diffoscope/comparators/apk.py
@@ -24,7 +24,8 @@ import subprocess
 from diffoscope import logger, tool_required, get_temporary_directory
 
 from .binary import File
-from .utils import Archive, get_compressed_content_name
+from .utils.archive import Archive
+from .utils.filenames import get_compressed_content_name
 
 class ApkContainer(Archive):
     @property
diff --git a/diffoscope/comparators/ar.py b/diffoscope/comparators/ar.py
index 6c3ad83..aa36807 100644
--- a/diffoscope/comparators/ar.py
+++ b/diffoscope/comparators/ar.py
@@ -23,8 +23,8 @@ import re
 from diffoscope import logger, tool_required
 from diffoscope.difference import Difference
 
-from .utils import Command
 from .binary import File
+from .utils.command import Command
 from .utils.libarchive import LibarchiveContainer, list_libarchive
 
 
diff --git a/diffoscope/comparators/binary.py b/diffoscope/comparators/binary.py
index 6ba6c45..47352d0 100644
--- a/diffoscope/comparators/binary.py
+++ b/diffoscope/comparators/binary.py
@@ -17,244 +17,10 @@
 # You should have received a copy of the GNU General Public License
 # along with diffoscope.  If not, see <https://www.gnu.org/licenses/>.
 
-import io
 import os
-import re
-import abc
 import stat
-import magic
-import binascii
-import subprocess
 
-from diffoscope import tool_required, logger
-from diffoscope.exc import OutputParsingError, RequiredToolNotFound
-from diffoscope.config import Config
-from diffoscope.profiling import profile
-from diffoscope.difference import Difference
-
-try:
-    import tlsh
-except ImportError:
-    tlsh = None
-
-
-# helper function to convert to bytes if necessary
-def maybe_decode(s):
-    if type(s) is bytes:
-        return s.decode('utf-8')
-    else:
-        return s
-
-def hexdump_fallback(path):
-    hexdump = io.StringIO()
-    with open(path, 'rb') as f:
-        for buf in iter(lambda: f.read(32), b''):
-            hexdump.write('%s\n' % binascii.hexlify(buf).decode('us-ascii'))
-    return hexdump.getvalue()
-
-
-def compare_binary_files(file1, file2, source=None):
-    from diffoscope.comparators.utils import Xxd
-
-    try:
-        return Difference.from_command(
-            Xxd, file1.path, file2.path,
-            source=[file1.name, file2.name], has_internal_linenos=True)
-    except RequiredToolNotFound:
-        hexdump1 = hexdump_fallback(file1.path)
-        hexdump2 = hexdump_fallback(file2.path)
-        comment = 'xxd not available in path. Falling back to Python hexlify.\n'
-        return Difference.from_text(hexdump1, hexdump2, file1.name, file2.name, source, comment)
-
-SMALL_FILE_THRESHOLD = 65536 # 64 kiB
-
-
-class File(object, metaclass=abc.ABCMeta):
-    if hasattr(magic, 'open'): # use Magic-file-extensions from file
-        @classmethod
-        def guess_file_type(self, path):
-            if not hasattr(self, '_mimedb'):
-                self._mimedb = magic.open(magic.NONE)
-                self._mimedb.load()
-            return self._mimedb.file(path)
-
-        @classmethod
-        def guess_encoding(self, path):
-            if not hasattr(self, '_mimedb_encoding'):
-                self._mimedb_encoding = magic.open(magic.MAGIC_MIME_ENCODING)
-                self._mimedb_encoding.load()
-            return self._mimedb_encoding.file(path)
-    else: # use python-magic
-        @classmethod
-        def guess_file_type(self, path):
-            if not hasattr(self, '_mimedb'):
-                self._mimedb = magic.Magic()
-            return maybe_decode(self._mimedb.from_file(path))
-
-        @classmethod
-        def guess_encoding(self, path):
-            if not hasattr(self, '_mimedb_encoding'):
-                self._mimedb_encoding = magic.Magic(mime_encoding=True)
-            return maybe_decode(self._mimedb_encoding.from_file(path))
-
-    def __init__(self, container=None):
-        self._container = container
-
-    def __repr__(self):
-        return '<%s %s>' % (self.__class__, self.name)
-
-    # This should return a path that allows to access the file content
-    @property
-    @abc.abstractmethod
-    def path(self):
-        raise NotImplementedError()
-
-    # Remove any temporary data associated with the file. The function
-    # should be idempotent and work during the destructor.
-    def cleanup(self):
-        if hasattr(self, '_as_container'):
-            del self._as_container
-
-    def __del__(self):
-        self.cleanup()
-
-    # This might be different from path and is used to do file extension matching
-    @property
-    def name(self):
-        return self._name
-
-    @property
-    def container(self):
-        return self._container
-
-    @property
-    def as_container(self):
-        if not hasattr(self.__class__, 'CONTAINER_CLASS'):
-            if hasattr(self, '_other_file'):
-                return self._other_file.__class__.CONTAINER_CLASS(self)
-            return None
-        if not hasattr(self, '_as_container'):
-            logger.debug('instantiating %s for %s', self.__class__.CONTAINER_CLASS, self)
-            self._as_container = self.__class__.CONTAINER_CLASS(self)
-        logger.debug('returning a %s for %s', self._as_container.__class__, self)
-        return self._as_container
-
-    @property
-    def magic_file_type(self):
-        if not hasattr(self, '_magic_file_type'):
-            self._magic_file_type = File.guess_file_type(self.path)
-        return self._magic_file_type
-
-    if tlsh:
-        @property
-        def fuzzy_hash(self):
-            if not hasattr(self, '_fuzzy_hash'):
-                # tlsh is not meaningful with files smaller than 512 bytes
-                if os.stat(self.path).st_size >= 512:
-                    h = tlsh.Tlsh()
-                    with open(self.path, 'rb') as f:
-                        for buf in iter(lambda: f.read(32768), b''):
-                            h.update(buf)
-                    h.final()
-                    self._fuzzy_hash = h.hexdigest()
-                else:
-                    self._fuzzy_hash = None
-            return self._fuzzy_hash
-
-    @abc.abstractmethod
-    def is_directory():
-        raise NotImplementedError()
-
-    @abc.abstractmethod
-    def is_symlink():
-        raise NotImplementedError()
-
-    @abc.abstractmethod
-    def is_device():
-        raise NotImplementedError()
-
-    def compare_bytes(self, other, source=None):
-        return compare_binary_files(self, other, source)
-
-    def _compare_using_details(self, other, source):
-        details = []
-        if hasattr(self, 'compare_details'):
-            details.extend(filter(None, self.compare_details(other, source)))
-        if self.as_container:
-            details.extend(filter(None, self.as_container.compare(other.as_container)))
-        if not details:
-            return None
-        difference = Difference(None, self.name, other.name, source=source)
-        difference.add_details(details)
-        return difference
-
-    def has_same_content_as(self, other):
-        logger.debug('Binary.has_same_content: %s %s', self, other)
-        # try comparing small files directly first
-        try:
-            my_size = os.path.getsize(self.path)
-            other_size = os.path.getsize(other.path)
-        except OSError:
-            # files not readable (e.g. broken symlinks) or something else,
-            # just assume they are different
-            return False
-        if my_size == other_size and my_size <= SMALL_FILE_THRESHOLD:
-            try:
-                with profile('command', 'cmp (internal)'):
-                    with open(self.path, 'rb') as file1, open(other.path, 'rb') as file2:
-                        return file1.read() == file2.read()
-            except OSError:
-                # one or both files could not be opened for some reason,
-                # assume they are different
-                return False
-
-        return self.cmp_external(other)
-
-    @tool_required('cmp')
-    def cmp_external(self, other):
-        return 0 == subprocess.call(['cmp', '-s', self.path, other.path],
-                                    shell=False, close_fds=True)
-
-
-    # To be specialized directly, or by implementing compare_details
-    def compare(self, other, source=None):
-        if hasattr(self, 'compare_details') or self.as_container:
-            try:
-                difference = self._compare_using_details(other, source)
-                # no differences detected inside? let's at least do a binary diff
-                if difference is None:
-                    difference = self.compare_bytes(other, source=source)
-                    if difference is None:
-                        return None
-                    difference.add_comment("No file format specific differences found inside, yet data differs")
-            except subprocess.CalledProcessError as e:
-                difference = self.compare_bytes(other, source=source)
-                if e.output:
-                    output = re.sub(r'^', '    ', e.output.decode('utf-8', errors='replace'), flags=re.MULTILINE)
-                else:
-                    output = '<none>'
-                cmd = ' '.join(e.cmd)
-                if difference is None:
-                    return None
-                difference.add_comment("Command `%s` exited with %d. Output:\n%s"
-                                       % (cmd, e.returncode, output))
-            except RequiredToolNotFound as e:
-                difference = self.compare_bytes(other, source=source)
-                if difference is None:
-                    return None
-                difference.add_comment(
-                    "'%s' not available in path. Falling back to binary comparison." % e.command)
-                package = e.get_package()
-                if package:
-                    difference.add_comment("Install '%s' to get a better output." % package)
-            except OutputParsingError as e:
-                difference = self.compare_bytes(other, source=source)
-                if difference is None:
-                    return None
-                difference.add_comment("Error parsing output of `%s` for %s" %
-                        (e.command, e.object_class))
-            return difference
-        return self.compare_bytes(other, source)
+from .utils.file import File
 
 
 class FilesystemFile(File):
@@ -275,75 +41,3 @@ class FilesystemFile(File):
     def is_device(self):
         mode = os.lstat(self._name).st_mode
         return stat.S_ISCHR(mode) or stat.S_ISBLK(mode)
-
-
-class MissingFile(File):
-    """Represents a missing file when comparing containers"""
-
-    @staticmethod
-    def recognizes(file):
-        if isinstance(file, FilesystemFile) and not os.path.lexists(file.name):
-            assert Config().new_file, '%s does not exist' % file.name
-            return True
-        return False
-
-    def __init__(self, path, other_file=None):
-        self._name = path
-        self._other_file = other_file
-
-    @property
-    def path(self):
-        return '/dev/null'
-
-    @property
-    def other_file(self):
-        return self._other_file
-
-    @other_file.setter
-    def other_file(self, value):
-        self._other_file = value
-
-    def has_same_content_as(self, other):
-        return False
-
-    def is_directory(self):
-        return False
-
-    def is_symlink(self):
-        return False
-
-    def is_device(self):
-        return False
-
-    def compare(self, other, source=None):
-        # So now that comparators are all object-oriented, we don't have any clue on how to
-        # perform a meaningful comparison right here. So we are good do the comparison backward
-        # (where knowledge of the file format lies) and and then reverse it.
-        if isinstance(other, MissingFile):
-            return Difference(None, self.name, other.name, comment='Trying to compare two non-existing files.')
-        logger.debug('Performing backward comparison')
-        backward_diff = other.compare(self, source)
-        if not backward_diff:
-            return None
-        return backward_diff.get_reverse()
-
-    # Be nice to text comparisons
-    @property
-    def encoding(self):
-        return self._other_file.encoding
-
-    # Be nice to device comparisons
-    def get_device(self):
-        return ''
-
-    # Be nice to metadata comparisons
-    @property
-    def magic_file_type(self):
-        return self._other_file.magic_file_type
-
-    # Be nice to .changes and .dsc comparisons
-    @property
-    def deb822(self):
-        class DummyChanges(dict):
-            get_as_string = lambda self, _: ''
-        return DummyChanges(Files=[], Version='')
diff --git a/diffoscope/comparators/bzip2.py b/diffoscope/comparators/bzip2.py
index bbb2fcf..bcd269a 100644
--- a/diffoscope/comparators/bzip2.py
+++ b/diffoscope/comparators/bzip2.py
@@ -24,8 +24,9 @@ import collections
 
 from diffoscope import logger, tool_required
 
-from .utils import Archive, get_compressed_content_name
-from .binary import File
+from .utils.file import File
+from .utils.archive import Archive
+from .utils.filenames import get_compressed_content_name
 
 
 class Bzip2Container(Archive):
diff --git a/diffoscope/comparators/cbfs.py b/diffoscope/comparators/cbfs.py
index 14ac377..0fa39c5 100644
--- a/diffoscope/comparators/cbfs.py
+++ b/diffoscope/comparators/cbfs.py
@@ -26,8 +26,9 @@ import subprocess
 from diffoscope import logger, tool_required
 from diffoscope.difference import Difference
 
-from .utils import Archive, Command
 from .binary import File
+from .utils.archive import Archive
+from .utils.command import Command
 
 
 class CbfsListing(Command):
diff --git a/diffoscope/comparators/cpio.py b/diffoscope/comparators/cpio.py
index f5060c6..7dac814 100644
--- a/diffoscope/comparators/cpio.py
+++ b/diffoscope/comparators/cpio.py
@@ -23,8 +23,8 @@ import re
 from diffoscope import tool_required
 from diffoscope.difference import Difference
 
-from .utils import Command
 from .binary import File
+from .utils.command import Command
 from .utils.libarchive import LibarchiveContainer, list_libarchive
 
 
diff --git a/diffoscope/comparators/deb.py b/diffoscope/comparators/deb.py
index b77599c..e0207b7 100644
--- a/diffoscope/comparators/deb.py
+++ b/diffoscope/comparators/deb.py
@@ -22,11 +22,11 @@ import re
 from diffoscope import logger
 from diffoscope.difference import Difference
 
-from . import specialize
 from .tar import TarContainer
-from .utils import ArchiveMember
-from .binary import File
+from .utils.file import File
+from .utils.archive import ArchiveMember
 from .utils.libarchive import LibarchiveContainer, list_libarchive
+from .utils.specialize import specialize
 
 try:
     from debian import deb822
diff --git a/diffoscope/comparators/debian.py b/diffoscope/comparators/debian.py
index 98c52e4..7dde081 100644
--- a/diffoscope/comparators/debian.py
+++ b/diffoscope/comparators/debian.py
@@ -29,8 +29,8 @@ from diffoscope import logger
 from diffoscope.changes import Changes
 from diffoscope.difference import Difference
 
-from .utils import Container
 from .binary import File
+from .utils.container import Container
 
 
 DOT_CHANGES_FIELDS = [
diff --git a/diffoscope/comparators/dex.py b/diffoscope/comparators/dex.py
index 7ac9208..d8222fe 100644
--- a/diffoscope/comparators/dex.py
+++ b/diffoscope/comparators/dex.py
@@ -25,7 +25,8 @@ import collections
 from diffoscope import logger, tool_required
 
 from .binary import File
-from .utils import Archive, get_compressed_content_name
+from .utils.archive import Archive
+from .utils.filenames import get_compressed_content_name
 
 
 class DexContainer(Archive):
diff --git a/diffoscope/comparators/directory.py b/diffoscope/comparators/directory.py
index 4edbadd..3aa2012 100644
--- a/diffoscope/comparators/directory.py
+++ b/diffoscope/comparators/directory.py
@@ -26,9 +26,9 @@ from diffoscope.exc import RequiredToolNotFound
 from diffoscope.progress import Progress
 from diffoscope.difference import Difference
 
-from .utils import Container, Command
 from .binary import FilesystemFile
-from .utils.compare import compare_files
+from .utils.command import Command
+from .utils.container import Container
 
 
 def list_files(path):
@@ -147,6 +147,8 @@ class FilesystemDirectory(object):
         return False
 
     def compare(self, other, source=None):
+        from .utils.compare import compare_files
+
         differences = []
         try:
             listing_diff = Difference.from_text('\n'.join(list_files(self.path)),
diff --git a/diffoscope/comparators/elf.py b/diffoscope/comparators/elf.py
index 927658e..0b82993 100644
--- a/diffoscope/comparators/elf.py
+++ b/diffoscope/comparators/elf.py
@@ -27,8 +27,9 @@ from diffoscope.exc import OutputParsingError
 from diffoscope.difference import Difference
 
 from .deb import DebFile, get_build_id_map
-from .utils import Command, Container
 from .binary import File
+from .utils.command import Command
+from .utils.container import  Container
 from .utils.libarchive import list_libarchive
 
 
diff --git a/diffoscope/comparators/fonts.py b/diffoscope/comparators/fonts.py
index a8855ad..339383b 100644
--- a/diffoscope/comparators/fonts.py
+++ b/diffoscope/comparators/fonts.py
@@ -22,8 +22,8 @@ import re
 from diffoscope import tool_required
 from diffoscope.difference import Difference
 
-from .utils import Command
 from .binary import File
+from .utils.command import Command
 
 
 class Showttf(Command):
diff --git a/diffoscope/comparators/fsimage.py b/diffoscope/comparators/fsimage.py
index 22ba92b..91ba8f8 100644
--- a/diffoscope/comparators/fsimage.py
+++ b/diffoscope/comparators/fsimage.py
@@ -24,8 +24,8 @@ import collections
 from diffoscope import logger
 from diffoscope.difference import Difference
 
-from .utils import Archive
 from .binary import File
+from .utils.archive import Archive
 
 try:
     import guestfs
diff --git a/diffoscope/comparators/gettext.py b/diffoscope/comparators/gettext.py
index bd1d936..4885b73 100644
--- a/diffoscope/comparators/gettext.py
+++ b/diffoscope/comparators/gettext.py
@@ -24,8 +24,8 @@ from diffoscope import logger
 from diffoscope import tool_required
 from diffoscope.difference import Difference
 
-from .utils import Command
 from .binary import File
+from .utils.command import Command
 
 
 class Msgunfmt(Command):
diff --git a/diffoscope/comparators/gzip.py b/diffoscope/comparators/gzip.py
index 61751c1..61a812b 100644
--- a/diffoscope/comparators/gzip.py
+++ b/diffoscope/comparators/gzip.py
@@ -25,7 +25,8 @@ import collections
 from diffoscope import logger, tool_required
 from diffoscope.difference import Difference
 
-from .utils import Archive, get_compressed_content_name
+from .utils.archive import Archive
+from .utils.filenames import get_compressed_content_name
 
 
 class GzipContainer(Archive):
diff --git a/diffoscope/comparators/haskell.py b/diffoscope/comparators/haskell.py
index 9e27b67..8d154fa 100644
--- a/diffoscope/comparators/haskell.py
+++ b/diffoscope/comparators/haskell.py
@@ -26,8 +26,8 @@ from diffoscope import tool_required, logger
 from diffoscope.profiling import profile
 from diffoscope.difference import Difference
 
-from .utils import Command
 from .binary import File
+from .utils.command import Command
 
 
 class ShowIface(Command):
diff --git a/diffoscope/comparators/icc.py b/diffoscope/comparators/icc.py
index 5b80351..028b241 100644
--- a/diffoscope/comparators/icc.py
+++ b/diffoscope/comparators/icc.py
@@ -22,8 +22,8 @@ import re
 from diffoscope import tool_required
 from diffoscope.difference import Difference
 
-from .utils import Command
 from .binary import File
+from .utils.command import Command
 
 
 class Iccdump(Command):
diff --git a/diffoscope/comparators/image.py b/diffoscope/comparators/image.py
index 9486aed..e7ed8d2 100644
--- a/diffoscope/comparators/image.py
+++ b/diffoscope/comparators/image.py
@@ -22,8 +22,8 @@ import re
 from diffoscope import tool_required
 from diffoscope.difference import Difference
 
-from .utils import Command
 from .binary import File
+from .utils.command import Command
 
 re_ansi_escapes = re.compile(r'\x1b[^m]*m')
 
diff --git a/diffoscope/comparators/iso9660.py b/diffoscope/comparators/iso9660.py
index c2bc6f6..82f0da9 100644
--- a/diffoscope/comparators/iso9660.py
+++ b/diffoscope/comparators/iso9660.py
@@ -23,8 +23,8 @@ import subprocess
 from diffoscope import tool_required
 from diffoscope.difference import Difference
 
-from .utils import Command
 from .binary import File
+from .utils.command import Command
 from .utils.libarchive import LibarchiveContainer
 
 
diff --git a/diffoscope/comparators/java.py b/diffoscope/comparators/java.py
index cc734e6..b01e6b5 100644
--- a/diffoscope/comparators/java.py
+++ b/diffoscope/comparators/java.py
@@ -24,8 +24,8 @@ import os.path
 from diffoscope import tool_required
 from diffoscope.difference import Difference
 
-from .utils import Command
 from .binary import File
+from .utils.command import Command
 
 
 class Javap(Command):
diff --git a/diffoscope/comparators/javascript.py b/diffoscope/comparators/javascript.py
index 166d9f4..875d54a 100644
--- a/diffoscope/comparators/javascript.py
+++ b/diffoscope/comparators/javascript.py
@@ -22,8 +22,8 @@ import re
 from diffoscope import tool_required
 from diffoscope.difference import Difference
 
-from .utils import Command
-from .binary import File
+from .utils.file import File
+from .utils.command import Command
 
 
 class JavaScriptBeautify(Command):
diff --git a/diffoscope/comparators/llvm.py b/diffoscope/comparators/llvm.py
index 7987958..759d383 100644
--- a/diffoscope/comparators/llvm.py
+++ b/diffoscope/comparators/llvm.py
@@ -21,8 +21,8 @@
 from diffoscope import tool_required
 from diffoscope.difference import Difference
 
-from .utils import Command
 from .binary import File
+from .utils.command import Command
 
 
 class LlvmBcAnalyzer(Command):
diff --git a/diffoscope/comparators/macho.py b/diffoscope/comparators/macho.py
index 6453c35..e5dbe52 100644
--- a/diffoscope/comparators/macho.py
+++ b/diffoscope/comparators/macho.py
@@ -24,8 +24,8 @@ import subprocess
 from diffoscope import tool_required
 from diffoscope.difference import Difference
 
-from .utils import Command
 from .binary import File
+from .utils.command import Command
 
 
 class Otool(Command):
diff --git a/diffoscope/comparators/missing_file.py b/diffoscope/comparators/missing_file.py
new file mode 100644
index 0000000..fb9cde8
--- /dev/null
+++ b/diffoscope/comparators/missing_file.py
@@ -0,0 +1,100 @@
+# -*- coding: utf-8 -*-
+#
+# diffoscope: in-depth comparison of files, archives, and directories
+#
+# Copyright © 2016 Chris Lamb <lamby at debian.org>
+#
+# diffoscope is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# diffoscope is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with diffoscope.  If not, see <https://www.gnu.org/licenses/>.
+
+import os
+
+from diffoscope import logger
+from diffoscope.config import Config
+from diffoscope.difference import Difference
+
+from .utils.file import File
+from .binary import FilesystemFile
+
+
+class MissingFile(File):
+    """Represents a missing file when comparing containers"""
+
+    @staticmethod
+    def recognizes(file):
+        if isinstance(file, FilesystemFile) and not os.path.lexists(file.name):
+            assert Config().new_file, '%s does not exist' % file.name
+            return True
+        return False
+
+    def __init__(self, path, other_file=None):
+        self._name = path
+        self._other_file = other_file
+
+    @property
+    def path(self):
+        return '/dev/null'
+
+    @property
+    def other_file(self):
+        return self._other_file
+
+    @other_file.setter
+    def other_file(self, value):
+        self._other_file = value
+
+    def has_same_content_as(self, other):
+        return False
+
+    def is_directory(self):
+        return False
+
+    def is_symlink(self):
+        return False
+
+    def is_device(self):
+        return False
+
+    def compare(self, other, source=None):
+        # So now that comparators are all object-oriented, we don't have any clue on how to
+        # perform a meaningful comparison right here. So we are good do the comparison backward
+        # (where knowledge of the file format lies) and and then reverse it.
+        if isinstance(other, MissingFile):
+            return Difference(None, self.name, other.name, comment='Trying to compare two non-existing files.')
+        logger.debug('Performing backward comparison')
+        backward_diff = other.compare(self, source)
+        if not backward_diff:
+            return None
+        return backward_diff.get_reverse()
+
+    # Be nice to text comparisons
+    @property
+    def encoding(self):
+        return self._other_file.encoding
+
+    # Be nice to device comparisons
+    def get_device(self):
+        return ''
+
+    # Be nice to metadata comparisons
+    @property
+    def magic_file_type(self):
+        return self._other_file.magic_file_type
+
+    # Be nice to .changes and .dsc comparisons
+    @property
+    def deb822(self):
+        class DummyChanges(dict):
+            get_as_string = lambda self, _: ''
+        return DummyChanges(Files=[], Version='')
+
diff --git a/diffoscope/comparators/mono.py b/diffoscope/comparators/mono.py
index d0d0cc9..333eb4c 100644
--- a/diffoscope/comparators/mono.py
+++ b/diffoscope/comparators/mono.py
@@ -23,8 +23,8 @@ import re
 from diffoscope import tool_required
 from diffoscope.difference import Difference
 
-from .utils import Command
 from .binary import File
+from .utils.command import Command
 
 
 class Pedump(Command):
diff --git a/diffoscope/comparators/openssh.py b/diffoscope/comparators/openssh.py
index 26c521a..522b0d0 100644
--- a/diffoscope/comparators/openssh.py
+++ b/diffoscope/comparators/openssh.py
@@ -22,8 +22,8 @@ import re
 from diffoscope import tool_required
 from diffoscope.difference import Difference
 
-from .utils import Command
 from .binary import File
+from .utils.command import Command
 
 
 class SSHKeyList(Command):
diff --git a/diffoscope/comparators/pdf.py b/diffoscope/comparators/pdf.py
index 9514812..ceb4da4 100644
--- a/diffoscope/comparators/pdf.py
+++ b/diffoscope/comparators/pdf.py
@@ -22,8 +22,8 @@ import re
 from diffoscope import tool_required
 from diffoscope.difference import Difference
 
-from .utils import Command
 from .binary import File
+from .utils.command import Command
 
 
 class Pdftotext(Command):
diff --git a/diffoscope/comparators/png.py b/diffoscope/comparators/png.py
index f03ab40..33b1e8e 100644
--- a/diffoscope/comparators/png.py
+++ b/diffoscope/comparators/png.py
@@ -23,8 +23,8 @@ import functools
 from diffoscope import tool_required
 from diffoscope.difference import Difference
 
-from .utils import Command
 from .binary import File
+from .utils.command import Command
 
 
 class Sng(Command):
diff --git a/diffoscope/comparators/ppu.py b/diffoscope/comparators/ppu.py
index 5acc16c..340d1ef 100644
--- a/diffoscope/comparators/ppu.py
+++ b/diffoscope/comparators/ppu.py
@@ -27,8 +27,8 @@ from diffoscope import tool_required, logger
 from diffoscope.profiling import profile
 from diffoscope.difference import Difference
 
-from .utils import Command
 from .binary import File
+from .utils.command import Command
 
 
 class Ppudump(Command):
diff --git a/diffoscope/comparators/ps.py b/diffoscope/comparators/ps.py
index 203e40c..5c64e80 100644
--- a/diffoscope/comparators/ps.py
+++ b/diffoscope/comparators/ps.py
@@ -24,7 +24,7 @@ from diffoscope.exc import RequiredToolNotFound
 from diffoscope.difference import Difference
 
 from .text import TextFile
-from .utils import Command
+from .utils.command import Command
 
 
 class Pstotext(Command):
diff --git a/diffoscope/comparators/rpm.py b/diffoscope/comparators/rpm.py
index 6a8e122..dc271d8 100644
--- a/diffoscope/comparators/rpm.py
+++ b/diffoscope/comparators/rpm.py
@@ -27,8 +27,8 @@ import subprocess
 from diffoscope import logger, tool_required, get_temporary_directory
 from diffoscope.difference import Difference
 
-from .utils import Archive
 from .rpm_fallback import AbstractRpmFile
+from .utils.archive import Archive
 
 
 def convert_header_field(io, header):
diff --git a/diffoscope/comparators/rust.py b/diffoscope/comparators/rust.py
index eb25869..d1accf2 100644
--- a/diffoscope/comparators/rust.py
+++ b/diffoscope/comparators/rust.py
@@ -25,7 +25,8 @@ import collections
 from diffoscope import logger, tool_required
 from diffoscope.difference import Difference
 
-from .utils import Archive, get_compressed_content_name
+from .utils.archive import Archive
+from .utils.filenames import get_compressed_content_name
 
 RLIB_BYTECODE_OBJECT_V1_DATASIZE_OFFSET = 15
 RLIB_BYTECODE_OBJECT_V1_DATA_OFFSET = 23
diff --git a/diffoscope/comparators/sqlite.py b/diffoscope/comparators/sqlite.py
index 74a0fa0..42d8c2a 100644
--- a/diffoscope/comparators/sqlite.py
+++ b/diffoscope/comparators/sqlite.py
@@ -20,8 +20,8 @@
 from diffoscope import tool_required
 from diffoscope.difference import Difference
 
-from .utils import Command
 from .binary import File
+from .utils.command import Command
 
 
 class Sqlite3Dump(Command):
diff --git a/diffoscope/comparators/squashfs.py b/diffoscope/comparators/squashfs.py
index 92b5219..d5c3d54 100644
--- a/diffoscope/comparators/squashfs.py
+++ b/diffoscope/comparators/squashfs.py
@@ -26,11 +26,12 @@ import collections
 from diffoscope import logger, tool_required
 from diffoscope.difference import Difference
 
-from .utils import Archive, ArchiveMember, Command
 from .binary import File
 from .device import Device
 from .symlink import Symlink
 from .directory import Directory
+from .utils.archive import Archive, ArchiveMember
+from .utils.command import Command
 
 
 class SquashfsSuperblock(Command):
diff --git a/diffoscope/comparators/utils/__init__.py b/diffoscope/comparators/utils/__init__.py
index 40321db..e69de29 100644
--- a/diffoscope/comparators/utils/__init__.py
+++ b/diffoscope/comparators/utils/__init__.py
@@ -1,343 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# diffoscope: in-depth comparison of files, archives, and directories
-#
-# Copyright © 2014-2015 Jérémy Bobbio <lunar at debian.org>
-#
-# diffoscope is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# diffoscope is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with diffoscope.  If not, see <https://www.gnu.org/licenses/>.
-
-import abc
-import os
-import re
-import io
-import threading
-import itertools
-import subprocess
-import collections
-
-from diffoscope import logger, tool_required, get_temporary_directory
-from diffoscope.config import Config
-from diffoscope.progress import Progress
-from diffoscope.profiling import profile
-
-from .fuzzy import perform_fuzzy_matching
-from .compare import compare_commented_files
-
-from .. import specialize
-from ..binary import File, MissingFile
-
-class Command(object, metaclass=abc.ABCMeta):
-    def __init__(self, path):
-        self._path = path
-        logger.debug('running %s', self.cmdline())
-        self._process = subprocess.Popen(self.cmdline(),
-                                         shell=False, close_fds=True,
-                                         env=self.env(),
-                                         stdin=subprocess.PIPE,
-                                         stdout=subprocess.PIPE,
-                                         stderr=subprocess.PIPE)
-        if hasattr(self, 'feed_stdin'):
-            self._stdin_feeder = threading.Thread(target=self._feed_stdin, args=(self._process.stdin,))
-            self._stdin_feeder.daemon = True
-            self._stdin_feeder.start()
-        else:
-            self._stdin_feeder = None
-            self._process.stdin.close()
-        self._stderr = io.BytesIO()
-        self._stderr_line_count = 0
-        self._stderr_reader = threading.Thread(target=self._read_stderr)
-        self._stderr_reader.daemon = True
-        self._stderr_reader.start()
-
-    @property
-    def path(self):
-        return self._path
-
-    @abc.abstractmethod
-    def cmdline(self):
-        raise NotImplementedError()
-
-    def env(self):
-        return None # inherit parent environment by default
-
-    # Define only if needed. We take care of closing stdin.
-    #def feed_stdin(self, stdin)
-
-    def _feed_stdin(self, stdin):
-        try:
-            self.feed_stdin(stdin)
-        finally:
-            stdin.close()
-
-    def filter(self, line):
-        # Assume command output is utf-8 by default
-        return line
-
-    def poll(self):
-        return self._process.poll()
-
-    def terminate(self):
-        return self._process.terminate()
-
-    def wait(self):
-        if self._stdin_feeder:
-            self._stdin_feeder.join()
-        self._stderr_reader.join()
-        returncode = self._process.wait()
-        logger.debug('done with %s. exit code %d', self.cmdline()[0], returncode)
-        return returncode
-
-    MAX_STDERR_LINES = 50
-
-    def _read_stderr(self):
-        for line in iter(self._process.stderr.readline, b''):
-            self._stderr_line_count += 1
-            if self._stderr_line_count <= Command.MAX_STDERR_LINES:
-                self._stderr.write(line)
-        if self._stderr_line_count > Command.MAX_STDERR_LINES:
-            self._stderr.write('[ {} lines ignored ]\n'.format(self._stderr_line_count - Command.MAX_STDERR_LINES).encode('utf-8'))
-        self._process.stderr.close()
-
-    @property
-    def stderr_content(self):
-        return self._stderr.getvalue().decode('utf-8', errors='replace')
-
-    @property
-    def stderr(self):
-        return self._stderr
-
-    @property
-    def stdout(self):
-        return self._process.stdout
-
-
-def get_compressed_content_name(path, expected_extension):
-    basename = os.path.basename(path)
-    if basename.endswith(expected_extension):
-        name = basename[:-len(expected_extension)]
-    else:
-        name = "%s-content" % basename
-    return name
-
-
-
-NO_COMMENT = None
-
-
-class Container(object, metaclass=abc.ABCMeta):
-    def __new__(cls, source):
-        if isinstance(source, MissingFile):
-            new = super(Container, MissingContainer).__new__(MissingContainer)
-            new.__init__(source)
-            return new
-        else:
-            return super(Container, cls).__new__(cls)
-
-    def __init__(self, source):
-        self._source = source
-
-    @property
-    def source(self):
-        return self._source
-
-    def get_members(self):
-        """Returns a dictionary. The key is what is used to match when comparing containers."""
-        return collections.OrderedDict(self.get_all_members())
-
-    def lookup_file(self, *names):
-        """Try to fetch a specific file by digging in containers."""
-        name, remainings = names[0], names[1:]
-        try:
-            file = self.get_member(name)
-        except KeyError:
-            return None
-        logger.debug('lookup_file(%s) -> %s', names, file)
-        specialize(file)
-        if not remainings:
-            return file
-        container = file.as_container
-        if not container:
-            return None
-        return container.lookup_file(*remainings)
-
-    @abc.abstractmethod
-    def get_member_names(self):
-        raise NotImplementedError()
-
-    @abc.abstractmethod
-    def get_member(self, member_name):
-        raise NotImplementedError()
-
-    def get_all_members(self):
-        # If your get_member implementation is O(n) then this will be O(n^2) cost
-        # In such cases it is HIGHLY RECOMMENDED to override this as well
-        for name in self.get_member_names():
-            yield name, self.get_member(name)
-
-    def comparisons(self, other):
-        my_members = self.get_members()
-        my_reminders = collections.OrderedDict()
-        other_members = other.get_members()
-
-        with Progress(max(len(my_members), len(other_members))) as p:
-            # keep it sorted like my members
-            while my_members:
-                my_member_name, my_member = my_members.popitem(last=False)
-                if my_member_name in other_members:
-                    yield my_member, other_members.pop(my_member_name), NO_COMMENT
-                    p.step()
-                else:
-                    my_reminders[my_member_name] = my_member
-            my_members = my_reminders
-            for my_name, other_name, score in perform_fuzzy_matching(my_members, other_members):
-                comment = 'Files similar despite different names (difference score: %d)' % score
-                yield my_members.pop(my_name), other_members.pop(other_name), comment
-                p.step(2)
-            if Config().new_file:
-                for my_member in my_members.values():
-                    yield my_member, MissingFile('/dev/null', my_member), NO_COMMENT
-                    p.step()
-                for other_member in other_members.values():
-                    yield MissingFile('/dev/null', other_member), other_member, NO_COMMENT
-                    p.step()
-
-    def compare(self, other, source=None):
-        return itertools.starmap(compare_commented_files, self.comparisons(other))
-
-
-class MissingContainer(Container):
-    def get_member_names(self):
-        return self.source.other_file.as_container.get_member_names()
-
-    def get_member(self, member_name):
-        return MissingFile('/dev/null')
-
-
-class ArchiveMember(File):
-    def __init__(self, container, member_name):
-        super().__init__(container=container)
-        self._name = member_name
-        self._temp_dir = None
-        self._path = None
-
-    @property
-    def path(self):
-        if self._path is None:
-            logger.debug('unpacking %s', self._name)
-            assert self._temp_dir is None
-            self._temp_dir = get_temporary_directory()
-            with profile('container_extract', self.container):
-                self._path = self.container.extract(self._name, self._temp_dir.name)
-        return self._path
-
-    def cleanup(self):
-        if self._path is not None:
-            self._path = None
-        if self._temp_dir is not None:
-            self._temp_dir.cleanup()
-            self._temp_dir = None
-        super().cleanup()
-
-    def is_directory(self):
-        return False
-
-    def is_symlink(self):
-        return False
-
-    def is_device(self):
-        return False
-
-
-class Archive(Container, metaclass=abc.ABCMeta):
-    def __new__(cls, source, *args, **kwargs):
-        if isinstance(source, MissingFile):
-            return super(Container, MissingArchive).__new__(MissingArchive)
-        else:
-            return super(Container, cls).__new__(cls)
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        with profile('open_archive', self):
-            self._archive = self.open_archive()
-
-    def __del__(self):
-        with profile('close_archive', self):
-            self.close_archive()
-
-    @property
-    def archive(self):
-        return self._archive
-
-    @abc.abstractmethod
-    def open_archive(self):
-        raise NotImplementedError()
-
-    @abc.abstractmethod
-    def close_archive(self):
-        raise NotImplementedError()
-
-    @abc.abstractmethod
-    def get_member_names(self):
-        raise NotImplementedError()
-
-    @abc.abstractmethod
-    def extract(self, member_name, dest_dir):
-        raise NotImplementedError()
-
-    def get_member(self, member_name):
-        return ArchiveMember(self, member_name)
-
-
-class MissingArchiveLikeObject(object):
-    def getnames(self):
-        return []
-
-    def list(self, *args, **kwargs):
-        return ''
-
-    def close(self):
-        pass
-
-
-class MissingArchive(Archive):
-    @property
-    def source(self):
-        return None
-
-    def open_archive(self):
-        return MissingArchiveLikeObject()
-
-    def close_archive(self):
-        pass
-
-    def get_member_names(self):
-        return []
-
-    def extract(self, member_name, dest_dir):
-        # should never be called
-        raise NotImplementedError()
-
-    def get_member(self, member_name):
-        return MissingFile('/dev/null')
-
-    # Be nice to gzip and the likes
-    @property
-    def path(self):
-        return '/dev/null'
-
-
-class Xxd(Command):
-    @tool_required('xxd')
-    def cmdline(self):
-        return ['xxd', self.path]
diff --git a/diffoscope/comparators/utils/archive.py b/diffoscope/comparators/utils/archive.py
new file mode 100644
index 0000000..201962a
--- /dev/null
+++ b/diffoscope/comparators/utils/archive.py
@@ -0,0 +1,141 @@
+# -*- coding: utf-8 -*-
+#
+# diffoscope: in-depth comparison of files, archives, and directories
+#
+# Copyright © 2016 Chris Lamb <lamby at debian.org>
+#
+# diffoscope is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# diffoscope is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with diffoscope.  If not, see <https://www.gnu.org/licenses/>.
+
+import abc
+
+from diffoscope import logger, get_temporary_directory
+from diffoscope.profiling import profile
+
+from ..missing_file import MissingFile
+
+from .file import File
+from .container import Container
+
+
+class Archive(Container, metaclass=abc.ABCMeta):
+    def __new__(cls, source, *args, **kwargs):
+        if isinstance(source, MissingFile):
+            return super(Container, MissingArchive).__new__(MissingArchive)
+        else:
+            return super(Container, cls).__new__(cls)
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        with profile('open_archive', self):
+            self._archive = self.open_archive()
+
+    def __del__(self):
+        with profile('close_archive', self):
+            self.close_archive()
+
+    @property
+    def archive(self):
+        return self._archive
+
+    @abc.abstractmethod
+    def open_archive(self):
+        raise NotImplementedError()
+
+    @abc.abstractmethod
+    def close_archive(self):
+        raise NotImplementedError()
+
+    @abc.abstractmethod
+    def get_member_names(self):
+        raise NotImplementedError()
+
+    @abc.abstractmethod
+    def extract(self, member_name, dest_dir):
+        raise NotImplementedError()
+
+    def get_member(self, member_name):
+        return ArchiveMember(self, member_name)
+
+
+class ArchiveMember(File):
+    def __init__(self, container, member_name):
+        super().__init__(container=container)
+        self._name = member_name
+        self._temp_dir = None
+        self._path = None
+
+    @property
+    def path(self):
+        if self._path is None:
+            logger.debug('unpacking %s', self._name)
+            assert self._temp_dir is None
+            self._temp_dir = get_temporary_directory()
+            with profile('container_extract', self.container):
+                self._path = self.container.extract(self._name, self._temp_dir.name)
+        return self._path
+
+    def cleanup(self):
+        if self._path is not None:
+            self._path = None
+        if self._temp_dir is not None:
+            self._temp_dir.cleanup()
+            self._temp_dir = None
+        super().cleanup()
+
+    def is_directory(self):
+        return False
+
+    def is_symlink(self):
+        return False
+
+    def is_device(self):
+        return False
+
+
+class MissingArchiveLikeObject(object):
+    def getnames(self):
+        return []
+
+    def list(self, *args, **kwargs):
+        return ''
+
+    def close(self):
+        pass
+
+
+class MissingArchive(Archive):
+    @property
+    def source(self):
+        return None
+
+    def open_archive(self):
+        return MissingArchiveLikeObject()
+
+    def close_archive(self):
+        pass
+
+    def get_member_names(self):
+        return []
+
+    def extract(self, member_name, dest_dir):
+        # should never be called
+        raise NotImplementedError()
+
+    def get_member(self, member_name):
+        return MissingFile('/dev/null')
+
+    # Be nice to gzip and the likes
+    @property
+    def path(self):
+        return '/dev/null'
diff --git a/diffoscope/comparators/utils/command.py b/diffoscope/comparators/utils/command.py
new file mode 100644
index 0000000..89c839b
--- /dev/null
+++ b/diffoscope/comparators/utils/command.py
@@ -0,0 +1,110 @@
+# -*- coding: utf-8 -*-
+#
+# diffoscope: in-depth comparison of files, archives, and directories
+#
+# Copyright © 2016 Chris Lamb <lamby at debian.org>
+#
+# diffoscope is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# diffoscope is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with diffoscope.  If not, see <https://www.gnu.org/licenses/>.
+
+import io
+import abc
+import subprocess
+import threading
+
+from diffoscope import logger
+
+
+class Command(object, metaclass=abc.ABCMeta):
+    def __init__(self, path):
+        self._path = path
+        logger.debug('running %s', self.cmdline())
+        self._process = subprocess.Popen(self.cmdline(),
+                                         shell=False, close_fds=True,
+                                         env=self.env(),
+                                         stdin=subprocess.PIPE,
+                                         stdout=subprocess.PIPE,
+                                         stderr=subprocess.PIPE)
+        if hasattr(self, 'feed_stdin'):
+            self._stdin_feeder = threading.Thread(target=self._feed_stdin, args=(self._process.stdin,))
+            self._stdin_feeder.daemon = True
+            self._stdin_feeder.start()
+        else:
+            self._stdin_feeder = None
+            self._process.stdin.close()
+        self._stderr = io.BytesIO()
+        self._stderr_line_count = 0
+        self._stderr_reader = threading.Thread(target=self._read_stderr)
+        self._stderr_reader.daemon = True
+        self._stderr_reader.start()
+
+    @property
+    def path(self):
+        return self._path
+
+    @abc.abstractmethod
+    def cmdline(self):
+        raise NotImplementedError()
+
+    def env(self):
+        return None # inherit parent environment by default
+
+    # Define only if needed. We take care of closing stdin.
+    #def feed_stdin(self, stdin)
+
+    def _feed_stdin(self, stdin):
+        try:
+            self.feed_stdin(stdin)
+        finally:
+            stdin.close()
+
+    def filter(self, line):
+        # Assume command output is utf-8 by default
+        return line
+
+    def poll(self):
+        return self._process.poll()
+
+    def terminate(self):
+        return self._process.terminate()
+
+    def wait(self):
+        if self._stdin_feeder:
+            self._stdin_feeder.join()
+        self._stderr_reader.join()
+        returncode = self._process.wait()
+        logger.debug('done with %s. exit code %d', self.cmdline()[0], returncode)
+        return returncode
+
+    MAX_STDERR_LINES = 50
+
+    def _read_stderr(self):
+        for line in iter(self._process.stderr.readline, b''):
+            self._stderr_line_count += 1
+            if self._stderr_line_count <= Command.MAX_STDERR_LINES:
+                self._stderr.write(line)
+        if self._stderr_line_count > Command.MAX_STDERR_LINES:
+            self._stderr.write('[ {} lines ignored ]\n'.format(self._stderr_line_count - Command.MAX_STDERR_LINES).encode('utf-8'))
+        self._process.stderr.close()
+
+    @property
+    def stderr_content(self):
+        return self._stderr.getvalue().decode('utf-8', errors='replace')
+
+    @property
+    def stderr(self):
+        return self._stderr
+
+    @property
+    def stdout(self):
+        return self._process.stdout
diff --git a/diffoscope/comparators/utils/compare.py b/diffoscope/comparators/utils/compare.py
index faa6164..f6316d9 100644
--- a/diffoscope/comparators/utils/compare.py
+++ b/diffoscope/comparators/utils/compare.py
@@ -1,15 +1,49 @@
+# -*- coding: utf-8 -*-
+#
+# diffoscope: in-depth comparison of files, archives, and directories
+#
+# Copyright © 2016 Chris Lamb <lamby at debian.org>
+#
+# diffoscope is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# diffoscope is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with diffoscope.  If not, see <https://www.gnu.org/licenses/>.
+
+import io
 import os
 import sys
+import binascii
 
-from diffoscope import logger
+from diffoscope import logger, tool_required
+from diffoscope.exc import RequiredToolNotFound
 from diffoscope.config import Config
 from diffoscope.profiling import profile
 from diffoscope.difference import Difference
 
-from .. import specialize
-from ..binary import MissingFile
+from ..missing_file import MissingFile
+
+from .command import Command
+from .specialize import specialize
+
+try:
+    import tlsh
+except ImportError:
+    tlsh = None
 
 
+class Xxd(Command):
+    @tool_required('xxd')
+    def cmdline(self):
+        return ['xxd', self.path]
+
 def compare_root_paths(path1, path2):
     from ..directory import FilesystemDirectory, FilesystemFile, compare_directories
 
@@ -54,3 +88,21 @@ def bail_if_non_existing(*paths):
             if not os.path.lexists(path):
                 sys.stderr.write('%s: %s: No such file or directory\n' % (sys.argv[0], path))
         sys.exit(2)
+
+def compare_binary_files(file1, file2, source=None):
+    try:
+        return Difference.from_command(
+            Xxd, file1.path, file2.path,
+            source=[file1.name, file2.name], has_internal_linenos=True)
+    except RequiredToolNotFound:
+        hexdump1 = hexdump_fallback(file1.path)
+        hexdump2 = hexdump_fallback(file2.path)
+        comment = 'xxd not available in path. Falling back to Python hexlify.\n'
+        return Difference.from_text(hexdump1, hexdump2, file1.name, file2.name, source, comment)
+
+def hexdump_fallback(path):
+    hexdump = io.StringIO()
+    with open(path, 'rb') as f:
+        for buf in iter(lambda: f.read(32), b''):
+            hexdump.write('%s\n' % binascii.hexlify(buf).decode('us-ascii'))
+    return hexdump.getvalue()
diff --git a/diffoscope/comparators/utils/container.py b/diffoscope/comparators/utils/container.py
new file mode 100644
index 0000000..d2e8d9a
--- /dev/null
+++ b/diffoscope/comparators/utils/container.py
@@ -0,0 +1,125 @@
+# -*- coding: utf-8 -*-
+#
+# diffoscope: in-depth comparison of files, archives, and directories
+#
+# Copyright © 2016 Chris Lamb <lamby at debian.org>
+#
+# diffoscope is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# diffoscope is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with diffoscope.  If not, see <https://www.gnu.org/licenses/>.
+
+import abc
+import itertools
+import collections
+
+from diffoscope import logger
+from diffoscope.config import Config
+from diffoscope.progress import Progress
+
+from ..missing_file import MissingFile
+
+from .fuzzy import perform_fuzzy_matching
+from .specialize import specialize
+
+NO_COMMENT = None
+
+
+
+class Container(object, metaclass=abc.ABCMeta):
+    def __new__(cls, source):
+        if isinstance(source, MissingFile):
+            new = super(Container, MissingContainer).__new__(MissingContainer)
+            new.__init__(source)
+            return new
+        else:
+            return super(Container, cls).__new__(cls)
+
+    def __init__(self, source):
+        self._source = source
+
+    @property
+    def source(self):
+        return self._source
+
+    def get_members(self):
+        """Returns a dictionary. The key is what is used to match when comparing containers."""
+        return collections.OrderedDict(self.get_all_members())
+
+    def lookup_file(self, *names):
+        """Try to fetch a specific file by digging in containers."""
+        name, remainings = names[0], names[1:]
+        try:
+            file = self.get_member(name)
+        except KeyError:
+            return None
+        logger.debug('lookup_file(%s) -> %s', names, file)
+        specialize(file)
+        if not remainings:
+            return file
+        container = file.as_container
+        if not container:
+            return None
+        return container.lookup_file(*remainings)
+
+    @abc.abstractmethod
+    def get_member_names(self):
+        raise NotImplementedError()
+
+    @abc.abstractmethod
+    def get_member(self, member_name):
+        raise NotImplementedError()
+
+    def get_all_members(self):
+        # If your get_member implementation is O(n) then this will be O(n^2) cost
+        # In such cases it is HIGHLY RECOMMENDED to override this as well
+        for name in self.get_member_names():
+            yield name, self.get_member(name)
+
+    def comparisons(self, other):
+        my_members = self.get_members()
+        my_reminders = collections.OrderedDict()
+        other_members = other.get_members()
+
+        with Progress(max(len(my_members), len(other_members))) as p:
+            # keep it sorted like my members
+            while my_members:
+                my_member_name, my_member = my_members.popitem(last=False)
+                if my_member_name in other_members:
+                    yield my_member, other_members.pop(my_member_name), NO_COMMENT
+                    p.step()
+                else:
+                    my_reminders[my_member_name] = my_member
+            my_members = my_reminders
+            for my_name, other_name, score in perform_fuzzy_matching(my_members, other_members):
+                comment = 'Files similar despite different names (difference score: %d)' % score
+                yield my_members.pop(my_name), other_members.pop(other_name), comment
+                p.step(2)
+            if Config().new_file:
+                for my_member in my_members.values():
+                    yield my_member, MissingFile('/dev/null', my_member), NO_COMMENT
+                    p.step()
+                for other_member in other_members.values():
+                    yield MissingFile('/dev/null', other_member), other_member, NO_COMMENT
+                    p.step()
+
+    def compare(self, other, source=None):
+        from .compare import compare_commented_files
+
+        return itertools.starmap(compare_commented_files, self.comparisons(other))
+
+
+class MissingContainer(Container):
+    def get_member_names(self):
+        return self.source.other_file.as_container.get_member_names()
+
+    def get_member(self, member_name):
+        return MissingFile('/dev/null')
diff --git a/diffoscope/comparators/binary.py b/diffoscope/comparators/utils/file.py
similarity index 70%
copy from diffoscope/comparators/binary.py
copy to diffoscope/comparators/utils/file.py
index 6ba6c45..310db8a 100644
--- a/diffoscope/comparators/binary.py
+++ b/diffoscope/comparators/utils/file.py
@@ -2,7 +2,7 @@
 #
 # diffoscope: in-depth comparison of files, archives, and directories
 #
-# Copyright © 2014-2015 Jérémy Bobbio <lunar at debian.org>
+# Copyright © 2016 Chris Lamb <lamby at debian.org>
 #
 # diffoscope is free software: you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -17,18 +17,14 @@
 # You should have received a copy of the GNU General Public License
 # along with diffoscope.  If not, see <https://www.gnu.org/licenses/>.
 
-import io
 import os
 import re
 import abc
-import stat
 import magic
-import binascii
 import subprocess
 
 from diffoscope import tool_required, logger
-from diffoscope.exc import OutputParsingError, RequiredToolNotFound
-from diffoscope.config import Config
+from diffoscope.exc import RequiredToolNotFound, OutputParsingError
 from diffoscope.profiling import profile
 from diffoscope.difference import Difference
 
@@ -37,35 +33,6 @@ try:
 except ImportError:
     tlsh = None
 
-
-# helper function to convert to bytes if necessary
-def maybe_decode(s):
-    if type(s) is bytes:
-        return s.decode('utf-8')
-    else:
-        return s
-
-def hexdump_fallback(path):
-    hexdump = io.StringIO()
-    with open(path, 'rb') as f:
-        for buf in iter(lambda: f.read(32), b''):
-            hexdump.write('%s\n' % binascii.hexlify(buf).decode('us-ascii'))
-    return hexdump.getvalue()
-
-
-def compare_binary_files(file1, file2, source=None):
-    from diffoscope.comparators.utils import Xxd
-
-    try:
-        return Difference.from_command(
-            Xxd, file1.path, file2.path,
-            source=[file1.name, file2.name], has_internal_linenos=True)
-    except RequiredToolNotFound:
-        hexdump1 = hexdump_fallback(file1.path)
-        hexdump2 = hexdump_fallback(file2.path)
-        comment = 'xxd not available in path. Falling back to Python hexlify.\n'
-        return Difference.from_text(hexdump1, hexdump2, file1.name, file2.name, source, comment)
-
 SMALL_FILE_THRESHOLD = 65536 # 64 kiB
 
 
@@ -174,6 +141,8 @@ class File(object, metaclass=abc.ABCMeta):
         raise NotImplementedError()
 
     def compare_bytes(self, other, source=None):
+        from .compare import compare_binary_files
+
         return compare_binary_files(self, other, source)
 
     def _compare_using_details(self, other, source):
@@ -256,94 +225,9 @@ class File(object, metaclass=abc.ABCMeta):
             return difference
         return self.compare_bytes(other, source)
 
-
-class FilesystemFile(File):
-    def __init__(self, path, container=None):
-        super().__init__(container=container)
-        self._name = path
-
-    @property
-    def path(self):
-        return self._name
-
-    def is_directory(self):
-        return not os.path.islink(self._name) and os.path.isdir(self._name)
-
-    def is_symlink(self):
-        return os.path.islink(self._name)
-
-    def is_device(self):
-        mode = os.lstat(self._name).st_mode
-        return stat.S_ISCHR(mode) or stat.S_ISBLK(mode)
-
-
-class MissingFile(File):
-    """Represents a missing file when comparing containers"""
-
-    @staticmethod
-    def recognizes(file):
-        if isinstance(file, FilesystemFile) and not os.path.lexists(file.name):
-            assert Config().new_file, '%s does not exist' % file.name
-            return True
-        return False
-
-    def __init__(self, path, other_file=None):
-        self._name = path
-        self._other_file = other_file
-
-    @property
-    def path(self):
-        return '/dev/null'
-
-    @property
-    def other_file(self):
-        return self._other_file
-
-    @other_file.setter
-    def other_file(self, value):
-        self._other_file = value
-
-    def has_same_content_as(self, other):
-        return False
-
-    def is_directory(self):
-        return False
-
-    def is_symlink(self):
-        return False
-
-    def is_device(self):
-        return False
-
-    def compare(self, other, source=None):
-        # So now that comparators are all object-oriented, we don't have any clue on how to
-        # perform a meaningful comparison right here. So we are good do the comparison backward
-        # (where knowledge of the file format lies) and and then reverse it.
-        if isinstance(other, MissingFile):
-            return Difference(None, self.name, other.name, comment='Trying to compare two non-existing files.')
-        logger.debug('Performing backward comparison')
-        backward_diff = other.compare(self, source)
-        if not backward_diff:
-            return None
-        return backward_diff.get_reverse()
-
-    # Be nice to text comparisons
-    @property
-    def encoding(self):
-        return self._other_file.encoding
-
-    # Be nice to device comparisons
-    def get_device(self):
-        return ''
-
-    # Be nice to metadata comparisons
-    @property
-    def magic_file_type(self):
-        return self._other_file.magic_file_type
-
-    # Be nice to .changes and .dsc comparisons
-    @property
-    def deb822(self):
-        class DummyChanges(dict):
-            get_as_string = lambda self, _: ''
-        return DummyChanges(Files=[], Version='')
+# helper function to convert to bytes if necessary
+def maybe_decode(s):
+    if type(s) is bytes:
+        return s.decode('utf-8')
+    else:
+        return s
diff --git a/diffoscope/comparators/utils/loading.py b/diffoscope/comparators/utils/filenames.py
similarity index 57%
copy from diffoscope/comparators/utils/loading.py
copy to diffoscope/comparators/utils/filenames.py
index 4c06222..9269c2f 100644
--- a/diffoscope/comparators/utils/loading.py
+++ b/diffoscope/comparators/utils/filenames.py
@@ -17,27 +17,13 @@
 # You should have received a copy of the GNU General Public License
 # along with diffoscope.  If not, see <https://www.gnu.org/licenses/>.
 
-import importlib
+import os
 
-def import_comparators(comparators):
-    result = []
 
-    for xs in comparators:
-        for x in xs:
-            package, klass_name = x.rsplit('.', 1)
-
-            try:
-                mod = importlib.import_module(
-                    'diffoscope.comparators.{}'.format(package)
-                )
-            except ImportError:
-                continue
-
-            result.append(getattr(mod, klass_name))
-            break
-        else:
-            raise ImportError(
-                "Could not import any of {}".format(', '.join(xs))
-            )
-
-    return result
+def get_compressed_content_name(path, expected_extension):
+    basename = os.path.basename(path)
+    if basename.endswith(expected_extension):
+        name = basename[:-len(expected_extension)]
+    else:
+        name = "%s-content" % basename
+    return name
diff --git a/diffoscope/comparators/utils/libarchive.py b/diffoscope/comparators/utils/libarchive.py
index 14dcec1..ed48721 100644
--- a/diffoscope/comparators/utils/libarchive.py
+++ b/diffoscope/comparators/utils/libarchive.py
@@ -29,7 +29,7 @@ from ..device import Device
 from ..symlink import Symlink
 from ..directory import Directory
 
-from . import Archive, ArchiveMember
+from .archive import Archive, ArchiveMember
 
 
 # Monkeypatch libarchive-c (<< 2.2)
diff --git a/diffoscope/comparators/utils/loading.py b/diffoscope/comparators/utils/specialize.py
similarity index 68%
rename from diffoscope/comparators/utils/loading.py
rename to diffoscope/comparators/utils/specialize.py
index 4c06222..b3db931 100644
--- a/diffoscope/comparators/utils/loading.py
+++ b/diffoscope/comparators/utils/specialize.py
@@ -19,6 +19,25 @@
 
 import importlib
 
+from diffoscope import logger
+from diffoscope.profiling import profile
+
+from .. import COMPARATORS
+
+
+def specialize(file):
+    for cls in FILE_CLASSES:
+        if isinstance(file, cls):
+            return file
+        with profile('recognizes', file):
+            if cls.recognizes(file):
+                logger.debug("Using %s for %s", cls.__name__, file.name)
+                new_cls = type(cls.__name__, (cls, type(file)), {})
+                file.__class__ = new_cls
+                return file
+    logger.debug('Unidentified file. Magic says: %s', file.magic_file_type)
+    return file
+
 def import_comparators(comparators):
     result = []
 
@@ -41,3 +60,5 @@ def import_comparators(comparators):
             )
 
     return result
+
+FILE_CLASSES = import_comparators(COMPARATORS)
diff --git a/diffoscope/comparators/xz.py b/diffoscope/comparators/xz.py
index 0fc614d..efb5b4b 100644
--- a/diffoscope/comparators/xz.py
+++ b/diffoscope/comparators/xz.py
@@ -24,8 +24,9 @@ import collections
 
 from diffoscope import logger, tool_required
 
-from .utils import Archive, get_compressed_content_name
 from .binary import File
+from .utils.archive import Archive
+from .utils.filenames import get_compressed_content_name
 
 
 class XzContainer(Archive):
diff --git a/diffoscope/comparators/zip.py b/diffoscope/comparators/zip.py
index 4b86693..85d5ac2 100644
--- a/diffoscope/comparators/zip.py
+++ b/diffoscope/comparators/zip.py
@@ -27,9 +27,10 @@ import contextlib
 from diffoscope import tool_required
 from diffoscope.difference import Difference
 
-from .utils import Archive, ArchiveMember, Command
 from .binary import File
 from .directory import Directory
+from .utils.archive import Archive, ArchiveMember
+from .utils.command import Command
 
 
 class Zipinfo(Command):
diff --git a/tests/comparators/test_binary.py b/tests/comparators/test_binary.py
index cc46396..f009dbb 100644
--- a/tests/comparators/test_binary.py
+++ b/tests/comparators/test_binary.py
@@ -20,12 +20,13 @@
 import pytest
 import subprocess
 
-import diffoscope.comparators.binary
-
 from diffoscope import tool_required
 from diffoscope.exc import RequiredToolNotFound
 from diffoscope.difference import Difference
-from diffoscope.comparators.binary import File, FilesystemFile, MissingFile
+from diffoscope.comparators.binary import FilesystemFile
+from diffoscope.comparators.utils.file import File
+from diffoscope.comparators.missing_file import MissingFile
+from diffoscope.comparators.utils.compare import Xxd
 
 from utils import skip_unless_tools_exist, data, load_fixture
 from os import mkdir, symlink
@@ -85,7 +86,7 @@ def test_compare_non_existing_with_xxd(binary1):
 def xxd_not_found(monkeypatch):
     def mock_cmdline(self):
         raise RequiredToolNotFound('xxd')
-    monkeypatch.setattr(diffoscope.comparators.utils.Xxd, 'cmdline', mock_cmdline)
+    monkeypatch.setattr(Xxd, 'cmdline', mock_cmdline)
 
 def test_no_differences_without_xxd(xxd_not_found, binary1):
     difference = binary1.compare_bytes(binary1)
diff --git a/tests/comparators/test_bzip2.py b/tests/comparators/test_bzip2.py
index c0ddf7c..41c6f35 100644
--- a/tests/comparators/test_bzip2.py
+++ b/tests/comparators/test_bzip2.py
@@ -20,9 +20,9 @@
 import shutil
 import pytest
 
-from diffoscope.comparators import specialize
 from diffoscope.comparators.bzip2 import Bzip2File
 from diffoscope.comparators.binary import FilesystemFile
+from diffoscope.comparators.utils.specialize import specialize
 
 from utils import skip_unless_tools_exist, data, load_fixture, \
     assert_non_existing
diff --git a/tests/comparators/test_cbfs.py b/tests/comparators/test_cbfs.py
index 8193737..9043559 100644
--- a/tests/comparators/test_cbfs.py
+++ b/tests/comparators/test_cbfs.py
@@ -21,10 +21,10 @@ import struct
 import pytest
 import subprocess
 
-from diffoscope.comparators import specialize
 from diffoscope.presenters.text import output_text
 from diffoscope.comparators.cbfs import CbfsFile
 from diffoscope.comparators.binary import FilesystemFile
+from diffoscope.comparators.utils.specialize import specialize
 
 from utils import skip_unless_tools_exist, data, assert_non_existing
 
diff --git a/tests/comparators/test_deb.py b/tests/comparators/test_deb.py
index fd68c92..cbb0302 100644
--- a/tests/comparators/test_deb.py
+++ b/tests/comparators/test_deb.py
@@ -22,9 +22,10 @@ import pytest
 import diffoscope.comparators
 
 from diffoscope.config import Config
-from diffoscope.comparators import specialize
 from diffoscope.comparators.deb import DebFile, Md5sumsFile, DebDataTarFile
-from diffoscope.comparators.binary import FilesystemFile, MissingFile
+from diffoscope.comparators.binary import FilesystemFile
+from diffoscope.comparators.missing_file import MissingFile
+from diffoscope.comparators.utils.specialize import specialize
 
 from utils import data, load_fixture
 
diff --git a/tests/comparators/test_debian.py b/tests/comparators/test_debian.py
index 82122c9..de2d052 100644
--- a/tests/comparators/test_debian.py
+++ b/tests/comparators/test_debian.py
@@ -21,9 +21,10 @@ import shutil
 import pytest
 
 from diffoscope.config import Config
-from diffoscope.comparators import specialize
 from diffoscope.presenters.text import output_text
-from diffoscope.comparators.binary import FilesystemFile, MissingFile
+from diffoscope.comparators.binary import FilesystemFile
+from diffoscope.comparators.missing_file import MissingFile
+from diffoscope.comparators.utils.specialize import specialize
 
 from utils import data, assert_non_existing
 
diff --git a/tests/comparators/test_dex.py b/tests/comparators/test_dex.py
index 9867a93..788f836 100644
--- a/tests/comparators/test_dex.py
+++ b/tests/comparators/test_dex.py
@@ -22,7 +22,7 @@ import subprocess
 
 from diffoscope.config import Config
 from diffoscope.comparators.dex import DexFile
-from diffoscope.comparators.binary import MissingFile
+from diffoscope.comparators.missing_file import MissingFile
 
 from utils import skip_unless_tools_exist, data, load_fixture, skip_unless_tool_is_at_least
 from test_java import javap_version
diff --git a/tests/comparators/test_elf.py b/tests/comparators/test_elf.py
index e7bf1e7..362d893 100644
--- a/tests/comparators/test_elf.py
+++ b/tests/comparators/test_elf.py
@@ -21,11 +21,12 @@ import pytest
 import os.path
 
 from diffoscope.config import Config
-from diffoscope.comparators import specialize
 from diffoscope.presenters.text import output_text
 from diffoscope.comparators.elf import ElfFile, StaticLibFile
-from diffoscope.comparators.binary import FilesystemFile, MissingFile
+from diffoscope.comparators.binary import FilesystemFile
 from diffoscope.comparators.directory import FilesystemDirectory
+from diffoscope.comparators.missing_file import MissingFile
+from diffoscope.comparators.utils.specialize import specialize
 
 from utils import skip_unless_tools_exist, data, load_fixture
 
diff --git a/tests/comparators/test_epub.py b/tests/comparators/test_epub.py
index 8b55086..1e64d45 100644
--- a/tests/comparators/test_epub.py
+++ b/tests/comparators/test_epub.py
@@ -21,7 +21,7 @@ import pytest
 
 from diffoscope.config import Config
 from diffoscope.comparators.zip import ZipFile
-from diffoscope.comparators.binary import MissingFile
+from diffoscope.comparators.missing_file import MissingFile
 
 from utils import skip_unless_tools_exist, data, load_fixture
 
diff --git a/tests/comparators/test_fonts.py b/tests/comparators/test_fonts.py
index 266541f..8bbbb1c 100644
--- a/tests/comparators/test_fonts.py
+++ b/tests/comparators/test_fonts.py
@@ -21,7 +21,7 @@ import pytest
 
 from diffoscope.config import Config
 from diffoscope.comparators.fonts import TtfFile
-from diffoscope.comparators.binary import MissingFile
+from diffoscope.comparators.missing_file import MissingFile
 
 from utils import skip_unless_tools_exist, data, load_fixture
 
diff --git a/tests/comparators/test_fsimage.py b/tests/comparators/test_fsimage.py
index 7942442..f917376 100644
--- a/tests/comparators/test_fsimage.py
+++ b/tests/comparators/test_fsimage.py
@@ -20,7 +20,7 @@
 import pytest
 
 from diffoscope.config import Config
-from diffoscope.comparators.binary import MissingFile
+from diffoscope.comparators.missing_file import MissingFile
 from diffoscope.comparators.fsimage import FsImageFile
 
 from utils import skip_unless_tools_exist, data, load_fixture
diff --git a/tests/comparators/test_gettext.py b/tests/comparators/test_gettext.py
index ea49362..3071d1e 100644
--- a/tests/comparators/test_gettext.py
+++ b/tests/comparators/test_gettext.py
@@ -21,7 +21,7 @@ import codecs
 import pytest
 
 from diffoscope.config import Config
-from diffoscope.comparators.binary import MissingFile
+from diffoscope.comparators.missing_file import MissingFile
 from diffoscope.comparators.gettext import MoFile
 
 from utils import skip_unless_tools_exist, data, load_fixture
diff --git a/tests/comparators/test_gzip.py b/tests/comparators/test_gzip.py
index 6418db4..4c60287 100644
--- a/tests/comparators/test_gzip.py
+++ b/tests/comparators/test_gzip.py
@@ -21,9 +21,10 @@ import shutil
 import pytest
 
 from diffoscope.config import Config
-from diffoscope.comparators import specialize
 from diffoscope.comparators.gzip import GzipFile
-from diffoscope.comparators.binary import FilesystemFile, MissingFile
+from diffoscope.comparators.binary import FilesystemFile
+from diffoscope.comparators.missing_file import MissingFile
+from diffoscope.comparators.utils.specialize import specialize
 
 from utils import data, load_fixture
 
diff --git a/tests/comparators/test_icc.py b/tests/comparators/test_icc.py
index 1cb1748..9628595 100644
--- a/tests/comparators/test_icc.py
+++ b/tests/comparators/test_icc.py
@@ -21,7 +21,7 @@ import pytest
 
 from diffoscope.config import Config
 from diffoscope.comparators.icc import IccFile
-from diffoscope.comparators.binary import MissingFile
+from diffoscope.comparators.missing_file import MissingFile
 
 from utils import skip_unless_tools_exist, data, load_fixture
 
diff --git a/tests/comparators/test_image.py b/tests/comparators/test_image.py
index 09f0632..80a7175 100644
--- a/tests/comparators/test_image.py
+++ b/tests/comparators/test_image.py
@@ -21,7 +21,7 @@ import pytest
 
 from diffoscope.config import Config
 from diffoscope.comparators.image import ImageFile
-from diffoscope.comparators.binary import MissingFile
+from diffoscope.comparators.missing_file import MissingFile
 
 from utils import skip_unless_tools_exist, data, load_fixture
 
diff --git a/tests/comparators/test_ipk.py b/tests/comparators/test_ipk.py
index 95fc24e..3789622 100644
--- a/tests/comparators/test_ipk.py
+++ b/tests/comparators/test_ipk.py
@@ -21,7 +21,7 @@ import pytest
 
 from diffoscope.config import Config
 from diffoscope.comparators.ipk import IpkFile
-from diffoscope.comparators.binary import MissingFile
+from diffoscope.comparators.missing_file import MissingFile
 
 from utils import data, load_fixture
 
diff --git a/tests/comparators/test_iso9660.py b/tests/comparators/test_iso9660.py
index 0d0f35a..7748027 100644
--- a/tests/comparators/test_iso9660.py
+++ b/tests/comparators/test_iso9660.py
@@ -20,7 +20,7 @@
 import pytest
 
 from diffoscope.config import Config
-from diffoscope.comparators.binary import MissingFile
+from diffoscope.comparators.missing_file import MissingFile
 from diffoscope.comparators.iso9660 import Iso9660File
 
 from utils import skip_unless_tools_exist, data, load_fixture
diff --git a/tests/comparators/test_java.py b/tests/comparators/test_java.py
index 35f198a..0a6a941 100644
--- a/tests/comparators/test_java.py
+++ b/tests/comparators/test_java.py
@@ -22,7 +22,7 @@ import subprocess
 
 from diffoscope.config import Config
 from diffoscope.comparators.java import ClassFile
-from diffoscope.comparators.binary import MissingFile
+from diffoscope.comparators.missing_file import MissingFile
 
 from utils import skip_unless_tools_exist, data, load_fixture, skip_unless_tool_is_at_least
 
diff --git a/tests/comparators/test_javascript.py b/tests/comparators/test_javascript.py
index c26f7c9..9324988 100644
--- a/tests/comparators/test_javascript.py
+++ b/tests/comparators/test_javascript.py
@@ -21,7 +21,7 @@ import pytest
 
 from diffoscope.config import Config
 from diffoscope.comparators.javascript import JavaScriptFile
-from diffoscope.comparators.binary import MissingFile
+from diffoscope.comparators.missing_file import MissingFile
 
 from utils import skip_unless_tools_exist, data, load_fixture
 
diff --git a/tests/comparators/test_macho.py b/tests/comparators/test_macho.py
index bb880f8..b161573 100644
--- a/tests/comparators/test_macho.py
+++ b/tests/comparators/test_macho.py
@@ -23,7 +23,7 @@ import os.path
 
 from diffoscope.config import Config
 from diffoscope.comparators.macho import MachoFile
-from diffoscope.comparators.binary import MissingFile
+from diffoscope.comparators.missing_file import MissingFile
 
 from utils import skip_unless_tools_exist, data, load_fixture
 
diff --git a/tests/comparators/test_mono.py b/tests/comparators/test_mono.py
index a848386..c8d3a7d 100644
--- a/tests/comparators/test_mono.py
+++ b/tests/comparators/test_mono.py
@@ -21,7 +21,7 @@ import pytest
 
 from diffoscope.config import Config
 from diffoscope.comparators.mono import MonoExeFile
-from diffoscope.comparators.binary import MissingFile
+from diffoscope.comparators.missing_file import MissingFile
 
 from utils import skip_unless_tools_exist, data, load_fixture
 
diff --git a/tests/comparators/test_openssh_pub_key.py b/tests/comparators/test_openssh_pub_key.py
index d99d0b9..6c65293 100644
--- a/tests/comparators/test_openssh_pub_key.py
+++ b/tests/comparators/test_openssh_pub_key.py
@@ -21,7 +21,7 @@ import pytest
 
 from diffoscope.config import Config
 from diffoscope.comparators.openssh import PublicKeyFile
-from diffoscope.comparators.binary import MissingFile
+from diffoscope.comparators.missing_file import MissingFile
 
 from utils import skip_unless_tools_exist, data, load_fixture
 
diff --git a/tests/comparators/test_tar.py b/tests/comparators/test_tar.py
index 6415850..98fab9b 100644
--- a/tests/comparators/test_tar.py
+++ b/tests/comparators/test_tar.py
@@ -21,7 +21,7 @@ import pytest
 
 from diffoscope.config import Config
 from diffoscope.comparators.tar import TarFile
-from diffoscope.comparators.binary import MissingFile
+from diffoscope.comparators.missing_file import MissingFile
 
 from utils import data, load_fixture, assert_non_existing
 
diff --git a/tests/comparators/test_text.py b/tests/comparators/test_text.py
index afa0716..bd9b33b 100644
--- a/tests/comparators/test_text.py
+++ b/tests/comparators/test_text.py
@@ -19,8 +19,8 @@
 
 import codecs
 
-from diffoscope.comparators import specialize
 from diffoscope.comparators.binary import FilesystemFile
+from diffoscope.comparators.utils.specialize import specialize
 
 from utils import data, load_fixture, assert_non_existing
 
diff --git a/tests/comparators/test_utils.py b/tests/comparators/test_utils.py
index 936cbfd..af90477 100644
--- a/tests/comparators/test_utils.py
+++ b/tests/comparators/test_utils.py
@@ -22,7 +22,7 @@ import pytest
 
 from diffoscope.config import Config
 from diffoscope.difference import Difference
-from diffoscope.comparators.utils import Command
+from diffoscope.comparators.utils.command import Command
 
 from utils import tools_missing, skip_unless_tools_exist, data, load_fixture, \
     skip_unless_tool_is_at_least
diff --git a/tests/comparators/test_xz.py b/tests/comparators/test_xz.py
index 8422740..f0df4e6 100644
--- a/tests/comparators/test_xz.py
+++ b/tests/comparators/test_xz.py
@@ -20,9 +20,9 @@
 import shutil
 import pytest
 
-from diffoscope.comparators import specialize
 from diffoscope.comparators.xz import XzFile
 from diffoscope.comparators.binary import FilesystemFile
+from diffoscope.comparators.utils.specialize import specialize
 
 from utils import skip_unless_tools_exist, data, load_fixture, \
     assert_non_existing
diff --git a/tests/comparators/utils.py b/tests/comparators/utils.py
index 90fee76..5a0930f 100644
--- a/tests/comparators/utils.py
+++ b/tests/comparators/utils.py
@@ -26,10 +26,11 @@ from distutils.spawn import find_executable
 from distutils.version import LooseVersion
 
 from diffoscope.config import Config
-from diffoscope.comparators import specialize
 from diffoscope.presenters.html import output_html
 from diffoscope.presenters.text import output_text
-from diffoscope.comparators.binary import FilesystemFile, MissingFile
+from diffoscope.comparators.binary import FilesystemFile
+from diffoscope.comparators.missing_file import MissingFile
+from diffoscope.comparators.utils.specialize import specialize
 
 re_diff_line_numbers = re.compile(r"(^|\n)@@ -(\d+),(\d+) \+(\d+),(\d+) @@(?=\n|$)")
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/diffoscope.git


More information about the diffoscope mailing list