[diffoscope] 01/01: Add support for Mozilla optimized Zip files

Jérémy Bobbio lunar at moszumanska.debian.org
Wed Dec 16 11:28:14 CET 2015


This is an automated email from the git hooks/post-receive script.

lunar pushed a commit to branch master
in repository diffoscope.

commit 12c67c42a359adeaa510468e6fe9568487c87157
Author: Mike Hommey <mh+diffoscope at glandium.org>
Date:   Wed Dec 16 10:22:17 2015 +0000

    Add support for Mozilla optimized Zip files
    
    The value of the preload field that is present at the very beginning of the
    file is not compared separately at the moment. In the worst case, it will be
    visible in the fallback binary comparison.
    
    See:
    https://developer.mozilla.org/en-US/docs/Mozilla/About_omni.ja_%28formerly_omni.jar%29
    
    Closes: #808002
---
 diffoscope/comparators/__init__.py      |   3 +-
 diffoscope/comparators/zip.py           |  51 ++++++++++++++++++++++++++++++++
 tests/comparators/test_zip.py           |  45 +++++++++++++++++++++++++++-
 tests/data/mozzip_zipinfo_expected_diff |  15 ++++++++++
 tests/data/test1.mozzip                 | Bin 0 -> 409 bytes
 tests/data/test2.mozzip                 | Bin 0 -> 552 bytes
 6 files changed, 112 insertions(+), 2 deletions(-)

diff --git a/diffoscope/comparators/__init__.py b/diffoscope/comparators/__init__.py
index ed24f63..d308b6e 100644
--- a/diffoscope/comparators/__init__.py
+++ b/diffoscope/comparators/__init__.py
@@ -72,7 +72,7 @@ from diffoscope.comparators.symlink import Symlink
 from diffoscope.comparators.text import TextFile
 from diffoscope.comparators.tar import TarFile
 from diffoscope.comparators.xz import XzFile
-from diffoscope.comparators.zip import ZipFile
+from diffoscope.comparators.zip import ZipFile, MozillaZipFile
 
 
 def bail_if_non_existing(*paths):
@@ -153,6 +153,7 @@ FILE_CLASSES = (
     TarFile,
     XzFile,
     ZipFile,
+    MozillaZipFile,
     ImageFile,
     CbfsFile,
     )
diff --git a/diffoscope/comparators/zip.py b/diffoscope/comparators/zip.py
index ecdc77b..42c9a9f 100644
--- a/diffoscope/comparators/zip.py
+++ b/diffoscope/comparators/zip.py
@@ -111,3 +111,54 @@ class ZipFile(File):
         zipinfo_difference = Difference.from_command(Zipinfo, self.path, other.path) or \
                              Difference.from_command(ZipinfoVerbose, self.path, other.path)
         return [zipinfo_difference]
+
+
+class MozillaZipCommandMixin(object):
+    def wait(self):
+        # zipinfo emits an error when reading Mozilla-optimized ZIPs,
+        # which is fine to ignore.
+        super(Zipinfo, self).wait()
+        return 0
+
+
+class MozillaZipinfo(MozillaZipCommandMixin, Zipinfo): pass
+
+
+class MozillaZipinfoVerbose(MozillaZipCommandMixin, ZipinfoVerbose): pass
+
+
+class MozillaZipContainer(ZipContainer):
+    def open_archive(self):
+        # This is gross: Monkeypatch zipfile._EndRecData to work with
+        # Mozilla-optimized ZIPs
+        _orig_EndRecData = zipfile._EndRecData
+        def _EndRecData(fh):
+            endrec = _orig_EndRecData(fh)
+            if endrec:
+                endrec[zipfile._ECD_LOCATION] = (endrec[zipfile._ECD_OFFSET] +
+                                                 endrec[zipfile._ECD_SIZE])
+            return endrec
+        zipfile._EndRecData = _EndRecData
+        result = super(MozillaZipContainer, self).open_archive()
+        zipfile._EndRecData = _orig_EndRecData
+        return result
+
+
+class MozillaZipFile(File):
+    CONTAINER_CLASS = MozillaZipContainer
+
+    @staticmethod
+    def recognizes(file):
+        # Mozilla-optimized ZIPs start with a 32-bit little endian integer
+        # indicating the amount of data to preload, followed by the ZIP
+        # central directory (with a PK\x01\x02 signature)
+        with open(file.path, 'rb') as f:
+            preload = f.read(4)
+            if len(preload) == 4:
+                signature = f.read(4)
+                return signature == b'PK\x01\x02'
+
+    def compare_details(self, other, source=None):
+        zipinfo_difference = Difference.from_command(MozillaZipinfo, self.path, other.path) or \
+                             Difference.from_command(MozillaZipinfoVerbose, self.path, other.path)
+        return [zipinfo_difference]
diff --git a/tests/comparators/test_zip.py b/tests/comparators/test_zip.py
index d921b79..57255bc 100644
--- a/tests/comparators/test_zip.py
+++ b/tests/comparators/test_zip.py
@@ -21,7 +21,7 @@ import os.path
 import pytest
 from diffoscope.comparators import specialize
 from diffoscope.comparators.binary import FilesystemFile, NonExistingFile
-from diffoscope.comparators.zip import ZipFile
+from diffoscope.comparators.zip import ZipFile, MozillaZipFile
 from diffoscope.config import Config
 from conftest import tool_missing
 
@@ -65,3 +65,46 @@ def test_compare_non_existing(monkeypatch, zip1):
     difference = zip1.compare(NonExistingFile('/nonexisting', zip1))
     assert difference.source2 == '/nonexisting'
     assert difference.details[-1].source2 == '/dev/null'
+
+TEST_MOZZIP1_PATH = os.path.join(os.path.dirname(__file__), '../data/test1.mozzip')
+TEST_MOZZIP2_PATH = os.path.join(os.path.dirname(__file__), '../data/test2.mozzip')
+
+ at pytest.fixture
+def mozzip1():
+    return specialize(FilesystemFile(TEST_MOZZIP1_PATH))
+
+ at pytest.fixture
+def mozzip2():
+    return specialize(FilesystemFile(TEST_MOZZIP2_PATH))
+
+def test_mozzip_identification(mozzip1):
+    assert isinstance(mozzip1, MozillaZipFile)
+
+def test_mozzip_no_differences(mozzip1):
+    difference = mozzip1.compare(mozzip1)
+    assert difference is None
+
+ at pytest.fixture
+def mozzip_differences(mozzip1, mozzip2):
+    return mozzip1.compare(mozzip2).details
+
+ at pytest.mark.skipif(tool_missing('zipinfo'), reason='missing zip')
+def test_mozzip_metadata(mozzip_differences):
+    expected_diff = open(os.path.join(os.path.dirname(__file__), '../data/mozzip_zipinfo_expected_diff')).read()
+    diff = mozzip_differences[0].unified_diff
+    assert (diff.replace(TEST_MOZZIP1_PATH, 'test1.mozzip')
+                .replace(TEST_MOZZIP2_PATH, 'test2.mozzip')) == expected_diff
+
+ at pytest.mark.skipif(tool_missing('zipinfo'), reason='missing zip')
+def test_mozzip_compressed_files(mozzip_differences):
+    assert mozzip_differences[1].source1 == 'dir/text'
+    assert mozzip_differences[1].source2 == 'dir/text'
+    expected_diff = open(os.path.join(os.path.dirname(__file__), '../data/text_ascii_expected_diff')).read()
+    assert mozzip_differences[1].unified_diff == expected_diff
+
+ at pytest.mark.skipif(tool_missing('zipinfo'), reason='missing zip')
+def test_mozzip_compare_non_existing(monkeypatch, mozzip1):
+    monkeypatch.setattr(Config.general, 'new_file', True)
+    difference = mozzip1.compare(NonExistingFile('/nonexisting', mozzip1))
+    assert difference.source2 == '/nonexisting'
+    assert difference.details[-1].source2 == '/dev/null'
diff --git a/tests/data/mozzip_zipinfo_expected_diff b/tests/data/mozzip_zipinfo_expected_diff
new file mode 100644
index 0000000..9cc8134
--- /dev/null
+++ b/tests/data/mozzip_zipinfo_expected_diff
@@ -0,0 +1,15 @@
+@@ -1,8 +1,8 @@
+-Zip file size: 409 bytes, number of entries: 1
+-warning [test1.mozzip]:  329 extra bytes at beginning or within zipfile
++Zip file size: 552 bytes, number of entries: 1
++warning [test2.mozzip]:  472 extra bytes at beginning or within zipfile
+   (attempting to process anyway)
+-error [test1.mozzip]:  reported length of central directory is
+-  -329 bytes too long (Atari STZip zipfile?  J.H.Holm ZIPSPLIT 1.1
++error [test2.mozzip]:  reported length of central directory is
++  -472 bytes too long (Atari STZip zipfile?  J.H.Holm ZIPSPLIT 1.1
+   zipfile?).  Compensating...
+--rw-r--r--  2.0 unx      446 b- defX 10-Jan-01 00:00 dir/text
+-1 file, 446 bytes uncompressed, 269 bytes compressed:  39.7%
++-rw-r--r--  2.0 unx      671 b- defX 10-Jan-01 00:00 dir/text
++1 file, 671 bytes uncompressed, 412 bytes compressed:  38.6%
diff --git a/tests/data/test1.mozzip b/tests/data/test1.mozzip
new file mode 100644
index 0000000..d43bb74
Binary files /dev/null and b/tests/data/test1.mozzip differ
diff --git a/tests/data/test2.mozzip b/tests/data/test2.mozzip
new file mode 100644
index 0000000..b0b88fb
Binary files /dev/null and b/tests/data/test2.mozzip differ

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/diffoscope.git


More information about the diffoscope mailing list