[diffoscope] 02/03: Check nested containers while comparing containers content.

Maria Glukhova siamezzze-guest at moszumanska.debian.org
Sun Jan 22 19:01:20 CET 2017


This is an automated email from the git hooks/post-receive script.

siamezzze-guest pushed a commit to branch siamezzze/containers
in repository diffoscope.

commit ac55cece316d6bc0e532d300c825a54083af61e0
Author: Maria Glukhova <siamezzze at gmail.com>
Date:   Sun Jan 22 19:42:34 2017 +0200

    Check nested containers while comparing containers content.
    
    Before matching container contents, check for nested container
    (only one member that is also a container) in cases where container
    always has only one member (.gz, .bz2, .xz).
    If both containers have nested containers, always compare them, no
    matter their name or type (for cases like .tar.gz vs .tar.bz2).
    If only one of them has, unfold it (for cases like .tar.gz vs .zip).
---
 diffoscope/comparators/bzip2.py           |  7 +++++++
 diffoscope/comparators/gzip.py            |  7 +++++++
 diffoscope/comparators/utils/container.py | 18 ++++++++++++++++++
 diffoscope/comparators/xz.py              |  7 +++++++
 4 files changed, 39 insertions(+)

diff --git a/diffoscope/comparators/bzip2.py b/diffoscope/comparators/bzip2.py
index e54b28b..e8fa96e 100644
--- a/diffoscope/comparators/bzip2.py
+++ b/diffoscope/comparators/bzip2.py
@@ -28,6 +28,7 @@ from diffoscope.tools import tool_required
 from .utils.file import File
 from .utils.archive import Archive
 from .utils.filenames import get_compressed_content_name
+from .utils.specialize import specialize
 
 logger = logging.getLogger(__name__)
 
@@ -45,6 +46,12 @@ class Bzip2Container(Archive):
     def get_member_names(self):
         return [get_compressed_content_name(self.source.path, '.bz2')]
 
+    def get_nested_container(self):
+        # If the only member of container is also container, return it.
+        only_member = self.get_member(self.get_member_names()[0])
+        specialize(only_member)
+        return only_member.as_container
+
     @tool_required('bzip2')
     def extract(self, member_name, dest_dir):
         dest_path = os.path.join(dest_dir, member_name)
diff --git a/diffoscope/comparators/gzip.py b/diffoscope/comparators/gzip.py
index f81bac5..2541496 100644
--- a/diffoscope/comparators/gzip.py
+++ b/diffoscope/comparators/gzip.py
@@ -30,6 +30,7 @@ from diffoscope.difference import Difference
 from .utils.file import File
 from .utils.archive import Archive
 from .utils.filenames import get_compressed_content_name
+from .utils.specialize import specialize
 
 logger = logging.getLogger(__name__)
 
@@ -47,6 +48,12 @@ class GzipContainer(Archive):
     def get_member_names(self):
         return [get_compressed_content_name(self.source.path, '.gz')]
 
+    def get_nested_container(self):
+        # If the only member of container is also container, return it.
+        only_member = self.get_member(self.get_member_names()[0])
+        specialize(only_member)
+        return only_member.as_container
+
     @tool_required('gzip')
     def extract(self, member_name, dest_dir):
         dest_path = os.path.join(dest_dir, member_name)
diff --git a/diffoscope/comparators/utils/container.py b/diffoscope/comparators/utils/container.py
index 3cd6650..52e2f92 100644
--- a/diffoscope/comparators/utils/container.py
+++ b/diffoscope/comparators/utils/container.py
@@ -85,11 +85,29 @@ class Container(object, metaclass=abc.ABCMeta):
         for name in self.get_member_names():
             yield name, self.get_member(name)
 
+    def get_nested_container(self):
+        return None
+
     def comparisons(self, other):
         my_members = self.get_members()
         my_reminders = collections.OrderedDict()
         other_members = other.get_members()
 
+        my_nested_container = self.get_nested_container()
+        other_nested_container = other.get_nested_container()
+
+        if my_nested_container and other_nested_container:
+            # If both containers contain one sub-container each,
+            # make sure they get compared no matter their name/type.
+            # (not unpacking them here to preserve structure and metadata).
+            yield my_members.popitem()[1], other_members.popitem()[1], NO_COMMENT
+            return
+        # One of the containers has nested container - unpack it.
+        if my_nested_container:
+            my_members = my_nested_container.get_members()
+        if other_nested_container:
+            other_members = other_nested_container.get_members()
+
         with Progress(max(len(my_members), len(other_members))) as p:
             # keep it sorted like my members
             while my_members:
diff --git a/diffoscope/comparators/xz.py b/diffoscope/comparators/xz.py
index d75853a..77edaec 100644
--- a/diffoscope/comparators/xz.py
+++ b/diffoscope/comparators/xz.py
@@ -28,6 +28,7 @@ from diffoscope.tools import tool_required
 from .utils.file import File
 from .utils.archive import Archive
 from .utils.filenames import get_compressed_content_name
+from .utils.specialize import specialize
 
 logger = logging.getLogger(__name__)
 
@@ -45,6 +46,12 @@ class XzContainer(Archive):
     def get_member_names(self):
         return [get_compressed_content_name(self.source.path, '.xz')]
 
+    def get_nested_container(self):
+        # If the only member of container is also container, return it.
+        only_member = self.get_member(self.get_member_names()[0])
+        specialize(only_member)
+        return only_member.as_container
+
     @tool_required('xz')
     def extract(self, member_name, dest_dir):
         dest_path = os.path.join(dest_dir, member_name)

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/diffoscope.git


More information about the diffoscope mailing list