[diffoscope] 01/01: Remove unused imports and tidy up Container.comparisons()

Ximin Luo infinity0 at debian.org
Fri May 26 17:20:34 CEST 2017


This is an automated email from the git hooks/post-receive script.

infinity0 pushed a commit to branch experimental
in repository diffoscope.

commit 7f559e4e9d28f75297589d3ebb6ed31a44777cbc
Author: Ximin Luo <infinity0 at debian.org>
Date:   Fri May 26 17:18:11 2017 +0200

    Remove unused imports and tidy up Container.comparisons()
---
 diffoscope/comparators/bzip2.py           |  1 -
 diffoscope/comparators/debian.py          |  1 -
 diffoscope/comparators/dex.py             |  1 -
 diffoscope/comparators/fsimage.py         |  1 -
 diffoscope/comparators/gzip.py            |  1 -
 diffoscope/comparators/rust.py            |  1 -
 diffoscope/comparators/utils/container.py | 43 ++++++++++++++-----------------
 diffoscope/comparators/utils/fuzzy.py     |  4 +--
 diffoscope/comparators/xz.py              |  1 -
 9 files changed, 21 insertions(+), 33 deletions(-)

diff --git a/diffoscope/comparators/bzip2.py b/diffoscope/comparators/bzip2.py
index 3775c43..c2b8ddf 100644
--- a/diffoscope/comparators/bzip2.py
+++ b/diffoscope/comparators/bzip2.py
@@ -21,7 +21,6 @@ import re
 import os.path
 import logging
 import subprocess
-import collections
 
 from diffoscope.tools import tool_required
 
diff --git a/diffoscope/comparators/debian.py b/diffoscope/comparators/debian.py
index f8df2bf..c0aaf8b 100644
--- a/diffoscope/comparators/debian.py
+++ b/diffoscope/comparators/debian.py
@@ -22,7 +22,6 @@ import os.path
 import hashlib
 import logging
 import functools
-import collections
 
 from debian.deb822 import Dsc
 
diff --git a/diffoscope/comparators/dex.py b/diffoscope/comparators/dex.py
index a26ab42..3b71b3b 100644
--- a/diffoscope/comparators/dex.py
+++ b/diffoscope/comparators/dex.py
@@ -21,7 +21,6 @@ import re
 import os.path
 import logging
 import subprocess
-import collections
 
 from diffoscope.tools import tool_required
 
diff --git a/diffoscope/comparators/fsimage.py b/diffoscope/comparators/fsimage.py
index 5135ab7..29f83e6 100644
--- a/diffoscope/comparators/fsimage.py
+++ b/diffoscope/comparators/fsimage.py
@@ -20,7 +20,6 @@
 import re
 import logging
 import os.path
-import collections
 
 from diffoscope.difference import Difference
 
diff --git a/diffoscope/comparators/gzip.py b/diffoscope/comparators/gzip.py
index 73126bb..92a9b3e 100644
--- a/diffoscope/comparators/gzip.py
+++ b/diffoscope/comparators/gzip.py
@@ -21,7 +21,6 @@ import re
 import os.path
 import logging
 import subprocess
-import collections
 
 from diffoscope.tools import tool_required
 from diffoscope.difference import Difference
diff --git a/diffoscope/comparators/rust.py b/diffoscope/comparators/rust.py
index 28d8dc2..7ffe63b 100644
--- a/diffoscope/comparators/rust.py
+++ b/diffoscope/comparators/rust.py
@@ -21,7 +21,6 @@
 import zlib
 import os.path
 import logging
-import collections
 
 from diffoscope.difference import Difference
 
diff --git a/diffoscope/comparators/utils/container.py b/diffoscope/comparators/utils/container.py
index 0f9832f..7ec4b8d 100644
--- a/diffoscope/comparators/utils/container.py
+++ b/diffoscope/comparators/utils/container.py
@@ -119,39 +119,34 @@ class Container(object, metaclass=abc.ABCMeta):
 
     def comparisons(self, other):
         my_members = OrderedDict(self.get_filtered_members_sizes())
-        my_remainders = OrderedDict()
         other_members = OrderedDict(other.get_filtered_members_sizes())
-        total_size = sum(x[1] for x in my_members.values()) + sum(x[1] for x in other_members.values())
+        total_size = sum(x[1] for x in itertools.chain(my_members.values(), other_members.values()))
         # TODO: progress could be a bit more accurate here, give more weight to fuzzy-hashed files
+        # TODO: merge DirectoryContainer.comparisons() into this
 
         with Progress(total_size) as p:
-            if len(my_members) == 1 and len(other_members) == 1:
-                _, (my_member, my_size) = my_members.popitem()
-                _, (other_member, other_size) = other_members.popitem()
+            def prep_yield(my_name, other_name, comment=NO_COMMENT):
+                my_member, my_size = my_members.pop(my_name)
+                other_member, other_size = other_members.pop(other_name)
                 p.begin_step(my_size + other_size, msg=my_member.progress_name)
-                yield my_member, other_member, NO_COMMENT
+                return my_member, other_member, comment
+
+            # if both containers contain 1 element, compare these
+            if len(my_members) == 1 and len(other_members) == 1:
+                yield prep_yield(next(iter(my_members.keys())),
+                                 next(iter(other_members.keys())))
                 return
 
-            # keep it sorted like my members
-            while my_members:
-                my_member_name, (my_member, my_size) = my_members.popitem(last=False)
-                if my_member_name in other_members:
-                    other_member, other_size = other_members.pop(my_member_name)
-                    p.begin_step(my_size + other_size, msg=my_member.progress_name)
-                    yield my_member, other_member, NO_COMMENT
-                else:
-                    my_remainders[my_member_name] = (my_member, my_size)
-
-            my_members = my_remainders
-            my_members_fuzz = OrderedDict((k, v[0]) for k, v in my_members.items())
-            other_members_fuzz = OrderedDict((k, v[0]) for k, v in other_members.items())
-            for my_name, other_name, score in perform_fuzzy_matching(my_members_fuzz, other_members_fuzz):
-                my_member, my_size = my_members.pop(my_name)
-                other_member, other_size = other_members.pop(other_name)
+            other_names = set(other_members.keys())
+            # keep it sorted like my_members
+            both_names = [name for name in my_members.keys() if name in other_names]
+            for name in both_names:
+                yield prep_yield(name, name)
+
+            for my_name, other_name, score in perform_fuzzy_matching(my_members, other_members):
                 comment = "Files similar despite different names" \
                     " (difference score: {})".format(score)
-                p.begin_step(my_size + other_size, msg=my_name)
-                yield my_member, other_member, comment
+                yield prep_yield(my_name, other_name, comment)
 
             if Config().new_file:
                 for my_member, my_size in my_members.values():
diff --git a/diffoscope/comparators/utils/fuzzy.py b/diffoscope/comparators/utils/fuzzy.py
index c365703..7b4a40a 100644
--- a/diffoscope/comparators/utils/fuzzy.py
+++ b/diffoscope/comparators/utils/fuzzy.py
@@ -37,11 +37,11 @@ def perform_fuzzy_matching(members1, members2):
     # Perform local copies because they will be modified by consumer
     members1 = dict(members1)
     members2 = dict(members2)
-    for name1, file1 in members1.items():
+    for name1, (file1, _) in members1.items():
         if file1.is_directory() or not file1.fuzzy_hash:
             continue
         comparisons = []
-        for name2, file2 in members2.items():
+        for name2, (file2, _) in members2.items():
             if name2 in already_compared or file2.is_directory() or not file2.fuzzy_hash:
                 continue
             comparisons.append((tlsh.diff(file1.fuzzy_hash, file2.fuzzy_hash), name2))
diff --git a/diffoscope/comparators/xz.py b/diffoscope/comparators/xz.py
index fedc848..e2421e6 100644
--- a/diffoscope/comparators/xz.py
+++ b/diffoscope/comparators/xz.py
@@ -21,7 +21,6 @@ import re
 import os.path
 import logging
 import subprocess
-import collections
 
 from diffoscope.tools import tool_required
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/diffoscope.git


More information about the diffoscope mailing list