[diffoscope] 01/01: Remove unused imports and tidy up Container.comparisons()
Ximin Luo
infinity0 at debian.org
Fri May 26 17:20:34 CEST 2017
This is an automated email from the git hooks/post-receive script.
infinity0 pushed a commit to branch experimental
in repository diffoscope.
commit 7f559e4e9d28f75297589d3ebb6ed31a44777cbc
Author: Ximin Luo <infinity0 at debian.org>
Date: Fri May 26 17:18:11 2017 +0200
Remove unused imports and tidy up Container.comparisons()
---
diffoscope/comparators/bzip2.py | 1 -
diffoscope/comparators/debian.py | 1 -
diffoscope/comparators/dex.py | 1 -
diffoscope/comparators/fsimage.py | 1 -
diffoscope/comparators/gzip.py | 1 -
diffoscope/comparators/rust.py | 1 -
diffoscope/comparators/utils/container.py | 43 ++++++++++++++-----------------
diffoscope/comparators/utils/fuzzy.py | 4 +--
diffoscope/comparators/xz.py | 1 -
9 files changed, 21 insertions(+), 33 deletions(-)
diff --git a/diffoscope/comparators/bzip2.py b/diffoscope/comparators/bzip2.py
index 3775c43..c2b8ddf 100644
--- a/diffoscope/comparators/bzip2.py
+++ b/diffoscope/comparators/bzip2.py
@@ -21,7 +21,6 @@ import re
import os.path
import logging
import subprocess
-import collections
from diffoscope.tools import tool_required
diff --git a/diffoscope/comparators/debian.py b/diffoscope/comparators/debian.py
index f8df2bf..c0aaf8b 100644
--- a/diffoscope/comparators/debian.py
+++ b/diffoscope/comparators/debian.py
@@ -22,7 +22,6 @@ import os.path
import hashlib
import logging
import functools
-import collections
from debian.deb822 import Dsc
diff --git a/diffoscope/comparators/dex.py b/diffoscope/comparators/dex.py
index a26ab42..3b71b3b 100644
--- a/diffoscope/comparators/dex.py
+++ b/diffoscope/comparators/dex.py
@@ -21,7 +21,6 @@ import re
import os.path
import logging
import subprocess
-import collections
from diffoscope.tools import tool_required
diff --git a/diffoscope/comparators/fsimage.py b/diffoscope/comparators/fsimage.py
index 5135ab7..29f83e6 100644
--- a/diffoscope/comparators/fsimage.py
+++ b/diffoscope/comparators/fsimage.py
@@ -20,7 +20,6 @@
import re
import logging
import os.path
-import collections
from diffoscope.difference import Difference
diff --git a/diffoscope/comparators/gzip.py b/diffoscope/comparators/gzip.py
index 73126bb..92a9b3e 100644
--- a/diffoscope/comparators/gzip.py
+++ b/diffoscope/comparators/gzip.py
@@ -21,7 +21,6 @@ import re
import os.path
import logging
import subprocess
-import collections
from diffoscope.tools import tool_required
from diffoscope.difference import Difference
diff --git a/diffoscope/comparators/rust.py b/diffoscope/comparators/rust.py
index 28d8dc2..7ffe63b 100644
--- a/diffoscope/comparators/rust.py
+++ b/diffoscope/comparators/rust.py
@@ -21,7 +21,6 @@
import zlib
import os.path
import logging
-import collections
from diffoscope.difference import Difference
diff --git a/diffoscope/comparators/utils/container.py b/diffoscope/comparators/utils/container.py
index 0f9832f..7ec4b8d 100644
--- a/diffoscope/comparators/utils/container.py
+++ b/diffoscope/comparators/utils/container.py
@@ -119,39 +119,34 @@ class Container(object, metaclass=abc.ABCMeta):
def comparisons(self, other):
my_members = OrderedDict(self.get_filtered_members_sizes())
- my_remainders = OrderedDict()
other_members = OrderedDict(other.get_filtered_members_sizes())
- total_size = sum(x[1] for x in my_members.values()) + sum(x[1] for x in other_members.values())
+ total_size = sum(x[1] for x in itertools.chain(my_members.values(), other_members.values()))
# TODO: progress could be a bit more accurate here, give more weight to fuzzy-hashed files
+ # TODO: merge DirectoryContainer.comparisons() into this
with Progress(total_size) as p:
- if len(my_members) == 1 and len(other_members) == 1:
- _, (my_member, my_size) = my_members.popitem()
- _, (other_member, other_size) = other_members.popitem()
+ def prep_yield(my_name, other_name, comment=NO_COMMENT):
+ my_member, my_size = my_members.pop(my_name)
+ other_member, other_size = other_members.pop(other_name)
p.begin_step(my_size + other_size, msg=my_member.progress_name)
- yield my_member, other_member, NO_COMMENT
+ return my_member, other_member, comment
+
+ # if both containers contain 1 element, compare these
+ if len(my_members) == 1 and len(other_members) == 1:
+ yield prep_yield(next(iter(my_members.keys())),
+ next(iter(other_members.keys())))
return
- # keep it sorted like my members
- while my_members:
- my_member_name, (my_member, my_size) = my_members.popitem(last=False)
- if my_member_name in other_members:
- other_member, other_size = other_members.pop(my_member_name)
- p.begin_step(my_size + other_size, msg=my_member.progress_name)
- yield my_member, other_member, NO_COMMENT
- else:
- my_remainders[my_member_name] = (my_member, my_size)
-
- my_members = my_remainders
- my_members_fuzz = OrderedDict((k, v[0]) for k, v in my_members.items())
- other_members_fuzz = OrderedDict((k, v[0]) for k, v in other_members.items())
- for my_name, other_name, score in perform_fuzzy_matching(my_members_fuzz, other_members_fuzz):
- my_member, my_size = my_members.pop(my_name)
- other_member, other_size = other_members.pop(other_name)
+ other_names = set(other_members.keys())
+ # keep it sorted like my_members
+ both_names = [name for name in my_members.keys() if name in other_names]
+ for name in both_names:
+ yield prep_yield(name, name)
+
+ for my_name, other_name, score in perform_fuzzy_matching(my_members, other_members):
comment = "Files similar despite different names" \
" (difference score: {})".format(score)
- p.begin_step(my_size + other_size, msg=my_name)
- yield my_member, other_member, comment
+ yield prep_yield(my_name, other_name, comment)
if Config().new_file:
for my_member, my_size in my_members.values():
diff --git a/diffoscope/comparators/utils/fuzzy.py b/diffoscope/comparators/utils/fuzzy.py
index c365703..7b4a40a 100644
--- a/diffoscope/comparators/utils/fuzzy.py
+++ b/diffoscope/comparators/utils/fuzzy.py
@@ -37,11 +37,11 @@ def perform_fuzzy_matching(members1, members2):
# Perform local copies because they will be modified by consumer
members1 = dict(members1)
members2 = dict(members2)
- for name1, file1 in members1.items():
+ for name1, (file1, _) in members1.items():
if file1.is_directory() or not file1.fuzzy_hash:
continue
comparisons = []
- for name2, file2 in members2.items():
+ for name2, (file2, _) in members2.items():
if name2 in already_compared or file2.is_directory() or not file2.fuzzy_hash:
continue
comparisons.append((tlsh.diff(file1.fuzzy_hash, file2.fuzzy_hash), name2))
diff --git a/diffoscope/comparators/xz.py b/diffoscope/comparators/xz.py
index fedc848..e2421e6 100644
--- a/diffoscope/comparators/xz.py
+++ b/diffoscope/comparators/xz.py
@@ -21,7 +21,6 @@ import re
import os.path
import logging
import subprocess
-import collections
from diffoscope.tools import tool_required
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/diffoscope.git
More information about the diffoscope
mailing list