[diffoscope] 04/04: Refactor Container abstract method names to be more obvious in what they do
Ximin Luo
infinity0 at debian.org
Fri May 26 16:55:55 CEST 2017
This is an automated email from the git hooks/post-receive script.
infinity0 pushed a commit to branch experimental
in repository diffoscope.
commit 8a8cb3f681a1e6b32d35d6efe2ec5095fcaa4bd6
Author: Ximin Luo <infinity0 at debian.org>
Date: Fri May 26 16:54:44 2017 +0200
Refactor Container abstract method names to be more obvious in what they do
Add logic to force-compare 1-element containers with each other, and remove
several unnecessary get[_adjusted]_member overrides in subclasses.
---
diffoscope/comparators/ar.py | 11 +++---
diffoscope/comparators/bzip2.py | 3 --
diffoscope/comparators/deb.py | 6 ++--
diffoscope/comparators/debian.py | 8 ++---
diffoscope/comparators/dex.py | 3 --
diffoscope/comparators/fsimage.py | 3 --
diffoscope/comparators/gzip.py | 3 --
diffoscope/comparators/rust.py | 3 --
diffoscope/comparators/utils/container.py | 55 +++++++++++++++++-------------
diffoscope/comparators/utils/libarchive.py | 12 ++++---
diffoscope/comparators/xz.py | 3 --
11 files changed, 50 insertions(+), 60 deletions(-)
diff --git a/diffoscope/comparators/ar.py b/diffoscope/comparators/ar.py
index 361f741..0de0bff 100644
--- a/diffoscope/comparators/ar.py
+++ b/diffoscope/comparators/ar.py
@@ -37,18 +37,17 @@ logger = logging.getLogger(__name__)
# the __.PKGDEF member which is just a text file containing the Go interface.
class ArContainer(LibarchiveContainer):
- def get_members(self):
- members = LibarchiveContainer.get_members(self)
- cls = members.__class__
+ def get_adjusted_members(self):
+ members = list(super().get_adjusted_members())
known_ignores = {
"/" : "this is the symbol table, already accounted for in other output",
"//" : "this is the table for GNU long names, already accounted for in the archive filelist",
}
- filtered_out = cls([p for p in members.items() if p[0] in known_ignores])
+ filtered_out = [p for p in members if p[0] in known_ignores]
if filtered_out:
- for k, v in filtered_out.items():
+ for k, v in filtered_out:
logger.debug("ignored ar member '%s' because %s", k, known_ignores[k])
- return cls([p for p in members.items() if p[0] not in known_ignores])
+ return [p for p in members if p[0] not in known_ignores]
class ArSymbolTableDumper(Command):
@tool_required('nm')
diff --git a/diffoscope/comparators/bzip2.py b/diffoscope/comparators/bzip2.py
index baac954..3775c43 100644
--- a/diffoscope/comparators/bzip2.py
+++ b/diffoscope/comparators/bzip2.py
@@ -38,9 +38,6 @@ class Bzip2Container(Archive):
def close_archive(self):
pass
- def get_members(self):
- return collections.OrderedDict({'bzip2-content': self.get_member(self.get_member_names()[0])})
-
def get_member_names(self):
return [self.get_compressed_content_name('.bz2')]
diff --git a/diffoscope/comparators/deb.py b/diffoscope/comparators/deb.py
index 9a0758e..66a6d05 100644
--- a/diffoscope/comparators/deb.py
+++ b/diffoscope/comparators/deb.py
@@ -40,7 +40,7 @@ logger = logging.getLogger(__name__)
# given container
def get_build_id_map(container):
d = {}
- for member_name, member in container.get_members().items():
+ for member_name, member in container.get_adjusted_members():
# Let's assume the name will end with .deb to avoid looking at
# too many irrelevant files
if not member_name.endswith('.deb'):
@@ -59,7 +59,7 @@ class DebContainer(LibarchiveContainer):
@property
def data_tar(self):
- for name, member in self.get_members().items():
+ for name, member in self.get_adjusted_members():
if DebContainer.RE_DATA_TAR.match(name):
specialize(member)
if name.endswith('.tar'):
@@ -69,7 +69,7 @@ class DebContainer(LibarchiveContainer):
@property
def control_tar(self):
- for name, member in self.get_members().items():
+ for name, member in self.get_adjusted_members():
if DebContainer.RE_CONTROL_TAR.match(name):
specialize(member)
if name.endswith('.tar'):
diff --git a/diffoscope/comparators/debian.py b/diffoscope/comparators/debian.py
index 4658fc2..f8df2bf 100644
--- a/diffoscope/comparators/debian.py
+++ b/diffoscope/comparators/debian.py
@@ -87,11 +87,9 @@ class DebControlContainer(Container):
return re.compile(re.escape(version))
- def get_members(self):
- return collections.OrderedDict([
- (self._trim_version_number(name), self.get_member(name))
- for name in self.get_member_names()
- ])
+ def get_adjusted_members(self):
+ for name in self.get_member_names():
+ yield self._trim_version_number(name), self.get_member(name)
def get_member_names(self):
field = self.source.deb822.get('Files') or \
diff --git a/diffoscope/comparators/dex.py b/diffoscope/comparators/dex.py
index bd393cd..a26ab42 100644
--- a/diffoscope/comparators/dex.py
+++ b/diffoscope/comparators/dex.py
@@ -42,9 +42,6 @@ class DexContainer(Archive):
def close_archive(self):
pass
- def get_members(self):
- return collections.OrderedDict({'dex-content': self.get_member(self.get_member_names()[0])})
-
def get_member_names(self):
return [self.get_compressed_content_name('.dex') + '.jar']
diff --git a/diffoscope/comparators/fsimage.py b/diffoscope/comparators/fsimage.py
index f30bbb7..5135ab7 100644
--- a/diffoscope/comparators/fsimage.py
+++ b/diffoscope/comparators/fsimage.py
@@ -59,9 +59,6 @@ class FsImageContainer(Archive):
self.g.umount_all()
self.g.close()
- def get_members(self):
- return collections.OrderedDict({'fsimage-content': self.get_member(self.get_member_names()[0])})
-
def get_member_names(self):
return [os.path.basename(self.source.path) + '.tar']
diff --git a/diffoscope/comparators/gzip.py b/diffoscope/comparators/gzip.py
index 4eda027..73126bb 100644
--- a/diffoscope/comparators/gzip.py
+++ b/diffoscope/comparators/gzip.py
@@ -40,9 +40,6 @@ class GzipContainer(Archive):
def close_archive(self):
pass
- def get_members(self):
- return collections.OrderedDict({'gzip-content': self.get_member(self.get_member_names()[0])})
-
def get_member_names(self):
return [self.get_compressed_content_name('.gz')]
diff --git a/diffoscope/comparators/rust.py b/diffoscope/comparators/rust.py
index 475fe1e..28d8dc2 100644
--- a/diffoscope/comparators/rust.py
+++ b/diffoscope/comparators/rust.py
@@ -41,9 +41,6 @@ class RustObjectContainer(Archive):
def close_archive(self):
pass
- def get_members(self):
- return collections.OrderedDict({'deflate-content': self.get_member(self.get_member_names()[0])})
-
def get_member_names(self):
return [self.get_compressed_content_name('.deflate')]
diff --git a/diffoscope/comparators/utils/container.py b/diffoscope/comparators/utils/container.py
index b8107d9..0f9832f 100644
--- a/diffoscope/comparators/utils/container.py
+++ b/diffoscope/comparators/utils/container.py
@@ -58,12 +58,32 @@ class Container(object, metaclass=abc.ABCMeta):
def source(self):
return self._source
- def get_members(self):
+ @abc.abstractmethod
+ def get_member_names(self):
+ raise NotImplementedError()
+
+ @abc.abstractmethod
+ def get_member(self, member_name):
+ raise NotImplementedError()
+
+ def get_filtered_members(self):
+ # If your get_member implementation is O(n) then this will be O(n^2)
+ # cost. In such cases it is HIGHLY RECOMMENDED to override this as well
+ for name in filter_excludes(self.get_member_names()):
+ yield name, self.get_member(name)
+
+ def get_adjusted_members(self):
"""
- Returns a dictionary. The key is what is used to match when comparing
- containers.
+ Returns an iterable of pairs. The key is what is used to match when
+ comparing containers. This may be used to e.g. strip off version
+ numbers, hashes, etc, efficiently for known file formats, so that we
+ don't need to use the expensive tlsh "fuzzy-hashing" logic.
+
+ Note that containers with 1 element are already force-compared against
+ other containers with 1 element, so you don't need to override this
+ method for those cases.
"""
- return OrderedDict(self.get_all_members())
+ return self.get_filtered_members()
def lookup_file(self, *names):
"""
@@ -89,32 +109,14 @@ class Container(object, metaclass=abc.ABCMeta):
return container.lookup_file(*remainings)
- @abc.abstractmethod
- def get_member_names(self):
- raise NotImplementedError()
-
- @abc.abstractmethod
- def get_member(self, member_name):
- raise NotImplementedError()
-
- def get_filtered_member_names(self):
- return filter_excludes(self.get_member_names())
-
def get_filtered_members_sizes(self):
- for name in self.get_filtered_member_names():
- member = self.get_member(name)
+ for name, member in self.get_adjusted_members():
if member.is_directory():
size = 4096 # default "size" of a directory
else:
size = path_apparent_size(member.path)
yield name, (member, size)
- def get_all_members(self):
- # If your get_member implementation is O(n) then this will be O(n^2)
- # cost. In such cases it is HIGHLY RECOMMENDED to override this as well
- for name in self.get_filtered_member_names():
- yield name, self.get_member(name)
-
def comparisons(self, other):
my_members = OrderedDict(self.get_filtered_members_sizes())
my_remainders = OrderedDict()
@@ -123,6 +125,13 @@ class Container(object, metaclass=abc.ABCMeta):
# TODO: progress could be a bit more accurate here, give more weight to fuzzy-hashed files
with Progress(total_size) as p:
+ if len(my_members) == 1 and len(other_members) == 1:
+ _, (my_member, my_size) = my_members.popitem()
+ _, (other_member, other_size) = other_members.popitem()
+ p.begin_step(my_size + other_size, msg=my_member.progress_name)
+ yield my_member, other_member, NO_COMMENT
+ return
+
# keep it sorted like my members
while my_members:
my_member_name, (my_member, my_size) = my_members.popitem(last=False)
diff --git a/diffoscope/comparators/utils/libarchive.py b/diffoscope/comparators/utils/libarchive.py
index 7ec2b0f..f427ede 100644
--- a/diffoscope/comparators/utils/libarchive.py
+++ b/diffoscope/comparators/utils/libarchive.py
@@ -173,10 +173,6 @@ class LibarchiveContainer(Archive):
self.ensure_unpacked()
return self._members.keys()
- def extract(self, member_name, dest_dir):
- self.ensure_unpacked()
- return self._members[member_name]
-
def get_member(self, member_name):
with libarchive.file_reader(self.source.path) as archive:
for entry in archive:
@@ -184,11 +180,17 @@ class LibarchiveContainer(Archive):
return self.get_subclass(entry)
raise KeyError('%s not found in archive', member_name)
- def get_all_members(self):
+ def get_filtered_members(self):
with libarchive.file_reader(self.source.path) as archive:
for entry in archive:
+ if any_excluded(entry.pathname):
+ continue
yield entry.pathname, self.get_subclass(entry)
+ def extract(self, member_name, dest_dir):
+ self.ensure_unpacked()
+ return self._members[member_name]
+
def get_subclass(self, entry):
if entry.isdir:
return LibarchiveDirectory(self, entry)
diff --git a/diffoscope/comparators/xz.py b/diffoscope/comparators/xz.py
index ab1d89c..fedc848 100644
--- a/diffoscope/comparators/xz.py
+++ b/diffoscope/comparators/xz.py
@@ -38,9 +38,6 @@ class XzContainer(Archive):
def close_archive(self):
pass
- def get_members(self):
- return collections.OrderedDict({'xz-content': self.get_member(self.get_member_names()[0])})
-
def get_member_names(self):
return [self.get_compressed_content_name('.xz')]
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/diffoscope.git
More information about the diffoscope
mailing list