[diffoscope] 01/01: Greatly improve speed by cutting out O(n^2) lookup in libarchive.py
Ximin Luo
infinity0 at debian.org
Tue Nov 8 13:51:33 CET 2016
This is an automated email from the git hooks/post-receive script.
infinity0 pushed a commit to branch master
in repository diffoscope.
commit deb8aea1bc82fee35d1fca0c7fcc9cbe4748f2f8
Author: Ximin Luo <infinity0 at debian.org>
Date: Tue Nov 8 13:51:13 2016 +0100
Greatly improve speed by cutting out O(n^2) lookup in libarchive.py
---
diffoscope/comparators/libarchive.py | 13 +++++++++++++
diffoscope/comparators/utils.py | 8 +++++++-
2 files changed, 20 insertions(+), 1 deletion(-)
diff --git a/diffoscope/comparators/libarchive.py b/diffoscope/comparators/libarchive.py
index aad3969..e2536c1 100644
--- a/diffoscope/comparators/libarchive.py
+++ b/diffoscope/comparators/libarchive.py
@@ -195,3 +195,16 @@ class LibarchiveContainer(Archive):
else:
return LibarchiveMember(self, entry)
raise KeyError('%s not found in archive', member_name)
+
+ def get_all_members(self):
+ with libarchive.file_reader(self.source.path) as archive:
+ for entry in archive:
+ p = entry.pathname
+ if entry.isdir:
+ yield p, LibarchiveDirectory(self, entry)
+ elif entry.issym:
+ yield p, LibarchiveSymlink(self, entry)
+ elif entry.isblk or entry.ischr:
+ yield p, LibarchiveDevice(self, entry)
+ else:
+ yield p, LibarchiveMember(self, entry)
diff --git a/diffoscope/comparators/utils.py b/diffoscope/comparators/utils.py
index 75f40fb..f757f4e 100644
--- a/diffoscope/comparators/utils.py
+++ b/diffoscope/comparators/utils.py
@@ -172,7 +172,7 @@ class Container(object, metaclass=abc.ABCMeta):
def get_members(self):
"""Returns a directory. The key is what is used to match when comparing containers."""
- return collections.OrderedDict([(name, self.get_member(name)) for name in self.get_member_names()])
+ return collections.OrderedDict(self.get_all_members())
def lookup_file(self, *names):
"""Try to fetch a specific file by digging in containers."""
@@ -198,6 +198,12 @@ class Container(object, metaclass=abc.ABCMeta):
def get_member(self, member_name):
raise NotImplementedError()
+ def get_all_members(self):
+ # If your get_member implementation is O(n) then this will be O(n^2) cost
+ # In such cases it is HIGHLY RECOMMENDED to override this as well
+ for name in self.get_member_names():
+ yield name, self.get_member(name)
+
def comparisons(self, other):
my_members = self.get_members()
my_reminders = collections.OrderedDict()
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/diffoscope.git
More information about the diffoscope
mailing list