[diffoscope] 01/01: Greatly improve speed by cutting out O(n^2) lookup in libarchive.py

Ximin Luo infinity0 at debian.org
Tue Nov 8 13:51:33 CET 2016


This is an automated email from the git hooks/post-receive script.

infinity0 pushed a commit to branch master
in repository diffoscope.

commit deb8aea1bc82fee35d1fca0c7fcc9cbe4748f2f8
Author: Ximin Luo <infinity0 at debian.org>
Date:   Tue Nov 8 13:51:13 2016 +0100

    Greatly improve speed by cutting out O(n^2) lookup in libarchive.py
---
 diffoscope/comparators/libarchive.py | 13 +++++++++++++
 diffoscope/comparators/utils.py      |  8 +++++++-
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/diffoscope/comparators/libarchive.py b/diffoscope/comparators/libarchive.py
index aad3969..e2536c1 100644
--- a/diffoscope/comparators/libarchive.py
+++ b/diffoscope/comparators/libarchive.py
@@ -195,3 +195,16 @@ class LibarchiveContainer(Archive):
                     else:
                         return LibarchiveMember(self, entry)
         raise KeyError('%s not found in archive', member_name)
+
+    def get_all_members(self):
+        with libarchive.file_reader(self.source.path) as archive:
+            for entry in archive:
+                p = entry.pathname
+                if entry.isdir:
+                    yield p, LibarchiveDirectory(self, entry)
+                elif entry.issym:
+                    yield p, LibarchiveSymlink(self, entry)
+                elif entry.isblk or entry.ischr:
+                    yield p, LibarchiveDevice(self, entry)
+                else:
+                    yield p, LibarchiveMember(self, entry)
diff --git a/diffoscope/comparators/utils.py b/diffoscope/comparators/utils.py
index 75f40fb..f757f4e 100644
--- a/diffoscope/comparators/utils.py
+++ b/diffoscope/comparators/utils.py
@@ -172,7 +172,7 @@ class Container(object, metaclass=abc.ABCMeta):
 
     def get_members(self):
         """Returns a directory. The key is what is used to match when comparing containers."""
-        return collections.OrderedDict([(name, self.get_member(name)) for name in self.get_member_names()])
+        return collections.OrderedDict(self.get_all_members())
 
     def lookup_file(self, *names):
         """Try to fetch a specific file by digging in containers."""
@@ -198,6 +198,12 @@ class Container(object, metaclass=abc.ABCMeta):
     def get_member(self, member_name):
         raise NotImplementedError()
 
+    def get_all_members(self):
+        # If your get_member implementation is O(n) then this will be O(n^2) cost
+        # In such cases it is HIGHLY RECOMMENDED to override this as well
+        for name in self.get_member_names():
+            yield name, self.get_member(name)
+
     def comparisons(self, other):
         my_members = self.get_members()
         my_reminders = collections.OrderedDict()

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/diffoscope.git


More information about the diffoscope mailing list