[diffoscope] 03/03: Add various traverse_* methods to Difference

Ximin Luo infinity0 at debian.org
Wed Jun 14 19:10:39 CEST 2017


This is an automated email from the git hooks/post-receive script.

infinity0 pushed a commit to branch WIP/humungous-diffs
in repository diffoscope.

commit bc16f34848c3400f27f23ffaad3056f58a9df806
Author: Ximin Luo <infinity0 at debian.org>
Date:   Wed Jun 14 19:04:58 2017 +0200

    Add various traverse_* methods to Difference
---
 diffoscope/difference.py | 47 +++++++++++++++++++++++++++++++++++++++++------
 tests/test_difference.py | 40 +++++++++++++++++++++++++++++++++++++---
 2 files changed, 78 insertions(+), 9 deletions(-)

diff --git a/diffoscope/difference.py b/diffoscope/difference.py
index 5693726..0436016 100644
--- a/diffoscope/difference.py
+++ b/diffoscope/difference.py
@@ -17,9 +17,10 @@
 # You should have received a copy of the GNU General Public License
 # along with diffoscope.  If not, see <https://www.gnu.org/licenses/>.
 
+import hashlib
+import heapq
 import re
 import signal
-import hashlib
 import logging
 import subprocess
 
@@ -114,13 +115,16 @@ class Difference(object):
 
     def size(self):
         if self._size_cache is None:
-            self._size_cache = (len(self.unified_diff) +
-                len(self.source1) +
-                len(self.source2) +
+            self._size_cache = sum(d.size_self() for d in self.traverse_depth())
+        return self._size_cache
+
+    def size_self(self):
+        """Size, excluding children."""
+        return ((len(self.unified_diff) if self.unified_diff else 0) +
+                (len(self.source1) if self.source1 else 0) +
+                (len(self.source2) if self.source2 else 0) +
                 sum(map(len, self.comments)) +
-                sum(d.size() for d in self._details) +
                 sum(v.size() for v in self._visuals))
-        return self._size_cache
 
     def has_children(self):
         """Whether there are children.
@@ -128,6 +132,37 @@ class Difference(object):
         Useful for e.g. choosing whether to display [+]/[-] controls."""
         return self._unified_diff is not None or self._details or self._visuals
 
+    def traverse_depth(self, depth=-1):
+        yield self
+        if depth != 0:
+            for d in self._details:
+                yield from d.traverse_depth(depth-1)
+
+    def traverse_breadth(self, queue=None):
+        queue = queue if queue is not None else [self]
+        if queue:
+            top = queue.pop(0)
+            yield top
+            queue.extend(top._details)
+            yield from self.traverse_breadth(queue)
+
+    def traverse_heapq(self, scorer, queue=None):
+        """Traverse the difference tree using a priority queue, where each node
+        is scored according to a user-supplied function, and nodes with smaller
+        scores are traversed first (after they have been added to the queue).
+
+        The function `scorer` takes two arguments, a node to score and the
+        score of its parent node (or None if there is no parent). It should
+        return the score of the input node.
+        """
+        queue = queue if queue is not None else [(scorer(self, None), self)]
+        if queue:
+            val, top = heapq.heappop(queue)
+            yield top
+            for d in top._details:
+                heapq.heappush(queue, (scorer(d, val), d))
+            yield from self.traverse_heapq(scorer, queue)
+
     @staticmethod
     def from_feeder(feeder1, feeder2, path1, path2, source=None, comment=None, **kwargs):
         try:
diff --git a/tests/test_difference.py b/tests/test_difference.py
index fbf40d1..3c06c99 100644
--- a/tests/test_difference.py
+++ b/tests/test_difference.py
@@ -18,12 +18,19 @@
 # along with diffoscope.  If not, see <https://www.gnu.org/licenses/>.
 
 import io
+import itertools
 import pytest
 
 from diffoscope.config import Config
 from diffoscope.difference import Difference
 
 
+def assert_size(diff, size):
+    assert size == diff.size()
+    assert size == sum(d.size_self() for d in diff.traverse_breadth())
+    g = itertools.count()
+    assert size == sum(d.size_self() for d in diff.traverse_heapq(lambda x, _: next(g)))
+
 def assert_algebraic_properties(d, size):
     assert d.equals(d.get_reverse().get_reverse())
     assert d.get_reverse().size() == d.size() == size
@@ -47,11 +54,38 @@ def test_too_long_diff_block_lines(monkeypatch):
 
 def test_size_updates():
     d = Difference("0123456789", "path1", "path2")
-    assert d.size() == 20
+    assert_size(d, 20)
     d.add_details([Difference("0123456789", "path1/a", "path2/a")])
-    assert d.size() == 44
+    assert_size(d, 44)
     d.add_comment("lol1")
-    assert d.size() == 48
+    assert_size(d, 48)
+
+def test_traverse_heapq():
+    d0 = Difference("0", "path1/a", "path2/a")
+    d1 = Difference("012", "path1/b", "path2/b")
+    d2 = Difference("01", "path1/c", "path2/c")
+    d0.add_details([
+        Difference("012345678", "path1/a/1", "path2/a/1"),
+        Difference("0123", "path1/a/2", "path2/a/2"),
+        Difference("012", "path1/a/3", "path2/a/3")])
+    d1.add_details([
+        Difference("01234567", "path1/b/1", "path2/b/1"),
+        Difference("01234", "path1/b/2", "path2/b/2"),
+        Difference("012345", "path1/b/3", "path2/b/3")])
+    d2.add_details([
+        Difference("01", "path1/c/1", "path2/c/1"),
+        Difference("0123456789", "path1/c/2", "path2/c/2"),
+        Difference("0123456", "path1/c/3", "path2/c/3")])
+    diff = Difference("0123456789", "path1", "path2")
+    diff.add_details([d0, d1, d2])
+    # traverse nodes in depth order, but at a given depth traverse the nodes
+    # there from smallest diff (counted non-recursively) to largest
+    def f(node, parscore):
+        depth = parscore[0] + 1 if parscore else 0
+        return depth, node.size_self()
+    assert_size(diff, 284)
+    results = [d.source1[6:] for d in diff.traverse_heapq(f)]
+    assert results == ['', 'a', 'c', 'b', 'c/1', 'a/3', 'a/2', 'b/2', 'b/3', 'c/3', 'b/1', 'a/1', 'c/2']
 
 def test_non_str_arguments_to_source1_source2():
     for x in (

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/diffoscope.git


More information about the diffoscope mailing list