[diffoscope] 03/03: Add various traverse_* methods to Difference
Ximin Luo
infinity0 at debian.org
Wed Jun 14 19:10:39 CEST 2017
This is an automated email from the git hooks/post-receive script.
infinity0 pushed a commit to branch WIP/humungous-diffs
in repository diffoscope.
commit bc16f34848c3400f27f23ffaad3056f58a9df806
Author: Ximin Luo <infinity0 at debian.org>
Date: Wed Jun 14 19:04:58 2017 +0200
Add various traverse_* methods to Difference
---
diffoscope/difference.py | 47 +++++++++++++++++++++++++++++++++++++++++------
tests/test_difference.py | 40 +++++++++++++++++++++++++++++++++++++---
2 files changed, 78 insertions(+), 9 deletions(-)
diff --git a/diffoscope/difference.py b/diffoscope/difference.py
index 5693726..0436016 100644
--- a/diffoscope/difference.py
+++ b/diffoscope/difference.py
@@ -17,9 +17,10 @@
# You should have received a copy of the GNU General Public License
# along with diffoscope. If not, see <https://www.gnu.org/licenses/>.
+import hashlib
+import heapq
import re
import signal
-import hashlib
import logging
import subprocess
@@ -114,13 +115,16 @@ class Difference(object):
def size(self):
if self._size_cache is None:
- self._size_cache = (len(self.unified_diff) +
- len(self.source1) +
- len(self.source2) +
+ self._size_cache = sum(d.size_self() for d in self.traverse_depth())
+ return self._size_cache
+
+ def size_self(self):
+ """Size, excluding children."""
+ return ((len(self.unified_diff) if self.unified_diff else 0) +
+ (len(self.source1) if self.source1 else 0) +
+ (len(self.source2) if self.source2 else 0) +
sum(map(len, self.comments)) +
- sum(d.size() for d in self._details) +
sum(v.size() for v in self._visuals))
- return self._size_cache
def has_children(self):
"""Whether there are children.
@@ -128,6 +132,37 @@ class Difference(object):
Useful for e.g. choosing whether to display [+]/[-] controls."""
return self._unified_diff is not None or self._details or self._visuals
+ def traverse_depth(self, depth=-1):
+ yield self
+ if depth != 0:
+ for d in self._details:
+ yield from d.traverse_depth(depth-1)
+
+ def traverse_breadth(self, queue=None):
+ queue = queue if queue is not None else [self]
+ if queue:
+ top = queue.pop(0)
+ yield top
+ queue.extend(top._details)
+ yield from self.traverse_breadth(queue)
+
+ def traverse_heapq(self, scorer, queue=None):
+ """Traverse the difference tree using a priority queue, where each node
+ is scored according to a user-supplied function, and nodes with smaller
+ scores are traversed first (after they have been added to the queue).
+
+ The function `scorer` takes two arguments, a node to score and the
+ score of its parent node (or None if there is no parent). It should
+ return the score of the input node.
+ """
+ queue = queue if queue is not None else [(scorer(self, None), self)]
+ if queue:
+ val, top = heapq.heappop(queue)
+ yield top
+ for d in top._details:
+ heapq.heappush(queue, (scorer(d, val), d))
+ yield from self.traverse_heapq(scorer, queue)
+
@staticmethod
def from_feeder(feeder1, feeder2, path1, path2, source=None, comment=None, **kwargs):
try:
diff --git a/tests/test_difference.py b/tests/test_difference.py
index fbf40d1..3c06c99 100644
--- a/tests/test_difference.py
+++ b/tests/test_difference.py
@@ -18,12 +18,19 @@
# along with diffoscope. If not, see <https://www.gnu.org/licenses/>.
import io
+import itertools
import pytest
from diffoscope.config import Config
from diffoscope.difference import Difference
+def assert_size(diff, size):
+ assert size == diff.size()
+ assert size == sum(d.size_self() for d in diff.traverse_breadth())
+ g = itertools.count()
+ assert size == sum(d.size_self() for d in diff.traverse_heapq(lambda x, _: next(g)))
+
def assert_algebraic_properties(d, size):
assert d.equals(d.get_reverse().get_reverse())
assert d.get_reverse().size() == d.size() == size
@@ -47,11 +54,38 @@ def test_too_long_diff_block_lines(monkeypatch):
def test_size_updates():
d = Difference("0123456789", "path1", "path2")
- assert d.size() == 20
+ assert_size(d, 20)
d.add_details([Difference("0123456789", "path1/a", "path2/a")])
- assert d.size() == 44
+ assert_size(d, 44)
d.add_comment("lol1")
- assert d.size() == 48
+ assert_size(d, 48)
+
+def test_traverse_heapq():
+ d0 = Difference("0", "path1/a", "path2/a")
+ d1 = Difference("012", "path1/b", "path2/b")
+ d2 = Difference("01", "path1/c", "path2/c")
+ d0.add_details([
+ Difference("012345678", "path1/a/1", "path2/a/1"),
+ Difference("0123", "path1/a/2", "path2/a/2"),
+ Difference("012", "path1/a/3", "path2/a/3")])
+ d1.add_details([
+ Difference("01234567", "path1/b/1", "path2/b/1"),
+ Difference("01234", "path1/b/2", "path2/b/2"),
+ Difference("012345", "path1/b/3", "path2/b/3")])
+ d2.add_details([
+ Difference("01", "path1/c/1", "path2/c/1"),
+ Difference("0123456789", "path1/c/2", "path2/c/2"),
+ Difference("0123456", "path1/c/3", "path2/c/3")])
+ diff = Difference("0123456789", "path1", "path2")
+ diff.add_details([d0, d1, d2])
+ # traverse nodes in depth order, but at a given depth traverse the nodes
+ # there from smallest diff (counted non-recursively) to largest
+ def f(node, parscore):
+ depth = parscore[0] + 1 if parscore else 0
+ return depth, node.size_self()
+ assert_size(diff, 284)
+ results = [d.source1[6:] for d in diff.traverse_heapq(f)]
+ assert results == ['', 'a', 'c', 'b', 'c/1', 'a/3', 'a/2', 'b/2', 'b/3', 'c/3', 'b/1', 'a/1', 'c/2']
def test_non_str_arguments_to_source1_source2():
for x in (
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/diffoscope.git
More information about the diffoscope
mailing list