[diffoscope] 03/05: Add detection of order-only difference in plain text format. (Closes: #848049)

Chris Lamb chris at chris-lamb.co.uk
Sat Dec 24 20:27:28 CET 2016


This is an automated email from the git hooks/post-receive script.

lamby pushed a commit to branch master
in repository diffoscope.

commit 0c6710bdfabfbdd892e45015a01fe3758882470d
Author: Maria Glukhova <siamezzze at gmail.com>
Date:   Sat Dec 24 12:29:57 2016 +0200

    Add detection of order-only difference in plain text format. (Closes: #848049)
    
    Detect if the text files' contents differ only in line ordering, and give an appropriate comment.
    
    Signed-off-by: Chris Lamb <lamby at debian.org>
---
 diffoscope/comparators/text.py      | 14 ++++++++++++++
 tests/comparators/test_text.py      |  8 ++++++++
 tests/data/text_order1              |  7 +++++++
 tests/data/text_order2              |  7 +++++++
 tests/data/text_order_expected_diff | 11 +++++++++++
 5 files changed, 47 insertions(+)

diff --git a/diffoscope/comparators/text.py b/diffoscope/comparators/text.py
index 909ff98..f7f423f 100644
--- a/diffoscope/comparators/text.py
+++ b/diffoscope/comparators/text.py
@@ -24,6 +24,17 @@ from diffoscope.difference import Difference
 from diffoscope.comparators.binary import File
 
 
+def order_only_difference(unified_diff):
+    diff_lines = unified_diff.splitlines()
+    added_lines = [line[1:] for line in diff_lines if line.startswith('+')]
+    removed_lines = [line[1:] for line in diff_lines if line.startswith('-')]
+    # Faster check: does number of lines match?
+    if len(added_lines) != len(removed_lines):
+        return False
+    # Counter stores line and number of its occurrences.
+    return sorted(added_lines) == sorted(removed_lines)
+
+
 class TextFile(File):
     RE_FILE_TYPE = re.compile(r'\btext\b')
 
@@ -44,6 +55,9 @@ class TextFile(File):
             with codecs.open(self.path, 'r', encoding=my_encoding) as my_content, \
                  codecs.open(other.path, 'r', encoding=other_encoding) as other_content:
                 difference = Difference.from_text_readers(my_content, other_content, self.name, other.name, source)
+                # Check if difference is only in line order.
+                if difference and order_only_difference(difference.unified_diff):
+                    difference.add_comment("ordering differences only")
                 if my_encoding != other_encoding:
                     if difference is None:
                         difference = Difference(None, self.path, other.path, source)
diff --git a/tests/comparators/test_text.py b/tests/comparators/test_text.py
index 9892826..afa0716 100644
--- a/tests/comparators/test_text.py
+++ b/tests/comparators/test_text.py
@@ -65,3 +65,11 @@ def test_difference_between_iso88591_and_unicode_only(iso8859, tmpdir):
 
 def test_compare_non_existing(monkeypatch, ascii1):
     assert_non_existing(monkeypatch, ascii1, has_null_source=False, has_details=False)
+
+text_order1 = load_fixture(data('text_order1'))
+text_order2 = load_fixture(data('text_order2'))
+
+def test_ordering_differences(text_order1, text_order2):
+    difference = text_order1.compare(text_order2)
+    assert difference.comments == ['ordering differences only']
+    assert difference.unified_diff == open(data('text_order_expected_diff')).read()
diff --git a/tests/data/text_order1 b/tests/data/text_order1
new file mode 100644
index 0000000..9f85b81
--- /dev/null
+++ b/tests/data/text_order1
@@ -0,0 +1,7 @@
+These
+lines
+follow
+in
+some
+order
+.
diff --git a/tests/data/text_order2 b/tests/data/text_order2
new file mode 100644
index 0000000..7890b50
--- /dev/null
+++ b/tests/data/text_order2
@@ -0,0 +1,7 @@
+These
+some
+order
+follow
+in
+lines
+.
diff --git a/tests/data/text_order_expected_diff b/tests/data/text_order_expected_diff
new file mode 100644
index 0000000..2d8b915
--- /dev/null
+++ b/tests/data/text_order_expected_diff
@@ -0,0 +1,11 @@
+@@ -1,7 +1,7 @@
+ These
+-lines
+-follow
+-in
+ some
+ order
++follow
++in
++lines
+ .

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/diffoscope.git


More information about the diffoscope mailing list