[diffoscope] 03/05: Add detection of order-only difference in plain text format. (Closes: #848049)
Chris Lamb
chris at chris-lamb.co.uk
Sat Dec 24 20:27:28 CET 2016
This is an automated email from the git hooks/post-receive script.
lamby pushed a commit to branch master
in repository diffoscope.
commit 0c6710bdfabfbdd892e45015a01fe3758882470d
Author: Maria Glukhova <siamezzze at gmail.com>
Date: Sat Dec 24 12:29:57 2016 +0200
Add detection of order-only difference in plain text format. (Closes: #848049)
Detect if the text files' contents differ only in line ordering, and give an appropriate comment.
Signed-off-by: Chris Lamb <lamby at debian.org>
---
diffoscope/comparators/text.py | 14 ++++++++++++++
tests/comparators/test_text.py | 8 ++++++++
tests/data/text_order1 | 7 +++++++
tests/data/text_order2 | 7 +++++++
tests/data/text_order_expected_diff | 11 +++++++++++
5 files changed, 47 insertions(+)
diff --git a/diffoscope/comparators/text.py b/diffoscope/comparators/text.py
index 909ff98..f7f423f 100644
--- a/diffoscope/comparators/text.py
+++ b/diffoscope/comparators/text.py
@@ -24,6 +24,17 @@ from diffoscope.difference import Difference
from diffoscope.comparators.binary import File
+def order_only_difference(unified_diff):
+ diff_lines = unified_diff.splitlines()
+ added_lines = [line[1:] for line in diff_lines if line.startswith('+')]
+ removed_lines = [line[1:] for line in diff_lines if line.startswith('-')]
+ # Faster check: does number of lines match?
+ if len(added_lines) != len(removed_lines):
+ return False
+ # Counter stores line and number of its occurrences.
+ return sorted(added_lines) == sorted(removed_lines)
+
+
class TextFile(File):
RE_FILE_TYPE = re.compile(r'\btext\b')
@@ -44,6 +55,9 @@ class TextFile(File):
with codecs.open(self.path, 'r', encoding=my_encoding) as my_content, \
codecs.open(other.path, 'r', encoding=other_encoding) as other_content:
difference = Difference.from_text_readers(my_content, other_content, self.name, other.name, source)
+ # Check if difference is only in line order.
+ if difference and order_only_difference(difference.unified_diff):
+ difference.add_comment("ordering differences only")
if my_encoding != other_encoding:
if difference is None:
difference = Difference(None, self.path, other.path, source)
diff --git a/tests/comparators/test_text.py b/tests/comparators/test_text.py
index 9892826..afa0716 100644
--- a/tests/comparators/test_text.py
+++ b/tests/comparators/test_text.py
@@ -65,3 +65,11 @@ def test_difference_between_iso88591_and_unicode_only(iso8859, tmpdir):
def test_compare_non_existing(monkeypatch, ascii1):
assert_non_existing(monkeypatch, ascii1, has_null_source=False, has_details=False)
+
+text_order1 = load_fixture(data('text_order1'))
+text_order2 = load_fixture(data('text_order2'))
+
+def test_ordering_differences(text_order1, text_order2):
+ difference = text_order1.compare(text_order2)
+ assert difference.comments == ['ordering differences only']
+ assert difference.unified_diff == open(data('text_order_expected_diff')).read()
diff --git a/tests/data/text_order1 b/tests/data/text_order1
new file mode 100644
index 0000000..9f85b81
--- /dev/null
+++ b/tests/data/text_order1
@@ -0,0 +1,7 @@
+These
+lines
+follow
+in
+some
+order
+.
diff --git a/tests/data/text_order2 b/tests/data/text_order2
new file mode 100644
index 0000000..7890b50
--- /dev/null
+++ b/tests/data/text_order2
@@ -0,0 +1,7 @@
+These
+some
+order
+follow
+in
+lines
+.
diff --git a/tests/data/text_order_expected_diff b/tests/data/text_order_expected_diff
new file mode 100644
index 0000000..2d8b915
--- /dev/null
+++ b/tests/data/text_order_expected_diff
@@ -0,0 +1,11 @@
+@@ -1,7 +1,7 @@
+ These
+-lines
+-follow
+-in
+ some
+ order
++follow
++in
++lines
+ .
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/diffoscope.git
More information about the diffoscope
mailing list