[diffoscope] 01/01: More ydiff/linediff from diffoscope.{difference => diff} to group unified_diff related things together
Ximin Luo
infinity0 at debian.org
Fri Jun 16 17:58:46 CEST 2017
This is an automated email from the git hooks/post-receive script.
infinity0 pushed a commit to branch experimental
in repository diffoscope.
commit 56fcdec5c6948dc5fe91c36d2dff731da39fd610
Author: Ximin Luo <infinity0 at debian.org>
Date: Fri Jun 16 17:55:39 2017 +0200
More ydiff/linediff from diffoscope.{difference => diff} to group unified_diff related things together
---
diffoscope/diff.py | 252 ++++++++++++++++++++++++++++++++++++
diffoscope/difference.py | 255 -------------------------------------
diffoscope/presenters/html/html.py | 2 +-
3 files changed, 253 insertions(+), 256 deletions(-)
diff --git a/diffoscope/diff.py b/diffoscope/diff.py
index 99b5230..6710361 100644
--- a/diffoscope/diff.py
+++ b/diffoscope/diff.py
@@ -289,3 +289,255 @@ def color_unified_diff(diff):
}[m.group(1)], m.group(0), RESET)
return re_diff_change.sub(repl, diff)
+
+DIFFON = "\x01"
+DIFFOFF = "\x02"
+
+def _linediff_sane(x):
+ r = ""
+ for i in x:
+ j = ord(i)
+ if i not in ['\t', '\n'] and (j < 32):
+ r = r + "."
+ else:
+ r = r + i
+ return r
+
+def linediff(s, t, diffon, diffoff):
+ '''
+ Original line diff algorithm of diff2html. It's character based.
+ '''
+ if len(s):
+ s = ''.join([ _linediff_sane(c) for c in s ])
+ if len(t):
+ t = ''.join([ _linediff_sane(c) for c in t ])
+
+ m, n = len(s), len(t)
+ d = [[(0, 0) for i in range(n+1)] for i in range(m+1)]
+
+
+ d[0][0] = (0, (0, 0))
+ for i in range(m+1)[1:]:
+ d[i][0] = (i,(i-1, 0))
+ for j in range(n+1)[1:]:
+ d[0][j] = (j,(0, j-1))
+
+ for i in range(m+1)[1:]:
+ for j in range(n+1)[1:]:
+ if s[i-1] == t[j-1]:
+ cost = 0
+ else:
+ cost = 1
+ d[i][j] = min((d[i-1][j][0] + 1, (i-1, j)),
+ (d[i][j-1][0] + 1, (i, j-1)),
+ (d[i-1][j-1][0] + cost, (i-1, j-1)))
+
+ l = []
+ coord = (m, n)
+ while coord != (0, 0):
+ l.insert(0, coord)
+ x, y = coord
+ coord = d[x][y][1]
+
+ l1 = []
+ l2 = []
+
+ for coord in l:
+ cx, cy = coord
+ child_val = d[cx][cy][0]
+
+ father_coord = d[cx][cy][1]
+ fx, fy = father_coord
+ father_val = d[fx][fy][0]
+
+ diff = (cx-fx, cy-fy)
+
+ if diff == (0, 1):
+ l1.append("")
+ l2.append(diffon + t[fy] + diffoff)
+ elif diff == (1, 0):
+ l1.append(diffon + s[fx] + diffoff)
+ l2.append("")
+ elif child_val-father_val == 1:
+ l1.append(diffon + s[fx] + diffoff)
+ l2.append(diffon + t[fy] + diffoff)
+ else:
+ l1.append(s[fx])
+ l2.append(t[fy])
+
+ return ''.join(l1).replace(diffoff + diffon, ''), ''.join(l2).replace(diffoff + diffon, '')
+
+
+class SideBySideDiff(object):
+ """Calculates a side-by-side diff from a unified diff."""
+
+ def __init__(self, unified_diff, diffon=DIFFON, diffoff=DIFFOFF):
+ self.unified_diff = unified_diff
+ self.diffon = diffon
+ self.diffoff = diffoff
+ self.reset()
+
+ def reset(self):
+ self.buf = []
+ self.add_cpt = 0
+ self.del_cpt = 0
+ self.line1 = 0
+ self.line2 = 0
+ self.hunk_off1 = 0
+ self.hunk_size1 = 0
+ self.hunk_off2 = 0
+ self.hunk_size2 = 0
+ self._bytes_processed = 0
+
+ @property
+ def bytes_processed(self):
+ return self._bytes_processed
+
+ def empty_buffer(self):
+ if self.del_cpt == 0 or self.add_cpt == 0:
+ for l in self.buf:
+ yield from self.yield_line(l[0], l[1])
+
+ elif self.del_cpt != 0 and self.add_cpt != 0:
+ l0, l1 = [], []
+ for l in self.buf:
+ if l[0] != None:
+ l0.append(l[0])
+ if l[1] != None:
+ l1.append(l[1])
+ max_len = (len(l0) > len(l1)) and len(l0) or len(l1)
+ for i in range(max_len):
+ s0, s1 = "", ""
+ if i < len(l0):
+ s0 = l0[i]
+ if i < len(l1):
+ s1 = l1[i]
+ yield from self.yield_line(s0, s1)
+
+ def yield_line(self, s1, s2):
+ orig1 = s1
+ orig2 = s2
+
+ if s1 == None and s2 == None:
+ type_name = "unmodified"
+ elif s1 == "" and s2 == "":
+ type_name = "unmodified"
+ elif s1 == None or s1 == "":
+ type_name = "added"
+ elif s2 == None or s2 == "":
+ type_name = "deleted"
+ elif orig1 == orig2 and not s1.endswith('lines removed ]') and not s2.endswith('lines removed ]'):
+ type_name = "unmodified"
+ else:
+ type_name = "changed"
+ s1, s2 = linediff(s1, s2, self.diffon, self.diffoff)
+
+ yield "L", (type_name, s1, self.line1, s2, self.line2)
+
+ m = orig1 and re.match(r"^\[ (\d+) lines removed \]$", orig1)
+ if m:
+ self.line1 += int(m.group(1))
+ elif orig1:
+ self.line1 += 1
+ m = orig2 and re.match(r"^\[ (\d+) lines removed \]$", orig2)
+ if m:
+ self.line2 += int(m.group(1))
+ elif orig2:
+ self.line2 += 1
+
+ self.add_cpt = 0
+ self.del_cpt = 0
+ self.buf = []
+
+ def items(self):
+ """Yield the items that form the side-by-side diff.
+
+ Each item is a (type, value) tuple, as follows:
+
+ type == "H", value is a tuple representing a hunk header
+ hunk_offset1, hunk_size1, hunk_offset2, hunk_size2 = value
+ all ints
+
+ type == "L", value is a tuple representing a line of a hunk
+ mode, line1, lineno1, line2, lineno2 = value
+ where mode is one of {"unmodified", "added", "deleted", "changed"}
+ line* are strings
+ lineno* are ints
+
+ type == "C", value is a comment
+ comment = value
+ a string
+ """
+ self.reset()
+
+ for l in self.unified_diff.splitlines():
+ self._bytes_processed += len(l) + 1
+ m = re.match(r'^--- ([^\s]*)', l)
+ if m:
+ yield from self.empty_buffer()
+ continue
+ m = re.match(r'^\+\+\+ ([^\s]*)', l)
+ if m:
+ yield from self.empty_buffer()
+ continue
+
+ m = re.match(r"@@ -(\d+),?(\d*) \+(\d+),?(\d*)", l)
+ if m:
+ yield from self.empty_buffer()
+ hunk_data = map(lambda x:x=="" and 1 or int(x), m.groups())
+ self.hunk_off1, self.hunk_size1, self.hunk_off2, self.hunk_size2 = hunk_data
+ self.line1, self.line2 = self.hunk_off1, self.hunk_off2
+ yield "H", (self.hunk_off1, self.hunk_size1, self.hunk_off2, self.hunk_size2)
+ continue
+
+ if re.match(r'^\[', l):
+ yield from self.empty_buffer()
+ yield "C", l
+
+ if re.match(r"^\\ No newline", l):
+ if self.hunk_size2 == 0:
+ self.buf[-1] = (self.buf[-1][0], self.buf[-1][1] + '\n' + l[2:])
+ else:
+ self.buf[-1] = (buf[-1][0] + '\n' + l[2:], self.buf[-1][1])
+ continue
+
+ if self.hunk_size1 <= 0 and self.hunk_size2 <= 0:
+ yield from self.empty_buffer()
+ continue
+
+ m = re.match(r"^\+\[ (\d+) lines removed \]$", l)
+ if m:
+ self.add_cpt += int(m.group(1))
+ self.hunk_size2 -= int(m.group(1))
+ self.buf.append((None, l[1:]))
+ continue
+
+ if re.match(r"^\+", l):
+ self.add_cpt += 1
+ self.hunk_size2 -= 1
+ self.buf.append((None, l[1:]))
+ continue
+
+ m = re.match(r"^-\[ (\d+) lines removed \]$", l)
+ if m:
+ self.del_cpt += int(m.group(1))
+ self.hunk_size1 -= int(m.group(1))
+ self.buf.append((l[1:], None))
+ continue
+
+ if re.match(r"^-", l):
+ self.del_cpt += 1
+ self.hunk_size1 -= 1
+ self.buf.append((l[1:], None))
+ continue
+
+ if re.match(r"^ ", l) and self.hunk_size1 and self.hunk_size2:
+ yield from self.empty_buffer()
+ self.hunk_size1 -= 1
+ self.hunk_size2 -= 1
+ self.buf.append((l[1:], l[1:]))
+ continue
+
+ yield from self.empty_buffer()
+
+ yield from self.empty_buffer()
diff --git a/diffoscope/difference.py b/diffoscope/difference.py
index ce5b1e9..54cb686 100644
--- a/diffoscope/difference.py
+++ b/diffoscope/difference.py
@@ -17,9 +17,7 @@
# You should have received a copy of the GNU General Public License
# along with diffoscope. If not, see <https://www.gnu.org/licenses/>.
-import hashlib
import heapq
-import re
import logging
from . import feeders
@@ -325,256 +323,3 @@ class VisualDifference(object):
def size(self):
return len(self.data_type) + len(self.content) + len(self.source)
-
-
-DIFFON = "\x01"
-DIFFOFF = "\x02"
-
-def _linediff_sane(x):
- r = ""
- for i in x:
- j = ord(i)
- if i not in ['\t', '\n'] and (j < 32):
- r = r + "."
- else:
- r = r + i
- return r
-
-def linediff(s, t, diffon, diffoff):
- '''
- Original line diff algorithm of diff2html. It's character based.
- '''
- if len(s):
- s = ''.join([ _linediff_sane(c) for c in s ])
- if len(t):
- t = ''.join([ _linediff_sane(c) for c in t ])
-
- m, n = len(s), len(t)
- d = [[(0, 0) for i in range(n+1)] for i in range(m+1)]
-
-
- d[0][0] = (0, (0, 0))
- for i in range(m+1)[1:]:
- d[i][0] = (i,(i-1, 0))
- for j in range(n+1)[1:]:
- d[0][j] = (j,(0, j-1))
-
- for i in range(m+1)[1:]:
- for j in range(n+1)[1:]:
- if s[i-1] == t[j-1]:
- cost = 0
- else:
- cost = 1
- d[i][j] = min((d[i-1][j][0] + 1, (i-1, j)),
- (d[i][j-1][0] + 1, (i, j-1)),
- (d[i-1][j-1][0] + cost, (i-1, j-1)))
-
- l = []
- coord = (m, n)
- while coord != (0, 0):
- l.insert(0, coord)
- x, y = coord
- coord = d[x][y][1]
-
- l1 = []
- l2 = []
-
- for coord in l:
- cx, cy = coord
- child_val = d[cx][cy][0]
-
- father_coord = d[cx][cy][1]
- fx, fy = father_coord
- father_val = d[fx][fy][0]
-
- diff = (cx-fx, cy-fy)
-
- if diff == (0, 1):
- l1.append("")
- l2.append(diffon + t[fy] + diffoff)
- elif diff == (1, 0):
- l1.append(diffon + s[fx] + diffoff)
- l2.append("")
- elif child_val-father_val == 1:
- l1.append(diffon + s[fx] + diffoff)
- l2.append(diffon + t[fy] + diffoff)
- else:
- l1.append(s[fx])
- l2.append(t[fy])
-
- return ''.join(l1).replace(diffoff + diffon, ''), ''.join(l2).replace(diffoff + diffon, '')
-
-
-class SideBySideDiff(object):
- """Calculates a side-by-side diff from a unified diff."""
-
- def __init__(self, unified_diff, diffon=DIFFON, diffoff=DIFFOFF):
- self.unified_diff = unified_diff
- self.diffon = diffon
- self.diffoff = diffoff
- self.reset()
-
- def reset(self):
- self.buf = []
- self.add_cpt = 0
- self.del_cpt = 0
- self.line1 = 0
- self.line2 = 0
- self.hunk_off1 = 0
- self.hunk_size1 = 0
- self.hunk_off2 = 0
- self.hunk_size2 = 0
- self._bytes_processed = 0
-
- @property
- def bytes_processed(self):
- return self._bytes_processed
-
- def empty_buffer(self):
- if self.del_cpt == 0 or self.add_cpt == 0:
- for l in self.buf:
- yield from self.yield_line(l[0], l[1])
-
- elif self.del_cpt != 0 and self.add_cpt != 0:
- l0, l1 = [], []
- for l in self.buf:
- if l[0] != None:
- l0.append(l[0])
- if l[1] != None:
- l1.append(l[1])
- max_len = (len(l0) > len(l1)) and len(l0) or len(l1)
- for i in range(max_len):
- s0, s1 = "", ""
- if i < len(l0):
- s0 = l0[i]
- if i < len(l1):
- s1 = l1[i]
- yield from self.yield_line(s0, s1)
-
- def yield_line(self, s1, s2):
- orig1 = s1
- orig2 = s2
-
- if s1 == None and s2 == None:
- type_name = "unmodified"
- elif s1 == "" and s2 == "":
- type_name = "unmodified"
- elif s1 == None or s1 == "":
- type_name = "added"
- elif s2 == None or s2 == "":
- type_name = "deleted"
- elif orig1 == orig2 and not s1.endswith('lines removed ]') and not s2.endswith('lines removed ]'):
- type_name = "unmodified"
- else:
- type_name = "changed"
- s1, s2 = linediff(s1, s2, self.diffon, self.diffoff)
-
- yield "L", (type_name, s1, self.line1, s2, self.line2)
-
- m = orig1 and re.match(r"^\[ (\d+) lines removed \]$", orig1)
- if m:
- self.line1 += int(m.group(1))
- elif orig1:
- self.line1 += 1
- m = orig2 and re.match(r"^\[ (\d+) lines removed \]$", orig2)
- if m:
- self.line2 += int(m.group(1))
- elif orig2:
- self.line2 += 1
-
- self.add_cpt = 0
- self.del_cpt = 0
- self.buf = []
-
- def items(self):
- """Yield the items that form the side-by-side diff.
-
- Each item is a (type, value) tuple, as follows:
-
- type == "H", value is a tuple representing a hunk header
- hunk_offset1, hunk_size1, hunk_offset2, hunk_size2 = value
- all ints
-
- type == "L", value is a tuple representing a line of a hunk
- mode, line1, lineno1, line2, lineno2 = value
- where mode is one of {"unmodified", "added", "deleted", "changed"}
- line* are strings
- lineno* are ints
-
- type == "C", value is a comment
- comment = value
- a string
- """
- self.reset()
-
- for l in self.unified_diff.splitlines():
- self._bytes_processed += len(l) + 1
- m = re.match(r'^--- ([^\s]*)', l)
- if m:
- yield from self.empty_buffer()
- continue
- m = re.match(r'^\+\+\+ ([^\s]*)', l)
- if m:
- yield from self.empty_buffer()
- continue
-
- m = re.match(r"@@ -(\d+),?(\d*) \+(\d+),?(\d*)", l)
- if m:
- yield from self.empty_buffer()
- hunk_data = map(lambda x:x=="" and 1 or int(x), m.groups())
- self.hunk_off1, self.hunk_size1, self.hunk_off2, self.hunk_size2 = hunk_data
- self.line1, self.line2 = self.hunk_off1, self.hunk_off2
- yield "H", (self.hunk_off1, self.hunk_size1, self.hunk_off2, self.hunk_size2)
- continue
-
- if re.match(r'^\[', l):
- yield from self.empty_buffer()
- yield "C", l
-
- if re.match(r"^\\ No newline", l):
- if self.hunk_size2 == 0:
- self.buf[-1] = (self.buf[-1][0], self.buf[-1][1] + '\n' + l[2:])
- else:
- self.buf[-1] = (buf[-1][0] + '\n' + l[2:], self.buf[-1][1])
- continue
-
- if self.hunk_size1 <= 0 and self.hunk_size2 <= 0:
- yield from self.empty_buffer()
- continue
-
- m = re.match(r"^\+\[ (\d+) lines removed \]$", l)
- if m:
- self.add_cpt += int(m.group(1))
- self.hunk_size2 -= int(m.group(1))
- self.buf.append((None, l[1:]))
- continue
-
- if re.match(r"^\+", l):
- self.add_cpt += 1
- self.hunk_size2 -= 1
- self.buf.append((None, l[1:]))
- continue
-
- m = re.match(r"^-\[ (\d+) lines removed \]$", l)
- if m:
- self.del_cpt += int(m.group(1))
- self.hunk_size1 -= int(m.group(1))
- self.buf.append((l[1:], None))
- continue
-
- if re.match(r"^-", l):
- self.del_cpt += 1
- self.hunk_size1 -= 1
- self.buf.append((l[1:], None))
- continue
-
- if re.match(r"^ ", l) and self.hunk_size1 and self.hunk_size2:
- yield from self.empty_buffer()
- self.hunk_size1 -= 1
- self.hunk_size2 -= 1
- self.buf.append((l[1:], l[1:]))
- continue
-
- yield from self.empty_buffer()
-
- yield from self.empty_buffer()
diff --git a/diffoscope/presenters/html/html.py b/diffoscope/presenters/html/html.py
index e7ad83c..bb847a7 100644
--- a/diffoscope/presenters/html/html.py
+++ b/diffoscope/presenters/html/html.py
@@ -43,7 +43,7 @@ import contextlib
from diffoscope import VERSION
from diffoscope.config import Config
-from diffoscope.difference import SideBySideDiff, DIFFON, DIFFOFF
+from diffoscope.diff import SideBySideDiff, DIFFON, DIFFOFF
from ..icon import FAVICON_BASE64
from ..utils import PrintLimitReached, DiffBlockLimitReached, \
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/diffoscope.git
More information about the diffoscope
mailing list