[diffoscope] 01/03: html-dir: In html-dir output, split large diffs across several child pages
Ximin Luo
infinity0 at debian.org
Thu Sep 8 16:42:15 CEST 2016
This is an automated email from the git hooks/post-receive script.
infinity0 pushed a commit to branch master
in repository diffoscope.
commit 9d804217cd2eadea32fbfe3e6e2874c8a5061493
Author: Ximin Luo <infinity0 at debian.org>
Date: Wed Aug 24 20:47:46 2016 +0200
html-dir: In html-dir output, split large diffs across several child pages
Also add some flags so html-dir behaviour is easier to reason about.
1. Previously, max-report-size would apply to both parent and child pages,
which didn't work well if you disabled this limit.
Instead, we now add an extra --max-report-child-size that remains in effect
even with --no-default-limits.
2. Previously, separate-file-diff-size would affect the parent page in an
unsmooth way: if a diff block had N lines, the parent page would contain N
lines, but if the diff block had N+1 lines, then 0 would be shown on the parent
page. This gives a visually counterintuitive result where a larger diff would
have less presence on the parent overview page.
Instead, we now change this to --max-diff-block-lines-parent which has a
smoother behaviour. If the diff block has N+1 lines, then N would be shown on
the parent page and 1 would be shown on the child page. This could be smoothed
out further, but we'll leave this complexity for the future.
---
diffoscope/config.py | 30 +++--
diffoscope/main.py | 47 +++++---
diffoscope/presenters/html.py | 268 ++++++++++++++++++++++++++++++++----------
3 files changed, 259 insertions(+), 86 deletions(-)
diff --git a/diffoscope/config.py b/diffoscope/config.py
index 555cc2b..dbe721f 100644
--- a/diffoscope/config.py
+++ b/diffoscope/config.py
@@ -27,10 +27,11 @@ class classproperty(property):
class Config(object):
def __init__(self):
- self._max_diff_block_lines = 50
+ self._max_diff_block_lines = 1024
+ self._max_diff_block_lines_parent = 50
self._max_diff_input_lines = 2 ** 20 # GNU diff cannot process arbitrary large files :(
self._max_report_size = 2000 * 2 ** 10 # 2000 kB
- self._separate_file_diff_size = 200 * 2 ** 10 # 200kB
+ self._max_report_child_size = 500 * 2 ** 10
self._fuzzy_threshold = 60
self._new_file = False
@@ -40,6 +41,11 @@ class Config(object):
cls._general_config = Config()
return cls._general_config
+ def _check_constraints(self):
+ if self._max_diff_block_lines < self._max_diff_block_lines_parent:
+ raise ValueError("max_diff_block_lines (%s) cannot be smaller than max_diff_block_lines_parent (%s)" %
+ (self._max_diff_block_lines, self._max_diff_block_lines_parent))
+
@property
def max_diff_block_lines(self):
return self._max_diff_block_lines
@@ -47,6 +53,16 @@ class Config(object):
@max_diff_block_lines.setter
def max_diff_block_lines(self, value):
self._max_diff_block_lines = value
+ self._check_constraints()
+
+ @property
+ def max_diff_block_lines_parent(self):
+ return self._max_diff_block_lines_parent
+
+ @max_diff_block_lines_parent.setter
+ def max_diff_block_lines_parent(self, value):
+ self._max_diff_block_lines_parent = value
+ self._check_constraints()
@property
def max_diff_input_lines(self):
@@ -65,12 +81,12 @@ class Config(object):
self._max_report_size = value
@property
- def separate_file_diff_size(self):
- return self._separate_file_diff_size
+ def max_report_child_size(self):
+ return self._max_report_child_size
- @separate_file_diff_size.setter
- def separate_file_diff_size(self, value):
- self._separate_file_diff_size = value
+ @max_report_child_size.setter
+ def max_report_child_size(self, value):
+ self._max_report_child_size = value
@property
def fuzzy_threshold(self):
diff --git a/diffoscope/main.py b/diffoscope/main.py
index bfe3efb..113fdea 100644
--- a/diffoscope/main.py
+++ b/diffoscope/main.py
@@ -67,32 +67,49 @@ def create_parser():
parser.add_argument('--text', metavar='output', dest='text_output',
help='write plain text output to given file (use - for stdout)')
parser.add_argument('--no-default-limits', action='store_true', default=False,
- help='Disable all default limits.')
+ help='Disable most default limits. Note that text '
+ 'output already ignores most of these.')
parser.add_argument('--max-report-size', metavar='BYTES',
dest='max_report_size', type=int,
- help='maximum bytes written in report (default: %d, 0 to disable)' %
+ help='Maximum bytes written in report. In html-dir '
+ 'output, this is the max bytes of the parent page. '
+ '(0 to disable, default: %d)' %
Config.general.max_report_size,
default=None).completer=RangeCompleter(0,
Config.general.max_report_size, 200000)
- parser.add_argument('--separate-file-diff-size', metavar='BYTES',
- dest='separate_file_diff_size', type=int,
- help='diff size to load diff on demand, with --html-dir (default: %(default)s)',
- default=Config.general.separate_file_diff_size).completer=RangeCompleter(0,
- Config.general.separate_file_diff_size, 20000)
- parser.add_argument('--max-diff-block-lines', dest='max_diff_block_lines', type=int,
- help='maximum number of lines per diff block (default: %d, 0 to disable)' %
+ parser.add_argument('--max-report-child-size', metavar='BYTES',
+ dest='max_report_child_size', type=int,
+ help='In html-dir output, this is the max bytes of '
+ 'each child page. (0 to disable, default: %(default)s, '
+ 'remaining in effect even with --no-default-limits)',
+ default=Config.general.max_report_child_size).completer=RangeCompleter(0,
+ Config.general.max_report_child_size, 50000)
+ parser.add_argument('--max-diff-block-lines', dest='max_diff_block_lines',
+ metavar='LINES', type=int,
+ help='Maximum number of lines output per diff block, '
+ 'across the whole report. (0 to disable, default: %d)' %
Config.general.max_diff_block_lines,
default=None).completer=RangeCompleter(0,
Config.general.max_diff_block_lines, 5)
- parser.add_argument('--max-diff-input-lines', dest='max_diff_input_lines', type=int,
- help='maximum number of lines fed to diff (default: %d, 0 to disable)' %
+ parser.add_argument('--max-diff-block-lines-parent', dest='max_diff_block_lines_parent',
+ metavar='LINES', type=int,
+ help='In --html-dir output, this is maximum number of '
+ 'lines output per diff block on the parent page, '
+ 'before spilling it into child pages. (0 to disable, '
+ 'default: %(default)s, remaining in effect even with '
+ '--no-default-limits)',
+ default=Config.general.max_diff_block_lines_parent).completer=RangeCompleter(0,
+ Config.general.max_diff_block_lines_parent, 200)
+ parser.add_argument('--max-diff-input-lines', dest='max_diff_input_lines',
+ metavar='LINES', type=int,
+ help='Maximum number of lines fed to diff(1). '
+ '(0 to disable, default: %d)' %
Config.general.max_diff_input_lines,
default=None).completer=RangeCompleter(0,
Config.general.max_diff_input_lines, 5000)
parser.add_argument('--fuzzy-threshold', dest='fuzzy_threshold', type=int,
help='threshold for fuzzy-matching '
- '(0 to disable, %d is default, 400 is high fuzziness)' %
- (Config.general.fuzzy_threshold),
+ '(0 to disable, %(default)s is default, 400 is high fuzziness)',
default=Config.general.fuzzy_threshold).completer=RangeCompleter(0,
400, 20)
parser.add_argument('--new-file', dest='new_file', action='store_true',
@@ -174,7 +191,9 @@ def run_diffoscope(parsed_args):
if not tlsh and Config.general.fuzzy_threshold != parsed_args.fuzzy_threshold:
logger.warning('Fuzzy-matching is currently disabled as the “tlsh” module is unavailable.')
maybe_set_limit(Config.general, parsed_args, "max_report_size")
- Config.general.separate_file_diff_size = parsed_args.separate_file_diff_size
+ maybe_set_limit(Config.general, parsed_args, "max_report_child_size")
+ # need to set them in this order due to Config._check_constraints
+ maybe_set_limit(Config.general, parsed_args, "max_diff_block_lines_parent")
maybe_set_limit(Config.general, parsed_args, "max_diff_block_lines")
maybe_set_limit(Config.general, parsed_args, "max_diff_input_lines")
Config.general.fuzzy_threshold = parsed_args.fuzzy_threshold
diff --git a/diffoscope/presenters/html.py b/diffoscope/presenters/html.py
index 2c1f1d6..fbc0a48 100644
--- a/diffoscope/presenters/html.py
+++ b/diffoscope/presenters/html.py
@@ -147,8 +147,14 @@ HEADER = """<!DOCTYPE html>
.diffheader:hover .anchor {
display: inline;
}
- .ondemand {
+ table.diff tr.ondemand td {
+ background: #f99;
text-align: center;
+ padding: 0.5em 0;
+ }
+ table.diff tr.ondemand:hover td {
+ background: #faa;
+ cursor: pointer;
}
</style>
%(css_link)s
@@ -166,23 +172,47 @@ SCRIPTS = """
<script src="%(jquery_url)s"></script>
<script type="text/javascript">
$(function() {
- $("div.ondemand a").on('click', function (){
- var filename = $(this).attr('href');
- var div = $(this).parent();
- div.text('... loading ...');
- div.load(filename + " table", function() {
+ var load_cont = function() {
+ var a = $(this).find("a");
+ var textparts = /^(.*)\((\d+) pieces?(.*)\)$/.exec(a.text());
+ var numleft = Number.parseInt(textparts[2]) - 1;
+ var noun = numleft == 1 ? "piece" : "pieces";
+ var newtext = textparts[1] + "(" + numleft + " " + noun + textparts[3] + ")";
+ var filename = a.attr('href');
+ var td = a.parent();
+ td.text('... loading ...');
+ td.parent().load(filename + " tr", function() {
// http://stackoverflow.com/a/8452751/946226
- $(this).children(':first').unwrap();
+ var elems = $(this).children(':first').unwrap();
+ // set this behaviour for the next link too
+ var td = elems.parent().find(".ondemand td");
+ td.find("a").text(newtext);
+ td.on('click', load_cont);
});
return false;
- });
+ };
+ $(".ondemand td").on('click', load_cont);
});
</script>
"""
+UD_TABLE_HEADER = u"""<table class="diff">
+<colgroup><col style="width: 3em;"/><col style="99%"/>
+<col style="width: 3em;"/><col style="99%"/></colgroup>
+"""
+
+UD_TABLE_FOOTER = u"""<tr class="ondemand"><td colspan="4">
+... <a href="%(filename)s">%(text)s</a> ...
+</td></tr>
+</table>
+"""
+
class PrintLimitReached(Exception):
pass
+class DiffBlockLimitReached(Exception):
+ pass
+
def create_limited_print_func(print_func, max_page_size):
def limited_print_func(s, force=False):
@@ -195,10 +225,24 @@ def create_limited_print_func(print_func, max_page_size):
return limited_print_func
-buf = []
-add_cpt, del_cpt = 0, 0
+buf, add_cpt, del_cpt = [], 0, 0
line1, line2, has_internal_linenos = 0, 0, True
hunk_off1, hunk_size1, hunk_off2, hunk_size2 = 0, 0, 0, 0
+spl_rows, spl_current_page = 0, 0
+spl_print_func, spl_print_ctrl = None, None
+
+
+def new_unified_diff():
+ global buf, add_cpt, del_cpt
+ global line1, line2, has_internal_linenos
+ global hunk_off1, hunk_size1, hunk_off2, hunk_size2
+ global spl_rows, spl_current_page
+ global spl_print_func, spl_print_ctrl
+ buf, add_cpt, del_cpt = [], 0, 0
+ line1, line2, has_internal_linenos = 0, 0, True
+ hunk_off1, hunk_size1, hunk_off2, hunk_size2 = 0, 0, 0, 0
+ spl_rows, spl_current_page = 0, 0
+ spl_print_func, spl_print_ctrl = None, None
def sane(x):
@@ -315,12 +359,13 @@ def convert(s, ponct=0, tag=''):
return t.getvalue()
-def output_hunk(print_func):
- print_func(u'<tr class="diffhunk"><td colspan="2">Offset %d, %d lines modified</td>'%(hunk_off1, hunk_size1))
- print_func(u'<td colspan="2">Offset %d, %d lines modified</td></tr>\n'%(hunk_off2, hunk_size2))
+def output_hunk():
+ spl_print_func(u'<tr class="diffhunk"><td colspan="2">Offset %d, %d lines modified</td>'%(hunk_off1, hunk_size1))
+ spl_print_func(u'<td colspan="2">Offset %d, %d lines modified</td></tr>\n'%(hunk_off2, hunk_size2))
+ row_was_output()
-def output_line(print_func, s1, s2):
+def output_line(s1, s2):
global line1, line2, has_internal_linenos
orig1 = s1
@@ -345,31 +390,32 @@ def output_line(print_func, s1, s2):
type_name = "changed"
s1, s2 = linediff(s1, s2)
- print_func(u'<tr class="diff%s">' % type_name)
+ spl_print_func(u'<tr class="diff%s">' % type_name)
try:
if s1:
if has_internal_linenos:
- print_func(u'<td colspan="2" class="diffpresent">')
+ spl_print_func(u'<td colspan="2" class="diffpresent">')
else:
- print_func(u'<td class="diffline">%d </td>' % line1)
- print_func(u'<td class="diffpresent">')
- print_func(convert(s1, ponct=1, tag='del'))
- print_func(u'</td>')
+ spl_print_func(u'<td class="diffline">%d </td>' % line1)
+ spl_print_func(u'<td class="diffpresent">')
+ spl_print_func(convert(s1, ponct=1, tag='del'))
+ spl_print_func(u'</td>')
else:
- print_func(u'<td colspan="2">\xa0</td>')
+ spl_print_func(u'<td colspan="2">\xa0</td>')
if s2:
if has_internal_linenos:
- print_func(u'<td colspan="2" class="diffpresent">')
+ spl_print_func(u'<td colspan="2" class="diffpresent">')
else:
- print_func(u'<td class="diffline">%d </td>' % line2)
- print_func(u'<td class="diffpresent">')
- print_func(convert(s2, ponct=1, tag='ins'))
- print_func(u'</td>')
+ spl_print_func(u'<td class="diffline">%d </td>' % line2)
+ spl_print_func(u'<td class="diffpresent">')
+ spl_print_func(convert(s2, ponct=1, tag='ins'))
+ spl_print_func(u'</td>')
else:
- print_func(u'<td colspan="2">\xa0</td>')
+ spl_print_func(u'<td colspan="2">\xa0</td>')
finally:
- print_func(u"</tr>\n", force=True)
+ spl_print_func(u"</tr>\n", force=True)
+ row_was_output()
m = orig1 and re.match(r"^\[ (\d+) lines removed \]$", orig1)
if m:
@@ -383,14 +429,14 @@ def output_line(print_func, s1, s2):
line2 += 1
-def empty_buffer(print_func):
+def empty_buffer():
global buf
global add_cpt
global del_cpt
if del_cpt == 0 or add_cpt == 0:
for l in buf:
- output_line(print_func, l[0], l[1])
+ output_line(l[0], l[1])
elif del_cpt != 0 and add_cpt != 0:
l0, l1 = [], []
@@ -406,45 +452,116 @@ def empty_buffer(print_func):
s0 = l0[i]
if i < len(l1):
s1 = l1[i]
- output_line(print_func, s0, s1)
+ output_line(s0, s1)
add_cpt, del_cpt = 0, 0
buf = []
-def output_unified_diff_table(print_func, unified_diff, _has_internal_linenos):
+def spl_print_enter(print_context, rotation_params):
+ # Takes ownership of print_context
+ global spl_print_func, spl_print_ctrl
+ spl_print_ctrl = print_context.__exit__, rotation_params
+ spl_print_func = print_context.__enter__()
+ _, _, css_url = rotation_params
+ # Print file and table headers
+ output_header(css_url, spl_print_func)
+
+def spl_had_entered_child():
+ global spl_print_ctrl, spl_current_page
+ return spl_print_ctrl and spl_print_ctrl[1] and spl_current_page > 0
+
+def spl_print_exit(*exc_info):
+ global spl_print_func, spl_print_ctrl
+ if not spl_had_entered_child(): return False
+ output_footer(spl_print_func)
+ _exit, _ = spl_print_ctrl
+ spl_print_func, spl_print_ctrl = None, None
+ return _exit(*exc_info)
+
+ at contextlib.contextmanager
+def spl_file_printer(directory, filename):
+ with codecs.open(os.path.join(directory,filename), 'w', encoding='utf-8') as f:
+ print_func = f.write
+ def recording_print_func(s, force=False):
+ print_func(s)
+ recording_print_func.bytes_written += len(s)
+ recording_print_func.bytes_written = 0
+ yield recording_print_func
+
+def row_was_output():
+ global spl_print_func, spl_print_ctrl, spl_rows, spl_current_page
+ spl_rows += 1
+ _, rotation_params = spl_print_ctrl
+ max_lines = Config.general.max_diff_block_lines
+ max_lines_parent = Config.general.max_diff_block_lines_parent
+ max_report_child_size = Config.general.max_report_child_size
+ if not rotation_params:
+ # html-dir single output, don't need to rotate
+ if spl_rows >= max_lines:
+ raise DiffBlockLimitReached()
+ return
+ else:
+ # html-dir output, perhaps need to rotate
+ directory, mainname, css_url = rotation_params
+ if spl_rows >= max_lines:
+ raise DiffBlockLimitReached()
+
+ if spl_current_page == 0: # on parent page
+ if spl_rows < max_lines_parent:
+ return
+ else: # on child page
+ # TODO: make this stay below the max, instead of going 1 row over the max
+ # will require some backtracking...
+ if spl_print_func.bytes_written < max_report_child_size:
+ return
+
+ spl_current_page += 1
+ filename = "%s-%s.html" % (mainname, spl_current_page)
+
+ if spl_current_page > 1:
+ # previous page was a child, close it
+ spl_print_func(UD_TABLE_FOOTER % {"filename": html.escape(filename), "text": "load diff"}, force=True)
+ spl_print_exit(None, None, None)
+
+ # rotate to the next child page
+ context = spl_file_printer(directory, filename)
+ spl_print_enter(context, rotation_params)
+ spl_print_func(UD_TABLE_HEADER)
+
+
+def output_unified_diff_table(unified_diff, _has_internal_linenos):
global add_cpt, del_cpt
global line1, line2, has_internal_linenos
global hunk_off1, hunk_size1, hunk_off2, hunk_size2
has_internal_linenos = _has_internal_linenos
- print_func(u'<table class="diff">\n')
+ spl_print_func(UD_TABLE_HEADER)
try:
- print_func(u'<colgroup><col style="width: 3em;"/><col style="99%"/>\n')
- print_func(u'<col style="width: 3em;"/><col style="99%"/></colgroup>\n')
-
+ bytes_processed = 0
for l in unified_diff.splitlines():
+ bytes_processed += len(l) + 1
m = re.match(r'^--- ([^\s]*)', l)
if m:
- empty_buffer(print_func)
+ empty_buffer()
continue
m = re.match(r'^\+\+\+ ([^\s]*)', l)
if m:
- empty_buffer(print_func)
+ empty_buffer()
continue
m = re.match(r"@@ -(\d+),?(\d*) \+(\d+),?(\d*)", l)
if m:
- empty_buffer(print_func)
+ empty_buffer()
hunk_data = map(lambda x:x=="" and 1 or int(x), m.groups())
hunk_off1, hunk_size1, hunk_off2, hunk_size2 = hunk_data
line1, line2 = hunk_off1, hunk_off2
- output_hunk(print_func)
+ output_hunk()
continue
if re.match(r'^\[', l):
- empty_buffer(print_func)
- print_func(u'<td colspan="2">%s</td>\n' % l)
+ empty_buffer()
+ spl_print_func(u'<td colspan="2">%s</td>\n' % l)
if re.match(r"^\\ No newline", l):
if hunk_size2 == 0:
@@ -454,7 +571,7 @@ def output_unified_diff_table(print_func, unified_diff, _has_internal_linenos):
continue
if hunk_size1 <= 0 and hunk_size2 <= 0:
- empty_buffer(print_func)
+ empty_buffer()
continue
m = re.match(r"^\+\[ (\d+) lines removed \]$", l)
@@ -484,34 +601,57 @@ def output_unified_diff_table(print_func, unified_diff, _has_internal_linenos):
continue
if re.match(r"^ ", l) and hunk_size1 and hunk_size2:
- empty_buffer(print_func)
+ empty_buffer()
hunk_size1 -= 1
hunk_size2 -= 1
buf.append((l[1:], l[1:]))
continue
- empty_buffer(print_func)
-
- empty_buffer(print_func)
+ empty_buffer()
+
+ empty_buffer()
+ return True
+ except DiffBlockLimitReached:
+ total = len(unified_diff)
+ bytes_left = total - bytes_processed
+ frac = bytes_left / total
+ spl_print_func(
+ u"<tr class='error'>"
+ u"<td colspan='4'>Max diff block lines reached; %s/%s bytes (%.2f%%) of diff not shown."
+ u"</td></tr>" % (bytes_left, total, frac*100), force=True)
+ return False
+ except PrintLimitReached:
+ assert not spl_had_entered_child() # limit reached on the parent page
+ spl_print_func(u"<tr class='error'><td colspan='4'>Max output size reached.</td></tr>", force=True)
+ raise
finally:
- print_func(u"</table>", force=True)
+ spl_print_func(u"</table>", force=True)
-def output_unified_diff(print_func, css_url, directory, unified_diff, has_internal_linenos):
- if directory and len(unified_diff) > Config.general.separate_file_diff_size:
- # open a new file for this table
- filename="%s.html" % hashlib.md5(unified_diff.encode('utf-8')).hexdigest()
- logger.debug('separate html output for diff of size %d', len(unified_diff))
- with file_printer(directory, filename) as new_print_func:
- output_header(css_url, new_print_func)
- output_unified_diff_table(new_print_func, unified_diff, has_internal_linenos)
- output_footer(new_print_func)
-
- print_func("<div class='ondemand'>\n")
- print_func("... <a href='%s'>load diff</a> ...\n" % html.escape(filename))
- print_func("</div>\n")
+def output_unified_diff(print_func, css_url, directory, unified_diff, has_internal_linenos):
+ global spl_print_func, spl_print_ctrl, spl_current_page
+ new_unified_diff()
+ rotation_params = None
+ if directory:
+ mainname = hashlib.md5(unified_diff.encode('utf-8')).hexdigest()
+ rotation_params = directory, mainname, css_url
+ try:
+ spl_print_func = print_func
+ spl_print_ctrl = None, rotation_params
+ truncated = not output_unified_diff_table(unified_diff, has_internal_linenos)
+ except:
+ if not spl_print_exit(*sys.exc_info()): raise
else:
- output_unified_diff_table(print_func, unified_diff, has_internal_linenos)
+ spl_print_exit(None, None, None)
+ finally:
+ spl_print_ctrl = None
+ spl_print_func = None
+
+ if spl_current_page > 0:
+ noun = "pieces" if spl_current_page > 1 else "piece"
+ text = "load diff (%s %s%s)" % (spl_current_page, noun, (", truncated" if truncated else ""))
+ print_func(UD_TABLE_FOOTER % {"filename": html.escape("%s-1.html" % mainname), "text": text}, force=True)
+
def output_difference(difference, print_func, css_url, directory, parents):
logger.debug('html output for %s', difference.source1)
@@ -578,9 +718,7 @@ def output_html(difference, css_url=None, print_func=None):
@contextlib.contextmanager
def file_printer(directory, filename):
with codecs.open(os.path.join(directory,filename), 'w', encoding='utf-8') as f:
- print_func = f.write
- print_func = create_limited_print_func(print_func, Config.general.max_report_size)
- yield print_func
+ yield f.write
JQUERY_SYSTEM_LOCATIONS = ['/usr/share/javascript/jquery/jquery.js']
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/diffoscope.git
More information about the diffoscope
mailing list