[diffoscope] 01/04: In split-html output, also split individual diffs that are too long
Ximin Luo
infinity0 at debian.org
Thu Aug 25 12:23:58 CEST 2016
This is an automated email from the git hooks/post-receive script.
infinity0 pushed a commit to branch better-lazy-loading
in repository diffoscope.
commit 5d38805bad35df7f06cb8a8dd0f4b8e764ae7e21
Author: Ximin Luo <infinity0 at debian.org>
Date: Wed Aug 24 20:47:46 2016 +0200
In split-html output, also split individual diffs that are too long
---
diffoscope/presenters/html.py | 218 +++++++++++++++++++++++++++++++-----------
1 file changed, 162 insertions(+), 56 deletions(-)
diff --git a/diffoscope/presenters/html.py b/diffoscope/presenters/html.py
index be8b145..0e887b6 100644
--- a/diffoscope/presenters/html.py
+++ b/diffoscope/presenters/html.py
@@ -168,20 +168,39 @@ SCRIPTS = """
<script src="%(jquery_url)s"></script>
<script type="text/javascript">
$(function() {
- $("div.ondemand a").on('click', function (){
- var filename = $(this).attr('href');
- var div = $(this).parent();
- div.text('... loading ...');
- div.load(filename + " table", function() {
+ var load_cont = function() {
+ var a = $(this);
+ var filename = a.attr('href');
+ var numleft = Number.parseInt(/\((\d+) pieces?\)/.exec(a.text())[1]) - 1
+ var td = a.parent();
+ td.text('... loading ...');
+ td.parent().load(filename + " tr", function() {
// http://stackoverflow.com/a/8452751/946226
- $(this).children(':first').unwrap();
+ var elems = $(this).children(':first').unwrap();
+ // set this behaviour for the next link too
+ var a = elems.parent().find(".ondemand a");
+ var noun = numleft > 1 ? "pieces" : "piece" // be sure the regex matches either
+ a.text(a.text() + " (" + numleft + " " + noun + ")");
+ a.on('click', load_cont);
});
return false;
- });
+ };
+ $(".ondemand a").on('click', load_cont);
});
</script>
"""
+UD_TABLE_HEADER = u"""<table class="diff">
+<colgroup><col style="width: 3em;"/><col style="99%"/>
+<col style="width: 3em;"/><col style="99%"/></colgroup>
+"""
+
+UD_TABLE_FOOTER = u"""<tr class="ondemand"><td colspan="4">
+... <a href="%(filename)s">%(text)s</a>
+</td></tr>
+</table>
+"""
+
class PrintLimitReached(Exception):
pass
@@ -197,10 +216,30 @@ def create_limited_print_func(print_func, max_page_size):
return limited_print_func
-buf = []
-add_cpt, del_cpt = 0, 0
+def estimate_num_rows_per_page(separate_file_diff_size):
+ # each row takes about 1200 bytes in the output, so roughly calculate
+ # the number of rows that 4 * separate_file_diff_size will hold
+ return separate_file_diff_size * 4 // 1200
+
+
+buf, add_cpt, del_cpt = [], 0, 0
line1, line2 = 0, 0
hunk_off1, hunk_size1, hunk_off2, hunk_size2 = 0, 0, 0, 0
+spl_rows, spl_bytes, spl_current_page = 0, 0, 0
+spl_print_func, spl_print_ctrl = None, None
+
+
+def new_unified_diff():
+ global buf, add_cpt, del_cpt
+ global line1, line2, has_internal_linenos
+ global hunk_off1, hunk_size1, hunk_off2, hunk_size2
+ global spl_rows, spl_bytes, spl_current_page
+ global spl_print_func, spl_print_ctrl
+ buf, add_cpt, del_cpt = [], 0, 0
+ line1, line2, has_internal_linenos = 0, 0, True
+ hunk_off1, hunk_size1, hunk_off2, hunk_size2 = 0, 0, 0, 0
+ spl_rows, spl_bytes, spl_current_page = 0, 0, 0
+ spl_print_func, spl_print_ctrl = None, None
def sane(x):
@@ -317,12 +356,13 @@ def convert(s, ponct=0, tag=''):
return t.getvalue()
-def output_hunk(print_func):
- print_func(u'<tr class="diffhunk"><td colspan="2">Offset %d, %d lines modified</td>'%(hunk_off1, hunk_size1))
- print_func(u'<td colspan="2">Offset %d, %d lines modified</td></tr>\n'%(hunk_off2, hunk_size2))
+def output_hunk():
+ spl_print_func(u'<tr class="diffhunk"><td colspan="2">Offset %d, %d lines modified</td>'%(hunk_off1, hunk_size1))
+ spl_print_func(u'<td colspan="2">Offset %d, %d lines modified</td></tr>\n'%(hunk_off2, hunk_size2))
+ row_was_output()
-def output_line(print_func, s1, s2):
+def output_line(s1, s2):
global line1
global line2
@@ -348,25 +388,26 @@ def output_line(print_func, s1, s2):
type_name = "changed"
s1, s2 = linediff(s1, s2)
- print_func(u'<tr class="diff%s">' % type_name)
+ spl_print_func(u'<tr class="diff%s">' % type_name)
try:
if s1:
- print_func(u'<td class="diffline">%d </td>' % line1)
- print_func(u'<td class="diffpresent">')
- print_func(convert(s1, ponct=1, tag='del'))
- print_func(u'</td>')
+ spl_print_func(u'<td class="diffline">%d </td>' % line1)
+ spl_print_func(u'<td class="diffpresent">')
+ spl_print_func(convert(s1, ponct=1, tag='del'))
+ spl_print_func(u'</td>')
else:
- print_func(u'<td colspan="2">\xa0</td>')
+ spl_print_func(u'<td colspan="2">\xa0</td>')
if s2:
- print_func(u'<td class="diffline">%d </td>' % line2)
- print_func(u'<td class="diffpresent">')
- print_func(convert(s2, ponct=1, tag='ins'))
- print_func(u'</td>')
+ spl_print_func(u'<td class="diffline">%d </td>' % line2)
+ spl_print_func(u'<td class="diffpresent">')
+ spl_print_func(convert(s2, ponct=1, tag='ins'))
+ spl_print_func(u'</td>')
else:
- print_func(u'<td colspan="2">\xa0</td>')
+ spl_print_func(u'<td colspan="2">\xa0</td>')
finally:
- print_func(u"</tr>\n", force=True)
+ spl_print_func(u"</tr>\n", force=True)
+ row_was_output()
m = orig1 and re.match(r"^\[ (\d+) lines removed \]$", orig1)
if m:
@@ -380,14 +421,14 @@ def output_line(print_func, s1, s2):
line2 += 1
-def empty_buffer(print_func):
+def empty_buffer():
global buf
global add_cpt
global del_cpt
if del_cpt == 0 or add_cpt == 0:
for l in buf:
- output_line(print_func, l[0], l[1])
+ output_line(l[0], l[1])
elif del_cpt != 0 and add_cpt != 0:
l0, l1 = [], []
@@ -403,44 +444,91 @@ def empty_buffer(print_func):
s0 = l0[i]
if i < len(l1):
s1 = l1[i]
- output_line(print_func, s0, s1)
+ output_line(s0, s1)
add_cpt, del_cpt = 0, 0
buf = []
-def output_unified_diff_table(print_func, unified_diff):
+def spl_print_enter(print_context, rotation_params):
+ # Takes ownership of print_context
+ global spl_print_func, spl_print_ctrl
+ spl_print_ctrl = print_context.__exit__, rotation_params
+ spl_print_func = print_context.__enter__()
+ _, _, css_url, _ = rotation_params
+ # Print file and table headers
+ output_header(css_url, spl_print_func)
+
+def spl_print_exit(*exc_info):
+ global spl_print_func, spl_print_ctrl
+ output_footer(spl_print_func)
+ _exit, _ = spl_print_ctrl
+ spl_print_func, spl_print_ctrl = None, None
+ return _exit(*exc_info)
+
+ at contextlib.contextmanager
+def spl_file_printer(directory, filename):
+ with codecs.open(os.path.join(directory,filename), 'w', encoding='utf-8') as f:
+ print_func = f.write
+ max_page_size = Config.general.max_report_size
+ def limited_print_func(s, force=False):
+ global spl_bytes
+ print_func(s)
+ spl_bytes += len(s)
+ if not force and max_page_size > 0 and spl_bytes >= max_page_size:
+ raise PrintLimitReached()
+ yield limited_print_func
+
+def row_was_output():
+ global spl_print_func, spl_print_ctrl, spl_rows, spl_current_page
+ spl_rows += 1
+ if not spl_print_ctrl:
+ return
+ _, rotation_params = spl_print_ctrl
+ directory, mainname, css_url, rows_per_page = rotation_params
+ if spl_rows % rows_per_page != 0:
+ return
+ spl_current_page += 1
+
+ filename = "%s-%s.html" % (mainname, spl_current_page)
+ # close the current page
+ spl_print_func(UD_TABLE_FOOTER % {"filename": escape(filename), "text": "load diff"}, force=True)
+ spl_print_exit(None, None, None)
+ # rotate to the next page
+ context = spl_file_printer(directory, filename)
+ spl_print_enter(context, rotation_params)
+ spl_print_func(UD_TABLE_HEADER)
+
+
+def output_unified_diff_table(unified_diff):
global add_cpt, del_cpt
global line1, line2
global hunk_off1, hunk_size1, hunk_off2, hunk_size2
- print_func(u'<table class="diff">\n')
+ spl_print_func(UD_TABLE_HEADER)
try:
- print_func(u'<colgroup><col style="width: 3em;"/><col style="99%"/>\n')
- print_func(u'<col style="width: 3em;"/><col style="99%"/></colgroup>\n')
-
for l in unified_diff.splitlines():
m = re.match(r'^--- ([^\s]*)', l)
if m:
- empty_buffer(print_func)
+ empty_buffer()
continue
m = re.match(r'^\+\+\+ ([^\s]*)', l)
if m:
- empty_buffer(print_func)
+ empty_buffer()
continue
m = re.match(r"@@ -(\d+),?(\d*) \+(\d+),?(\d*)", l)
if m:
- empty_buffer(print_func)
+ empty_buffer()
hunk_data = map(lambda x:x=="" and 1 or int(x), m.groups())
hunk_off1, hunk_size1, hunk_off2, hunk_size2 = hunk_data
line1, line2 = hunk_off1, hunk_off2
- output_hunk(print_func)
+ output_hunk()
continue
if re.match(r'^\[', l):
- empty_buffer(print_func)
- print_func(u'<td colspan="2">%s</td>\n' % l)
+ empty_buffer()
+ spl_print_func(u'<td colspan="2">%s</td>\n' % l)
if re.match(r"^\\ No newline", l):
if hunk_size2 == 0:
@@ -450,7 +538,7 @@ def output_unified_diff_table(print_func, unified_diff):
continue
if hunk_size1 <= 0 and hunk_size2 <= 0:
- empty_buffer(print_func)
+ empty_buffer()
continue
m = re.match(r"^\+\[ (\d+) lines removed \]$", l)
@@ -480,34 +568,54 @@ def output_unified_diff_table(print_func, unified_diff):
continue
if re.match(r"^ ", l) and hunk_size1 and hunk_size2:
- empty_buffer(print_func)
+ empty_buffer()
hunk_size1 -= 1
hunk_size2 -= 1
buf.append((l[1:], l[1:]))
continue
- empty_buffer(print_func)
+ empty_buffer()
- empty_buffer(print_func)
+ empty_buffer()
finally:
- print_func(u"</table>", force=True)
+ spl_print_func(u"</table>", force=True)
+
def output_unified_diff(print_func, css_url, directory, unified_diff):
+ global spl_print_func, spl_current_page
+ new_unified_diff()
if directory and len(unified_diff) > Config.general.separate_file_diff_size:
# open a new file for this table
- filename="%s.html" % hashlib.md5(unified_diff.encode('utf-8')).hexdigest()
+ mainname = hashlib.md5(unified_diff.encode('utf-8')).hexdigest()
+ filename="%s.html" % mainname
logger.debug('separate html output for diff of size %d', len(unified_diff))
- with file_printer(directory, filename) as new_print_func:
- output_header(css_url, new_print_func)
- output_unified_diff_table(new_print_func, unified_diff)
- output_footer(new_print_func)
+ num_pages = 0
+ rows_per_page = estimate_num_rows_per_page(Config.general.separate_file_diff_size)
+ rotation_params = directory, mainname, css_url, rows_per_page
+ try:
+ spl_print_enter(spl_file_printer(directory, filename), rotation_params)
+ output_unified_diff_table(unified_diff)
+ except PrintLimitReached:
+ spl_print_func(u"<table><tr class='error'><td colspan='4'>Max output size reached.</td></tr></table>",
+ force=True)
+ spl_print_exit(None, None, None) # swallow
+ except:
+ if not spl_print_exit(*sys.exc_info()): raise
+ else:
+ spl_print_exit(None, None, None)
+ finally:
+ num_pages = spl_current_page + 1
- print_func("<div class='ondemand'>\n")
- print_func("... <a href='%s'>load diff</a> ...\n" % escape(filename))
- print_func("</div>\n")
+ print_func(UD_TABLE_HEADER, force=True)
+ print_func(UD_TABLE_FOOTER % {"filename": escape(filename), "text": "load diff (%s pieces)" % num_pages}, force=True)
else:
- output_unified_diff_table(print_func, unified_diff)
+ try:
+ spl_print_func = print_func
+ output_unified_diff_table(unified_diff)
+ finally:
+ spl_print_func = None
+
def output_difference(difference, print_func, css_url, directory, parents):
logger.debug('html output for %s', difference.source1)
@@ -574,9 +682,7 @@ def output_html(difference, css_url=None, print_func=None):
@contextlib.contextmanager
def file_printer(directory, filename):
with codecs.open(os.path.join(directory,filename), 'w', encoding='utf-8') as f:
- print_func = f.write
- print_func = create_limited_print_func(print_func, Config.general.max_report_size)
- yield print_func
+ yield f.write
JQUERY_SYSTEM_LOCATIONS = ['/usr/share/javascript/jquery/jquery.js']
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/diffoscope.git
More information about the diffoscope
mailing list