[diffoscope] 01/04: In split-html output, also split individual diffs that are too long

Thu Aug 25 12:23:58 CEST 2016

This is an automated email from the git hooks/post-receive script.

infinity0 pushed a commit to branch better-lazy-loading
in repository diffoscope.

commit 5d38805bad35df7f06cb8a8dd0f4b8e764ae7e21
Author: Ximin Luo <infinity0 at debian.org>
Date:   Wed Aug 24 20:47:46 2016 +0200

    In split-html output, also split individual diffs that are too long
---
 diffoscope/presenters/html.py | 218 +++++++++++++++++++++++++++++++-----------
 1 file changed, 162 insertions(+), 56 deletions(-)

diff --git a/diffoscope/presenters/html.py b/diffoscope/presenters/html.py
index be8b145..0e887b6 100644
--- a/diffoscope/presenters/html.py
+++ b/diffoscope/presenters/html.py
@@ -168,20 +168,39 @@ SCRIPTS = """
 <script src="%(jquery_url)s"></script>
 <script type="text/javascript">
 $(function() {
-  $("div.ondemand a").on('click', function (){
-    var filename = $(this).attr('href');
-    var div = $(this).parent();
-    div.text('... loading ...');
-    div.load(filename + " table", function() {
+  var load_cont = function() {
+    var a = $(this);
+    var filename = a.attr('href');
+    var numleft = Number.parseInt(/\((\d+) pieces?\)/.exec(a.text())[1]) - 1
+    var td = a.parent();
+    td.text('... loading ...');
+    td.parent().load(filename + " tr", function() {
         // http://stackoverflow.com/a/8452751/946226
-        $(this).children(':first').unwrap();
+        var elems = $(this).children(':first').unwrap();
+        // set this behaviour for the next link too
+        var a = elems.parent().find(".ondemand a");
+        var noun = numleft > 1 ? "pieces" : "piece" // be sure the regex matches either
+        a.text(a.text() + " (" + numleft + " " + noun + ")");
+        a.on('click', load_cont);
     });
     return false;
-  });
+  };
+  $(".ondemand a").on('click', load_cont);
 });
 </script>
 """
 
+UD_TABLE_HEADER = u"""<table class="diff">
+<colgroup><col style="width: 3em;"/><col style="99%"/>
+<col style="width: 3em;"/><col style="99%"/></colgroup>
+"""
+
+UD_TABLE_FOOTER = u"""<tr class="ondemand"><td colspan="4">
+... <a href="%(filename)s">%(text)s</a>
+</td></tr>
+</table>
+"""
+
 class PrintLimitReached(Exception):
     pass
 
@@ -197,10 +216,30 @@ def create_limited_print_func(print_func, max_page_size):
     return limited_print_func
 
 
-buf = []
-add_cpt, del_cpt = 0, 0
+def estimate_num_rows_per_page(separate_file_diff_size):
+    # each row takes about 1200 bytes in the output, so roughly calculate
+    # the number of rows that 4 * separate_file_diff_size will hold
+    return separate_file_diff_size * 4 // 1200
+
+
+buf, add_cpt, del_cpt = [], 0, 0
 line1, line2 = 0, 0
 hunk_off1, hunk_size1, hunk_off2, hunk_size2 = 0, 0, 0, 0
+spl_rows, spl_bytes, spl_current_page = 0, 0, 0
+spl_print_func, spl_print_ctrl = None, None
+
+
+def new_unified_diff():
+    global buf, add_cpt, del_cpt
+    global line1, line2, has_internal_linenos
+    global hunk_off1, hunk_size1, hunk_off2, hunk_size2
+    global spl_rows, spl_bytes, spl_current_page
+    global spl_print_func, spl_print_ctrl
+    buf, add_cpt, del_cpt = [], 0, 0
+    line1, line2, has_internal_linenos = 0, 0, True
+    hunk_off1, hunk_size1, hunk_off2, hunk_size2 = 0, 0, 0, 0
+    spl_rows, spl_bytes, spl_current_page = 0, 0, 0
+    spl_print_func, spl_print_ctrl = None, None
 
 
 def sane(x):
@@ -317,12 +356,13 @@ def convert(s, ponct=0, tag=''):
     return t.getvalue()
 
 
-def output_hunk(print_func):
-    print_func(u'<tr class="diffhunk"><td colspan="2">Offset %d, %d lines modified</td>'%(hunk_off1, hunk_size1))
-    print_func(u'<td colspan="2">Offset %d, %d lines modified</td></tr>\n'%(hunk_off2, hunk_size2))
+def output_hunk():
+    spl_print_func(u'<tr class="diffhunk"><td colspan="2">Offset %d, %d lines modified</td>'%(hunk_off1, hunk_size1))
+    spl_print_func(u'<td colspan="2">Offset %d, %d lines modified</td></tr>\n'%(hunk_off2, hunk_size2))
+    row_was_output()
 
 
-def output_line(print_func, s1, s2):
+def output_line(s1, s2):
     global line1
     global line2
 
@@ -348,25 +388,26 @@ def output_line(print_func, s1, s2):
         type_name = "changed"
         s1, s2 = linediff(s1, s2)
 
-    print_func(u'<tr class="diff%s">' % type_name)
+    spl_print_func(u'<tr class="diff%s">' % type_name)
     try:
         if s1:
-            print_func(u'<td class="diffline">%d </td>' % line1)
-            print_func(u'<td class="diffpresent">')
-            print_func(convert(s1, ponct=1, tag='del'))
-            print_func(u'</td>')
+            spl_print_func(u'<td class="diffline">%d </td>' % line1)
+            spl_print_func(u'<td class="diffpresent">')
+            spl_print_func(convert(s1, ponct=1, tag='del'))
+            spl_print_func(u'</td>')
         else:
-            print_func(u'<td colspan="2">\xa0</td>')
+            spl_print_func(u'<td colspan="2">\xa0</td>')
 
         if s2:
-            print_func(u'<td class="diffline">%d </td>' % line2)
-            print_func(u'<td class="diffpresent">')
-            print_func(convert(s2, ponct=1, tag='ins'))
-            print_func(u'</td>')
+            spl_print_func(u'<td class="diffline">%d </td>' % line2)
+            spl_print_func(u'<td class="diffpresent">')
+            spl_print_func(convert(s2, ponct=1, tag='ins'))
+            spl_print_func(u'</td>')
         else:
-            print_func(u'<td colspan="2">\xa0</td>')
+            spl_print_func(u'<td colspan="2">\xa0</td>')
     finally:
-        print_func(u"</tr>\n", force=True)
+        spl_print_func(u"</tr>\n", force=True)
+        row_was_output()
 
     m = orig1 and re.match(r"^\[ (\d+) lines removed \]$", orig1)
     if m:
@@ -380,14 +421,14 @@ def output_line(print_func, s1, s2):
         line2 += 1
 
 
-def empty_buffer(print_func):
+def empty_buffer():
     global buf
     global add_cpt
     global del_cpt
 
     if del_cpt == 0 or add_cpt == 0:
         for l in buf:
-            output_line(print_func, l[0], l[1])
+            output_line(l[0], l[1])
 
     elif del_cpt != 0 and add_cpt != 0:
         l0, l1 = [], []
@@ -403,44 +444,91 @@ def empty_buffer(print_func):
                 s0 = l0[i]
             if i < len(l1):
                 s1 = l1[i]
-            output_line(print_func, s0, s1)
+            output_line(s0, s1)
 
     add_cpt, del_cpt = 0, 0
     buf = []
 
 
-def output_unified_diff_table(print_func, unified_diff):
+def spl_print_enter(print_context, rotation_params):
+    # Takes ownership of print_context
+    global spl_print_func, spl_print_ctrl
+    spl_print_ctrl = print_context.__exit__, rotation_params
+    spl_print_func = print_context.__enter__()
+    _, _, css_url, _ = rotation_params
+    # Print file and table headers
+    output_header(css_url, spl_print_func)
+
+def spl_print_exit(*exc_info):
+    global spl_print_func, spl_print_ctrl
+    output_footer(spl_print_func)
+    _exit, _ = spl_print_ctrl
+    spl_print_func, spl_print_ctrl = None, None
+    return _exit(*exc_info)
+
+ at contextlib.contextmanager
+def spl_file_printer(directory, filename):
+    with codecs.open(os.path.join(directory,filename), 'w', encoding='utf-8') as f:
+        print_func = f.write
+        max_page_size = Config.general.max_report_size
+        def limited_print_func(s, force=False):
+            global spl_bytes
+            print_func(s)
+            spl_bytes += len(s)
+            if not force and max_page_size > 0 and spl_bytes >= max_page_size:
+                raise PrintLimitReached()
+        yield limited_print_func
+
+def row_was_output():
+    global spl_print_func, spl_print_ctrl, spl_rows, spl_current_page
+    spl_rows += 1
+    if not spl_print_ctrl:
+        return
+    _, rotation_params = spl_print_ctrl
+    directory, mainname, css_url, rows_per_page = rotation_params
+    if spl_rows % rows_per_page != 0:
+        return
+    spl_current_page += 1
+
+    filename = "%s-%s.html" % (mainname, spl_current_page)
+    # close the current page
+    spl_print_func(UD_TABLE_FOOTER % {"filename": escape(filename), "text": "load diff"}, force=True)
+    spl_print_exit(None, None, None)
+    # rotate to the next page
+    context = spl_file_printer(directory, filename)
+    spl_print_enter(context, rotation_params)
+    spl_print_func(UD_TABLE_HEADER)
+
+
+def output_unified_diff_table(unified_diff):
     global add_cpt, del_cpt
     global line1, line2
     global hunk_off1, hunk_size1, hunk_off2, hunk_size2
 
-    print_func(u'<table class="diff">\n')
+    spl_print_func(UD_TABLE_HEADER)
     try:
-        print_func(u'<colgroup><col style="width: 3em;"/><col style="99%"/>\n')
-        print_func(u'<col style="width: 3em;"/><col style="99%"/></colgroup>\n')
-
         for l in unified_diff.splitlines():
             m = re.match(r'^--- ([^\s]*)', l)
             if m:
-                empty_buffer(print_func)
+                empty_buffer()
                 continue
             m = re.match(r'^\+\+\+ ([^\s]*)', l)
             if m:
-                empty_buffer(print_func)
+                empty_buffer()
                 continue
 
             m = re.match(r"@@ -(\d+),?(\d*) \+(\d+),?(\d*)", l)
             if m:
-                empty_buffer(print_func)
+                empty_buffer()
                 hunk_data = map(lambda x:x=="" and 1 or int(x), m.groups())
                 hunk_off1, hunk_size1, hunk_off2, hunk_size2 = hunk_data
                 line1, line2 = hunk_off1, hunk_off2
-                output_hunk(print_func)
+                output_hunk()
                 continue
 
             if re.match(r'^\[', l):
-                empty_buffer(print_func)
-                print_func(u'<td colspan="2">%s</td>\n' % l)
+                empty_buffer()
+                spl_print_func(u'<td colspan="2">%s</td>\n' % l)
 
             if re.match(r"^\\ No newline", l):
                 if hunk_size2 == 0:
@@ -450,7 +538,7 @@ def output_unified_diff_table(print_func, unified_diff):
                 continue
 
             if hunk_size1 <= 0 and hunk_size2 <= 0:
-                empty_buffer(print_func)
+                empty_buffer()
                 continue
 
             m = re.match(r"^\+\[ (\d+) lines removed \]$", l)
@@ -480,34 +568,54 @@ def output_unified_diff_table(print_func, unified_diff):
                 continue
 
             if re.match(r"^ ", l) and hunk_size1 and hunk_size2:
-                empty_buffer(print_func)
+                empty_buffer()
                 hunk_size1 -= 1
                 hunk_size2 -= 1
                 buf.append((l[1:], l[1:]))
                 continue
 
-            empty_buffer(print_func)
+            empty_buffer()
 
-        empty_buffer(print_func)
+        empty_buffer()
     finally:
-        print_func(u"</table>", force=True)
+        spl_print_func(u"</table>", force=True)
+
 
 def output_unified_diff(print_func, css_url, directory, unified_diff):
+    global spl_print_func, spl_current_page
+    new_unified_diff()
     if directory and len(unified_diff) > Config.general.separate_file_diff_size:
         # open a new file for this table
-        filename="%s.html" % hashlib.md5(unified_diff.encode('utf-8')).hexdigest()
+        mainname = hashlib.md5(unified_diff.encode('utf-8')).hexdigest()
+        filename="%s.html" % mainname
         logger.debug('separate html output for diff of size %d', len(unified_diff))
-        with file_printer(directory, filename) as new_print_func:
-            output_header(css_url, new_print_func)
-            output_unified_diff_table(new_print_func, unified_diff)
-            output_footer(new_print_func)
+        num_pages = 0
+        rows_per_page = estimate_num_rows_per_page(Config.general.separate_file_diff_size)
+        rotation_params = directory, mainname, css_url, rows_per_page
+        try:
+            spl_print_enter(spl_file_printer(directory, filename), rotation_params)
+            output_unified_diff_table(unified_diff)
+        except PrintLimitReached:
+            spl_print_func(u"<table><tr class='error'><td colspan='4'>Max output size reached.</td></tr></table>",
+                           force=True)
+            spl_print_exit(None, None, None) # swallow
+        except:
+            if not spl_print_exit(*sys.exc_info()): raise
+        else:
+            spl_print_exit(None, None, None)
+        finally:
+            num_pages = spl_current_page + 1
 
-        print_func("<div class='ondemand'>\n")
-        print_func("... <a href='%s'>load diff</a> ...\n" % escape(filename))
-        print_func("</div>\n")
+        print_func(UD_TABLE_HEADER, force=True)
+        print_func(UD_TABLE_FOOTER % {"filename": escape(filename), "text": "load diff (%s pieces)" % num_pages}, force=True)
 
     else:
-        output_unified_diff_table(print_func, unified_diff)
+        try:
+            spl_print_func = print_func
+            output_unified_diff_table(unified_diff)
+        finally:
+            spl_print_func = None
+
 
 def output_difference(difference, print_func, css_url, directory, parents):
     logger.debug('html output for %s', difference.source1)
@@ -574,9 +682,7 @@ def output_html(difference, css_url=None, print_func=None):
 @contextlib.contextmanager
 def file_printer(directory, filename):
     with codecs.open(os.path.join(directory,filename), 'w', encoding='utf-8') as f:
-        print_func = f.write
-        print_func = create_limited_print_func(print_func, Config.general.max_report_size)
-        yield print_func
+        yield f.write
 
 JQUERY_SYSTEM_LOCATIONS = ['/usr/share/javascript/jquery/jquery.js']
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/diffoscope.git