[diffoscope] 02/02: WIP: html-dir: split index pages up if they get too big

Ximin Luo infinity0 at debian.org
Mon Jun 19 22:27:21 CEST 2017


This is an automated email from the git hooks/post-receive script.

infinity0 pushed a commit to branch WIP/humungous-diffs
in repository diffoscope.

commit c82efcf3f730836f78a493f94bd54b6a54698a93
Author: Ximin Luo <infinity0 at debian.org>
Date:   Mon Jun 19 22:26:07 2017 +0200

    WIP: html-dir: split index pages up if they get too big
---
 diffoscope/difference.py                |   6 +-
 diffoscope/presenters/html/html.py      | 228 +++++++++++++++++++++++++++++---
 diffoscope/presenters/html/templates.py |  52 ++++----
 diffoscope/presenters/utils.py          |   5 +
 4 files changed, 246 insertions(+), 45 deletions(-)

diff --git a/diffoscope/difference.py b/diffoscope/difference.py
index 54cb686..bff6c28 100644
--- a/diffoscope/difference.py
+++ b/diffoscope/difference.py
@@ -136,7 +136,7 @@ class Difference(object):
             queue.extend(top._details)
             yield from self.traverse_breadth(queue)
 
-    def traverse_heapq(self, scorer, queue=None):
+    def traverse_heapq(self, scorer, yield_score=False, queue=None):
         """Traverse the difference tree using a priority queue, where each node
         is scored according to a user-supplied function, and nodes with smaller
         scores are traversed first (after they have been added to the queue).
@@ -148,10 +148,10 @@ class Difference(object):
         queue = queue if queue is not None else [(scorer(self, None), self)]
         if queue:
             val, top = heapq.heappop(queue)
-            yield top
+            yield ((top, val) if yield_score else top)
             for d in top._details:
                 heapq.heappush(queue, (scorer(d, val), d))
-            yield from self.traverse_heapq(scorer, queue)
+            yield from self.traverse_heapq(scorer, yield_score, queue)
 
     @staticmethod
     def from_feeder(feeder1, feeder2, path1, path2, source=None, comment=None, **kwargs):
diff --git a/diffoscope/presenters/html/html.py b/diffoscope/presenters/html/html.py
index bb847a7..ae0a677 100644
--- a/diffoscope/presenters/html/html.py
+++ b/diffoscope/presenters/html/html.py
@@ -37,6 +37,7 @@ import re
 import sys
 import html
 import codecs
+import contextlib
 import hashlib
 import logging
 import contextlib
@@ -47,7 +48,7 @@ from diffoscope.diff import SideBySideDiff, DIFFON, DIFFOFF
 
 from ..icon import FAVICON_BASE64
 from ..utils import PrintLimitReached, DiffBlockLimitReached, \
-    create_limited_print_func, Presenter, make_printer
+    create_limited_print_func, Presenter, make_printer, PartialString
 
 from . import templates
 
@@ -106,22 +107,27 @@ def convert(s, ponct=0, tag=''):
 
     return t.getvalue()
 
+def get_visual(visual, anchor, indentstr, indentnum, end_tag=True):
+    logger.debug('including image for %s', visual.source)
+    indent = indentstr * indentnum
+    indent1 = indentstr * (indentnum+1)
+    indent2 = indentstr * (indentnum+2)
+    end_tag = indent + u"</div>" if end_tag else u""
+    return u"""{0}<div class="difference">
+{1}<div class="diffheader">
+{1}<div class="diffcontrol">⊟</div>
+{1}<div><span class="source">{3}</span>
+{2}<a class="anchor" href="#{4}" name="{4}">\xb6</a>
+{1}</div>
+{1}</div>
+{1}<div class="difference"><img src=\"data:{5},{6}\" alt=\"compared images\" /></div>
+{7}""".format(indent, indent1, indent2, html.escape(visual.source), anchor, visual.data_type, visual.content, end_tag)
+
 def output_visual(print_func, visual, parents):
     logger.debug('including image for %s', visual.source)
     sources = parents + [visual.source]
-    print_func(u'<div class="difference">')
-    print_func(u'<div class="diffheader">')
-    print_func(u'<div class="diffcontrol">⊟</div>')
-    print_func(u'<div><span class="source">%s</span>'
-               % html.escape(visual.source))
     anchor = escape_anchor('/'.join(sources[1:]))
-    print_func(
-        u' <a class="anchor" href="#%s" name="%s">\xb6</a>' % (anchor, anchor))
-    print_func(u"</div>")
-    print_func(u"</div>")
-    print_func(u'<div class="difference">'
-               u'<img src=\"data:%s,%s\" alt=\"compared images\" /></div>' %
-               (visual.data_type, visual.content))
+    print_func(get_visual(visual, anchor, "", 0, end_tag=False))
     print_func(u"</div>", force=True)
 
 def escape_anchor(val):
@@ -139,18 +145,27 @@ def escape_anchor(val):
 
     return val
 
-def output_header(css_url, print_func):
+def get_header(css_url):
     if css_url:
         css_link = '<link href="%s" type="text/css" rel="stylesheet" />' % css_url
     else:
         css_link = ''
-    print_func(templates.HEADER % {'title': html.escape(' '.join(sys.argv)),
+    return templates.HEADER % {'title': html.escape(' '.join(sys.argv)),
                          'favicon': FAVICON_BASE64,
                          'css_link': css_link,
-                        })
+                        }
+
+def output_header(css_url, print_func):
+    print_func(get_header(css_url))
+
+def get_footer(jquery_url=None):
+    footer = templates.FOOTER % {'version': VERSION}
+    if jquery_url:
+        return templates.SCRIPTS % {'jquery_url': html.escape(jquery_url)} + footer
+    return footer
 
 def output_footer(print_func):
-    print_func(templates.FOOTER % {'version': VERSION}, force=True)
+    print_func(get_footer(), force=True)
 
 
 @contextlib.contextmanager
@@ -351,10 +366,10 @@ class HTMLPresenter(Presenter):
             anchor = escape_anchor('/'.join(sources[1:]))
             print_func(u' <a class="anchor" href="#%s" name="%s">\xb6</a>' % (anchor, anchor))
             print_func(u"</div>")
+            print_func(u"</div>")
             if difference.comments:
                 print_func(u'<div class="comment">%s</div>'
                            % u'<br />'.join(map(html.escape, difference.comments)))
-            print_func(u"</div>")
             if len(difference.visuals) > 0:
                 for visual in difference.visuals:
                     output_visual(print_func, visual, sources)
@@ -394,7 +409,7 @@ class HTMLPresenter(Presenter):
             )
 
 
-class HTMLDirectoryPresenter(HTMLPresenter):
+class HTML2DirectoryPresenter(HTMLPresenter):
 
     def output_html_directory(self, directory, difference, css_url=None, jquery_url=None):
         """
@@ -450,3 +465,178 @@ class HTMLDirectoryPresenter(HTMLPresenter):
             css_url=parsed_args.css_url,
             jquery_url=parsed_args.jquery_url,
         )
+
+
+PLACEHOLDER = """<div class="ondemand-details">... <a href="%s.html">load details</a> ...</div>
+"""
+PLACEHOLDER_LEN = len(PLACEHOLDER)
+
+
+class HTMLDirectoryPresenter(HTML2DirectoryPresenter):
+
+    def output_anchor(self, path):
+        return escape_anchor('/'.join(path[1:]))
+
+    def output_node_frame(self, difference, path, indentstr, indentnum, body):
+        indent = indentstr * indentnum
+        indent1 = indentstr * (indentnum+1)
+        indent2 = indentstr * (indentnum+2)
+
+        anchor = self.output_anchor(path)
+        dctrl_class, dctrl = ("diffcontrol", u'⊟') if difference.has_visible_children() else ("diffcontrol-nochildren", u'⊡')
+        if difference.source1 == difference.source2:
+            header = u"""{0}<div class="{2}">{3}</div>
+{0}<div><span class="source">{5}</span>
+{1}<a class="anchor" href="#{4}" name="{4}">\xb6</a>
+{0}</div>
+""".format(indent1, indent2, dctrl_class, dctrl, anchor,
+           html.escape(PartialString.escape(difference.source1)))
+        else:
+            header = u"""{0}<div class="{2} diffcontrol-double">{3}</div>
+{0}<div><span class="source">{5}</span> vs.</div>
+{0}<div><span class="source">{6}</span>
+{1}<a class="anchor" href="#{4}" name="{4}">\xb6</a>
+{0}</div>
+""".format(indent1, indent2, dctrl_class, dctrl, anchor,
+           html.escape(PartialString.escape(difference.source1)),
+           html.escape(PartialString.escape(difference.source2)))
+
+        return u"""{0}<div class="diffheader">
+{1}{0}</div>
+{2}""".format(indent1, header, body)
+
+    def output_node(self, difference, path, indentstr, indentnum, css_url, directory):
+        indent = indentstr * indentnum
+        indent1 = indentstr * (indentnum+1)
+        indent2 = indentstr * (indentnum+2)
+        t, cont = PartialString.cont()
+
+        if difference.comments:
+            comments = u'{0}<div class="comment">\n{1}{0}</div>\n'.format(
+                indent1, "".join(u"{0}{1}<br/>\n".format(indent2, html.escape(x)) for x in difference.comments))
+        else:
+            comments = u""
+
+        visuals = u""
+        for visual in difference.visuals:
+            visuals += get_visual(visual, self.output_anchor(path), indentstr, indentnum+1)
+
+        udiff = io.StringIO()
+        if difference.unified_diff:
+            def print_func(x, force=False):
+                udiff.write(x)
+            self.output_unified_diff(print_func, css_url, directory, difference.unified_diff, difference.has_internal_linenos)
+
+        # Construct a PartialString for this node
+        # {3} gets mapped to {-1}, a continuation hole for later child nodes
+        body = u"{0}{1}{2}{3}".format(t.escape(comments), t.escape(visuals), t.escape(udiff.getvalue()), "{-1}")
+        if len(path) == 1:
+            # root node, frame it
+            t = cont(t, self.output_node_frame(difference, path, indentstr, indentnum, body))
+        else:
+            t = cont(t, body)
+
+        # Add holes for child nodes
+        for d in difference.details:
+            # {0} hole, for the child node's contents
+            # {-1} continuation hole, for later child nodes
+            t = cont(t, u"""{0}<div class="difference">
+{1}{0}</div>
+{{-1}}""".format(indent1, self.output_node_frame(d, path + [d.source1], indentstr, indentnum+1, "{0}")), d)
+
+        return cont(t, u"")
+
+    def output_difference(self, difference, jquery_url, css_url, directory):
+        partial_outputs = {} # nodes to their partial output
+        partial_ancestor = {} # child nodes to ancestor nodes
+
+        with contextlib.ExitStack() as xstack:
+            printers = {} # nodes to their printers
+            def maybe_print(node):
+                if partial_outputs[node].holes:
+                    return
+                printers[node](partial_outputs[node].format())
+                del partial_outputs[node]
+                del printers[node]
+
+            def smallest_first(node, parscore):
+                depth = parscore[0] + 1 if parscore else 0
+                parents = [node] + parscore[2] if parscore else []
+                return depth, node.size_self(), parents
+
+            for node, score in difference.traverse_heapq(smallest_first, yield_score=True):
+                ancestor = partial_ancestor.pop(node, None)
+                logger.debug('html output for %s', node.source1)
+                path = score[2] + [node.source1]
+                node_output = self.output_node(node, path, "  ", len(path)-1, css_url, directory)
+                anchor = self.output_anchor(path)
+
+                if ancestor:
+                    logger.debug("output size: %s, %s",
+                        partial_outputs[ancestor].size(PLACEHOLDER_LEN), node_output.size(PLACEHOLDER_LEN))
+                if ancestor and partial_outputs[ancestor].size(PLACEHOLDER_LEN) + node_output.size(PLACEHOLDER_LEN) < 100000: # FIXME limit
+                    # under limit, add it to an existing page
+                    partial_outputs[ancestor] = partial_outputs[ancestor].pformat({node: node_output})
+                    stored = ancestor
+
+                else:
+                    # over limit (or root), new subpage
+                    if ancestor:
+                        partial_outputs[ancestor] = partial_outputs[ancestor].pformat({node: PLACEHOLDER % anchor})
+                        maybe_print(ancestor)
+                        footer = get_footer()
+                    else:
+                        assert node is difference
+                        footer = get_footer(jquery_url)
+                        anchor = "index"
+
+                    partial_outputs[node] = node_output.frame(
+                        get_header(css_url) + u'<div class="difference">\n',
+                        u'</div>\n' + footer)
+                    printers[node] = xstack.enter_context(file_printer(directory, "%s.html" % anchor))
+                    stored = node
+
+                for child in node.details:
+                    partial_ancestor[child] = stored
+
+                maybe_print(stored)
+
+            if partial_outputs:
+                import pprint
+                pprint.pprint(partial_outputs, indent=4)
+            assert not partial_outputs
+
+    def output_html_directory(self, directory, difference, css_url=None, jquery_url=None):
+        """
+        Multi-file presenter. Writes to a directory, and puts large diff tables
+        into files of their own.
+
+        This uses jQuery. By default it uses /usr/share/javascript/jquery/jquery.js
+        (symlinked, so that you can still share the result over HTTP).
+        You can also pass --jquery URL to diffoscope to use a central jQuery copy.
+        """
+        if not os.path.exists(directory):
+            os.makedirs(directory)
+
+        if not os.path.isdir(directory):
+            raise ValueError("%s is not a directory" % directory)
+
+        if not jquery_url:
+            jquery_symlink = os.path.join(directory, "jquery.js")
+            if os.path.exists(jquery_symlink):
+                jquery_url = "./jquery.js"
+            else:
+                if os.path.lexists(jquery_symlink):
+                    os.unlink(jquery_symlink)
+                for path in JQUERY_SYSTEM_LOCATIONS:
+                    if os.path.exists(path):
+                        os.symlink(path, jquery_symlink)
+                        jquery_url = "./jquery.js"
+                        break
+                if not jquery_url:
+                    logger.warning('--jquery was not specified and jQuery was not found in any known location. Disabling on-demand inline loading.')
+                    logger.debug('Locations searched: %s', ', '.join(JQUERY_SYSTEM_LOCATIONS))
+        if jquery_url == 'disable':
+            jquery_url = None
+
+        self.output_difference(difference, jquery_url, css_url, directory)
diff --git a/diffoscope/presenters/html/templates.py b/diffoscope/presenters/html/templates.py
index 713718a..26e262e 100644
--- a/diffoscope/presenters/html/templates.py
+++ b/diffoscope/presenters/html/templates.py
@@ -110,12 +110,12 @@ HEADER = """<!DOCTYPE html>
     .diffoscope .diffheader:hover .anchor {
       display: inline;
     }
-    .diffoscope table.diff tr.ondemand td {
+    .diffoscope table.diff tr.ondemand td, .diffoscope div.ondemand-details {
       background: #f99;
       text-align: center;
       padding: 0.5em 0;
     }
-    .diffoscope table.diff tr.ondemand:hover td {
+    .diffoscope table.diff tr.ondemand:hover td, .diffoscope div.ondemand-details:hover {
       background: #faa;
       cursor: pointer;
     }
@@ -140,41 +140,47 @@ HEADER = """<!DOCTYPE html>
 <body class="diffoscope">
 """
 
-FOOTER = """
-<div class="footer">Generated by <a href="https://diffoscope.org" rel="noopener noreferrer" target="_blank">diffoscope</a> %(version)s</div>
+FOOTER = """<div class="footer">Generated by <a href="https://diffoscope.org" rel="noopener noreferrer" target="_blank">diffoscope</a> %(version)s</div>
 </body>
 </html>
 """
 
-SCRIPTS = """
-<script src="%(jquery_url)s"></script>
+SCRIPTS = """<script src="%(jquery_url)s"></script>
 <script type="text/javascript">
 $(function() {
-  var load_cont = function() {
-    var a = $(this).find("a");
+  // activate "loading" controls
+  var load_cont, load_generic = function(selector, target, getInfo, postLoad) {
+    return function() {
+        var a = $(this).find("a");
+        var filename = a.attr('href');
+        var info = getInfo ? getInfo(a) : null;
+        var button = a.parent();
+        button.text('... loading ...');
+        (target ? target(button) : button).load(filename + " " + selector, function() {
+            // https://stackoverflow.com/a/8452751/946226
+            var elems = $(this).children(':first').unwrap();
+            // set this behaviour for the next link too
+            var td = elems.parent().find(".ondemand td");
+            td.on('click', load_cont);
+            postLoad ? postLoad(td, info) : null;
+        });
+        return false;
+    };
+  };
+  load_cont = load_generic("tr", function(x) { return x.parent(); }, function(a) {
     var textparts = /^(.*)\((\d+) pieces?(.*)\)$/.exec(a.text());
     var numleft = Number.parseInt(textparts[2]) - 1;
     var noun = numleft == 1 ? "piece" : "pieces";
-    var newtext = textparts[1] + "(" + numleft + " " + noun + textparts[3] + ")";
-    var filename = a.attr('href');
-    var td = a.parent();
-    td.text('... loading ...');
-    td.parent().load(filename + " tr", function() {
-        // https://stackoverflow.com/a/8452751/946226
-        var elems = $(this).children(':first').unwrap();
-        // set this behaviour for the next link too
-        var td = elems.parent().find(".ondemand td");
-        td.find("a").text(newtext);
-        td.on('click', load_cont);
-    });
-    return false;
-  };
+    return textparts[1] + "(" + numleft + " " + noun + textparts[3] + ")";
+  }, function(td, info) { td.find("a").text(info); });
   $(".ondemand td").on('click', load_cont);
+  $(".ondemand-details").on('click', load_generic("div.difference > *"));
+  // activate [+]/[-] controls
   var diffcontrols = $(".diffcontrol");
   diffcontrols.on('click', function(evt) {
     var control = $(this);
     var parent = control.parent();
-    var target = $.merge(parent.siblings('table.diff, div.difference'), parent.find('div.comment'));
+    var target = parent.siblings('table.diff, div.difference, div.comment');
     var orig = target;
     if (evt.shiftKey) {
         var gparent = parent.parent();
diff --git a/diffoscope/presenters/utils.py b/diffoscope/presenters/utils.py
index c46773a..ae32819 100644
--- a/diffoscope/presenters/utils.py
+++ b/diffoscope/presenters/utils.py
@@ -249,6 +249,7 @@ class PartialString(object):
         return real_mapping, new_holes
 
     def size(self, hole_size=1):
+        # FIXME: fix for {{ and }} etc
         return self.base_len + hole_size * self.num_holes
 
     def pformat(self, mapping={}):
@@ -296,6 +297,10 @@ class PartialString(object):
             return t.pformat({cont: cls(fmtstr, *(holes + (cont,)))})
         return cls("{0}", cont), cont
 
+    def frame(self, header, footer):
+        frame = self.__class__(self.escape(header) + "{0}" + self.escape(footer), None)
+        return frame.pformat({None: self})
+
 
 if __name__ == "__main__":
     import doctest

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/diffoscope.git


More information about the diffoscope mailing list