[diffoscope] 01/03: Multi-file HTML output

Jérémy Bobbio lunar at moszumanska.debian.org
Mon Dec 14 00:06:24 CET 2015


This is an automated email from the git hooks/post-receive script.

lunar pushed a commit to branch master
in repository diffoscope.

commit 99c6c515398e99d7d7a028a44a71bfc417ab78c3
Author: Joachim Breitner <mail at joachim-breitner.de>
Date:   Sun Dec 13 13:01:04 2015 +0000

    Multi-file HTML output
    
    In order to make it feasible to create HTML output with large diffs,
    this adds a new presenter mode (--html-dir) which spreads the output
    over multiple files in a directory.
    
    In particular, the table presenting a diff that is larger than the
    number of bytes specified via --separate-file-diff-size (200kB by
    default) is writen to a separate file and loaded on demand using
    JavaScript/JQuery.
    
    By default, JQuery is symlinked into the output directory from
    /usr/share/javascript/jquery/jquery.js (install libjs-jquery!), but an
    alternative location (e.g. /javascript/jquery/jquery.js) can be
    specified using the --jquery command line parameter.
    
    Closes: #806891
---
 debian/control                |   3 +-
 diffoscope/__main__.py        |  18 +++++++-
 diffoscope/config.py          |   9 ++++
 diffoscope/presenters/html.py | 100 ++++++++++++++++++++++++++++++++++++++----
 4 files changed, 120 insertions(+), 10 deletions(-)

diff --git a/debian/control b/debian/control
index 29fadd2..63fa2a8 100644
--- a/debian/control
+++ b/debian/control
@@ -31,7 +31,8 @@ Depends: python3-pkg-resources,
          ${misc:Depends},
          ${python3:Depends},
 Recommends: ${diffoscope:Recommends},
-            ${python3:Recommends}
+            ${python3:Recommends},
+Suggests: libjs-jquery
 Breaks: debbindiff (<< 29)
 Replaces: debbindiff (<< 29)
 Description: in-depth comparison of files, archives, and directories
diff --git a/diffoscope/__main__.py b/diffoscope/__main__.py
index 3363186..0ba3f94 100644
--- a/diffoscope/__main__.py
+++ b/diffoscope/__main__.py
@@ -34,6 +34,7 @@ from diffoscope import logger, VERSION, set_locale, clean_all_temp_files
 import diffoscope.comparators
 from diffoscope.config import Config
 from diffoscope.presenters.html import output_html
+from diffoscope.presenters.html import output_html_directory
 from diffoscope.presenters.text import output_text
 
 
@@ -51,6 +52,8 @@ def create_parser():
                         help='Open the python debugger in case of crashes.')
     parser.add_argument('--html', metavar='output', dest='html_output',
                         help='write HTML report to given file (use - for stdout)')
+    parser.add_argument('--html-dir', metavar='output', dest='html_output_directory',
+                        help='write multi-file HTML report to given directory')
     parser.add_argument('--text', metavar='output', dest='text_output',
                         help='write plain text output to given file (use - for stdout)')
     parser.add_argument('--max-report-size', metavar='BYTES',
@@ -58,6 +61,11 @@ def create_parser():
                         help='maximum bytes written in report (default: %d)' %
                         Config.general.max_report_size,
                         default=Config.general.max_report_size)
+    parser.add_argument('--separate-file-diff-size', metavar='BYTES',
+                        dest='separate_file_diff_size', type=int,
+                        help='diff size to load diff on demand, with --html-dir (default: %d)' %
+                        Config.general.separate_file_diff_size,
+                        default=Config.general.separate_file_diff_size)
     parser.add_argument('--max-diff-block-lines', dest='max_diff_block_lines', type=int,
                         help='maximum number of lines per diff block (default: %d)' %
                         Config.general.max_diff_block_lines,
@@ -75,6 +83,8 @@ def create_parser():
                         help='treat absent files as empty')
     parser.add_argument('--css', metavar='url', dest='css_url',
                         help='link to an extra CSS for the HTML report')
+    parser.add_argument('--jquery', metavar='url', dest='jquery_url',
+                        help='link to the jquery url, with --html-dir. By default, a symlink to /usr/share/javascript/jquery/jquery.js is created')
     parser.add_argument('file1', help='first file to compare')
     parser.add_argument('file2', help='second file to compare')
     if not tlsh:
@@ -128,6 +138,7 @@ def run_diffoscope(parsed_args):
     Config.general.max_diff_block_lines = parsed_args.max_diff_block_lines
     Config.general.max_diff_input_lines = parsed_args.max_diff_input_lines
     Config.general.max_report_size = parsed_args.max_report_size
+    Config.general.separate_file_diff_size = parsed_args.separate_file_diff_size
     Config.general.fuzzy_threshold = parsed_args.fuzzy_threshold
     Config.general.new_file = parsed_args.new_file
     if parsed_args.debug:
@@ -136,10 +147,15 @@ def run_diffoscope(parsed_args):
     difference = diffoscope.comparators.compare_root_paths(
         parsed_args.file1, parsed_args.file2)
     if difference:
+        # no output desired? print text
+        if not any((parsed_args.text_output, parsed_args.html_output, parsed_args.html_output_directory)):
+            parsed_args.text_output = "-"
         if parsed_args.html_output:
             with make_printer(parsed_args.html_output) as print_func:
                 output_html(difference, css_url=parsed_args.css_url, print_func=print_func)
-        if (parsed_args.text_output and parsed_args.text_output != parsed_args.html_output) or not parsed_args.html_output:
+        if parsed_args.html_output_directory:
+                output_html_directory(parsed_args.html_output_directory, difference, css_url=parsed_args.css_url, jquery_url=parsed_args.jquery_url)
+        if parsed_args.text_output:
             with make_printer(parsed_args.text_output or '-') as print_func:
                 output_text(difference, print_func=print_func)
         return 1
diff --git a/diffoscope/config.py b/diffoscope/config.py
index 5087306..ff65558 100644
--- a/diffoscope/config.py
+++ b/diffoscope/config.py
@@ -30,6 +30,7 @@ class Config(object):
         self._max_diff_block_lines = 50
         self._max_diff_input_lines = 100000 # GNU diff cannot process arbitrary large files :(
         self._max_report_size = 2000 * 2 ** 10 # 2000 kB
+        self._separate_file_diff_size = 200 * 2 ** 10 # 200kB
         self._fuzzy_threshold = 60
         self._new_file = False
 
@@ -64,6 +65,14 @@ class Config(object):
         self._max_report_size = value
 
     @property
+    def separate_file_diff_size(self):
+        return self._separate_file_diff_size
+
+    @separate_file_diff_size.setter
+    def separate_file_diff_size(self, value):
+        self._separate_file_diff_size = value
+
+    @property
     def fuzzy_threshold(self):
         return self._fuzzy_threshold
 
diff --git a/diffoscope/presenters/html.py b/diffoscope/presenters/html.py
index 42f7acd..c0c605a 100644
--- a/diffoscope/presenters/html.py
+++ b/diffoscope/presenters/html.py
@@ -34,6 +34,11 @@
 import cgi
 import re
 import sys
+import os
+import os.path
+import codecs
+import hashlib
+from contextlib import contextmanager
 from xml.sax.saxutils import escape
 from diffoscope import logger, VERSION
 from diffoscope.config import Config
@@ -55,8 +60,8 @@ DIFFOFF = "\x02"
 HEADER = """<!DOCTYPE html>
 <html>
 <head>
-  <meta charset="utf-8">
-  <meta name="generator" content="diffoscope">
+  <meta charset="utf-8" />
+  <meta name="generator" content="diffoscope" />
   <link rel="icon" type="image/png" href="data:image/png;base64,%(favicon)s" />
   <title>%(title)s</title>
   <style>
@@ -138,6 +143,9 @@ HEADER = """<!DOCTYPE html>
     .diffheader:hover .anchor {
       display: inline;
     }
+    .ondemand {
+      text-align: center;
+    }
   </style>
   %(css_link)s
 </head>
@@ -150,6 +158,23 @@ FOOTER = """
 </html>
 """
 
+SCRIPTS = """
+<script src="%(jquery_url)s"></script>
+<script type="text/javascript">
+$(function() {
+  $("div.ondemand a").on('click', function (){
+    var filename = $(this).attr('href');
+    var div = $(this).parent();
+    div.text('... loading ...');
+    div.load(filename + " table", function() {
+        // http://stackoverflow.com/a/8452751/946226
+        $(this).children(':first').unwrap();
+    });
+    return false;
+  });
+});
+</script>
+"""
 
 class PrintLimitReached(Exception):
     pass
@@ -381,7 +406,7 @@ def empty_buffer(print_func):
     buf = []
 
 
-def output_unified_diff(print_func, unified_diff):
+def output_unified_diff_table(print_func, unified_diff):
     global add_cpt, del_cpt
     global line1, line2
     global hunk_off1, hunk_size1, hunk_off2, hunk_size2
@@ -464,8 +489,24 @@ def output_unified_diff(print_func, unified_diff):
     finally:
         print_func(u"</table>", force=True)
 
+def output_unified_diff(print_func, css_url, directory, unified_diff):
+    if directory and len(unified_diff) > Config.general.separate_file_diff_size:
+        # open a new file for this table
+        filename="%s.html" % hashlib.md5(unified_diff.encode('utf-8')).hexdigest()
+        logger.debug('separate html output for diff of size %d', len(unified_diff))
+        with file_printer(directory, filename) as new_print_func:
+            output_header(css_url, new_print_func)
+            output_unified_diff_table(new_print_func, unified_diff)
+            output_footer(new_print_func)
+
+        print_func("<div class='ondemand'>\n")
+        print_func("... <a href='%s'>load diff</a> ...\n" % escape(filename))
+        print_func("</div>\n")
+
+    else:
+        output_unified_diff_table(print_func, unified_diff)
 
-def output_difference(difference, print_func, parents):
+def output_difference(difference, print_func, css_url, directory, parents):
     logger.debug('html output for %s', difference.source1)
     sources = parents + [difference.source1]
     print_func(u"<div class='difference'>")
@@ -487,9 +528,9 @@ def output_difference(difference, print_func, parents):
                        % u'<br />'.join(map(escape, difference.comments)))
         print_func(u"</div>")
         if difference.unified_diff:
-            output_unified_diff(print_func, difference.unified_diff)
+            output_unified_diff(print_func, css_url, directory, difference.unified_diff)
         for detail in difference.details:
-            output_difference(detail, print_func, sources)
+            output_difference(detail, print_func, css_url, directory, sources)
     except PrintLimitReached:
         logger.debug('print limit reached')
         raise
@@ -507,16 +548,59 @@ def output_header(css_url, print_func):
                          'css_link': css_link,
                         })
 
+def output_footer(print_func):
+    print_func(FOOTER % {'version': VERSION}, force=True)
+
 
 def output_html(difference, css_url=None, print_func=None):
+    """
+    Default presenter, all in one HTML file
+    """
     if print_func is None:
         print_func = print
     print_func = create_limited_print_func(print_func, Config.general.max_report_size)
     try:
         output_header(css_url, print_func)
-        output_difference(difference, print_func, [])
+        output_difference(difference, print_func, css_url, None, [])
     except PrintLimitReached:
         logger.debug('print limit reached')
         print_func(u"<div class='error'>Max output size reached.</div>",
                    force=True)
-    print_func(FOOTER % {'version': VERSION}, force=True)
+    output_footer(print_func)
+
+ at contextmanager
+def file_printer(directory, filename):
+    with codecs.open(os.path.join(directory,filename), 'w', encoding='utf-8') as f:
+        print_func = f.write
+        print_func = create_limited_print_func(print_func, Config.general.max_report_size)
+        yield print_func
+
+def output_html_directory(directory, difference, css_url=None, jquery_url=None):
+    """
+    Multi-file presenter. Writes to a directory, and puts large diff tables
+    into files of their own.
+
+    This uses jQuery. By default it uses /usr/share/javascript/jquery/jquery.js
+    (symlinked, so that you can still share the result over HTTP).
+    You can also pass --jquery URL to diffoscope to use a central jQuery copy.
+    """
+    if not os.path.exists(directory):
+        os.makedirs(directory)
+
+    if not jquery_url:
+        jquery_symlink = os.path.join(directory, "jquery.js")
+        if not os.path.exists(jquery_symlink):
+            os.symlink("/usr/share/javascript/jquery/jquery.js", jquery_symlink)
+        jquery_url = "./jquery.js"
+
+    with file_printer(directory, "index.html") as print_func:
+        print_func = create_limited_print_func(print_func, Config.general.max_report_size)
+        try:
+            output_header(css_url, print_func)
+            output_difference(difference, print_func, css_url, directory, [])
+        except PrintLimitReached:
+            logger.debug('print limit reached')
+            print_func(u"<div class='error'>Max output size reached.</div>",
+                       force=True)
+        print_func(SCRIPTS % {'jquery_url': escape(jquery_url)}, force=True)
+        output_footer(print_func)

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/diffoscope.git


More information about the diffoscope mailing list