[diffoscope] 01/03: Multi-file HTML output
Jérémy Bobbio
lunar at moszumanska.debian.org
Mon Dec 14 00:06:24 CET 2015
This is an automated email from the git hooks/post-receive script.
lunar pushed a commit to branch master
in repository diffoscope.
commit 99c6c515398e99d7d7a028a44a71bfc417ab78c3
Author: Joachim Breitner <mail at joachim-breitner.de>
Date: Sun Dec 13 13:01:04 2015 +0000
Multi-file HTML output
In order to make it feasible to create HTML output with large diffs,
this adds a new presenter mode (--html-dir) which spreads the output
over multiple files in a directory.
In particular, the table presenting a diff that is larger than the
number of bytes specified via --separate-file-diff-size (200kB by
default) is writen to a separate file and loaded on demand using
JavaScript/JQuery.
By default, JQuery is symlinked into the output directory from
/usr/share/javascript/jquery/jquery.js (install libjs-jquery!), but an
alternative location (e.g. /javascript/jquery/jquery.js) can be
specified using the --jquery command line parameter.
Closes: #806891
---
debian/control | 3 +-
diffoscope/__main__.py | 18 +++++++-
diffoscope/config.py | 9 ++++
diffoscope/presenters/html.py | 100 ++++++++++++++++++++++++++++++++++++++----
4 files changed, 120 insertions(+), 10 deletions(-)
diff --git a/debian/control b/debian/control
index 29fadd2..63fa2a8 100644
--- a/debian/control
+++ b/debian/control
@@ -31,7 +31,8 @@ Depends: python3-pkg-resources,
${misc:Depends},
${python3:Depends},
Recommends: ${diffoscope:Recommends},
- ${python3:Recommends}
+ ${python3:Recommends},
+Suggests: libjs-jquery
Breaks: debbindiff (<< 29)
Replaces: debbindiff (<< 29)
Description: in-depth comparison of files, archives, and directories
diff --git a/diffoscope/__main__.py b/diffoscope/__main__.py
index 3363186..0ba3f94 100644
--- a/diffoscope/__main__.py
+++ b/diffoscope/__main__.py
@@ -34,6 +34,7 @@ from diffoscope import logger, VERSION, set_locale, clean_all_temp_files
import diffoscope.comparators
from diffoscope.config import Config
from diffoscope.presenters.html import output_html
+from diffoscope.presenters.html import output_html_directory
from diffoscope.presenters.text import output_text
@@ -51,6 +52,8 @@ def create_parser():
help='Open the python debugger in case of crashes.')
parser.add_argument('--html', metavar='output', dest='html_output',
help='write HTML report to given file (use - for stdout)')
+ parser.add_argument('--html-dir', metavar='output', dest='html_output_directory',
+ help='write multi-file HTML report to given directory')
parser.add_argument('--text', metavar='output', dest='text_output',
help='write plain text output to given file (use - for stdout)')
parser.add_argument('--max-report-size', metavar='BYTES',
@@ -58,6 +61,11 @@ def create_parser():
help='maximum bytes written in report (default: %d)' %
Config.general.max_report_size,
default=Config.general.max_report_size)
+ parser.add_argument('--separate-file-diff-size', metavar='BYTES',
+ dest='separate_file_diff_size', type=int,
+ help='diff size to load diff on demand, with --html-dir (default: %d)' %
+ Config.general.separate_file_diff_size,
+ default=Config.general.separate_file_diff_size)
parser.add_argument('--max-diff-block-lines', dest='max_diff_block_lines', type=int,
help='maximum number of lines per diff block (default: %d)' %
Config.general.max_diff_block_lines,
@@ -75,6 +83,8 @@ def create_parser():
help='treat absent files as empty')
parser.add_argument('--css', metavar='url', dest='css_url',
help='link to an extra CSS for the HTML report')
+ parser.add_argument('--jquery', metavar='url', dest='jquery_url',
+ help='link to the jquery url, with --html-dir. By default, a symlink to /usr/share/javascript/jquery/jquery.js is created')
parser.add_argument('file1', help='first file to compare')
parser.add_argument('file2', help='second file to compare')
if not tlsh:
@@ -128,6 +138,7 @@ def run_diffoscope(parsed_args):
Config.general.max_diff_block_lines = parsed_args.max_diff_block_lines
Config.general.max_diff_input_lines = parsed_args.max_diff_input_lines
Config.general.max_report_size = parsed_args.max_report_size
+ Config.general.separate_file_diff_size = parsed_args.separate_file_diff_size
Config.general.fuzzy_threshold = parsed_args.fuzzy_threshold
Config.general.new_file = parsed_args.new_file
if parsed_args.debug:
@@ -136,10 +147,15 @@ def run_diffoscope(parsed_args):
difference = diffoscope.comparators.compare_root_paths(
parsed_args.file1, parsed_args.file2)
if difference:
+ # no output desired? print text
+ if not any((parsed_args.text_output, parsed_args.html_output, parsed_args.html_output_directory)):
+ parsed_args.text_output = "-"
if parsed_args.html_output:
with make_printer(parsed_args.html_output) as print_func:
output_html(difference, css_url=parsed_args.css_url, print_func=print_func)
- if (parsed_args.text_output and parsed_args.text_output != parsed_args.html_output) or not parsed_args.html_output:
+ if parsed_args.html_output_directory:
+ output_html_directory(parsed_args.html_output_directory, difference, css_url=parsed_args.css_url, jquery_url=parsed_args.jquery_url)
+ if parsed_args.text_output:
with make_printer(parsed_args.text_output or '-') as print_func:
output_text(difference, print_func=print_func)
return 1
diff --git a/diffoscope/config.py b/diffoscope/config.py
index 5087306..ff65558 100644
--- a/diffoscope/config.py
+++ b/diffoscope/config.py
@@ -30,6 +30,7 @@ class Config(object):
self._max_diff_block_lines = 50
self._max_diff_input_lines = 100000 # GNU diff cannot process arbitrary large files :(
self._max_report_size = 2000 * 2 ** 10 # 2000 kB
+ self._separate_file_diff_size = 200 * 2 ** 10 # 200kB
self._fuzzy_threshold = 60
self._new_file = False
@@ -64,6 +65,14 @@ class Config(object):
self._max_report_size = value
@property
+ def separate_file_diff_size(self):
+ return self._separate_file_diff_size
+
+ @separate_file_diff_size.setter
+ def separate_file_diff_size(self, value):
+ self._separate_file_diff_size = value
+
+ @property
def fuzzy_threshold(self):
return self._fuzzy_threshold
diff --git a/diffoscope/presenters/html.py b/diffoscope/presenters/html.py
index 42f7acd..c0c605a 100644
--- a/diffoscope/presenters/html.py
+++ b/diffoscope/presenters/html.py
@@ -34,6 +34,11 @@
import cgi
import re
import sys
+import os
+import os.path
+import codecs
+import hashlib
+from contextlib import contextmanager
from xml.sax.saxutils import escape
from diffoscope import logger, VERSION
from diffoscope.config import Config
@@ -55,8 +60,8 @@ DIFFOFF = "\x02"
HEADER = """<!DOCTYPE html>
<html>
<head>
- <meta charset="utf-8">
- <meta name="generator" content="diffoscope">
+ <meta charset="utf-8" />
+ <meta name="generator" content="diffoscope" />
<link rel="icon" type="image/png" href="data:image/png;base64,%(favicon)s" />
<title>%(title)s</title>
<style>
@@ -138,6 +143,9 @@ HEADER = """<!DOCTYPE html>
.diffheader:hover .anchor {
display: inline;
}
+ .ondemand {
+ text-align: center;
+ }
</style>
%(css_link)s
</head>
@@ -150,6 +158,23 @@ FOOTER = """
</html>
"""
+SCRIPTS = """
+<script src="%(jquery_url)s"></script>
+<script type="text/javascript">
+$(function() {
+ $("div.ondemand a").on('click', function (){
+ var filename = $(this).attr('href');
+ var div = $(this).parent();
+ div.text('... loading ...');
+ div.load(filename + " table", function() {
+ // http://stackoverflow.com/a/8452751/946226
+ $(this).children(':first').unwrap();
+ });
+ return false;
+ });
+});
+</script>
+"""
class PrintLimitReached(Exception):
pass
@@ -381,7 +406,7 @@ def empty_buffer(print_func):
buf = []
-def output_unified_diff(print_func, unified_diff):
+def output_unified_diff_table(print_func, unified_diff):
global add_cpt, del_cpt
global line1, line2
global hunk_off1, hunk_size1, hunk_off2, hunk_size2
@@ -464,8 +489,24 @@ def output_unified_diff(print_func, unified_diff):
finally:
print_func(u"</table>", force=True)
+def output_unified_diff(print_func, css_url, directory, unified_diff):
+ if directory and len(unified_diff) > Config.general.separate_file_diff_size:
+ # open a new file for this table
+ filename="%s.html" % hashlib.md5(unified_diff.encode('utf-8')).hexdigest()
+ logger.debug('separate html output for diff of size %d', len(unified_diff))
+ with file_printer(directory, filename) as new_print_func:
+ output_header(css_url, new_print_func)
+ output_unified_diff_table(new_print_func, unified_diff)
+ output_footer(new_print_func)
+
+ print_func("<div class='ondemand'>\n")
+ print_func("... <a href='%s'>load diff</a> ...\n" % escape(filename))
+ print_func("</div>\n")
+
+ else:
+ output_unified_diff_table(print_func, unified_diff)
-def output_difference(difference, print_func, parents):
+def output_difference(difference, print_func, css_url, directory, parents):
logger.debug('html output for %s', difference.source1)
sources = parents + [difference.source1]
print_func(u"<div class='difference'>")
@@ -487,9 +528,9 @@ def output_difference(difference, print_func, parents):
% u'<br />'.join(map(escape, difference.comments)))
print_func(u"</div>")
if difference.unified_diff:
- output_unified_diff(print_func, difference.unified_diff)
+ output_unified_diff(print_func, css_url, directory, difference.unified_diff)
for detail in difference.details:
- output_difference(detail, print_func, sources)
+ output_difference(detail, print_func, css_url, directory, sources)
except PrintLimitReached:
logger.debug('print limit reached')
raise
@@ -507,16 +548,59 @@ def output_header(css_url, print_func):
'css_link': css_link,
})
+def output_footer(print_func):
+ print_func(FOOTER % {'version': VERSION}, force=True)
+
def output_html(difference, css_url=None, print_func=None):
+ """
+ Default presenter, all in one HTML file
+ """
if print_func is None:
print_func = print
print_func = create_limited_print_func(print_func, Config.general.max_report_size)
try:
output_header(css_url, print_func)
- output_difference(difference, print_func, [])
+ output_difference(difference, print_func, css_url, None, [])
except PrintLimitReached:
logger.debug('print limit reached')
print_func(u"<div class='error'>Max output size reached.</div>",
force=True)
- print_func(FOOTER % {'version': VERSION}, force=True)
+ output_footer(print_func)
+
+ at contextmanager
+def file_printer(directory, filename):
+ with codecs.open(os.path.join(directory,filename), 'w', encoding='utf-8') as f:
+ print_func = f.write
+ print_func = create_limited_print_func(print_func, Config.general.max_report_size)
+ yield print_func
+
+def output_html_directory(directory, difference, css_url=None, jquery_url=None):
+ """
+ Multi-file presenter. Writes to a directory, and puts large diff tables
+ into files of their own.
+
+ This uses jQuery. By default it uses /usr/share/javascript/jquery/jquery.js
+ (symlinked, so that you can still share the result over HTTP).
+ You can also pass --jquery URL to diffoscope to use a central jQuery copy.
+ """
+ if not os.path.exists(directory):
+ os.makedirs(directory)
+
+ if not jquery_url:
+ jquery_symlink = os.path.join(directory, "jquery.js")
+ if not os.path.exists(jquery_symlink):
+ os.symlink("/usr/share/javascript/jquery/jquery.js", jquery_symlink)
+ jquery_url = "./jquery.js"
+
+ with file_printer(directory, "index.html") as print_func:
+ print_func = create_limited_print_func(print_func, Config.general.max_report_size)
+ try:
+ output_header(css_url, print_func)
+ output_difference(difference, print_func, css_url, directory, [])
+ except PrintLimitReached:
+ logger.debug('print limit reached')
+ print_func(u"<div class='error'>Max output size reached.</div>",
+ force=True)
+ print_func(SCRIPTS % {'jquery_url': escape(jquery_url)}, force=True)
+ output_footer(print_func)
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/diffoscope.git
More information about the diffoscope
mailing list