[diffoscope] 03/04: presenters: html: more intuitive "limit" flags, some backwards-incompatible changes:
Ximin Luo
infinity0 at debian.org
Wed Jul 5 11:44:14 CEST 2017
This is an automated email from the git hooks/post-receive script.
infinity0 pushed a commit to branch WIP/humungous-diffs
in repository diffoscope.
commit f570ad45adde32c8395b54b63e5398e1829ac576
Author: Ximin Luo <infinity0 at debian.org>
Date: Mon Jun 26 16:00:18 2017 +0200
presenters: html: more intuitive "limit" flags, some backwards-incompatible changes:
--max-report-size:
Old: in --html-dir this limited only the parent page
New: in --html-dir this applies across all pages
--max-diff-block-lines:
Old: in --html-dir 4 * this number applied across all pages (for a given diff block)
New: in --html-dir this applies across all pages (for a given diff block)
--max-page-size:
New flag
Applies to the sole --html page, or the top-level --html-dir page
--max-report-child-size
Renamed to
--max-page-size-child:
No behavioural changes
--max-diff-block-lines-parent
Renamed to
--max-page-diff-block-lines:
Old: Only applied to the top-level --html-dir page
New: Applies to the sole --html page, or the top-level --html-dir page
The reasoning behind these changes is that it's unlikely someone would want to
generate a 500MB single html page, but they might in theory generate a 500MB html
directory split up into several 200KB pages, plus a single 200KB html page as
a summary. The new semantics for these flags allows both to be generated in one
run using the same set of flags.
---
diffoscope/config.py | 39 ++--
diffoscope/main.py | 108 ++++++-----
diffoscope/presenters/html/html.py | 319 +++++++++++++++++++++++---------
diffoscope/presenters/html/templates.py | 16 +-
4 files changed, 319 insertions(+), 163 deletions(-)
diff --git a/diffoscope/config.py b/diffoscope/config.py
index 5259a98..4565573 100644
--- a/diffoscope/config.py
+++ b/diffoscope/config.py
@@ -20,16 +20,21 @@
class Config(object):
- max_diff_block_lines = 256
- max_diff_block_lines_parent = 50
- max_diff_block_lines_saved = float("inf")
- # html-dir output uses ratio * max-diff-block-lines as its limit
- max_diff_block_lines_html_dir_ratio = 4
# GNU diff cannot process arbitrary large files :(
- max_diff_input_lines = 2 ** 20
- max_report_size = 2000 * 2 ** 10 # 2000 kB
+ max_diff_input_lines = 2 ** 22
+ max_diff_block_lines_saved = float("inf")
+
+ # hard limits, restricts single-file and multi-file formats
+ max_report_size = 40 * 2 ** 20 # 40 MB
+ max_diff_block_lines = 2 ** 10 # 1024 lines
+ # structural limits, restricts single-file formats
+ # semi-restricts multi-file formats
+ max_page_size = 400 * 2 ** 10 # 400 kB
+ max_page_size_child = 200 * 2 ** 10 # 200 kB
+ max_page_diff_block_lines = 2 ** 7 # 128 lines
+
max_text_report_size = 0
- max_report_child_size = 500 * 2 ** 10
+
new_file = False
fuzzy_threshold = 60
enforce_constraints = True
@@ -47,15 +52,13 @@ class Config(object):
def __setattr__(self, k, v):
super(Config, self).__setattr__(k, v)
- if self.enforce_constraints:
- self.check_constraints()
+ def check_ge(self, a, b):
+ va = getattr(self, a)
+ vb = getattr(self, b)
+ if va < vb:
+ raise ValueError("{0} ({1}) cannot be smaller than {2} ({3})".format(a, va, b, vb))
def check_constraints(self):
- max_ = self.max_diff_block_lines_html_dir_ratio * \
- self.max_diff_block_lines
- if self.max_diff_block_lines_saved < max_: # noqa
- raise ValueError("max_diff_block_lines_saved "
- "({0.max_diff_block_lines_saved}) cannot be smaller than "
- "{0.max_diff_block_lines_html_dir_ratio} * "
- "max_diff_block_lines ({1})".format(self, max_),
- )
+ self.check_ge("max_diff_block_lines", "max_page_diff_block_lines")
+ self.check_ge("max_report_size", "max_page_size")
+ self.check_ge("max_report_size", "max_page_size_child")
diff --git a/diffoscope/main.py b/diffoscope/main.py
index 8f064dd..ffe020a 100644
--- a/diffoscope/main.py
+++ b/diffoscope/main.py
@@ -78,6 +78,8 @@ def create_parser():
const=True, help='Show an approximate progress bar')
parser.add_argument('--no-progress', dest='progress', action='store_const',
const=False, help='Do not show any progress bar')
+ parser.add_argument('--no-default-limits', action='store_true', default=False,
+ help='Disable most default output limits and diff calculation limits.')
group1 = parser.add_argument_group('output types')
group1.add_argument('--text', metavar='OUTPUT_FILE', dest='text_output',
@@ -113,49 +115,44 @@ def create_parser():
help='Write profiling info to given file (use - for stdout)')
group2 = parser.add_argument_group('output limits')
- group2.add_argument('--no-default-limits', action='store_true', default=False,
- help='Disable most default limits. Note that text '
- 'output already ignores most of these.')
+ # everything marked with default=None below is affected by no-default-limits
group2.add_argument('--max-text-report-size', metavar='BYTES', type=int,
help='Maximum bytes written in --text report. (0 to '
- 'disable)', default=None).completer=RangeCompleter(0,
- Config().max_text_report_size, 200000)
+ 'disable, default: %d)' % Config().max_text_report_size,
+ default=None)
group2.add_argument('--max-report-size', metavar='BYTES', type=int,
- help='Maximum bytes written in report. In html-dir '
- 'output, this is the max bytes of the parent page. '
- '(0 to disable, default: %d)' %
- Config().max_report_size,
- default=None).completer=RangeCompleter(0,
- Config().max_report_size, 200000)
- group2.add_argument('--max-report-child-size', metavar='BYTES', type=int,
- help='In --html-dir output, this is the max bytes of '
- 'each child page (0 to disable, default: %(default)s, '
- 'remaining in effect even with --no-default-limits)',
- default=Config().max_report_child_size).completer=RangeCompleter(0,
- Config().max_report_child_size, 50000)
+ help='Maximum bytes of a report in a given format, '
+ 'across all of its pages. Note that some formats, such '
+ 'as --html, may be restricted by even smaller limits '
+ 'such as --max-page-size. (0 to disable, default: %d)' %
+ Config().max_report_size, default=None).completer=RangeCompleter(
+ Config().max_report_size)
group2.add_argument('--max-diff-block-lines', metavar='LINES', type=int,
- help='Maximum number of lines output per diff block. '
- 'In --html-dir output, we use %d times this number instead, '
- 'taken over all pages. (0 to disable, default: %d)' %
- (Config().max_diff_block_lines_html_dir_ratio,
- Config().max_diff_block_lines),
- default=None).completer=RangeCompleter(0,
- Config().max_diff_block_lines, 5)
- group2.add_argument('--max-diff-block-lines-parent', metavar='LINES', type=int,
- help='In --html-dir output, this is maximum number of '
- 'lines output per diff block on the parent page '
- 'before spilling it into child pages (0 to disable, '
- 'default: %(default)s, remaining in effect even with '
- '--no-default-limits)',
- default=Config().max_diff_block_lines_parent).completer=RangeCompleter(0,
- Config().max_diff_block_lines_parent, 200)
- group2.add_argument('--max-diff-block-lines-saved', metavar='LINES', type=int,
- help='Maximum number of lines saved per diff block. '
- 'Most users should not need this, unless you run out '
- 'of memory. This truncates diff(1) output before even '
- 'trying to emit it in a report. This also affects --text '
- 'output. (0 to disable, default: 0)',
- default=0).completer=RangeCompleter(0, 0, 200)
+ help='Maximum number of lines output per unified-diff '
+ 'block, across all pages. (0 to disable, default: %d)' %
+ Config().max_diff_block_lines, default=None).completer=RangeCompleter(
+ Config().max_diff_block_lines)
+ group2.add_argument('--max-page-size', metavar='BYTES', type=int,
+ help='Maximum bytes of the top-level (--html-dir) or sole '
+ '(--html) page. (default: %(default)s, remains in effect '
+ 'even with --no-default-limits)', default=
+ Config().max_page_size).completer=RangeCompleter(
+ Config().max_page_size)
+ group2.add_argument('--max-page-size-child', metavar='BYTES', type=int,
+ help='In --html-dir output, this is the maximum bytes of '
+ 'each child page (default: %(default)s, remains in '
+ 'effect even with --no-default-limits)', default=
+ Config().max_page_size_child).completer=RangeCompleter(
+ Config().max_page_size_child)
+ group2.add_argument('--max-page-diff-block-lines', metavar='LINES', type=int,
+ help='Maximum number of lines output per unified-diff block '
+ 'on the top-level (--html-dir) or sole (--html) page, before '
+ 'spilling it into child pages (--html-dir) or skipping the '
+ 'rest of the diff block. Child pages are limited instead by '
+ '--max-page-size-child. (default: %(default)s, remains in '
+ 'effect even with --no-default-limits)', default=
+ Config().max_page_diff_block_lines).completer=RangeCompleter(
+ Config().max_page_diff_block_lines)
group3 = parser.add_argument_group('diff calculation')
group3.add_argument('--new-file', action='store_true',
@@ -184,19 +181,25 @@ def create_parser():
group3.add_argument('--fuzzy-threshold', type=int,
help='Threshold for fuzzy-matching '
'(0 to disable, %(default)s is default, 400 is high fuzziness)',
- default=Config().fuzzy_threshold).completer=RangeCompleter(0,
- 400, 20)
+ default=Config().fuzzy_threshold).completer=RangeCompleter(400)
group3.add_argument('--max-diff-input-lines', metavar='LINES', type=int,
help='Maximum number of lines fed to diff(1) '
'(0 to disable, default: %d)' %
Config().max_diff_input_lines,
- default=None).completer=RangeCompleter(0,
- Config().max_diff_input_lines, 5000)
+ default=None).completer=RangeCompleter(
+ Config().max_diff_input_lines)
group3.add_argument('--max-container-depth', metavar='DEPTH', type=int,
help='Maximum depth to recurse into containers. '
'(Cannot be disabled for security reasons, default: '
'%(default)s)',
default=Config().max_container_depth)
+ group3.add_argument('--max-diff-block-lines-saved', metavar='LINES', type=int,
+ help='Maximum number of lines saved per diff block. '
+ 'Most users should not need this, unless you run out '
+ 'of memory. This truncates diff(1) output before emitting '
+ 'it in a report, and affects all types of output, '
+ 'including --text and --json. (0 to disable, default: '
+ '%(default)s)', default=0)
group4 = parser.add_argument_group('information commands')
group4.add_argument('--help', '-h', action='help',
@@ -225,8 +228,12 @@ def create_parser():
class RangeCompleter(object):
- def __init__(self, start, end, step):
- self.choices = range(start, end + 1, step)
+ def __init__(self, start, end=0, divisions=16):
+ if end < start:
+ tmp = end
+ end = start
+ start = tmp
+ self.choices = range(start, end + 1, int((end-start+1)/divisions))
def __call__(self, prefix, **kwargs):
return (str(i) for i in self.choices if str(i).startswith(prefix))
@@ -283,6 +290,7 @@ class ListDebianSubstvarsAction(argparse._StoreTrueAction):
sys.exit(0)
def maybe_set_limit(config, parsed_args, key):
+ # apply limits affected by "no-default-limits"
v = getattr(parsed_args, key)
if v is not None:
setattr(config, key, float("inf") if v == 0 else v)
@@ -300,11 +308,12 @@ def run_diffoscope(parsed_args):
logger.warning('Fuzzy-matching is currently disabled as the "tlsh" module is unavailable.')
maybe_set_limit(Config(), parsed_args, "max_report_size")
maybe_set_limit(Config(), parsed_args, "max_text_report_size")
- maybe_set_limit(Config(), parsed_args, "max_report_child_size")
- # need to set them in this order due to Config._check_constraints
- maybe_set_limit(Config(), parsed_args, "max_diff_block_lines_saved")
- maybe_set_limit(Config(), parsed_args, "max_diff_block_lines_parent")
maybe_set_limit(Config(), parsed_args, "max_diff_block_lines")
+ Config().max_page_size = parsed_args.max_page_size
+ Config().max_page_size_child = parsed_args.max_page_size_child
+ Config().max_page_diff_block_lines = parsed_args.max_page_diff_block_lines
+
+ maybe_set_limit(Config(), parsed_args, "max_diff_block_lines_saved")
maybe_set_limit(Config(), parsed_args, "max_diff_input_lines")
Config().max_container_depth = parsed_args.max_container_depth
Config().fuzzy_threshold = parsed_args.fuzzy_threshold
@@ -313,6 +322,7 @@ def run_diffoscope(parsed_args):
Config().exclude_commands = parsed_args.exclude_commands
Config().exclude_directory_metadata = parsed_args.exclude_directory_metadata
Config().compute_visual_diffs = PresenterManager().compute_visual_diffs()
+ Config().check_constraints()
set_path()
set_locale()
path1, path2 = parsed_args.path1, parsed_args.path2
diff --git a/diffoscope/presenters/html/html.py b/diffoscope/presenters/html/html.py
index 7436288..28be570 100644
--- a/diffoscope/presenters/html/html.py
+++ b/diffoscope/presenters/html/html.py
@@ -71,6 +71,20 @@ re_anchor_prefix = re.compile(r'^[^A-Za-z]')
re_anchor_suffix = re.compile(r'[^A-Za-z-_:\.]')
+def send_and_exhaust(iterator, arg, default):
+ """Send a single value to a coroutine, exhaust it, and return the final
+ element or a default value if it was empty."""
+ # Python's coroutine syntax is still a bit rough when you want to do
+ # slightly more complex stuff. Watch this logic closely.
+ output = default
+ try:
+ output = iterator.send(arg)
+ except StopIteration:
+ pass
+ for output in iterator:
+ pass
+ return output
+
def md5(s):
return hashlib.md5(s.encode('utf-8')).hexdigest()
@@ -167,48 +181,65 @@ def output_node_frame(difference, path, indentstr, indentnum, body):
html.escape(PartialString.escape(difference.source1)),
html.escape(PartialString.escape(difference.source2)))
- return u"""{0[1]}<div class="diffheader">
+ return PartialString.numl(u"""{0[1]}<div class="diffheader">
{1}{0[1]}</div>
-{2}""".format(indent, header, body)
+{2}""", 3).pformatl(indent, header, body)
def output_node(difference, path, indentstr, indentnum, css_url, directory):
+ """Returns a tuple (parent, continuation) where
+
+ - parent is a PartialString representing the body of the node, including
+ its comments, visuals, unified_diff and headers for its children - but
+ not the bodies of the children
+ - continuation is either None or (only in html-dir mode) a function which
+ when called with a single integer arg, the maximum size to print, will
+ print any remaining "split" pages for unified_diff up to the given size.
+ """
indent = tuple(indentstr * (indentnum + x) for x in range(3))
t, cont = PartialString.cont()
+ comments = u""
if difference.comments:
comments = u'{0[1]}<div class="comment">\n{1}{0[1]}</div>\n'.format(
indent, "".join(u"{0[2]}{1}<br/>\n".format(indent, html.escape(x)) for x in difference.comments))
- else:
- comments = u""
visuals = u""
for visual in difference.visuals:
visuals += output_visual(visual, path, indentstr, indentnum+1)
- udiff = io.StringIO()
+ udiff = u""
+ ud_cont = None
if difference.unified_diff:
- def print_func(x, force=False):
- udiff.write(x)
- HTMLPresenter().output_unified_diff(print_func, css_url, directory, difference.unified_diff, difference.has_internal_linenos)
+ ud_cont = HTMLSideBySidePresenter().output_unified_diff(
+ css_url, directory, difference.unified_diff,
+ difference.has_internal_linenos)
+ udiff = next(ud_cont)
+ if isinstance(udiff, PartialString):
+ ud_cont = ud_cont.send
+ udiff = udiff.pformatl(PartialString.of(ud_cont))
+ else:
+ for _ in ud_cont: pass # exhaust the iterator, avoids GeneratorExit
+ udiff = t.escape(udiff)
+ ud_cont = None
- # Construct a PartialString for this node
- # {3} gets mapped to {-1}, a continuation hole for later child nodes
- body = u"{0}{1}{2}{3}".format(t.escape(comments), t.escape(visuals), t.escape(udiff.getvalue()), "{-1}")
+ # PartialString for this node
+ body = PartialString.numl(u"{0}{1}{2}{-1}", 3, cont).pformatl(
+ t.escape(comments), t.escape(visuals), udiff)
if len(path) == 1:
# root node, frame it
- t = cont(t, output_node_frame(difference, path, indentstr, indentnum, body))
- else:
- t = cont(t, body)
+ body = output_node_frame(difference, path, indentstr, indentnum, body)
+ t = cont(t, body)
# Add holes for child nodes
for d in difference.details:
- # {0} hole, for the child node's contents
- # {-1} continuation hole, for later child nodes
- t = cont(t, u"""{0[1]}<div class="difference">
+ child = output_node_frame(d, path + [d], indentstr, indentnum+1, PartialString.of(d))
+ child = PartialString.numl(u"""{0[1]}<div class="difference">
{1}{0[1]}</div>
-{{-1}}""".format(indent, output_node_frame(d, path + [d], indentstr, indentnum+1, "{0}")), d)
+{-1}""", 2, cont).pformatl(indent, child)
+ t = cont(t, child)
- return cont(t, u"")
+ assert len(t.holes) >= len(difference.details) + 1 # there might be extra holes for the unified diff continuation
+ return cont(t, u""), ud_cont
def output_header(css_url):
if css_url:
@@ -233,32 +264,39 @@ def file_printer(directory, filename):
yield f.write
@contextlib.contextmanager
-def spl_file_printer(directory, filename):
+def spl_file_printer(directory, filename, accum):
with codecs.open(os.path.join(directory,filename), 'w', encoding='utf-8') as f:
print_func = f.write
- def recording_print_func(s, force=False):
+ def recording_print_func(s):
print_func(s)
recording_print_func.bytes_written += len(s)
+ accum.bytes_written += len(s)
recording_print_func.bytes_written = 0
yield recording_print_func
-class HTMLPresenter(Presenter):
+class HTMLSideBySidePresenter(object):
supports_visual_diffs = True
def __init__(self):
- self.new_unified_diff()
+ self.max_lines = Config().max_diff_block_lines # only for html-dir
+ self.max_lines_parent = Config().max_page_diff_block_lines
+ self.max_page_size_child = Config().max_page_size_child
def new_unified_diff(self):
self.spl_rows = 0
self.spl_current_page = 0
self.spl_print_func = None
self.spl_print_ctrl = None
+ # the below apply to child pages only, the parent page limit works
+ # differently and is controlled by output_difference later below
+ self.bytes_max_total = 0
+ self.bytes_written = 0
+ self.error_row = None
def output_hunk_header(self, hunk_off1, hunk_size1, hunk_off2, hunk_size2):
self.spl_print_func(u'<tr class="diffhunk"><td colspan="2">Offset %d, %d lines modified</td>' % (hunk_off1, hunk_size1))
self.spl_print_func(u'<td colspan="2">Offset %d, %d lines modified</td></tr>\n' % (hunk_off2, hunk_size2))
- self.row_was_output()
def output_line(self, has_internal_linenos, type_name, s1, line1, s2, line2):
if s1 and len(s1) > MAX_LINE_SIZE:
@@ -290,8 +328,7 @@ class HTMLPresenter(Presenter):
else:
self.spl_print_func(u'<td colspan="2">\xa0</td>')
finally:
- self.spl_print_func(u"</tr>\n", force=True)
- self.row_was_output()
+ self.spl_print_func(u"</tr>\n")
def spl_print_enter(self, print_context, rotation_params):
# Takes ownership of print_context
@@ -306,54 +343,71 @@ class HTMLPresenter(Presenter):
def spl_print_exit(self, *exc_info):
if not self.spl_had_entered_child(): return False
- self.spl_print_func(output_footer(), force=True)
+ self.spl_print_func(output_footer())
_exit, _ = self.spl_print_ctrl
self.spl_print_func = None
self.spl_print_ctrl = None
return _exit(*exc_info)
- def row_was_output(self):
- self.spl_rows += 1
- _, rotation_params = self.spl_print_ctrl
- max_lines = Config().max_diff_block_lines
- max_lines_parent = Config().max_diff_block_lines_parent
- max_lines_ratio = Config().max_diff_block_lines_html_dir_ratio
- max_report_child_size = Config().max_report_child_size
- if not rotation_params:
+ def check_limits(self):
+ if not self.spl_print_ctrl[1]:
# html-dir single output, don't need to rotate
- if self.spl_rows >= max_lines:
+ if self.spl_rows >= self.max_lines_parent:
raise DiffBlockLimitReached()
- return
+ return False
else:
# html-dir output, perhaps need to rotate
- directory, mainname, css_url = rotation_params
- if self.spl_rows >= max_lines_ratio * max_lines:
+ if self.spl_rows >= self.max_lines:
raise DiffBlockLimitReached()
if self.spl_current_page == 0: # on parent page
- if self.spl_rows < max_lines_parent:
- return
+ if self.spl_rows < self.max_lines_parent:
+ return False
+ logger.debug("new unified-diff subpage, parent page went over %s lines", self.max_lines_parent)
else: # on child page
- # TODO: make this stay below the max, instead of going 1 row over the max
- # will require some backtracking...
- if self.spl_print_func.bytes_written < max_report_child_size:
- return
+ if self.bytes_max_total and self.bytes_written > self.bytes_max_total:
+ raise PrintLimitReached()
+ if self.spl_print_func.bytes_written < self.max_page_size_child:
+ return False
+ logger.debug("new unified-diff subpage, previous subpage went over %s bytes", self.max_page_size_child)
+ return True
+ def new_child_page(self):
+ _, rotation_params = self.spl_print_ctrl
+ directory, mainname, css_url = rotation_params
self.spl_current_page += 1
filename = "%s-%s.html" % (mainname, self.spl_current_page)
if self.spl_current_page > 1:
# previous page was a child, close it
- self.spl_print_func(templates.UD_TABLE_FOOTER % {"filename": html.escape(filename), "text": "load diff"}, force=True)
+ self.spl_print_func(templates.UD_TABLE_FOOTER % {"filename": html.escape(filename), "text": "load diff"})
+ self.spl_print_func(u"</table>\n")
self.spl_print_exit(None, None, None)
# rotate to the next child page
- context = spl_file_printer(directory, filename)
+ context = spl_file_printer(directory, filename, self)
self.spl_print_enter(context, rotation_params)
self.spl_print_func(templates.UD_TABLE_HEADER)
+ def output_limit_reached(self, limit_type, total, bytes_processed):
+ logger.debug('%s print limit reached', limit_type)
+ bytes_left = total - bytes_processed
+ self.error_row = templates.UD_TABLE_LIMIT_FOOTER % {
+ "limit_type": limit_type,
+ "bytes_left": bytes_left,
+ "bytes_total": total,
+ "percent": (bytes_left / total) * 100
+ }
+ self.spl_print_func(self.error_row)
+
def output_unified_diff_table(self, unified_diff, has_internal_linenos):
- self.spl_print_func(templates.UD_TABLE_HEADER)
+ """Output a unified diff <table> possibly over multiple pages.
+
+ It is the caller's responsibility to set up self.spl_* correctly.
+
+ Yields None for each extra child page, and then True or False depending
+ on whether the whole output was truncated.
+ """
try:
ydiff = SideBySideDiff(unified_diff)
for t, args in ydiff.items():
@@ -365,67 +419,135 @@ class HTMLPresenter(Presenter):
self.spl_print_func(u'<td colspan="2">%s</td>\n' % args)
else:
raise AssertionError()
- return True
+ self.spl_rows += 1
+ if not self.check_limits():
+ continue
+ self.new_child_page()
+ new_limit = yield None
+ if new_limit:
+ self.bytes_max_total = new_limit
+ self.bytes_written = 0
+ self.check_limits()
+ wrote_all = True
+ except GeneratorExit:
+ return
except DiffBlockLimitReached:
- total = len(unified_diff)
- bytes_left = total - ydiff.bytes_processed
- frac = bytes_left / total
- self.spl_print_func(
- u'<tr class="error">'
- u'<td colspan="4">Max diff block lines reached; %s/%s bytes (%.2f%%) of diff not shown.'
- u"</td></tr>" % (bytes_left, total, frac*100), force=True)
- logger.debug('diff-block print limit reached')
- return False
+ self.output_limit_reached("diff block lines", len(unified_diff), ydiff.bytes_processed)
+ wrote_all = False
except PrintLimitReached:
- assert not self.spl_had_entered_child() # limit reached on the parent page
- self.spl_print_func(u'<tr class="error"><td colspan="4">Max output size reached.</td></tr>', force=True)
- raise
+ self.output_limit_reached("report size", len(unified_diff), ydiff.bytes_processed)
+ wrote_all = False
finally:
- self.spl_print_func(u"</table>", force=True)
+ # no footer on the last page, just a close tag
+ self.spl_print_func(u"</table>")
+ yield wrote_all
- def output_unified_diff(self, print_func, css_url, directory, unified_diff, has_internal_linenos):
+ def output_unified_diff(self, css_url, directory, unified_diff, has_internal_linenos):
self.new_unified_diff()
rotation_params = None
if directory:
mainname = md5(unified_diff)
rotation_params = directory, mainname, css_url
+
try:
- self.spl_print_func = print_func
+ udiff = io.StringIO()
+ udiff.write(templates.UD_TABLE_HEADER)
+ self.spl_print_func = udiff.write
self.spl_print_ctrl = None, rotation_params
- truncated = not self.output_unified_diff_table(unified_diff, has_internal_linenos)
+
+ it = self.output_unified_diff_table(unified_diff, has_internal_linenos)
+ wrote_all = next(it)
+ if wrote_all is None:
+ assert self.spl_current_page == 1
+ # now pause the iteration and wait for consumer to give us a
+ # size-limit to write the remaining pages with
+ # exhaust the iterator and save the last item in wrote_all
+ new_limit = yield PartialString(PartialString.escape(udiff.getvalue()) + u"{0}</table>\n", None)
+ wrote_all = send_and_exhaust(it, new_limit, wrote_all)
+ else:
+ yield udiff.getvalue()
+ return
+
+ except GeneratorExit:
+ logger.debug("skip extra output for unified diff %s", mainname)
+ it.close()
+ self.spl_print_exit(None, None, None)
+ return
except:
- if not self.spl_print_exit(*sys.exc_info()): raise
+ import traceback
+ traceback.print_exc()
+ if self.spl_print_exit(*sys.exc_info()) is False: raise
else:
self.spl_print_exit(None, None, None)
finally:
self.spl_print_ctrl = None
self.spl_print_func = None
- if self.spl_current_page > 0:
+ truncated = not wrote_all
+ child_rows_written = self.spl_rows - self.max_lines_parent
+ if truncated and not child_rows_written:
+ # if we didn't write any child rows, just output the error message on the parent page
+ parent_last_row = self.error_row
+ else:
noun = "pieces" if self.spl_current_page > 1 else "piece"
text = "load diff (%s %s%s)" % (self.spl_current_page, noun, (", truncated" if truncated else ""))
- print_func(templates.UD_TABLE_FOOTER % {"filename": html.escape("%s-1.html" % mainname), "text": text}, force=True)
+ parent_last_row = templates.UD_TABLE_FOOTER % {"filename": html.escape("%s-1.html" % mainname), "text": text}
+ yield self.bytes_written, parent_last_row
+
+
+class HTMLPresenter(Presenter):
+ supports_visual_diffs = True
+
+ def __init__(self):
+ self.reset()
+
+ def reset(self):
+ self.report_printed = 0
+ self.report_limit = Config().max_report_size
+
+ @property
+ def report_remaining(self):
+ return self.report_limit - self.report_printed
+
+ def maybe_print(self, node, printers, outputs, continuations):
+ output = outputs[node]
+ node_cont = continuations[node]
+ if output.holes and set(output.holes) - set(node_cont):
+ return
+
+ # could be slightly more accurate, whatever
+ est_placeholder_len = max(len(templates.UD_TABLE_FOOTER), len(templates.UD_TABLE_LIMIT_FOOTER)) + 40
+ est_size = output.size(est_placeholder_len)
+
+ results = {}
+ for cont in node_cont:
+ remaining = self.report_remaining - est_size
+ printed, result = cont(remaining)
+ self.report_printed += printed
+ results[cont] = result
+
+ out = output.format(results)
+ printer_args = printers[node]
+ with printer_args[0](*printer_args[1:]) as printer:
+ printer(out)
+ self.report_printed += len(out)
+
+ del outputs[node]
+ del printers[node]
+ del continuations[node]
def output_node_placeholder(self, anchor, lazy_load):
if lazy_load:
return templates.DIFFNODE_LAZY_LOAD % anchor
else:
- return '<div class="error">Max report size reached</div>\n'
+ return templates.DIFFNODE_LIMIT
def output_difference(self, target, difference, css_url, jquery_url, single_page=False):
outputs = {} # nodes to their partial output
ancestors = {} # child nodes to ancestor nodes
placeholder_len = len(self.output_node_placeholder("XXXXXXXXXXXXXXXX", not single_page))
-
+ continuations = {} # functions to print unified diff continuations (html-dir only)
printers = {} # nodes to their printers
- def maybe_print(node):
- if outputs[node].holes:
- return
- printer_args = printers[node]
- with printer_args[0](*printer_args[1:]) as printer:
- printer(outputs[node].format())
- del outputs[node]
- del printers[node]
def smallest_first(node, parscore):
depth = parscore[0] + 1 if parscore else 0
@@ -439,34 +561,44 @@ class HTMLPresenter(Presenter):
diff_path = output_diff_path(path)
pagename = md5(diff_path)
logger.debug('html output for %s', diff_path)
- node_output = output_node(node, path, " ", len(path)-1, css_url, None if single_page else target)
+ node_output, node_continuation = output_node(
+ node, path, " ", len(path)-1, css_url, None if single_page else target)
+ add_to_existing = False
if ancestor:
- limit = Config().max_report_child_size
- logger.debug("output size: %s, %s",
- outputs[ancestor].size(placeholder_len), node_output.size(placeholder_len))
- else:
- limit = Config().max_report_size
+ page_limit = Config().max_page_size if ancestor is difference else Config().max_page_size_child
+ page_current = outputs[ancestor].size(placeholder_len)
+ report_current = self.report_printed + sum(p.size(placeholder_len) for p in outputs.values())
+ want_to_add = node_output.size(placeholder_len)
+ logger.debug("report size: %s/%s, page size: %s/%s, want to add %s)", report_current, self.report_limit, page_current, page_limit, want_to_add)
+ if report_current + want_to_add > self.report_limit:
+ make_new_subpage = False
+ elif page_current + want_to_add < page_limit:
+ add_to_existing = True
+ else:
+ make_new_subpage = not single_page
- if ancestor and outputs[ancestor].size(placeholder_len) + node_output.size(placeholder_len) < limit:
+ if add_to_existing:
# under limit, add it to an existing page
outputs[ancestor] = outputs[ancestor].pformat({node: node_output})
stored = ancestor
else:
- # over limit (or root), new subpage
+ # over limit (or root), new subpage or continue/break
if ancestor:
- placeholder = self.output_node_placeholder(pagename, not single_page)
+ placeholder = self.output_node_placeholder(pagename, make_new_subpage)
outputs[ancestor] = outputs[ancestor].pformat({node: placeholder})
- maybe_print(ancestor)
+ self.maybe_print(ancestor, printers, outputs, continuations)
footer = output_footer()
- if single_page:
+ if not make_new_subpage: # we hit a limit, either max-report-size or single-page
if not outputs:
- # already output a single page, don't iterate through any more children
+ # no more holes, don't iterate through any more children
break
else:
+ # more holes to fill up with "limit reached" placeholders
continue
else:
+ # unconditionally write the root node regardless of limits
assert node is difference
footer = output_footer(jquery_url)
pagename = "index"
@@ -474,13 +606,18 @@ class HTMLPresenter(Presenter):
outputs[node] = node_output.frame(
output_header(css_url) + u'<div class="difference">\n',
u'</div>\n' + footer)
+ assert not single_page or node is difference
printers[node] = (make_printer, target) if single_page else (file_printer, target, "%s.html" % pagename)
stored = node
for child in node.details:
ancestors[child] = stored
- maybe_print(stored)
+ conts = continuations.setdefault(stored, [])
+ if node_continuation:
+ conts.append(node_continuation)
+
+ self.maybe_print(stored, printers, outputs, continuations)
if outputs:
import pprint
diff --git a/diffoscope/presenters/html/templates.py b/diffoscope/presenters/html/templates.py
index 595bc58..29ad061 100644
--- a/diffoscope/presenters/html/templates.py
+++ b/diffoscope/presenters/html/templates.py
@@ -17,7 +17,7 @@
# You should have received a copy of the GNU General Public License
# along with diffoscope. If not, see <https://www.gnu.org/licenses/>.
-HEADER = """<!DOCTYPE html>
+HEADER = u"""<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8" />
@@ -147,12 +147,12 @@ HEADER = """<!DOCTYPE html>
<body class="diffoscope">
"""
-FOOTER = """<div class="footer">Generated by <a href="https://diffoscope.org" rel="noopener noreferrer" target="_blank">diffoscope</a> %(version)s</div>
+FOOTER = u"""<div class="footer">Generated by <a href="https://diffoscope.org" rel="noopener noreferrer" target="_blank">diffoscope</a> %(version)s</div>
</body>
</html>
"""
-SCRIPTS = """<script src="%(jquery_url)s"></script>
+SCRIPTS = u"""<script src="%(jquery_url)s"></script>
<script type="text/javascript">
$(function() {
// activate "loading" controls
@@ -209,7 +209,10 @@ $(function() {
</script>
"""
-DIFFNODE_LAZY_LOAD = """<div class="ondemand-details">... <a href="%s.html">load details</a> ...</div>
+DIFFNODE_LAZY_LOAD = u"""<div class="ondemand-details">... <a href="%s.html">load details</a> ...</div>
+"""
+
+DIFFNODE_LIMIT = u"""<div class="error">Max report size reached</div>
"""
UD_TABLE_HEADER = u"""<table class="diff">
@@ -220,5 +223,8 @@ UD_TABLE_HEADER = u"""<table class="diff">
UD_TABLE_FOOTER = u"""<tr class="ondemand"><td colspan="4">
... <a href="%(filename)s">%(text)s</a> ...
</td></tr>
-</table>
"""
+
+UD_TABLE_LIMIT_FOOTER = u"""<tr class="error"><td colspan="4">
+Max %(limit_type)s reached; %(bytes_left)s/%(bytes_total)s bytes (%(percent).2f%%) of diff not shown.
+</td></tr>"""
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/diffoscope.git
More information about the diffoscope
mailing list