[diffoscope] 01/01: WIP on simple profiling. Example output: https://gist.github.com/lamby/42379b0a7ee851ba4a4d280d116344cd/raw
Chris Lamb
chris at chris-lamb.co.uk
Thu Dec 22 21:56:04 CET 2016
This is an automated email from the git hooks/post-receive script.
lamby pushed a commit to branch lamby/profiling
in repository diffoscope.
commit c161911b73046cfcadf9947a916bdd950e3068b7
Author: Chris Lamb <lamby at debian.org>
Date: Thu Dec 22 18:59:19 2016 +0000
WIP on simple profiling. Example output: https://gist.github.com/lamby/42379b0a7ee851ba4a4d280d116344cd/raw
---
diffoscope/__init__.py | 5 ++-
diffoscope/comparators/__init__.py | 25 ++++++++-------
diffoscope/comparators/binary.py | 11 +++++--
diffoscope/comparators/haskell.py | 4 ++-
diffoscope/comparators/ppu.py | 4 ++-
diffoscope/difference.py | 10 +++---
diffoscope/main.py | 6 ++++
diffoscope/profiling.py | 66 ++++++++++++++++++++++++++++++++++++++
8 files changed, 109 insertions(+), 22 deletions(-)
diff --git a/diffoscope/__init__.py b/diffoscope/__init__.py
index 7408c7e..4cf4dbf 100644
--- a/diffoscope/__init__.py
+++ b/diffoscope/__init__.py
@@ -27,6 +27,8 @@ import time
from distutils.spawn import find_executable
+from diffoscope.profiling import profile
+
VERSION = "64"
logger = logging.getLogger("diffoscope")
@@ -68,7 +70,8 @@ def tool_required(command):
if find_executable(command):
@functools.wraps(original_function)
def tool_check(*args, **kwargs):
- return original_function(*args, **kwargs)
+ with profile('command', command):
+ return original_function(*args, **kwargs)
else:
@functools.wraps(original_function)
def tool_check(*args, **kwargs):
diff --git a/diffoscope/comparators/__init__.py b/diffoscope/comparators/__init__.py
index 93d839d..e80b6d5 100644
--- a/diffoscope/comparators/__init__.py
+++ b/diffoscope/comparators/__init__.py
@@ -27,6 +27,7 @@ import importlib
from diffoscope import logger, tool_required
from diffoscope.config import Config
+from diffoscope.profiling import profile
from diffoscope.difference import Difference
from .binary import NonExistingFile
@@ -133,9 +134,10 @@ def compare_root_paths(path1, path2):
def compare_files(file1, file2, source=None):
logger.debug("Comparing files %s and %s", file1, file2)
- if file1.has_same_content_as(file2):
- logger.debug("has_same_content_as returned True; skipping further comparisons")
- return None
+ with profile('has_same_content_as', file1):
+ if file1.has_same_content_as(file2):
+ logger.debug("has_same_content_as returned True; skipping further comparisons")
+ return None
specialize(file1)
specialize(file2)
if isinstance(file1, NonExistingFile):
@@ -144,7 +146,8 @@ def compare_files(file1, file2, source=None):
file2.other_file = file1
elif file1.__class__.__name__ != file2.__class__.__name__:
return file1.compare_bytes(file2, source)
- return file1.compare(file2, source)
+ with profile('compare_files (cumulative)', file1):
+ return file1.compare(file2, source)
def compare_commented_files(file1, file2, comment=None, source=None):
difference = compare_files(file1, file2, source=source)
@@ -156,16 +159,14 @@ def compare_commented_files(file1, file2, comment=None, source=None):
def specialize(file):
for cls in FILE_CLASSES:
- # Uncomment the below to see which comparisons take ages to run "identify"
- #logger.debug("testing for %s", cls)
if isinstance(file, cls):
- logger.debug("%s is already specialized", file.name)
- return file
- if cls.recognizes(file):
- logger.debug("Using %s for %s", cls.__name__, file.name)
- new_cls = type(cls.__name__, (cls, type(file)), {})
- file.__class__ = new_cls
return file
+ with profile('recognizes', file):
+ if cls.recognizes(file):
+ logger.debug("Using %s for %s", cls.__name__, file.name)
+ new_cls = type(cls.__name__, (cls, type(file)), {})
+ file.__class__ = new_cls
+ return file
logger.debug('Unidentified file. Magic says: %s', file.magic_file_type)
return file
diff --git a/diffoscope/comparators/binary.py b/diffoscope/comparators/binary.py
index 792fca6..57d5d57 100644
--- a/diffoscope/comparators/binary.py
+++ b/diffoscope/comparators/binary.py
@@ -29,6 +29,7 @@ import subprocess
from diffoscope import tool_required, logger
from diffoscope.exc import OutputParsingError, RequiredToolNotFound
from diffoscope.config import Config
+from diffoscope.profiling import profile
from diffoscope.difference import Difference
try:
@@ -187,7 +188,6 @@ class File(object, metaclass=abc.ABCMeta):
difference.add_details(details)
return difference
- @tool_required('cmp')
def has_same_content_as(self, other):
logger.debug('Binary.has_same_content: %s %s', self, other)
# try comparing small files directly first
@@ -200,13 +200,18 @@ class File(object, metaclass=abc.ABCMeta):
return False
if my_size == other_size and my_size <= SMALL_FILE_THRESHOLD:
try:
- with open(self.path, 'rb') as file1, open(other.path, 'rb') as file2:
- return file1.read() == file2.read()
+ with profile('command', 'cmp (internal)'):
+ with open(self.path, 'rb') as file1, open(other.path, 'rb') as file2:
+ return file1.read() == file2.read()
except OSError:
# one or both files could not be opened for some reason,
# assume they are different
return False
+ return self.cmp_external(other)
+
+ @tool_required('cmp')
+ def cmp_external(self, other):
return 0 == subprocess.call(['cmp', '-s', self.path, other.path],
shell=False, close_fds=True)
diff --git a/diffoscope/comparators/haskell.py b/diffoscope/comparators/haskell.py
index 0e4f9e6..7630941 100644
--- a/diffoscope/comparators/haskell.py
+++ b/diffoscope/comparators/haskell.py
@@ -23,6 +23,7 @@ import platform
import subprocess
from diffoscope import tool_required, logger
+from diffoscope.profiling import profile
from diffoscope.difference import Difference
from diffoscope.comparators.utils import Command
from diffoscope.comparators.binary import File
@@ -50,7 +51,8 @@ class HiFile(File):
return False
if not hasattr(HiFile, 'hi_version'):
try:
- output = subprocess.check_output(['ghc', '--numeric-version'], shell=False)
+ with profile('command', 'ghc'):
+ output = subprocess.check_output(['ghc', '--numeric-version'], shell=False)
major, minor, patch = map(int, output.decode('utf-8').strip().split('.'))
HiFile.hi_version = "%d%02d%d" % (major, minor, patch)
logger.debug('Found .hi version %s', HiFile.hi_version)
diff --git a/diffoscope/comparators/ppu.py b/diffoscope/comparators/ppu.py
index 84609bf..2577816 100644
--- a/diffoscope/comparators/ppu.py
+++ b/diffoscope/comparators/ppu.py
@@ -24,6 +24,7 @@ import re
import subprocess
from diffoscope import tool_required, logger
+from diffoscope.profiling import profile
from diffoscope.difference import Difference
from diffoscope.comparators.utils import Command
from diffoscope.comparators.binary import File
@@ -66,7 +67,8 @@ class PpuFile(File):
ppu_version = f.read(3).decode('ascii', errors='ignore')
if not hasattr(PpuFile, 'ppu_version'):
try:
- subprocess.check_output(['ppudump', '-vh', file.path], shell=False, stderr=subprocess.STDOUT)
+ with profile('command', 'ppudump'):
+ subprocess.check_output(['ppudump', '-vh', file.path], shell=False, stderr=subprocess.STDOUT)
PpuFile.ppu_version = ppu_version
except subprocess.CalledProcessError as e:
error = e.output.decode('utf-8', errors='ignore')
diff --git a/diffoscope/difference.py b/diffoscope/difference.py
index 5f4dab8..2634b55 100644
--- a/diffoscope/difference.py
+++ b/diffoscope/difference.py
@@ -32,6 +32,7 @@ from multiprocessing.dummy import Queue
from diffoscope import logger, tool_required
from diffoscope.exc import RequiredToolNotFound
from diffoscope.config import Config
+from diffoscope.profiling import profile
class DiffParser(object):
@@ -254,10 +255,11 @@ def make_feeder_from_text_reader(in_file, filter=lambda text_buf: text_buf):
def make_feeder_from_command(command):
def feeder(out_file):
- end_nl = make_feeder_from_raw_reader(command.stdout, command.filter)(out_file)
- if command.poll() is None:
- command.terminate()
- returncode = command.wait()
+ with profile('command', command.cmdline()[0]):
+ end_nl = make_feeder_from_raw_reader(command.stdout, command.filter)(out_file)
+ if command.poll() is None:
+ command.terminate()
+ returncode = command.wait()
if returncode not in (0, -signal.SIGTERM):
raise subprocess.CalledProcessError(returncode, command.cmdline(), output=command.stderr.getvalue())
return end_nl
diff --git a/diffoscope/main.py b/diffoscope/main.py
index 456a82d..7775d46 100644
--- a/diffoscope/main.py
+++ b/diffoscope/main.py
@@ -35,6 +35,7 @@ from diffoscope.exc import RequiredToolNotFound
from diffoscope.config import Config
from diffoscope.difference import Difference
from diffoscope.progress import ProgressManager, Progress
+from diffoscope.profiling import ProfileManager
from diffoscope.presenters.html import output_html, output_html_directory, \
JQUERY_SYSTEM_LOCATIONS
from diffoscope.presenters.text import output_text
@@ -92,6 +93,8 @@ def create_parser():
'"disable" to disable JavaScript. When omitted '
'diffoscope will try to create a symlink to a system '
'installation. Known locations: %s' % ', '.join(JQUERY_SYSTEM_LOCATIONS))
+ group1.add_argument('--profile', metavar='OUTPUT_FILE', dest='profile_output',
+ help='Write profiling info to given file (use - for stdout)')
group2 = parser.add_argument_group('output limits')
group2.add_argument('--no-default-limits', action='store_true', default=False,
@@ -270,6 +273,9 @@ def run_diffoscope(parsed_args):
if parsed_args.html_output_directory:
output_html_directory(parsed_args.html_output_directory, difference,
css_url=parsed_args.css_url, jquery_url=parsed_args.jquery_url)
+ if parsed_args.profile_output:
+ with make_printer(parsed_args.profile_output) as print_func:
+ ProfileManager().output(print_func)
return retcode
diff --git a/diffoscope/profiling.py b/diffoscope/profiling.py
new file mode 100644
index 0000000..9b2f2fe
--- /dev/null
+++ b/diffoscope/profiling.py
@@ -0,0 +1,66 @@
+# -*- coding: utf-8 -*-
+#
+# diffoscope: in-depth comparison of files, archives, and directories
+#
+# Copyright © 2016 Chris Lamb <lamby at debian.org>
+#
+# diffoscope is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# diffoscope is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with diffoscope. If not, see <https://www.gnu.org/licenses/>.
+
+import sys
+import time
+import contextlib
+import collections
+
+ at contextlib.contextmanager
+def profile(namespace, key):
+ start = time.time()
+ yield
+ ProfileManager().increment(start, namespace, key)
+
+class ProfileManager(object):
+ _singleton = {}
+
+ def __init__(self):
+ self.__dict__ = self._singleton
+
+ if not self._singleton:
+ self.data = collections.defaultdict(
+ lambda: collections.defaultdict(float),
+ )
+
+ def increment(self, start, namespace, key):
+ if not isinstance(key, str):
+ key = '{}.{}'.format(
+ key.__class__.__module__,
+ key.__class__.__name__,
+ )
+
+ self.data[namespace][key] += time.time() - start
+
+ def output(self, print):
+ title = "Profiling output for: {}".format(' '.join(sys.argv))
+
+ print(title)
+ print("=" * len(title))
+
+ for namespace, keys in sorted(self.data.items(), key=lambda x: x[0]):
+ subtitle = "{} (total: {:.3f}s)".format(
+ namespace,
+ sum(keys.values()),
+ )
+
+ print("\n{}\n{}\n".format(subtitle, "-" * len(subtitle)))
+
+ for value, total in sorted(keys.items(), key=lambda x: x[1], reverse=True):
+ print(" {:10.3f}s {}".format(total, value))
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/diffoscope.git
More information about the diffoscope
mailing list