[diffoscope] 01/03: WIP on simple profiling. Example output: https://gist.github.com/lamby/42379b0a7ee851ba4a4d280d116344cd/raw

Chris Lamb chris at chris-lamb.co.uk
Mon Jan 9 17:31:23 CET 2017


This is an automated email from the git hooks/post-receive script.

lamby pushed a commit to branch lamby/profiling
in repository diffoscope.

commit c161911b73046cfcadf9947a916bdd950e3068b7
Author: Chris Lamb <lamby at debian.org>
Date:   Thu Dec 22 18:59:19 2016 +0000

    WIP on simple profiling. Example output: https://gist.github.com/lamby/42379b0a7ee851ba4a4d280d116344cd/raw
---
 diffoscope/__init__.py             |  5 ++-
 diffoscope/comparators/__init__.py | 25 ++++++++-------
 diffoscope/comparators/binary.py   | 11 +++++--
 diffoscope/comparators/haskell.py  |  4 ++-
 diffoscope/comparators/ppu.py      |  4 ++-
 diffoscope/difference.py           | 10 +++---
 diffoscope/main.py                 |  6 ++++
 diffoscope/profiling.py            | 66 ++++++++++++++++++++++++++++++++++++++
 8 files changed, 109 insertions(+), 22 deletions(-)

diff --git a/diffoscope/__init__.py b/diffoscope/__init__.py
index 7408c7e..4cf4dbf 100644
--- a/diffoscope/__init__.py
+++ b/diffoscope/__init__.py
@@ -27,6 +27,8 @@ import time
 
 from distutils.spawn import find_executable
 
+from diffoscope.profiling import profile
+
 VERSION = "64"
 
 logger = logging.getLogger("diffoscope")
@@ -68,7 +70,8 @@ def tool_required(command):
         if find_executable(command):
             @functools.wraps(original_function)
             def tool_check(*args, **kwargs):
-                return original_function(*args, **kwargs)
+                with profile('command', command):
+                    return original_function(*args, **kwargs)
         else:
             @functools.wraps(original_function)
             def tool_check(*args, **kwargs):
diff --git a/diffoscope/comparators/__init__.py b/diffoscope/comparators/__init__.py
index 93d839d..e80b6d5 100644
--- a/diffoscope/comparators/__init__.py
+++ b/diffoscope/comparators/__init__.py
@@ -27,6 +27,7 @@ import importlib
 
 from diffoscope import logger, tool_required
 from diffoscope.config import Config
+from diffoscope.profiling import profile
 from diffoscope.difference import Difference
 
 from .binary import NonExistingFile
@@ -133,9 +134,10 @@ def compare_root_paths(path1, path2):
 
 def compare_files(file1, file2, source=None):
     logger.debug("Comparing files %s and %s", file1, file2)
-    if file1.has_same_content_as(file2):
-        logger.debug("has_same_content_as returned True; skipping further comparisons")
-        return None
+    with profile('has_same_content_as', file1):
+        if file1.has_same_content_as(file2):
+            logger.debug("has_same_content_as returned True; skipping further comparisons")
+            return None
     specialize(file1)
     specialize(file2)
     if isinstance(file1, NonExistingFile):
@@ -144,7 +146,8 @@ def compare_files(file1, file2, source=None):
         file2.other_file = file1
     elif file1.__class__.__name__ != file2.__class__.__name__:
         return file1.compare_bytes(file2, source)
-    return file1.compare(file2, source)
+    with profile('compare_files (cumulative)', file1):
+        return file1.compare(file2, source)
 
 def compare_commented_files(file1, file2, comment=None, source=None):
     difference = compare_files(file1, file2, source=source)
@@ -156,16 +159,14 @@ def compare_commented_files(file1, file2, comment=None, source=None):
 
 def specialize(file):
     for cls in FILE_CLASSES:
-        # Uncomment the below to see which comparisons take ages to run "identify"
-        #logger.debug("testing for %s", cls)
         if isinstance(file, cls):
-            logger.debug("%s is already specialized", file.name)
-            return file
-        if cls.recognizes(file):
-            logger.debug("Using %s for %s", cls.__name__, file.name)
-            new_cls = type(cls.__name__, (cls, type(file)), {})
-            file.__class__ = new_cls
             return file
+        with profile('recognizes', file):
+            if cls.recognizes(file):
+                logger.debug("Using %s for %s", cls.__name__, file.name)
+                new_cls = type(cls.__name__, (cls, type(file)), {})
+                file.__class__ = new_cls
+                return file
     logger.debug('Unidentified file. Magic says: %s', file.magic_file_type)
     return file
 
diff --git a/diffoscope/comparators/binary.py b/diffoscope/comparators/binary.py
index 792fca6..57d5d57 100644
--- a/diffoscope/comparators/binary.py
+++ b/diffoscope/comparators/binary.py
@@ -29,6 +29,7 @@ import subprocess
 from diffoscope import tool_required, logger
 from diffoscope.exc import OutputParsingError, RequiredToolNotFound
 from diffoscope.config import Config
+from diffoscope.profiling import profile
 from diffoscope.difference import Difference
 
 try:
@@ -187,7 +188,6 @@ class File(object, metaclass=abc.ABCMeta):
         difference.add_details(details)
         return difference
 
-    @tool_required('cmp')
     def has_same_content_as(self, other):
         logger.debug('Binary.has_same_content: %s %s', self, other)
         # try comparing small files directly first
@@ -200,13 +200,18 @@ class File(object, metaclass=abc.ABCMeta):
             return False
         if my_size == other_size and my_size <= SMALL_FILE_THRESHOLD:
             try:
-                with open(self.path, 'rb') as file1, open(other.path, 'rb') as file2:
-                    return file1.read() == file2.read()
+                with profile('command', 'cmp (internal)'):
+                    with open(self.path, 'rb') as file1, open(other.path, 'rb') as file2:
+                        return file1.read() == file2.read()
             except OSError:
                 # one or both files could not be opened for some reason,
                 # assume they are different
                 return False
 
+        return self.cmp_external(other)
+
+    @tool_required('cmp')
+    def cmp_external(self, other):
         return 0 == subprocess.call(['cmp', '-s', self.path, other.path],
                                     shell=False, close_fds=True)
 
diff --git a/diffoscope/comparators/haskell.py b/diffoscope/comparators/haskell.py
index 0e4f9e6..7630941 100644
--- a/diffoscope/comparators/haskell.py
+++ b/diffoscope/comparators/haskell.py
@@ -23,6 +23,7 @@ import platform
 import subprocess
 
 from diffoscope import tool_required, logger
+from diffoscope.profiling import profile
 from diffoscope.difference import Difference
 from diffoscope.comparators.utils import Command
 from diffoscope.comparators.binary import File
@@ -50,7 +51,8 @@ class HiFile(File):
             return False
         if not hasattr(HiFile, 'hi_version'):
             try:
-                output = subprocess.check_output(['ghc', '--numeric-version'], shell=False)
+                with profile('command', 'ghc'):
+                    output = subprocess.check_output(['ghc', '--numeric-version'], shell=False)
                 major, minor, patch = map(int, output.decode('utf-8').strip().split('.'))
                 HiFile.hi_version = "%d%02d%d" % (major, minor, patch)
                 logger.debug('Found .hi version %s', HiFile.hi_version)
diff --git a/diffoscope/comparators/ppu.py b/diffoscope/comparators/ppu.py
index 84609bf..2577816 100644
--- a/diffoscope/comparators/ppu.py
+++ b/diffoscope/comparators/ppu.py
@@ -24,6 +24,7 @@ import re
 import subprocess
 
 from diffoscope import tool_required, logger
+from diffoscope.profiling import profile
 from diffoscope.difference import Difference
 from diffoscope.comparators.utils import Command
 from diffoscope.comparators.binary import File
@@ -66,7 +67,8 @@ class PpuFile(File):
             ppu_version = f.read(3).decode('ascii', errors='ignore')
         if not hasattr(PpuFile, 'ppu_version'):
             try:
-                subprocess.check_output(['ppudump', '-vh', file.path], shell=False, stderr=subprocess.STDOUT)
+                with profile('command', 'ppudump'):
+                    subprocess.check_output(['ppudump', '-vh', file.path], shell=False, stderr=subprocess.STDOUT)
                 PpuFile.ppu_version = ppu_version
             except subprocess.CalledProcessError as e:
                 error = e.output.decode('utf-8', errors='ignore')
diff --git a/diffoscope/difference.py b/diffoscope/difference.py
index 5f4dab8..2634b55 100644
--- a/diffoscope/difference.py
+++ b/diffoscope/difference.py
@@ -32,6 +32,7 @@ from multiprocessing.dummy import Queue
 from diffoscope import logger, tool_required
 from diffoscope.exc import RequiredToolNotFound
 from diffoscope.config import Config
+from diffoscope.profiling import profile
 
 
 class DiffParser(object):
@@ -254,10 +255,11 @@ def make_feeder_from_text_reader(in_file, filter=lambda text_buf: text_buf):
 
 def make_feeder_from_command(command):
     def feeder(out_file):
-        end_nl = make_feeder_from_raw_reader(command.stdout, command.filter)(out_file)
-        if command.poll() is None:
-            command.terminate()
-        returncode = command.wait()
+        with profile('command', command.cmdline()[0]):
+            end_nl = make_feeder_from_raw_reader(command.stdout, command.filter)(out_file)
+            if command.poll() is None:
+                command.terminate()
+            returncode = command.wait()
         if returncode not in (0, -signal.SIGTERM):
             raise subprocess.CalledProcessError(returncode, command.cmdline(), output=command.stderr.getvalue())
         return end_nl
diff --git a/diffoscope/main.py b/diffoscope/main.py
index 456a82d..7775d46 100644
--- a/diffoscope/main.py
+++ b/diffoscope/main.py
@@ -35,6 +35,7 @@ from diffoscope.exc import RequiredToolNotFound
 from diffoscope.config import Config
 from diffoscope.difference import Difference
 from diffoscope.progress import ProgressManager, Progress
+from diffoscope.profiling import ProfileManager
 from diffoscope.presenters.html import output_html, output_html_directory, \
     JQUERY_SYSTEM_LOCATIONS
 from diffoscope.presenters.text import output_text
@@ -92,6 +93,8 @@ def create_parser():
                         '"disable" to disable JavaScript. When omitted '
                         'diffoscope will try to create a symlink to a system '
                         'installation. Known locations: %s' % ', '.join(JQUERY_SYSTEM_LOCATIONS))
+    group1.add_argument('--profile', metavar='OUTPUT_FILE', dest='profile_output',
+                        help='Write profiling info to given file (use - for stdout)')
 
     group2 = parser.add_argument_group('output limits')
     group2.add_argument('--no-default-limits', action='store_true', default=False,
@@ -270,6 +273,9 @@ def run_diffoscope(parsed_args):
         if parsed_args.html_output_directory:
             output_html_directory(parsed_args.html_output_directory, difference,
                 css_url=parsed_args.css_url, jquery_url=parsed_args.jquery_url)
+    if parsed_args.profile_output:
+        with make_printer(parsed_args.profile_output) as print_func:
+            ProfileManager().output(print_func)
     return retcode
 
 
diff --git a/diffoscope/profiling.py b/diffoscope/profiling.py
new file mode 100644
index 0000000..9b2f2fe
--- /dev/null
+++ b/diffoscope/profiling.py
@@ -0,0 +1,66 @@
+# -*- coding: utf-8 -*-
+#
+# diffoscope: in-depth comparison of files, archives, and directories
+#
+# Copyright © 2016 Chris Lamb <lamby at debian.org>
+#
+# diffoscope is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# diffoscope is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with diffoscope.  If not, see <https://www.gnu.org/licenses/>.
+
+import sys
+import time
+import contextlib
+import collections
+
+ at contextlib.contextmanager
+def profile(namespace, key):
+    start = time.time()
+    yield
+    ProfileManager().increment(start, namespace, key)
+
+class ProfileManager(object):
+    _singleton = {}
+
+    def __init__(self):
+        self.__dict__ = self._singleton
+
+        if not self._singleton:
+            self.data = collections.defaultdict(
+                lambda: collections.defaultdict(float),
+            )
+
+    def increment(self, start, namespace, key):
+        if not isinstance(key, str):
+            key = '{}.{}'.format(
+                key.__class__.__module__,
+                key.__class__.__name__,
+            )
+
+        self.data[namespace][key] += time.time() - start
+
+    def output(self, print):
+        title = "Profiling output for: {}".format(' '.join(sys.argv))
+
+        print(title)
+        print("=" * len(title))
+
+        for namespace, keys in sorted(self.data.items(), key=lambda x: x[0]):
+            subtitle = "{} (total: {:.3f}s)".format(
+                namespace,
+                sum(keys.values()),
+            )
+
+            print("\n{}\n{}\n".format(subtitle, "-" * len(subtitle)))
+
+            for value, total in sorted(keys.items(), key=lambda x: x[1], reverse=True):
+                print("  {:10.3f}s  {}".format(total, value))

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/diffoscope.git


More information about the diffoscope mailing list