[diffoscope] 01/02: diffoscope.diff: Improve FIFO writing robustness.

Mattia Rizzolo mattia at debian.org
Sun Jan 29 09:57:50 CET 2017


This is an automated email from the git hooks/post-receive script.

mattia pushed a commit to branch fifo-refactor
in repository diffoscope.

commit 025554bef0271bf739ba101d3b6d3699b071c09c
Author: Brett Smith <brettcsmith at brettcsmith.org>
Date:   Fri Jan 27 22:42:23 2017 -0500

    diffoscope.diff: Improve FIFO writing robustness.
    
    We used to give input to diff using FIFO objects.
    fec9e97c51b3a8ff226a4b3b2b0563a4a680ac68 changed this to avoid relying on
    /dev/fd, which is a good change for portability.  Unfortunately, the
    implementation has a few issues of its own:
    
    * It feeds diff with normal files, rather than FIFOs.  This could cause
      trouble if diff tries to read farther than the underlying feeder
      processes have written.
    * It uses threads to write to the pseudo-FIFO, but the file object is
      manipulated in multiple threads.  I suspect this is the primary cause of
      the segfaults observed in #852013.
    * fd_from_feeder is decorated as a context manager, but it never yields
      anything, which is not how it's expected to be used.  I'm not sure this is
      causing any problems, but it makes it harder to reason about what's going
      on.
    
    This commit introduces a new FIFOFeeder class.  It is wholly responsible for
    creating and feeding the FIFO, so we don't have to pass file objects across
    the thread boundary and risk segfaults.  It uses context management to tell
    when the FIFO is no longer needed, so it can clean up nicely.
    
    Signed-off-by: Chris Lamb <lamby at debian.org>
    Signed-off-by: Mattia Rizzolo <mattia at debian.org>
---
 diffoscope/diff.py | 103 +++++++++++++++++++++++++++--------------------------
 1 file changed, 52 insertions(+), 51 deletions(-)

diff --git a/diffoscope/diff.py b/diffoscope/diff.py
index 011916a..0335a26 100644
--- a/diffoscope/diff.py
+++ b/diffoscope/diff.py
@@ -19,6 +19,9 @@
 
 import re
 import io
+import os
+import errno
+import fcntl
 import hashlib
 import logging
 import threading
@@ -190,54 +193,55 @@ def run_diff(fifo1, fifo2, end_nl_q1, end_nl_q2):
 
     return parser.diff
 
-def feed(feeder, f, end_nl_q):
-    # work-around unified diff limitation: if there's no newlines in both
-    # don't make it a difference
-    try:
-        end_nl = feeder(f)
-        end_nl_q.put(end_nl)
-    finally:
-        f.close()
-
-class ExThread(threading.Thread):
-    """
-    Inspired by https://stackoverflow.com/a/6874161
-    """
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.__status_queue = Queue()
-
-    def run(self, *args, **kwargs):
-        try:
-            super().run(*args, **kwargs)
-        except Exception as ex:
-            #except_type, except_class, tb = sys.exc_info()
-            self.__status_queue.put(ex)
-
-        self.__status_queue.put(None)
+class FIFOFeeder(threading.Thread):
+    def __init__(self, feeder, fifo_path, end_nl_q=None, *, daemon=True):
+        os.mkfifo(fifo_path)
+        super().__init__(daemon=daemon)
+        self.feeder = feeder
+        self.fifo_path = fifo_path
+        self.end_nl_q = Queue() if end_nl_q is None else end_nl_q
+        self._exception = None
+        self._want_join = threading.Event()
 
-    def wait_for_exc_info(self):
-        return self.__status_queue.get()
+    def __enter__(self):
+        self.start()
+        return self
 
-    def join(self):
-        ex = self.wait_for_exc_info()
-        if ex is None:
-            return
-        raise ex
+    def __exit__(self, exc_type, exc_value, exc_tb):
+        self.join()
 
- at contextlib.contextmanager
-def fd_from_feeder(feeder, end_nl_q, fifo):
-    f = open(fifo, 'wb')
-    t = ExThread(target=feed, args=(feeder, f, end_nl_q))
+    def run(self):
+        try:
+            # Try to open the FIFO nonblocking, so we can periodically check
+            # if the main thread wants us to wind down.  If it does, there's no
+            # more need for the FIFO, so stop the thread.
+            while True:
+                try:
+                    fifo_fd = os.open(self.fifo_path, os.O_WRONLY | os.O_NONBLOCK)
+                except OSError as error:
+                    if error.errno != errno.ENXIO:
+                        raise
+                    elif self._want_join.is_set():
+                        return
+                else:
+                    break
+
+            # Now clear the fd's nonblocking flag to let writes block normally.
+            fcntl.fcntl(fifo_fd, fcntl.F_SETFL, 0)
+            with open(fifo_fd, 'wb') as fifo:
+                # The queue works around a unified diff limitation: if there's
+                # no newlines in both don't make it a difference
+                end_nl = self.feeder(fifo)
+                self.end_nl_q.put(end_nl)
+        except Exception as error:
+            self._exception = error
 
-    t.daemon = True
-    t.start()
+    def join(self):
+        self._want_join.set()
+        super().join()
+        if self._exception is not None:
+            raise self._exception
 
-    try:
-        t.join()
-    finally:
-        f.close()
 
 def empty_file_feeder():
     def feeder(f):
@@ -273,15 +277,12 @@ def make_feeder_from_raw_reader(in_file, filter=lambda buf: buf):
     return feeder
 
 def diff(feeder1, feeder2):
-    end_nl_q1 = Queue()
-    end_nl_q2 = Queue()
-
     with tempfile.TemporaryDirectory() as tmpdir:
-        fifo1 = '{}/f1'.format(tmpdir)
-        fifo2 = '{}/f2'.format(tmpdir)
-        fd_from_feeder(feeder1, end_nl_q1, fifo1)
-        fd_from_feeder(feeder2, end_nl_q2, fifo2)
-        return run_diff(fifo1, fifo2, end_nl_q1, end_nl_q2)
+        fifo1_path = os.path.join(tmpdir, 'fifo1')
+        fifo2_path = os.path.join(tmpdir, 'fifo2')
+        with FIFOFeeder(feeder1, fifo1_path) as fifo1, \
+             FIFOFeeder(feeder2, fifo2_path) as fifo2:
+            return run_diff(fifo1_path, fifo2_path, fifo1.end_nl_q, fifo2.end_nl_q)
 
 def reverse_unified_diff(diff):
     res = []

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/diffoscope.git


More information about the diffoscope mailing list