[diffoscope] 01/02: diffoscope.diff: Improve FIFO writing robustness.
Mattia Rizzolo
mattia at debian.org
Sun Jan 29 09:57:50 CET 2017
This is an automated email from the git hooks/post-receive script.
mattia pushed a commit to branch fifo-refactor
in repository diffoscope.
commit 025554bef0271bf739ba101d3b6d3699b071c09c
Author: Brett Smith <brettcsmith at brettcsmith.org>
Date: Fri Jan 27 22:42:23 2017 -0500
diffoscope.diff: Improve FIFO writing robustness.
We used to give input to diff using FIFO objects.
fec9e97c51b3a8ff226a4b3b2b0563a4a680ac68 changed this to avoid relying on
/dev/fd, which is a good change for portability. Unfortunately, the
implementation has a few issues of its own:
* It feeds diff with normal files, rather than FIFOs. This could cause
trouble if diff tries to read farther than the underlying feeder
processes have written.
* It uses threads to write to the pseudo-FIFO, but the file object is
manipulated in multiple threads. I suspect this is the primary cause of
the segfaults observed in #852013.
* fd_from_feeder is decorated as a context manager, but it never yields
anything, which is not how it's expected to be used. I'm not sure this is
causing any problems, but it makes it harder to reason about what's going
on.
This commit introduces a new FIFOFeeder class. It is wholly responsible for
creating and feeding the FIFO, so we don't have to pass file objects across
the thread boundary and risk segfaults. It uses context management to tell
when the FIFO is no longer needed, so it can clean up nicely.
Signed-off-by: Chris Lamb <lamby at debian.org>
Signed-off-by: Mattia Rizzolo <mattia at debian.org>
---
diffoscope/diff.py | 103 +++++++++++++++++++++++++++--------------------------
1 file changed, 52 insertions(+), 51 deletions(-)
diff --git a/diffoscope/diff.py b/diffoscope/diff.py
index 011916a..0335a26 100644
--- a/diffoscope/diff.py
+++ b/diffoscope/diff.py
@@ -19,6 +19,9 @@
import re
import io
+import os
+import errno
+import fcntl
import hashlib
import logging
import threading
@@ -190,54 +193,55 @@ def run_diff(fifo1, fifo2, end_nl_q1, end_nl_q2):
return parser.diff
-def feed(feeder, f, end_nl_q):
- # work-around unified diff limitation: if there's no newlines in both
- # don't make it a difference
- try:
- end_nl = feeder(f)
- end_nl_q.put(end_nl)
- finally:
- f.close()
-
-class ExThread(threading.Thread):
- """
- Inspired by https://stackoverflow.com/a/6874161
- """
-
- def __init__(self, *args, **kwargs):
- super().__init__(*args, **kwargs)
- self.__status_queue = Queue()
-
- def run(self, *args, **kwargs):
- try:
- super().run(*args, **kwargs)
- except Exception as ex:
- #except_type, except_class, tb = sys.exc_info()
- self.__status_queue.put(ex)
-
- self.__status_queue.put(None)
+class FIFOFeeder(threading.Thread):
+ def __init__(self, feeder, fifo_path, end_nl_q=None, *, daemon=True):
+ os.mkfifo(fifo_path)
+ super().__init__(daemon=daemon)
+ self.feeder = feeder
+ self.fifo_path = fifo_path
+ self.end_nl_q = Queue() if end_nl_q is None else end_nl_q
+ self._exception = None
+ self._want_join = threading.Event()
- def wait_for_exc_info(self):
- return self.__status_queue.get()
+ def __enter__(self):
+ self.start()
+ return self
- def join(self):
- ex = self.wait_for_exc_info()
- if ex is None:
- return
- raise ex
+ def __exit__(self, exc_type, exc_value, exc_tb):
+ self.join()
- at contextlib.contextmanager
-def fd_from_feeder(feeder, end_nl_q, fifo):
- f = open(fifo, 'wb')
- t = ExThread(target=feed, args=(feeder, f, end_nl_q))
+ def run(self):
+ try:
+ # Try to open the FIFO nonblocking, so we can periodically check
+ # if the main thread wants us to wind down. If it does, there's no
+ # more need for the FIFO, so stop the thread.
+ while True:
+ try:
+ fifo_fd = os.open(self.fifo_path, os.O_WRONLY | os.O_NONBLOCK)
+ except OSError as error:
+ if error.errno != errno.ENXIO:
+ raise
+ elif self._want_join.is_set():
+ return
+ else:
+ break
+
+ # Now clear the fd's nonblocking flag to let writes block normally.
+ fcntl.fcntl(fifo_fd, fcntl.F_SETFL, 0)
+ with open(fifo_fd, 'wb') as fifo:
+ # The queue works around a unified diff limitation: if there's
+ # no newlines in both don't make it a difference
+ end_nl = self.feeder(fifo)
+ self.end_nl_q.put(end_nl)
+ except Exception as error:
+ self._exception = error
- t.daemon = True
- t.start()
+ def join(self):
+ self._want_join.set()
+ super().join()
+ if self._exception is not None:
+ raise self._exception
- try:
- t.join()
- finally:
- f.close()
def empty_file_feeder():
def feeder(f):
@@ -273,15 +277,12 @@ def make_feeder_from_raw_reader(in_file, filter=lambda buf: buf):
return feeder
def diff(feeder1, feeder2):
- end_nl_q1 = Queue()
- end_nl_q2 = Queue()
-
with tempfile.TemporaryDirectory() as tmpdir:
- fifo1 = '{}/f1'.format(tmpdir)
- fifo2 = '{}/f2'.format(tmpdir)
- fd_from_feeder(feeder1, end_nl_q1, fifo1)
- fd_from_feeder(feeder2, end_nl_q2, fifo2)
- return run_diff(fifo1, fifo2, end_nl_q1, end_nl_q2)
+ fifo1_path = os.path.join(tmpdir, 'fifo1')
+ fifo2_path = os.path.join(tmpdir, 'fifo2')
+ with FIFOFeeder(feeder1, fifo1_path) as fifo1, \
+ FIFOFeeder(feeder2, fifo2_path) as fifo2:
+ return run_diff(fifo1_path, fifo2_path, fifo1.end_nl_q, fifo2.end_nl_q)
def reverse_unified_diff(diff):
res = []
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/diffoscope.git
More information about the diffoscope
mailing list