[diffoscope] 01/05: {command, feeders, diff}: replaces subprocess.Popen by .run

Juliana Oliveira jwnx-guest at moszumanska.debian.org
Sun Feb 11 01:32:49 CET 2018


This is an automated email from the git hooks/post-receive script.

jwnx-guest pushed a commit to branch jwnx_subprocess_merge
in repository diffoscope.

commit 80eb9125e6c8e3ffc7f5f650ee67a9af2667249a
Author: Juliana Oliveira <juliana.orod at gmail.com>
Date:   Sat Feb 10 17:20:15 2018 -0200

    {command, feeders, diff}: replaces subprocess.Popen by .run
    
    Popen requires thread/IO micromanaging, and locks some interactions
    with multiprocess lib due to its combined behavior with subprocess.PIPE, being
    unusable with .map function. Subprocess.run simplifies command execution
    by managing threads/IO and is also recommended over Popen since Python
    3.5.
    
    As a result, stderr and stdout were also simplified.
    
    Signed-off-by: Juliana Oliveira <juliana.orod at gmail.com>
---
 diffoscope/comparators/utils/command.py | 68 +++++++++++++++------------------
 diffoscope/comparators/zip.py           |  2 -
 diffoscope/diff.py                      |  9 ++---
 diffoscope/feeders.py                   |  4 +-
 4 files changed, 35 insertions(+), 48 deletions(-)

diff --git a/diffoscope/comparators/utils/command.py b/diffoscope/comparators/utils/command.py
index 1d4cfde..0a1634c 100644
--- a/diffoscope/comparators/utils/command.py
+++ b/diffoscope/comparators/utils/command.py
@@ -28,11 +28,15 @@ logger = logging.getLogger(__name__)
 
 
 class Command(object, metaclass=abc.ABCMeta):
+
+    MAX_STDERR_LINES = 50
+
     def __init__(self, path):
         self._path = path
 
     def start(self):
         logger.debug("Executing %s", ' '.join([shlex.quote(x) for x in self.cmdline()]))
+
         self._stdin = self.stdin()
         # "stdin" used to be a feeder but we didn't need the functionality so
         # it was simplified into the current form. it can be recovered from git
@@ -40,17 +44,15 @@ class Command(object, metaclass=abc.ABCMeta):
         # consider using a shell pipeline ("sh -ec $script") to implement what
         # you need, because that involves much less code - like it or not (I
         # don't) shell is still the most readable option for composing processes
-        self._process = subprocess.Popen(self.cmdline(),
-                                         shell=False, close_fds=True,
-                                         env=self.env(),
-                                         stdin=self._stdin,
-                                         stdout=subprocess.PIPE,
-                                         stderr=subprocess.PIPE)
-        self._stderr = io.BytesIO()
-        self._stderr_line_count = 0
-        self._stderr_reader = threading.Thread(target=self._read_stderr)
-        self._stderr_reader.daemon = True
-        self._stderr_reader.start()
+        self._process = subprocess.run(self.cmdline(),
+                                       shell=False, close_fds=True,
+                                       env=self.env(),
+                                       stdin=self._stdin,
+                                       stdout=subprocess.PIPE,
+                                       stderr=subprocess.PIPE)
+
+
+        self.stderr = self._read_stderr()
 
     @property
     def path(self):
@@ -74,42 +76,32 @@ class Command(object, metaclass=abc.ABCMeta):
         return line
 
     def poll(self):
-        return self._process.poll()
+        pass
 
     def terminate(self):
-        return self._process.terminate()
+        pass
 
     def wait(self):
-        self._stderr_reader.join()
-        returncode = self._process.wait()
-        logger.debug(
-            "%s returned (exit code: %d)",
-            ' '.join([shlex.quote(x) for x in self.cmdline()]),
-            returncode,
-        )
-        if self._stdin:
-            self._stdin.close()
-        return returncode
-
-    MAX_STDERR_LINES = 50
+        return self._process.returncode
 
     def _read_stderr(self):
-        for line in iter(self._process.stderr.readline, b''):
-            self._stderr_line_count += 1
-            if self._stderr_line_count <= Command.MAX_STDERR_LINES:
-                self._stderr.write(line)
-        if self._stderr_line_count > Command.MAX_STDERR_LINES:
-            self._stderr.write('[ {} lines ignored ]\n'.format(self._stderr_line_count - Command.MAX_STDERR_LINES).encode('utf-8'))
-        self._process.stderr.close()
+        buf = ""
+        lines = self._process.stderr.splitlines(True)
+        for index, line in enumerate(lines):
+          if index >= Command.MAX_STDERR_LINES:
+              break
+          buf += line.decode('utf-8', errors='replace')
 
-    @property
-    def stderr_content(self):
-        return self._stderr.getvalue().decode('utf-8', errors='replace')
+        if len(lines) > Command.MAX_STDERR_LINES:
+          buf += '[ {} lines ignored ]\n'.format(
+            len(lines) - Command.MAX_STDERR_LINES)
+
+        return buf
 
     @property
-    def stderr(self):
-        return self._stderr
+    def stderr_content(self):
+        return self.stderr
 
     @property
     def stdout(self):
-        return self._process.stdout
+        return self._process.stdout.splitlines(True)
diff --git a/diffoscope/comparators/zip.py b/diffoscope/comparators/zip.py
index 60d41ac..e626138 100644
--- a/diffoscope/comparators/zip.py
+++ b/diffoscope/comparators/zip.py
@@ -123,10 +123,8 @@ class MozillaZipCommandMixin(object):
     def wait(self):
         # zipinfo emits an error when reading Mozilla-optimized ZIPs,
         # which is fine to ignore.
-        super(Zipinfo, self).wait()
         return 0
 
-
 class MozillaZipinfo(MozillaZipCommandMixin, Zipinfo):
     pass
 
diff --git a/diffoscope/diff.py b/diffoscope/diff.py
index f6d1552..5664caa 100644
--- a/diffoscope/diff.py
+++ b/diffoscope/diff.py
@@ -67,12 +67,11 @@ class DiffParser(object):
         return self._success
 
     def parse(self):
-        for line in self._output:
-            self._action = self._action(line.decode('utf-8', errors='replace'))
+        for line in self._output.decode('utf-8', errors='replace').splitlines(True):
+            self._action = self._action(line)
 
         self._action('')
         self._success = True
-        self._output.close()
 
     def read_headers(self, line):
         if not line:
@@ -165,15 +164,15 @@ def run_diff(fifo1, fifo2, end_nl_q1, end_nl_q2):
 
     logger.debug("Running %s", ' '.join(cmd))
 
-    p = subprocess.Popen(
+    p = subprocess.run(
         cmd,
         bufsize=1,
         stdout=subprocess.PIPE,
         stderr=subprocess.STDOUT,
     )
+
     parser = DiffParser(p.stdout, end_nl_q1, end_nl_q2)
     parser.parse()
-    p.wait()
 
     logger.debug(
         "%s: returncode %d, parsed %s",
diff --git a/diffoscope/feeders.py b/diffoscope/feeders.py
index 019ece5..f064214 100644
--- a/diffoscope/feeders.py
+++ b/diffoscope/feeders.py
@@ -86,14 +86,12 @@ def from_command(command):
                 command.filter,
             )
             end_nl = feeder(out_file)
-            if command.poll() is None:
-                command.terminate()
             returncode = command.wait()
         if returncode not in (0, -signal.SIGTERM):
             raise subprocess.CalledProcessError(
                 returncode,
                 command.cmdline(),
-                output=command.stderr.getvalue(),
+                output=command.stderr,
             )
         return end_nl
     return feeder

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/diffoscope.git


More information about the diffoscope mailing list