[Git][reproducible-builds/diffoscope][master] 5 commits: diff: cleanup the fifos when FIFOFEEDER context manager exits

Chris Lamb gitlab at salsa.debian.org
Tue Jan 5 12:24:31 UTC 2021



Chris Lamb pushed to branch master at Reproducible Builds / diffoscope


Commits:
f681cfb5 by Dimitrios Apostolou at 2021-01-05T12:12:32+00:00
diff: cleanup the fifos when FIFOFEEDER context manager exits

These files can be seen in thousands, named "fifo1" and "fifo2", under the
temporary directory while diffoscope still runs. Instead of cleaning up
massively in the end, we now clean up while we go.

- - - - -
c645d403 by Dimitrios Apostolou at 2021-01-05T12:12:32+00:00
tempfiles: remove redundant code, let object destructors clean up

- - - - -
1ee33f4e by Dimitrios Apostolou at 2021-01-05T12:12:32+00:00
tempfiles: clean up temporary directories as you go

Previously the temporary directories where all left in place until the
end of the process, when a massive cleanup was being run. The cause
was the _DIRS variable that was holding references to the
TemporaryDirectory objects, so their destructor was not running.

We now remove the _DIRS list completely (replacing it with _BASEDIR for
storing only the parent temporary directory). As a result, the temporary
directories and their contents are being cleaned up automatically when
the class destructor runs.

This in turn requires modifications to preexisting code:

1. Code that was directly using `get_temporary_directory().name`
   stopped working; the object from `get_temporary_directory()`
   needs to be referenced directly to keep it alive.
2. `get_temporary_directory()` is now starting a `with` block
   (context manager) where it was straightforward.

- - - - -
4225e3e8 by Dimitrios Apostolou at 2021-01-05T12:12:32+00:00
Add docstring for get_temporary_directory

- - - - -
20e3c99e by Dimitrios Apostolou at 2021-01-05T12:12:32+00:00
Do not repeat same code

- - - - -


7 changed files:

- diffoscope/comparators/apk.py
- diffoscope/comparators/binwalk.py
- diffoscope/comparators/rdata.py
- diffoscope/comparators/squashfs.py
- diffoscope/comparators/utils/libarchive.py
- diffoscope/diff.py
- diffoscope/tempfiles.py


Changes:

=====================================
diffoscope/comparators/apk.py
=====================================
@@ -46,8 +46,9 @@ class ApkContainer(Archive):
     @tool_required("zipinfo")
     def open_archive(self):
         self._members = []
+        self._tmpdir = get_temporary_directory()
         self._unpacked = os.path.join(
-            get_temporary_directory().name, os.path.basename(self.source.name)
+            self._tmpdir.name, os.path.basename(self.source.name)
         )
         self._andmanifest = None
         self._andmanifest_orig = None


=====================================
diffoscope/comparators/binwalk.py
=====================================
@@ -42,8 +42,8 @@ else:
     # ensure it does not create (!) unnecessary directories, etc. (re. #903444)
     def fn(self):
         if not hasattr(fn, "_temp_dir"):
-            fn._temp_dir = get_temporary_directory("binwalk").name
-        return fn._temp_dir
+            fn._temp_dir = get_temporary_directory("binwalk")
+        return fn._temp_dir.name
 
     binwalk.core.settings.Settings._get_user_config_dir = fn
 


=====================================
diffoscope/comparators/rdata.py
=====================================
@@ -73,7 +73,7 @@ def check_rds_extension(f):
     return f.name.endswith(".rds") or f.name.endswith(".rdx")
 
 
-def get_module_path_for_rdb(rdb):
+def get_module_path_for_rdb(rdb, temp_dir):
     """
     R's lazyLoad method does not take a filename directly to an .rdb file (eg.
     `/path/to/foo.rdb`) but rather the path without any extension (eg.
@@ -106,7 +106,6 @@ def get_module_path_for_rdb(rdb):
         # Corresponding .rdx does not exist
         return None
 
-    temp_dir = get_temporary_directory().name
     prefix = os.path.join(temp_dir, "temp")
 
     logger.debug("Copying %s and %s to %s", rdx.path, rdb.path, temp_dir)
@@ -114,7 +113,7 @@ def get_module_path_for_rdb(rdb):
     shutil.copy(rdx.path, f"{prefix}.rdx")
 
     # Return the "module" path, ie. without an extension
-    return os.path.join(temp_dir, "temp")
+    return prefix
 
 
 class RdsReader(Command):
@@ -162,10 +161,11 @@ class RdbFile(File):
     FILE_EXTENSION_SUFFIX = {".rdb"}
 
     def compare_details(self, other, source=None):
-        a = get_module_path_for_rdb(self)
-        b = get_module_path_for_rdb(other)
+        with get_temporary_directory() as tmpdir:
+            a = get_module_path_for_rdb(self, tmpdir)
+            b = get_module_path_for_rdb(other, tmpdir)
 
-        if a is None or b is None:
-            return []
+            if a is None or b is None:
+                return []
 
-        return [Difference.from_operation(RdbReader, a, b)]
+            return [Difference.from_operation(RdbReader, a, b)]


=====================================
diffoscope/comparators/squashfs.py
=====================================
@@ -255,7 +255,8 @@ class SquashfsContainer(Archive):
             return
 
         self._members = collections.OrderedDict()
-        self._temp_dir = get_temporary_directory().name
+        self._temp_dir_object = get_temporary_directory()
+        self._temp_dir = self._temp_dir_object.name
 
         logger.debug("Extracting %s to %s", self.source.path, self._temp_dir)
 


=====================================
diffoscope/comparators/utils/libarchive.py
=====================================
@@ -290,7 +290,8 @@ class LibarchiveContainer(Archive):
         if hasattr(self, "_members"):
             return
 
-        tmpdir = get_temporary_directory().name
+        self._tmpdir_object = get_temporary_directory()
+        tmpdir = self._tmpdir_object.name
         self._members = collections.OrderedDict()
 
         logger.debug("Extracting %s to %s", self.source.path, tmpdir)


=====================================
diffoscope/diff.py
=====================================
@@ -213,6 +213,7 @@ class FIFOFeeder(threading.Thread):
         return self
 
     def __exit__(self, exc_type, exc_value, exc_tb):
+        os.remove(self.fifo_path)
         self.join()
 
     def run(self):
@@ -319,14 +320,15 @@ def make_feeder_from_raw_reader(in_file, filterfn=None):
 
 
 def diff(feeder1, feeder2):
-    tmpdir = get_temporary_directory().name
-
-    fifo1_path = os.path.join(tmpdir, "fifo1")
-    fifo2_path = os.path.join(tmpdir, "fifo2")
-    with FIFOFeeder(feeder1, fifo1_path) as fifo1, FIFOFeeder(
-        feeder2, fifo2_path
-    ) as fifo2:
-        return run_diff(fifo1_path, fifo2_path, fifo1.end_nl_q, fifo2.end_nl_q)
+    with get_temporary_directory() as tmpdir:
+        fifo1_path = os.path.join(tmpdir, "fifo1")
+        fifo2_path = os.path.join(tmpdir, "fifo2")
+        with FIFOFeeder(feeder1, fifo1_path) as fifo1, FIFOFeeder(
+            feeder2, fifo2_path
+        ) as fifo2:
+            return run_diff(
+                fifo1_path, fifo2_path, fifo1.end_nl_q, fifo2.end_nl_q
+            )
 
 
 def diff_split_lines(diff, keepends=True):


=====================================
diffoscope/tempfiles.py
=====================================
@@ -21,7 +21,8 @@ import os
 import logging
 import tempfile
 
-_DIRS, _FILES = [], []
+_BASEDIR = None
+_FILES = []
 
 logger = logging.getLogger(__name__)
 
@@ -36,10 +37,24 @@ def get_named_temporary_file(*args, **kwargs):
 
 
 def get_temporary_directory(*args, **kwargs):
+    """
+    Create and return a TemporaryDirectory
+
+    Preferably use a "with" statement to control its lifetime:
+        with get_temporary_directory() as tmpdir:
+
+    The temporary directory is cleaned up at the end of the "with"
+    statement. Otherwise it's cleaned up when the object is garbage collected.
+
+    WARNING, don't do this:
+        tmpdir = get_temporary_directory().name
+
+    This will result to immediate removal of the directory, as there is no
+    reference pointing to the TemporaryDirectory object.
+    """
     kwargs["dir"] = kwargs.pop("dir", _get_base_temporary_directory())
 
     d = tempfile.TemporaryDirectory(*args, **kwargs)
-    _DIRS.append(d)
 
     return d
 
@@ -56,39 +71,18 @@ def clean_all_temp_files():
             logger.exception("Unable to delete %s", x)
     _FILES.clear()
 
-    logger.debug("Cleaning %d temporary directories", len(_DIRS))
-
-    # Reverse so we delete the top-level directory last.
-    for x in reversed(_DIRS):
-        try:
-            x.cleanup()
-        except PermissionError:
-            # Recursively reset the permissions of temporary directories prior
-            # to deletion to ensure that non-writable permissions such as 0555
-            # are removed and do not cause a traceback. (#891363)
-            for dirpath, ys, _ in os.walk(x.name):
-                for y in ys:
-                    os.chmod(os.path.join(dirpath, y), 0o777)
-            # try removing it again now
-            x.cleanup()
-        except FileNotFoundError:
-            pass
-        except:  # noqa
-            logger.exception("Unable to delete %s", x)
-    _DIRS.clear()
-
 
 def _get_base_temporary_directory():
-    if not _DIRS:
-        d = tempfile.TemporaryDirectory(
+    global _BASEDIR
+    if _BASEDIR is None:
+        _BASEDIR = tempfile.TemporaryDirectory(
             dir=tempfile.gettempdir(), prefix="diffoscope_"
         )
+        logger.debug(
+            "Created top-level temporary directory: %s", _BASEDIR.name
+        )
 
-        logger.debug("Created top-level temporary directory: %s", d.name)
-
-        _DIRS.append(d)
-
-    return _DIRS[0].name
+    return _BASEDIR.name
 
 
 def get_tempdir_free_space():



View it on GitLab: https://salsa.debian.org/reproducible-builds/diffoscope/-/compare/56d20d0900328b3f876686720161c11fad344e3d...20e3c99e33b39850c046f9a933318e6761470cd3

-- 
View it on GitLab: https://salsa.debian.org/reproducible-builds/diffoscope/-/compare/56d20d0900328b3f876686720161c11fad344e3d...20e3c99e33b39850c046f9a933318e6761470cd3
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.reproducible-builds.org/pipermail/rb-commits/attachments/20210105/fe3f3a88/attachment.htm>


More information about the rb-commits mailing list