[Git][reproducible-builds/diffoscope][master] 14 commits: Add graceful handling for UNIX sockets and named pipes (cf. issue #293)

Chris Lamb (@lamby) gitlab at salsa.debian.org
Mon Mar 21 15:33:01 UTC 2022



Chris Lamb pushed to branch master at Reproducible Builds / diffoscope


Commits:
14e21db2 by spillner at 2022-03-21T15:29:16+00:00
Add graceful handling for UNIX sockets and named pipes (cf. issue #293)

- - - - -
9b249095 by spillner at 2022-03-21T15:29:16+00:00
Adding other files changed in this patch.

- - - - -
490a4b4c by spillner at 2022-03-21T15:29:16+00:00
Added unit tests for the socket/pipe module.

- - - - -
9c5f7c77 by spillner at 2022-03-21T15:29:16+00:00
Correcting comment, after validating that Linux kernel will indeed nest sockets inside squashfs.

- - - - -
cfbe5587 by spillner at 2022-03-21T15:29:16+00:00
Reformatted to pass black style test.

- - - - -
454d9757 by spillner at 2022-03-21T15:29:16+00:00
Changes to pass black style check in diffoscope/comparators; changed test fixture to automatically clean up filesystem lint.

- - - - -
004072bb by spillner at 2022-03-21T15:29:16+00:00
Renamed is_socketOrFIFO() to is_socket_or_fifo() to improve readability.

- - - - -
f0d830ba by spillner at 2022-03-21T15:29:16+00:00
Corrected copyright notice, reinserted blank line to pass black check for versions 2.11b0 and earlier (now fails 2.12b0)

- - - - -
edf850a3 by spillner at 2022-03-21T15:29:16+00:00
Removed superfluous else: after unconditional return in previous cases.

- - - - -
1be6e2b2 by spillner at 2022-03-21T15:29:16+00:00
Corrected inadvertently duplicated file contents.

- - - - -
5a8398e5 by spillner at 2022-03-21T15:29:16+00:00
Fixed black style reversion after unindenting.

- - - - -
00bd0a07 by spillner at 2022-03-21T15:29:16+00:00
Removed superfluous log message and reformatted comment lines to resolve review comments.

- - - - -
26a50176 by Brent Spillner at 2022-03-21T15:29:16+00:00
Removed blank line to satisfy black versions 21.12b0 and later, including latest salsa CI pipeline.
- - - - -
486f596f by Chris Lamb at 2022-03-21T15:30:49+00:00
Reformat for Black.

- - - - -


18 changed files:

- diffoscope/comparators/__init__.py
- diffoscope/comparators/binary.py
- diffoscope/comparators/cbfs.py
- diffoscope/comparators/debian.py
- diffoscope/comparators/decompile.py
- diffoscope/comparators/elf.py
- diffoscope/comparators/macho.py
- diffoscope/comparators/missing_file.py
- diffoscope/comparators/rpm.py
- + diffoscope/comparators/socket_or_fifo.py
- diffoscope/comparators/squashfs.py
- diffoscope/comparators/utils/archive.py
- diffoscope/comparators/utils/file.py
- diffoscope/comparators/utils/libarchive.py
- diffoscope/config.py
- diffoscope/presenters/text.py
- tests/comparators/test_pcap.py
- + tests/comparators/test_sockets.py


Changes:

=====================================
diffoscope/comparators/__init__.py
=====================================
@@ -36,6 +36,7 @@ class ComparatorManager:
         ("missing_file.MissingFile",),
         ("symlink.Symlink",),
         ("device.Device",),
+        ("socket_or_fifo.SocketOrFIFO",),
         ("debian.DotChangesFile", "debian_fallback.DotChangesFile"),
         ("debian.DotDscFile", "debian_fallback.DotDscFile"),
         ("debian.DotBuildinfoFile", "debian_fallback.DotBuildinfoFile"),


=====================================
diffoscope/comparators/binary.py
=====================================
@@ -41,3 +41,7 @@ class FilesystemFile(File):
     def is_device(self):
         mode = os.lstat(self._name).st_mode
         return stat.S_ISCHR(mode) or stat.S_ISBLK(mode)
+
+    def is_socket_or_fifo(self):
+        mode = os.lstat(self._name).st_mode
+        return stat.S_ISSOCK(mode) or stat.S_ISFIFO(mode)


=====================================
diffoscope/comparators/cbfs.py
=====================================
@@ -97,7 +97,7 @@ CBFS_HEADER_VERSION2 = 0x31313132
 CBFS_HEADER_SIZE = 8 * 4  # 8 * uint32_t
 
 # On 2015-12-15, the largest image produced by coreboot is 16 MiB
-CBFS_MAXIMUM_FILE_SIZE = 24 * 2 ** 20  # 24 MiB
+CBFS_MAXIMUM_FILE_SIZE = 24 * 2**20  # 24 MiB
 
 
 def is_header_valid(buf, size, offset=0):


=====================================
diffoscope/comparators/debian.py
=====================================
@@ -68,6 +68,9 @@ class DebControlMember(File):
     def is_device(self):
         return False
 
+    def is_socket_or_fifo(self):
+        return False
+
 
 class DebControlContainer(Container):
     def __init__(self, *args, **kwargs):


=====================================
diffoscope/comparators/decompile.py
=====================================
@@ -183,6 +183,9 @@ class AsmFunction(File):
     def is_device(self):
         return False
 
+    def is_socket_or_fifo(self):
+        return False
+
     if tlsh:
 
         @property


=====================================
diffoscope/comparators/elf.py
=====================================
@@ -329,6 +329,9 @@ class ElfSection(File):
     def is_device(self):
         return False
 
+    def is_socket_or_fifo(self):
+        return False
+
     def has_same_content_as(self, other):
         # Always force diff of the section
         return False


=====================================
diffoscope/comparators/macho.py
=====================================
@@ -67,6 +67,9 @@ class MachoContainerFile(File, metaclass=abc.ABCMeta):
     def is_device(self):
         return False
 
+    def is_socket_or_fifo(self):
+        return False
+
     def has_same_content_as(self, other):
         # Always force diff of the container
         return False


=====================================
diffoscope/comparators/missing_file.py
=====================================
@@ -73,6 +73,9 @@ class MissingFile(File, AbstractMissingType):
     def is_device(self):
         return False
 
+    def is_socket_or_fifo(self):
+        return False
+
     def compare(self, other, source=None):
         # So now that comparators are all object-oriented, we don't have any
         # clue on how to perform a meaningful comparison right here. So we are


=====================================
diffoscope/comparators/rpm.py
=====================================
@@ -39,10 +39,10 @@ logger = logging.getLogger(__name__)
 def convert_header_field(io, header):
     if isinstance(header, list):
         if len(header) == 0:
-            io.write(u"[]")
+            io.write("[]")
         else:
             for item in header:
-                io.write(u"\n - ")
+                io.write("\n - ")
                 convert_header_field(io, item)
         return
 
@@ -70,9 +70,9 @@ def get_rpm_header(path, ts):
         for rpmtag in sorted(rpm.tagnames):
             if rpmtag not in hdr:
                 continue
-            s.write(u"{}: ".format(rpm.tagnames[rpmtag]))
+            s.write("{}: ".format(rpm.tagnames[rpmtag]))
             convert_header_field(s, hdr[rpmtag])
-            s.write(u"\n")
+            s.write("\n")
     return s.getvalue()
 
 


=====================================
diffoscope/comparators/socket_or_fifo.py
=====================================
@@ -0,0 +1,102 @@
+#
+# diffoscope: in-depth comparison of files, archives, and directories
+#
+# Copyright © 2021 Brent Spillner <s p i l l n e r @ a c m . o r g>
+#
+# diffoscope is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# diffoscope is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with diffoscope.  If not, see <https://www.gnu.org/licenses/>.
+
+import os
+import stat
+import logging
+
+from diffoscope.tempfiles import get_named_temporary_file
+from diffoscope.difference import Difference
+
+from .binary import FilesystemFile
+from .utils.file import File
+
+logger = logging.getLogger(__name__)
+
+
+class SocketOrFIFO(File):
+    DESCRIPTION = "local (UNIX domain) sockets and named pipes (FIFOs)"
+
+    @classmethod
+    def recognizes(cls, file):
+        return file.is_socket_or_fifo()
+
+    def get_type(self):
+        assert isinstance(self, FilesystemFile)
+        st = os.lstat(self.name)
+        return stat.S_IFMT(st.st_mode)
+
+    def has_same_content_as(self, other):
+        try:
+            # (filesystem ID, inode) pair uniquely identifies the socket/pipe
+            # Path comparison allows matching against pipes inside an archive
+            # (i.e. that would be created by extraction), while using .samefile()
+            # lets us match endpoints that might have more than one "canonical"
+            # pathname after a mount -o rebind
+            if self.get_type() != other.get_type():
+                return False
+            if os.path.exists(self.name) and os.path.exists(other.name):
+                return os.path.samefile(self.name, other.name)
+            return os.path.realname(self.name) == os.path.realname(other.name)
+        except (AttributeError, OSError):
+            # 'other' is likely something odd that doesn't support stat() and/or
+            # can't supply an fs_uuid/inode pair for samefile()
+            logger.debug(
+                "has_same_content: Not a socket, FIFO, or ordinary file: %s",
+                other,
+            )
+            return False
+
+    def create_placeholder(self):
+        with get_named_temporary_file(mode="w+", delete=False) as f:
+            f.write(format_socket(self.get_type(), self.name))
+            f.flush()
+            return f.name
+
+    @property
+    def path(self):
+        if not hasattr(self, "_placeholder"):
+            self._placeholder = self.create_placeholder()
+        return self._placeholder
+
+    def cleanup(self):
+        if hasattr(self, "_placeholder"):
+            os.remove(self._placeholder)
+            del self._placeholder
+        super().cleanup()
+
+    def compare(self, other, source=None):
+        with open(self.path) as my_content, open(other.path) as other_content:
+            return Difference.from_text_readers(
+                my_content,
+                other_content,
+                self.name,
+                other.name,
+                source=source,
+                comment="socket/FIFO",
+            )
+
+
+def format_socket(mode, filename):
+    if stat.S_ISSOCK(mode):
+        kind = "UNIX domain socket"
+    elif stat.S_ISFIFO(mode):
+        kind = "named pipe (FIFO)"
+    else:
+        kind = "ERROR: problem with an is_socket_or_fifo() predicate"
+    return f"{kind}: {filename}\n"


=====================================
diffoscope/comparators/squashfs.py
=====================================
@@ -33,6 +33,7 @@ from diffoscope.tempfiles import get_temporary_directory
 from .utils.file import File
 from .device import Device
 from .symlink import Symlink
+from .socket_or_fifo import SocketOrFIFO
 from .directory import Directory
 from .utils.archive import Archive, ArchiveMember
 from .utils.command import Command
@@ -72,6 +73,9 @@ class SquashfsMember(ArchiveMember):
     def is_device(self):
         return False
 
+    def is_socket_or_fifo(self):
+        return False
+
     @property
     def path(self):
         # Use our extracted version and also avoid creating a temporary
@@ -217,6 +221,42 @@ class SquashfsDevice(Device, SquashfsMember):
         return True
 
 
+class SquashfsFIFO(SocketOrFIFO, SquashfsMember):
+    # Example line:
+    # crw-r--r-- root/root  0 2021-08-18 13:37 run/initctl
+    LINE_RE = re.compile(
+        r"^(?P<kind>s|p)\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+(?P<member_name>.*)$"
+    )
+
+    KIND_MAP = {"s": stat.S_IFSOCK, "p": stat.S_IFIFO}
+
+    @staticmethod
+    def parse(line):
+        m = SquashfsFIFO.LINE_RE.match(line)
+        if not m:
+            raise SquashfsInvalidLineFormat("invalid line format")
+
+        d = m.groupdict()
+        try:
+            d["mode"] = SquashfsFIFO.KIND_MAP[d["kind"]]
+            del d["kind"]
+        except KeyError:
+            raise SquashfsInvalidLineFormat(
+                f"unknown socket/FIFO kind {d['kind']}"
+            )
+        return d
+
+    def __init__(self, archive, member_name, mode):
+        SquashfsMember.__init__(self, archive, member_name)
+        self._mode = mode
+
+    def get_type(self):
+        return stat.S_IFMT(self._mode)
+
+    def is_socket_or_fifo(self):
+        return True
+
+
 class SquashfsContainer(Archive):
     auto_diff_metadata = False
 
@@ -225,6 +265,8 @@ class SquashfsContainer(Archive):
         "l": SquashfsSymlink,
         "c": SquashfsDevice,
         "b": SquashfsDevice,
+        "p": SquashfsFIFO,
+        "s": SquashfsFIFO,
         "-": SquashfsRegularFile,
     }
 


=====================================
diffoscope/comparators/utils/archive.py
=====================================
@@ -136,6 +136,9 @@ class ArchiveMember(File):
     def is_device(self):
         return False
 
+    def is_socket_or_fifo(self):
+        return False
+
 
 class MissingArchiveLikeObject(AbstractMissingType):
     def getnames(self):


=====================================
diffoscope/comparators/utils/file.py
=====================================
@@ -327,6 +327,7 @@ class File(metaclass=abc.ABCMeta):
             (self.is_device, "device"),
             (self.is_symlink, "symlink"),
             (self.is_directory, "directory"),
+            (self.is_socket_or_fifo, "socket or FIFO"),
         ):
             if x():
                 return y
@@ -374,6 +375,10 @@ class File(metaclass=abc.ABCMeta):
     def is_device():
         raise NotImplementedError()
 
+    @abc.abstractmethod
+    def is_socket_or_fifo():
+        raise NotImplementedError()
+
     def compare_bytes(self, other, source=None):
         from .compare import compare_binary_files
 


=====================================
diffoscope/comparators/utils/libarchive.py
=====================================
@@ -24,6 +24,7 @@ import ctypes
 import logging
 import libarchive
 import collections
+import stat
 
 from diffoscope.exc import ContainerExtractionError
 from diffoscope.config import Config
@@ -34,6 +35,7 @@ from diffoscope.difference import Difference
 from ..device import Device
 from ..symlink import Symlink
 from ..directory import Directory
+from ..socket_or_fifo import SocketOrFIFO
 
 from .archive import Archive, ArchiveMember
 
@@ -187,6 +189,9 @@ class LibarchiveMember(ArchiveMember):
     def is_device(self):
         return False
 
+    def is_socket_or_fifo(self):
+        return False
+
 
 class LibarchiveDirectory(Directory, LibarchiveMember):
     def __init__(self, archive, entry):
@@ -241,6 +246,18 @@ class LibarchiveDevice(Device, LibarchiveMember):
         return True
 
 
+class LibarchiveFIFO(SocketOrFIFO, LibarchiveMember):
+    def __init__(self, container, entry):
+        LibarchiveMember.__init__(self, container, entry)
+        self._mode = entry.mode
+
+    def get_type(self):
+        return stat.S_IFMT(self._mode)
+
+    def is_socket_or_fifo(self):
+        return True
+
+
 class LibarchiveContainer(Archive):
     def open_archive(self):
         # libarchive is very very stream oriented an not for random access
@@ -283,6 +300,8 @@ class LibarchiveContainer(Archive):
             return LibarchiveSymlink(self, entry)
         if entry.isblk or entry.ischr:
             return LibarchiveDevice(self, entry)
+        if entry.isfifo:
+            return LibarchiveFIFO(self, entry)
 
         return LibarchiveMember(self, entry)
 
@@ -323,7 +342,7 @@ class LibarchiveContainer(Archive):
                 os.makedirs(os.path.dirname(dst), exist_ok=True)
                 try:
                     with open(dst, "wb") as f:
-                        for block in entry.get_blocks(block_size=2 ** 17):
+                        for block in entry.get_blocks(block_size=2**17):
                             f.write(block)
                 except Exception as e:
                     raise ContainerExtractionError(entry.pathname, e)


=====================================
diffoscope/config.py
=====================================
@@ -43,16 +43,16 @@ class Config:
         self.diff_context = 7
 
         # GNU diff cannot process arbitrary large files :(
-        self.max_diff_input_lines = 2 ** 22
+        self.max_diff_input_lines = 2**22
         self.max_diff_block_lines_saved = float("inf")
 
         # hard limits, restricts single-file and multi-file formats
-        self.max_report_size = defaultint(40 * 2 ** 20)  # 40 MB
-        self.max_diff_block_lines = defaultint(2 ** 10)  # 1024 lines
+        self.max_report_size = defaultint(40 * 2**20)  # 40 MB
+        self.max_diff_block_lines = defaultint(2**10)  # 1024 lines
         # structural limits, restricts single-file formats
         # semi-restricts multi-file formats
-        self.max_page_size = defaultint(40 * 2 ** 20)  # 4 MB
-        self.max_page_diff_block_lines = defaultint(2 ** 7)  # 128 lines
+        self.max_page_size = defaultint(40 * 2**20)  # 4 MB
+        self.max_page_diff_block_lines = defaultint(2**7)  # 128 lines
 
         self.max_text_report_size = 0
 


=====================================
diffoscope/presenters/text.py
=====================================
@@ -34,7 +34,7 @@ logger = logging.getLogger(__name__)
 
 
 class TextPresenter(Presenter):
-    PREFIX = u"│ "
+    PREFIX = "│ "
     RE_PREFIX = re.compile(r"(^|\n)")
 
     def __init__(self, print_func, color):
@@ -76,13 +76,13 @@ class TextPresenter(Presenter):
             self.output("--- {}".format(difference.source1))
             self.output("+++ {}".format(difference.source2))
         elif difference.source1 == difference.source2:
-            self.output(u"├── {}".format(difference.source1))
+            self.output("├── {}".format(difference.source1))
         else:
-            self.output(u"│   --- {}".format(difference.source1))
-            self.output(u"├── +++ {}".format(difference.source2))
+            self.output("│   --- {}".format(difference.source1))
+            self.output("├── +++ {}".format(difference.source2))
 
         for x in difference.comments:
-            self.output(u"│┄ {}".format(x))
+            self.output("│┄ {}".format(x))
 
         diff = difference.unified_diff
 


=====================================
tests/comparators/test_pcap.py
=====================================
@@ -45,7 +45,7 @@ def differences(pcap1, pcap2):
 @skip_unless_tools_exist("tcpdump")
 def test_diff(differences):
     expected_diff = get_data("pcap_expected_diff")
-    assert differences[0].unified_diff[: 2 ** 13] == expected_diff[: 2 ** 13]
+    assert differences[0].unified_diff[: 2**13] == expected_diff[: 2**13]
 
 
 @skip_unless_tools_exist("tcpdump")


=====================================
tests/comparators/test_sockets.py
=====================================
@@ -0,0 +1,118 @@
+#
+# diffoscope: in-depth comparison of files, archives, and directories
+#
+# Copyright © 2017, 2020 Chris Lamb <lamby at debian.org>
+# Copyright © 2021 Brent Spillner <s p i l l n e r @ a c m . o r g>
+#
+# diffoscope is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# diffoscope is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with diffoscope.  If not, see <https://www.gnu.org/licenses/>.
+
+import os
+import socket
+import pytest
+
+from diffoscope.comparators.binary import FilesystemFile
+from diffoscope.comparators.socket_or_fifo import SocketOrFIFO, format_socket
+from diffoscope.comparators.utils.specialize import specialize
+
+from ..utils.data import get_data, load_fixture
+
+sample_textfile = "text_ascii1"
+sampletext = load_fixture(sample_textfile)
+
+
+def make_socket(path):
+    if os.path.exists(path):
+        os.remove(path)
+    sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+    sock.bind(path)
+    return specialize(FilesystemFile(path))
+
+
+def make_pipe(path):
+    if os.path.exists(path):
+        os.remove(path)
+    os.mkfifo(path)
+    return specialize(FilesystemFile(path))
+
+
+ at pytest.fixture
+def endpoints(tmpdir):
+    def makename(tag):
+        return os.path.join(tmpdir, "test_" + tag)
+
+    test_points = zip(
+        [make_socket, make_socket, make_pipe, make_pipe],
+        map(makename, ["socket1", "socket2", "pipe1", "pipe2"]),
+    )
+    yield [(name, f(name)) for (f, name) in test_points]
+    for (_, name) in test_points:
+        os.remove(name)
+
+
+ at pytest.fixture
+def expected_results(endpoints):
+    descriptions = [
+        format_socket(obj.get_type(), path) for (path, obj) in endpoints
+    ]
+    [sock1_desc, sock2_desc, pipe1_desc, pipe2_desc] = descriptions
+
+    # Prefix every line of the sample text file with '+' to predict RHS of the diff
+    sampletext_contents = get_data(sample_textfile)
+    sample_lines = sampletext_contents.count("\n")
+    added_text = "+" + "\n+".join(sampletext_contents.split("\n")[:-1]) + "\n"
+
+    sock_text_diff = (
+        "@@ -1 +1,{} @@\n".format(sample_lines) + "-" + sock1_desc + added_text
+    )
+    pipe_text_diff = (
+        "@@ -1 +1,{} @@\n".format(sample_lines) + "-" + pipe1_desc + added_text
+    )
+    sock_sock_diff = "@@ -1 +1 @@\n" + "-" + sock1_desc + "+" + sock2_desc
+    pipe_pipe_diff = "@@ -1 +1 @@\n" + "-" + pipe1_desc + "+" + pipe2_desc
+    sock_pipe_diff = "@@ -1 +1 @@\n" + "-" + sock1_desc + "+" + pipe1_desc
+    pipe_sock_diff = "@@ -1 +1 @@\n" + "-" + pipe1_desc + "+" + sock1_desc
+    yield (
+        sock_text_diff,
+        pipe_text_diff,
+        sock_sock_diff,
+        pipe_pipe_diff,
+        sock_pipe_diff,
+        pipe_sock_diff,
+    )
+
+
+def test_sockets(endpoints, expected_results, sampletext):
+    (names, objects) = zip(*endpoints)
+    (sock1, sock2, pipe1, pipe2) = objects
+    (
+        sock_text_diff,
+        pipe_text_diff,
+        sock_sock_diff,
+        pipe_pipe_diff,
+        sock_pipe_diff,
+        pipe_sock_diff,
+    ) = expected_results
+
+    assert isinstance(sock1, SocketOrFIFO)
+    assert isinstance(pipe1, SocketOrFIFO)
+
+    assert sock1.compare(sampletext).unified_diff == sock_text_diff
+    assert pipe1.compare(sampletext).unified_diff == pipe_text_diff
+
+    assert sock1.compare(sock1) == None
+    assert pipe1.compare(pipe1) == None
+    assert sock1.compare(sock2).unified_diff == sock_sock_diff
+    assert pipe1.compare(pipe2).unified_diff == pipe_pipe_diff
+    assert sock1.compare(pipe1).unified_diff == sock_pipe_diff
+    assert pipe1.compare(sock1).unified_diff == pipe_sock_diff



View it on GitLab: https://salsa.debian.org/reproducible-builds/diffoscope/-/compare/d1bd9b66bd376a410833acad7f0934672db35c4e...486f596f4ab573af82e22d8d35b8137f83bddf2c

-- 
View it on GitLab: https://salsa.debian.org/reproducible-builds/diffoscope/-/compare/d1bd9b66bd376a410833acad7f0934672db35c4e...486f596f4ab573af82e22d8d35b8137f83bddf2c
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.reproducible-builds.org/pipermail/rb-commits/attachments/20220321/db3bd998/attachment.htm>


More information about the rb-commits mailing list