[Git][reproducible-builds/diffoscope][master] 14 commits: Add graceful handling for UNIX sockets and named pipes (cf. issue #293)
Chris Lamb (@lamby)
gitlab at salsa.debian.org
Mon Mar 21 15:33:01 UTC 2022
Chris Lamb pushed to branch master at Reproducible Builds / diffoscope
Commits:
14e21db2 by spillner at 2022-03-21T15:29:16+00:00
Add graceful handling for UNIX sockets and named pipes (cf. issue #293)
- - - - -
9b249095 by spillner at 2022-03-21T15:29:16+00:00
Adding other files changed in this patch.
- - - - -
490a4b4c by spillner at 2022-03-21T15:29:16+00:00
Added unit tests for the socket/pipe module.
- - - - -
9c5f7c77 by spillner at 2022-03-21T15:29:16+00:00
Correcting comment, after validating that Linux kernel will indeed nest sockets inside squashfs.
- - - - -
cfbe5587 by spillner at 2022-03-21T15:29:16+00:00
Reformatted to pass black style test.
- - - - -
454d9757 by spillner at 2022-03-21T15:29:16+00:00
Changes to pass black style check in diffoscope/comparators; changed test fixture to automatically clean up filesystem lint.
- - - - -
004072bb by spillner at 2022-03-21T15:29:16+00:00
Renamed is_socketOrFIFO() to is_socket_or_fifo() to improve readability.
- - - - -
f0d830ba by spillner at 2022-03-21T15:29:16+00:00
Corrected copyright notice, reinserted blank line to pass black check for versions 2.11b0 and earlier (now fails 2.12b0)
- - - - -
edf850a3 by spillner at 2022-03-21T15:29:16+00:00
Removed superfluous else: after unconditional return in previous cases.
- - - - -
1be6e2b2 by spillner at 2022-03-21T15:29:16+00:00
Corrected inadvertently duplicated file contents.
- - - - -
5a8398e5 by spillner at 2022-03-21T15:29:16+00:00
Fixed black style reversion after unindenting.
- - - - -
00bd0a07 by spillner at 2022-03-21T15:29:16+00:00
Removed superfluous log message and reformatted comment lines to resolve review comments.
- - - - -
26a50176 by Brent Spillner at 2022-03-21T15:29:16+00:00
Removed blank line to satisfy black versions 21.12b0 and later, including latest salsa CI pipeline.
- - - - -
486f596f by Chris Lamb at 2022-03-21T15:30:49+00:00
Reformat for Black.
- - - - -
18 changed files:
- diffoscope/comparators/__init__.py
- diffoscope/comparators/binary.py
- diffoscope/comparators/cbfs.py
- diffoscope/comparators/debian.py
- diffoscope/comparators/decompile.py
- diffoscope/comparators/elf.py
- diffoscope/comparators/macho.py
- diffoscope/comparators/missing_file.py
- diffoscope/comparators/rpm.py
- + diffoscope/comparators/socket_or_fifo.py
- diffoscope/comparators/squashfs.py
- diffoscope/comparators/utils/archive.py
- diffoscope/comparators/utils/file.py
- diffoscope/comparators/utils/libarchive.py
- diffoscope/config.py
- diffoscope/presenters/text.py
- tests/comparators/test_pcap.py
- + tests/comparators/test_sockets.py
Changes:
=====================================
diffoscope/comparators/__init__.py
=====================================
@@ -36,6 +36,7 @@ class ComparatorManager:
("missing_file.MissingFile",),
("symlink.Symlink",),
("device.Device",),
+ ("socket_or_fifo.SocketOrFIFO",),
("debian.DotChangesFile", "debian_fallback.DotChangesFile"),
("debian.DotDscFile", "debian_fallback.DotDscFile"),
("debian.DotBuildinfoFile", "debian_fallback.DotBuildinfoFile"),
=====================================
diffoscope/comparators/binary.py
=====================================
@@ -41,3 +41,7 @@ class FilesystemFile(File):
def is_device(self):
mode = os.lstat(self._name).st_mode
return stat.S_ISCHR(mode) or stat.S_ISBLK(mode)
+
+ def is_socket_or_fifo(self):
+ mode = os.lstat(self._name).st_mode
+ return stat.S_ISSOCK(mode) or stat.S_ISFIFO(mode)
=====================================
diffoscope/comparators/cbfs.py
=====================================
@@ -97,7 +97,7 @@ CBFS_HEADER_VERSION2 = 0x31313132
CBFS_HEADER_SIZE = 8 * 4 # 8 * uint32_t
# On 2015-12-15, the largest image produced by coreboot is 16 MiB
-CBFS_MAXIMUM_FILE_SIZE = 24 * 2 ** 20 # 24 MiB
+CBFS_MAXIMUM_FILE_SIZE = 24 * 2**20 # 24 MiB
def is_header_valid(buf, size, offset=0):
=====================================
diffoscope/comparators/debian.py
=====================================
@@ -68,6 +68,9 @@ class DebControlMember(File):
def is_device(self):
return False
+ def is_socket_or_fifo(self):
+ return False
+
class DebControlContainer(Container):
def __init__(self, *args, **kwargs):
=====================================
diffoscope/comparators/decompile.py
=====================================
@@ -183,6 +183,9 @@ class AsmFunction(File):
def is_device(self):
return False
+ def is_socket_or_fifo(self):
+ return False
+
if tlsh:
@property
=====================================
diffoscope/comparators/elf.py
=====================================
@@ -329,6 +329,9 @@ class ElfSection(File):
def is_device(self):
return False
+ def is_socket_or_fifo(self):
+ return False
+
def has_same_content_as(self, other):
# Always force diff of the section
return False
=====================================
diffoscope/comparators/macho.py
=====================================
@@ -67,6 +67,9 @@ class MachoContainerFile(File, metaclass=abc.ABCMeta):
def is_device(self):
return False
+ def is_socket_or_fifo(self):
+ return False
+
def has_same_content_as(self, other):
# Always force diff of the container
return False
=====================================
diffoscope/comparators/missing_file.py
=====================================
@@ -73,6 +73,9 @@ class MissingFile(File, AbstractMissingType):
def is_device(self):
return False
+ def is_socket_or_fifo(self):
+ return False
+
def compare(self, other, source=None):
# So now that comparators are all object-oriented, we don't have any
# clue on how to perform a meaningful comparison right here. So we are
=====================================
diffoscope/comparators/rpm.py
=====================================
@@ -39,10 +39,10 @@ logger = logging.getLogger(__name__)
def convert_header_field(io, header):
if isinstance(header, list):
if len(header) == 0:
- io.write(u"[]")
+ io.write("[]")
else:
for item in header:
- io.write(u"\n - ")
+ io.write("\n - ")
convert_header_field(io, item)
return
@@ -70,9 +70,9 @@ def get_rpm_header(path, ts):
for rpmtag in sorted(rpm.tagnames):
if rpmtag not in hdr:
continue
- s.write(u"{}: ".format(rpm.tagnames[rpmtag]))
+ s.write("{}: ".format(rpm.tagnames[rpmtag]))
convert_header_field(s, hdr[rpmtag])
- s.write(u"\n")
+ s.write("\n")
return s.getvalue()
=====================================
diffoscope/comparators/socket_or_fifo.py
=====================================
@@ -0,0 +1,102 @@
+#
+# diffoscope: in-depth comparison of files, archives, and directories
+#
+# Copyright © 2021 Brent Spillner <s p i l l n e r @ a c m . o r g>
+#
+# diffoscope is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# diffoscope is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with diffoscope. If not, see <https://www.gnu.org/licenses/>.
+
+import os
+import stat
+import logging
+
+from diffoscope.tempfiles import get_named_temporary_file
+from diffoscope.difference import Difference
+
+from .binary import FilesystemFile
+from .utils.file import File
+
+logger = logging.getLogger(__name__)
+
+
+class SocketOrFIFO(File):
+ DESCRIPTION = "local (UNIX domain) sockets and named pipes (FIFOs)"
+
+ @classmethod
+ def recognizes(cls, file):
+ return file.is_socket_or_fifo()
+
+ def get_type(self):
+ assert isinstance(self, FilesystemFile)
+ st = os.lstat(self.name)
+ return stat.S_IFMT(st.st_mode)
+
+ def has_same_content_as(self, other):
+ try:
+ # (filesystem ID, inode) pair uniquely identifies the socket/pipe
+ # Path comparison allows matching against pipes inside an archive
+ # (i.e. that would be created by extraction), while using .samefile()
+ # lets us match endpoints that might have more than one "canonical"
+ # pathname after a mount -o rebind
+ if self.get_type() != other.get_type():
+ return False
+ if os.path.exists(self.name) and os.path.exists(other.name):
+ return os.path.samefile(self.name, other.name)
+ return os.path.realname(self.name) == os.path.realname(other.name)
+ except (AttributeError, OSError):
+ # 'other' is likely something odd that doesn't support stat() and/or
+ # can't supply an fs_uuid/inode pair for samefile()
+ logger.debug(
+ "has_same_content: Not a socket, FIFO, or ordinary file: %s",
+ other,
+ )
+ return False
+
+ def create_placeholder(self):
+ with get_named_temporary_file(mode="w+", delete=False) as f:
+ f.write(format_socket(self.get_type(), self.name))
+ f.flush()
+ return f.name
+
+ @property
+ def path(self):
+ if not hasattr(self, "_placeholder"):
+ self._placeholder = self.create_placeholder()
+ return self._placeholder
+
+ def cleanup(self):
+ if hasattr(self, "_placeholder"):
+ os.remove(self._placeholder)
+ del self._placeholder
+ super().cleanup()
+
+ def compare(self, other, source=None):
+ with open(self.path) as my_content, open(other.path) as other_content:
+ return Difference.from_text_readers(
+ my_content,
+ other_content,
+ self.name,
+ other.name,
+ source=source,
+ comment="socket/FIFO",
+ )
+
+
+def format_socket(mode, filename):
+ if stat.S_ISSOCK(mode):
+ kind = "UNIX domain socket"
+ elif stat.S_ISFIFO(mode):
+ kind = "named pipe (FIFO)"
+ else:
+ kind = "ERROR: problem with an is_socket_or_fifo() predicate"
+ return f"{kind}: {filename}\n"
=====================================
diffoscope/comparators/squashfs.py
=====================================
@@ -33,6 +33,7 @@ from diffoscope.tempfiles import get_temporary_directory
from .utils.file import File
from .device import Device
from .symlink import Symlink
+from .socket_or_fifo import SocketOrFIFO
from .directory import Directory
from .utils.archive import Archive, ArchiveMember
from .utils.command import Command
@@ -72,6 +73,9 @@ class SquashfsMember(ArchiveMember):
def is_device(self):
return False
+ def is_socket_or_fifo(self):
+ return False
+
@property
def path(self):
# Use our extracted version and also avoid creating a temporary
@@ -217,6 +221,42 @@ class SquashfsDevice(Device, SquashfsMember):
return True
+class SquashfsFIFO(SocketOrFIFO, SquashfsMember):
+ # Example line:
+ # crw-r--r-- root/root 0 2021-08-18 13:37 run/initctl
+ LINE_RE = re.compile(
+ r"^(?P<kind>s|p)\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+(?P<member_name>.*)$"
+ )
+
+ KIND_MAP = {"s": stat.S_IFSOCK, "p": stat.S_IFIFO}
+
+ @staticmethod
+ def parse(line):
+ m = SquashfsFIFO.LINE_RE.match(line)
+ if not m:
+ raise SquashfsInvalidLineFormat("invalid line format")
+
+ d = m.groupdict()
+ try:
+ d["mode"] = SquashfsFIFO.KIND_MAP[d["kind"]]
+ del d["kind"]
+ except KeyError:
+ raise SquashfsInvalidLineFormat(
+ f"unknown socket/FIFO kind {d['kind']}"
+ )
+ return d
+
+ def __init__(self, archive, member_name, mode):
+ SquashfsMember.__init__(self, archive, member_name)
+ self._mode = mode
+
+ def get_type(self):
+ return stat.S_IFMT(self._mode)
+
+ def is_socket_or_fifo(self):
+ return True
+
+
class SquashfsContainer(Archive):
auto_diff_metadata = False
@@ -225,6 +265,8 @@ class SquashfsContainer(Archive):
"l": SquashfsSymlink,
"c": SquashfsDevice,
"b": SquashfsDevice,
+ "p": SquashfsFIFO,
+ "s": SquashfsFIFO,
"-": SquashfsRegularFile,
}
=====================================
diffoscope/comparators/utils/archive.py
=====================================
@@ -136,6 +136,9 @@ class ArchiveMember(File):
def is_device(self):
return False
+ def is_socket_or_fifo(self):
+ return False
+
class MissingArchiveLikeObject(AbstractMissingType):
def getnames(self):
=====================================
diffoscope/comparators/utils/file.py
=====================================
@@ -327,6 +327,7 @@ class File(metaclass=abc.ABCMeta):
(self.is_device, "device"),
(self.is_symlink, "symlink"),
(self.is_directory, "directory"),
+ (self.is_socket_or_fifo, "socket or FIFO"),
):
if x():
return y
@@ -374,6 +375,10 @@ class File(metaclass=abc.ABCMeta):
def is_device():
raise NotImplementedError()
+ @abc.abstractmethod
+ def is_socket_or_fifo():
+ raise NotImplementedError()
+
def compare_bytes(self, other, source=None):
from .compare import compare_binary_files
=====================================
diffoscope/comparators/utils/libarchive.py
=====================================
@@ -24,6 +24,7 @@ import ctypes
import logging
import libarchive
import collections
+import stat
from diffoscope.exc import ContainerExtractionError
from diffoscope.config import Config
@@ -34,6 +35,7 @@ from diffoscope.difference import Difference
from ..device import Device
from ..symlink import Symlink
from ..directory import Directory
+from ..socket_or_fifo import SocketOrFIFO
from .archive import Archive, ArchiveMember
@@ -187,6 +189,9 @@ class LibarchiveMember(ArchiveMember):
def is_device(self):
return False
+ def is_socket_or_fifo(self):
+ return False
+
class LibarchiveDirectory(Directory, LibarchiveMember):
def __init__(self, archive, entry):
@@ -241,6 +246,18 @@ class LibarchiveDevice(Device, LibarchiveMember):
return True
+class LibarchiveFIFO(SocketOrFIFO, LibarchiveMember):
+ def __init__(self, container, entry):
+ LibarchiveMember.__init__(self, container, entry)
+ self._mode = entry.mode
+
+ def get_type(self):
+ return stat.S_IFMT(self._mode)
+
+ def is_socket_or_fifo(self):
+ return True
+
+
class LibarchiveContainer(Archive):
def open_archive(self):
# libarchive is very very stream oriented an not for random access
@@ -283,6 +300,8 @@ class LibarchiveContainer(Archive):
return LibarchiveSymlink(self, entry)
if entry.isblk or entry.ischr:
return LibarchiveDevice(self, entry)
+ if entry.isfifo:
+ return LibarchiveFIFO(self, entry)
return LibarchiveMember(self, entry)
@@ -323,7 +342,7 @@ class LibarchiveContainer(Archive):
os.makedirs(os.path.dirname(dst), exist_ok=True)
try:
with open(dst, "wb") as f:
- for block in entry.get_blocks(block_size=2 ** 17):
+ for block in entry.get_blocks(block_size=2**17):
f.write(block)
except Exception as e:
raise ContainerExtractionError(entry.pathname, e)
=====================================
diffoscope/config.py
=====================================
@@ -43,16 +43,16 @@ class Config:
self.diff_context = 7
# GNU diff cannot process arbitrary large files :(
- self.max_diff_input_lines = 2 ** 22
+ self.max_diff_input_lines = 2**22
self.max_diff_block_lines_saved = float("inf")
# hard limits, restricts single-file and multi-file formats
- self.max_report_size = defaultint(40 * 2 ** 20) # 40 MB
- self.max_diff_block_lines = defaultint(2 ** 10) # 1024 lines
+ self.max_report_size = defaultint(40 * 2**20) # 40 MB
+ self.max_diff_block_lines = defaultint(2**10) # 1024 lines
# structural limits, restricts single-file formats
# semi-restricts multi-file formats
- self.max_page_size = defaultint(40 * 2 ** 20) # 4 MB
- self.max_page_diff_block_lines = defaultint(2 ** 7) # 128 lines
+ self.max_page_size = defaultint(40 * 2**20) # 4 MB
+ self.max_page_diff_block_lines = defaultint(2**7) # 128 lines
self.max_text_report_size = 0
=====================================
diffoscope/presenters/text.py
=====================================
@@ -34,7 +34,7 @@ logger = logging.getLogger(__name__)
class TextPresenter(Presenter):
- PREFIX = u"│ "
+ PREFIX = "│ "
RE_PREFIX = re.compile(r"(^|\n)")
def __init__(self, print_func, color):
@@ -76,13 +76,13 @@ class TextPresenter(Presenter):
self.output("--- {}".format(difference.source1))
self.output("+++ {}".format(difference.source2))
elif difference.source1 == difference.source2:
- self.output(u"├── {}".format(difference.source1))
+ self.output("├── {}".format(difference.source1))
else:
- self.output(u"│ --- {}".format(difference.source1))
- self.output(u"├── +++ {}".format(difference.source2))
+ self.output("│ --- {}".format(difference.source1))
+ self.output("├── +++ {}".format(difference.source2))
for x in difference.comments:
- self.output(u"│┄ {}".format(x))
+ self.output("│┄ {}".format(x))
diff = difference.unified_diff
=====================================
tests/comparators/test_pcap.py
=====================================
@@ -45,7 +45,7 @@ def differences(pcap1, pcap2):
@skip_unless_tools_exist("tcpdump")
def test_diff(differences):
expected_diff = get_data("pcap_expected_diff")
- assert differences[0].unified_diff[: 2 ** 13] == expected_diff[: 2 ** 13]
+ assert differences[0].unified_diff[: 2**13] == expected_diff[: 2**13]
@skip_unless_tools_exist("tcpdump")
=====================================
tests/comparators/test_sockets.py
=====================================
@@ -0,0 +1,118 @@
+#
+# diffoscope: in-depth comparison of files, archives, and directories
+#
+# Copyright © 2017, 2020 Chris Lamb <lamby at debian.org>
+# Copyright © 2021 Brent Spillner <s p i l l n e r @ a c m . o r g>
+#
+# diffoscope is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# diffoscope is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with diffoscope. If not, see <https://www.gnu.org/licenses/>.
+
+import os
+import socket
+import pytest
+
+from diffoscope.comparators.binary import FilesystemFile
+from diffoscope.comparators.socket_or_fifo import SocketOrFIFO, format_socket
+from diffoscope.comparators.utils.specialize import specialize
+
+from ..utils.data import get_data, load_fixture
+
+sample_textfile = "text_ascii1"
+sampletext = load_fixture(sample_textfile)
+
+
+def make_socket(path):
+ if os.path.exists(path):
+ os.remove(path)
+ sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+ sock.bind(path)
+ return specialize(FilesystemFile(path))
+
+
+def make_pipe(path):
+ if os.path.exists(path):
+ os.remove(path)
+ os.mkfifo(path)
+ return specialize(FilesystemFile(path))
+
+
+ at pytest.fixture
+def endpoints(tmpdir):
+ def makename(tag):
+ return os.path.join(tmpdir, "test_" + tag)
+
+ test_points = zip(
+ [make_socket, make_socket, make_pipe, make_pipe],
+ map(makename, ["socket1", "socket2", "pipe1", "pipe2"]),
+ )
+ yield [(name, f(name)) for (f, name) in test_points]
+ for (_, name) in test_points:
+ os.remove(name)
+
+
+ at pytest.fixture
+def expected_results(endpoints):
+ descriptions = [
+ format_socket(obj.get_type(), path) for (path, obj) in endpoints
+ ]
+ [sock1_desc, sock2_desc, pipe1_desc, pipe2_desc] = descriptions
+
+ # Prefix every line of the sample text file with '+' to predict RHS of the diff
+ sampletext_contents = get_data(sample_textfile)
+ sample_lines = sampletext_contents.count("\n")
+ added_text = "+" + "\n+".join(sampletext_contents.split("\n")[:-1]) + "\n"
+
+ sock_text_diff = (
+ "@@ -1 +1,{} @@\n".format(sample_lines) + "-" + sock1_desc + added_text
+ )
+ pipe_text_diff = (
+ "@@ -1 +1,{} @@\n".format(sample_lines) + "-" + pipe1_desc + added_text
+ )
+ sock_sock_diff = "@@ -1 +1 @@\n" + "-" + sock1_desc + "+" + sock2_desc
+ pipe_pipe_diff = "@@ -1 +1 @@\n" + "-" + pipe1_desc + "+" + pipe2_desc
+ sock_pipe_diff = "@@ -1 +1 @@\n" + "-" + sock1_desc + "+" + pipe1_desc
+ pipe_sock_diff = "@@ -1 +1 @@\n" + "-" + pipe1_desc + "+" + sock1_desc
+ yield (
+ sock_text_diff,
+ pipe_text_diff,
+ sock_sock_diff,
+ pipe_pipe_diff,
+ sock_pipe_diff,
+ pipe_sock_diff,
+ )
+
+
+def test_sockets(endpoints, expected_results, sampletext):
+ (names, objects) = zip(*endpoints)
+ (sock1, sock2, pipe1, pipe2) = objects
+ (
+ sock_text_diff,
+ pipe_text_diff,
+ sock_sock_diff,
+ pipe_pipe_diff,
+ sock_pipe_diff,
+ pipe_sock_diff,
+ ) = expected_results
+
+ assert isinstance(sock1, SocketOrFIFO)
+ assert isinstance(pipe1, SocketOrFIFO)
+
+ assert sock1.compare(sampletext).unified_diff == sock_text_diff
+ assert pipe1.compare(sampletext).unified_diff == pipe_text_diff
+
+ assert sock1.compare(sock1) == None
+ assert pipe1.compare(pipe1) == None
+ assert sock1.compare(sock2).unified_diff == sock_sock_diff
+ assert pipe1.compare(pipe2).unified_diff == pipe_pipe_diff
+ assert sock1.compare(pipe1).unified_diff == sock_pipe_diff
+ assert pipe1.compare(sock1).unified_diff == pipe_sock_diff
View it on GitLab: https://salsa.debian.org/reproducible-builds/diffoscope/-/compare/d1bd9b66bd376a410833acad7f0934672db35c4e...486f596f4ab573af82e22d8d35b8137f83bddf2c
--
View it on GitLab: https://salsa.debian.org/reproducible-builds/diffoscope/-/compare/d1bd9b66bd376a410833acad7f0934672db35c4e...486f596f4ab573af82e22d8d35b8137f83bddf2c
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.reproducible-builds.org/pipermail/rb-commits/attachments/20220321/db3bd998/attachment.htm>
More information about the rb-commits
mailing list