[Git][reproducible-builds/diffoscope][master] 3 commits: If the equivalent of `file -i` returns text/plain, fallback to comparing as a...

Chris Lamb (@lamby) gitlab at salsa.debian.org
Fri Oct 13 08:07:48 UTC 2023



Chris Lamb pushed to branch master at Reproducible Builds / diffoscope


Commits:
81c68d7b by Chris Lamb at 2023-10-13T08:56:35+01:00
If the equivalent of `file -i` returns text/plain, fallback to comparing as a text file. (Closes: Debian:#1053668)

- - - - -
45de29d3 by Chris Lamb at 2023-10-13T08:57:27+01:00
Update copyright years.

- - - - -
1d8903b8 by Chris Lamb at 2023-10-13T08:59:15+01:00
releasing package diffoscope version 251

- - - - -


5 changed files:

- debian/changelog
- diffoscope/__init__.py
- diffoscope/comparators/utils/file.py
- diffoscope/comparators/utils/specialize.py
- tests/comparators/test_text.py


Changes:

=====================================
debian/changelog
=====================================
@@ -1,8 +1,11 @@
-diffoscope (251) UNRELEASED; urgency=medium
+diffoscope (251) unstable; urgency=medium
 
-  * WIP (generated upon release).
+  * If the equivalent of `file -i` returns text/plain, fallback to comparing
+    this file as a text file. This especially helps when file(1) miscategorises
+    text files as some esoteric type. (Closes: Debian:#1053668)
+  * Update copyright years.
 
- -- Chris Lamb <lamby at debian.org>  Fri, 08 Sep 2023 12:12:06 -0700
+ -- Chris Lamb <lamby at debian.org>  Fri, 13 Oct 2023 08:59:12 +0100
 
 diffoscope (250) unstable; urgency=medium
 


=====================================
diffoscope/__init__.py
=====================================
@@ -17,4 +17,4 @@
 # You should have received a copy of the GNU General Public License
 # along with diffoscope.  If not, see <https://www.gnu.org/licenses/>.
 
-VERSION = "250"
+VERSION = "251"


=====================================
diffoscope/comparators/utils/file.py
=====================================
@@ -1,7 +1,7 @@
 #
 # diffoscope: in-depth comparison of files, archives, and directories
 #
-# Copyright © 2016-2022 Chris Lamb <lamby at debian.org>
+# Copyright © 2016-2023 Chris Lamb <lamby at debian.org>
 #
 # diffoscope is free software: you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -86,6 +86,11 @@ class File(metaclass=abc.ABCMeta):
                 path.encode("utf-8", errors="surrogateescape")
             )
 
+        @classmethod
+        def guess_mime(cls, path):
+            # Not yet implemented
+            return ""
+
         @classmethod
         def guess_encoding(cls, path):
             if not hasattr(cls, "_mimedb_encoding"):
@@ -102,6 +107,12 @@ class File(metaclass=abc.ABCMeta):
                 cls._mimedb = magic.Magic()
             return maybe_decode(cls._mimedb.from_file(path))
 
+        @classmethod
+        def guess_mime(cls, path):
+            if not hasattr(cls, "_mimedb_mime"):
+                cls._mimedb_mime = magic.Magic(mime=True)
+            return maybe_decode(cls._mimedb_mime.from_file(path))
+
         @classmethod
         def guess_encoding(cls, path):
             if not hasattr(cls, "_mimedb_encoding"):
@@ -314,6 +325,12 @@ class File(metaclass=abc.ABCMeta):
             self._magic_file_type = File.guess_file_type(self.path)
         return self._magic_file_type
 
+    @property
+    def magic_mime_type(self):
+        if not hasattr(self, "_magic_mime_type"):
+            self._magic_mime_type = File.guess_mime(self.path)
+        return self._magic_mime_type
+
     @property
     def file_header(self):
         if not hasattr(self, "_file_header"):


=====================================
diffoscope/comparators/utils/specialize.py
=====================================
@@ -19,6 +19,7 @@
 import logging
 
 from diffoscope.profiling import profile
+from diffoscope.comparators.text import TextFile
 
 from ...utils import format_class
 
@@ -89,6 +90,14 @@ def specialize(file):
         file.magic_file_type,
     )
 
+    if file.magic_mime_type == "text/plain":
+        logger.debug(
+            "However, %s is probably text; using %s",
+            file.name,
+            format_class(TextFile, strip="diffoscope.comparators."),
+        )
+        return specialize_as(TextFile, file)
+
     return file
 
 


=====================================
tests/comparators/test_text.py
=====================================
@@ -2,7 +2,7 @@
 # diffoscope: in-depth comparison of files, archives, and directories
 #
 # Copyright © 2015 Jérémy Bobbio <lunar at debian.org>
-# Copyright © 2015-2020 Chris Lamb <lamby at debian.org>
+# Copyright © 2015-2020, 2023 Chris Lamb <lamby at debian.org>
 #
 # diffoscope is free software: you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -20,6 +20,7 @@
 import codecs
 
 from diffoscope.comparators.binary import FilesystemFile
+from diffoscope.comparators.text import TextFile
 from diffoscope.comparators.utils.specialize import specialize
 
 from ..utils.data import data, load_fixture, get_data
@@ -95,3 +96,13 @@ def test_ordering_differences(text_order1, text_order2):
     difference = text_order1.compare(text_order2)
     assert difference.comments == ["Ordering differences only"]
     assert difference.unified_diff == get_data("text_order_expected_diff")
+
+
+def test_text_fallback(tmp_path):
+    """
+    If we don't know the exact type but it looks like text, compare it as text.
+    """
+    temp = tmp_path / "temp.msc"
+    temp.write_text("msc{a, b;}")
+    specialized = specialize(FilesystemFile(str(temp)))
+    assert isinstance(specialized, TextFile)



View it on GitLab: https://salsa.debian.org/reproducible-builds/diffoscope/-/compare/ac79ac5c3dfa8a2d1ec0e897e0940c3b997b284d...1d8903b881cf6d35d8af140962c880edb87bfc47

-- 
View it on GitLab: https://salsa.debian.org/reproducible-builds/diffoscope/-/compare/ac79ac5c3dfa8a2d1ec0e897e0940c3b997b284d...1d8903b881cf6d35d8af140962c880edb87bfc47
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.reproducible-builds.org/pipermail/rb-commits/attachments/20231013/9a320a7c/attachment.htm>


More information about the rb-commits mailing list