[Git][reproducible-builds/diffoscope][master] 3 commits: Replace codecs.open with a simplified reimplementation

Chris Lamb (@lamby) gitlab at salsa.debian.org
Tue Sep 2 22:15:25 UTC 2025



Chris Lamb pushed to branch master at Reproducible Builds / diffoscope


Commits:
b19d7e7a by Zbigniew Jędrzejewski-Szmek at 2025-09-02T21:49:11+00:00
Replace codecs.open with a simplified reimplementation

python 3.14 doesn't like codecs.open:

tests/test_readers.py: 1 warning
/builddir/build/BUILD/diffoscope-303-build/diffoscope-303/diffoscope/comparators/text.py:44: DeprecationWarning: codecs.open() is deprecated. Use open() instead.
   ) as my_content, codecs.open(
...

In most scenarios, codecs.open and builtins.open are equivalent.
Unfortunately, the case where the file is opened for reading, 'b' is not
used in the mode, and an encoding is specified, results in a difference
in behaviour: whitespace is handled differently.

To avoid the warning, add a trivial reimplementation.


- - - - -
112492ec by Zbigniew Jędrzejewski-Szmek at 2025-09-02T21:49:11+00:00
Replace open instead of codecs.open in simple cases

python 3.14 doesn't like codecs.open:

tests/test_readers.py: 1 warning
tests/comparators/test_gettext.py::test_charsets
  /builddir/build/BUILD/diffoscope-303-build/diffoscope-303/tests/comparators/test_gettext.py:61: DeprecationWarning: codecs.open() is deprecated. Use open() instead.
    expected_diff = codecs.open(
tests/comparators/test_text.py::test_difference_in_unicode
  /builddir/build/BUILD/diffoscope-303-build/diffoscope-303/tests/comparators/test_text.py:53: DeprecationWarning: codecs.open() is deprecated. Use open() instead.
    expected_diff = codecs.open(
tests/comparators/test_text.py::test_difference_between_iso88591_and_unicode
  /builddir/build/BUILD/diffoscope-303-build/diffoscope-303/tests/comparators/test_text.py:64: DeprecationWarning: codecs.open() is deprecated. Use open() instead.
    expected_diff = codecs.open(
tests/comparators/test_text.py::test_difference_between_iso88591_and_unicode_only
  /builddir/build/BUILD/diffoscope-303-build/diffoscope-303/tests/comparators/test_text.py:74: DeprecationWarning: codecs.open() is deprecated. Use open() instead.
    codecs.open(data("text_iso8859"), encoding="iso8859-1")
tests/comparators/test_utils.py::test_fuzzy_matching
  /builddir/build/BUILD/diffoscope-303-build/diffoscope-303/tests/comparators/test_utils.py:75: DeprecationWarning: codecs.open() is deprecated. Use open() instead.
    expected_diff = codecs.open(
...

When reading text files and when writing files, plain open() is
equivalent.

- - - - -
8f265167 by Zbigniew Jędrzejewski-Szmek at 2025-09-02T21:49:11+00:00
test_dtb: accept additional 'v' in fdtump --version

CI fails with:
tests/comparators/test_dtb.py:45: in fdtdump_version
    raise ValueError("Error parsing `fdtdump --version` output")
E   ValueError: Error parsing `fdtdump --version` output
        m          = None
        out        = ('\n'
 '**** fdtdump is a low-level debugging tool, not meant for general use.\n'
 '**** If you want to decompile a dtb, you probably want\n'
 '****     dtc -I dtb -O dts <filename>\n'
 '\n'
 'Version: DTC v1.7.2-1-g9af601c\n')

Maybe Arch overrides the version or something. Let's just skip the 'v'
if it is there.

- - - - -


7 changed files:

- diffoscope/comparators/text.py
- diffoscope/presenters/html/html.py
- diffoscope/presenters/utils.py
- tests/comparators/test_dtb.py
- tests/comparators/test_gettext.py
- tests/comparators/test_text.py
- tests/comparators/test_utils.py


Changes:

=====================================
diffoscope/comparators/text.py
=====================================
@@ -25,6 +25,18 @@ from diffoscope.difference import Difference
 from .utils.file import File
 
 
+def _open_with_codec(filename, encoding):
+    info = codecs.lookup(encoding)
+    # codecs.open added 'b' to the mode if encoding was specified
+    file = open(filename, "rb")
+    srw = codecs.StreamReaderWriter(
+        file, info.streamreader, info.streamwriter, errors="strict"
+    )
+    # Add attributes to simplify introspection
+    srw.encoding = encoding
+    return srw
+
+
 class TextFile(File):
     DESCRIPTION = "text files"
     FILE_TYPE_RE = re.compile(r"\btext\b")
@@ -39,10 +51,10 @@ class TextFile(File):
         my_encoding = self.encoding or "utf-8"
         other_encoding = other.encoding or "utf-8"
         try:
-            with codecs.open(
-                self.path, "r", encoding=my_encoding
-            ) as my_content, codecs.open(
-                other.path, "r", encoding=other_encoding
+            with _open_with_codec(
+                self.path, my_encoding
+            ) as my_content, _open_with_codec(
+                other.path, other_encoding
             ) as other_content:
                 difference = Difference.from_text_readers(
                     my_content, other_content, self.name, other.name, source


=====================================
diffoscope/presenters/html/html.py
=====================================
@@ -31,7 +31,6 @@
 #
 
 import base64
-import codecs
 import collections
 import contextlib
 import hashlib
@@ -337,17 +336,13 @@ def output_footer(jquery_url=None):
 
 @contextlib.contextmanager
 def file_printer(directory, filename):
-    with codecs.open(
-        os.path.join(directory, filename), "w", encoding="utf-8"
-    ) as f:
+    with open(os.path.join(directory, filename), "w", encoding="utf-8") as f:
         yield f.write
 
 
 @contextlib.contextmanager
 def spl_file_printer(directory, filename, accum):
-    with codecs.open(
-        os.path.join(directory, filename), "w", encoding="utf-8"
-    ) as f:
+    with open(os.path.join(directory, filename), "w", encoding="utf-8") as f:
         print_func = f.write
 
         def recording_print_func(s):


=====================================
diffoscope/presenters/utils.py
=====================================
@@ -17,7 +17,6 @@
 # along with diffoscope.  If not, see <https://www.gnu.org/licenses/>.
 
 import sys
-import codecs
 import collections
 import contextlib
 import string
@@ -93,7 +92,7 @@ def make_printer(path):
     output = sys.stdout
 
     if path != "-":
-        output = codecs.open(path, "w", encoding="utf-8")
+        output = open(path, "w", encoding="utf-8")
 
     def fn(*args, **kwargs):
         kwargs["file"] = output


=====================================
tests/comparators/test_dtb.py
=====================================
@@ -40,7 +40,7 @@ def fdtdump_version():
         ("fdtdump", "--version"), stderr=subprocess.STDOUT
     ).decode("utf-8")
 
-    m = re.search(r"Version: DTC (?P<version>\d+\.\d+\.\d+)", out)
+    m = re.search(r"Version: DTC v?(?P<version>\d+\.\d+\.\d+)", out)
     if m is None:
         raise ValueError("Error parsing `fdtdump --version` output")
 


=====================================
tests/comparators/test_gettext.py
=====================================
@@ -17,7 +17,6 @@
 # You should have received a copy of the GNU General Public License
 # along with diffoscope.  If not, see <https://www.gnu.org/licenses/>.
 
-import codecs
 import pytest
 
 from diffoscope.config import Config
@@ -58,7 +57,7 @@ mo_iso8859_1 = load_fixture("test_iso8859-1.mo")
 @skip_unless_tools_exist("msgunfmt")
 def test_charsets(mo_no_charset, mo_iso8859_1):
     difference = mo_no_charset.compare(mo_iso8859_1)
-    expected_diff = codecs.open(
+    expected_diff = open(
         data("mo_charsets_expected_diff"), encoding="utf-8"
     ).read()
     assert difference.details[0].unified_diff == expected_diff


=====================================
tests/comparators/test_text.py
=====================================
@@ -17,8 +17,6 @@
 # You should have received a copy of the GNU General Public License
 # along with diffoscope.  If not, see <https://www.gnu.org/licenses/>.
 
-import codecs
-
 from diffoscope.comparators.binary import FilesystemFile
 from diffoscope.comparators.text import TextFile
 from diffoscope.comparators.utils.specialize import specialize
@@ -50,7 +48,7 @@ unicode2 = load_fixture("text_unicode2")
 
 def test_difference_in_unicode(unicode1, unicode2):
     difference = unicode1.compare(unicode2)
-    expected_diff = codecs.open(
+    expected_diff = open(
         data("text_unicode_expected_diff"), encoding="utf-8"
     ).read()
     assert difference.unified_diff == expected_diff
@@ -61,7 +59,7 @@ iso8859 = load_fixture("text_iso8859")
 
 def test_difference_between_iso88591_and_unicode(iso8859, unicode1):
     difference = iso8859.compare(unicode1)
-    expected_diff = codecs.open(
+    expected_diff = open(
         data("text_iso8859_expected_diff"), encoding="utf-8"
     ).read()
     assert difference.unified_diff == expected_diff
@@ -71,7 +69,7 @@ def test_difference_between_iso88591_and_unicode_only(iso8859, tmpdir):
     utf8_path = str(tmpdir.join("utf8"))
     with open(utf8_path, "wb") as f:
         f.write(
-            codecs.open(data("text_iso8859"), encoding="iso8859-1")
+            open(data("text_iso8859"), encoding="iso8859-1")
             .read()
             .encode("utf-8")
         )


=====================================
tests/comparators/test_utils.py
=====================================
@@ -17,7 +17,6 @@
 # You should have received a copy of the GNU General Public License
 # along with diffoscope.  If not, see <https://www.gnu.org/licenses/>.
 
-import codecs
 import os
 import pytest
 import threading
@@ -72,7 +71,7 @@ def skip_unless_tool_is_at_least():
 @skip_unless_module_exists("tlsh")
 def test_fuzzy_matching(fuzzy_tar1, fuzzy_tar2):
     differences = fuzzy_tar1.compare(fuzzy_tar2).details
-    expected_diff = codecs.open(
+    expected_diff = open(
         data("text_iso8859_expected_diff"), encoding="utf-8"
     ).read()
     assert differences[1].source1 == "./matching"



View it on GitLab: https://salsa.debian.org/reproducible-builds/diffoscope/-/compare/d12518047c5667c2c1c8718f03975826c0a54cd1...8f2651674e4d27d3fc287070af9c86ac1dd0b7c3

-- 
View it on GitLab: https://salsa.debian.org/reproducible-builds/diffoscope/-/compare/d12518047c5667c2c1c8718f03975826c0a54cd1...8f2651674e4d27d3fc287070af9c86ac1dd0b7c3
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.reproducible-builds.org/pipermail/rb-commits/attachments/20250902/6d6d7e28/attachment.htm>


More information about the rb-commits mailing list