[diffoscope] 01/01: comparators/utils/file: fix handling of filesnames with non-unicode chars

Mattia Rizzolo mattia at debian.org
Thu May 10 22:10:13 CEST 2018


This is an automated email from the git hooks/post-receive script.

mattia pushed a commit to branch master
in repository diffoscope.

commit 8202dac7f96a785cfb752ffb45eba183e43224e8
Author: Mattia Rizzolo <mattia at debian.org>
Date:   Thu May 10 22:09:04 2018 +0200

    comparators/utils/file: fix handling of filesnames with non-unicode chars
    
    Closes: #898022
    Signed-off-by: Mattia Rizzolo <mattia at debian.org>
---
 diffoscope/comparators/utils/file.py |  4 +++-
 tests/test_main.py                   | 16 ++++++++++++++++
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/diffoscope/comparators/utils/file.py b/diffoscope/comparators/utils/file.py
index 4fd49ac..b251426 100644
--- a/diffoscope/comparators/utils/file.py
+++ b/diffoscope/comparators/utils/file.py
@@ -68,7 +68,9 @@ class File(object, metaclass=abc.ABCMeta):
             if not hasattr(self, '_mimedb'):
                 self._mimedb = magic.open(magic.NONE)
                 self._mimedb.load()
-            return self._mimedb.file(path)
+            return self._mimedb.file(
+                path.encode('utf-8', errors='surrogateescape')
+            )
 
         @classmethod
         def guess_encoding(self, path):
diff --git a/tests/test_main.py b/tests/test_main.py
index 9ec9068..eb84eef 100644
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -143,3 +143,19 @@ def test_profiling(capsys):
     assert ret == 0
     assert "Profiling output for" in out
     assert err == ''
+
+
+def test_non_unicode_filename(capsys, tmpdir):
+    # Bug reference: https://bugs.debian.org/898022
+    path = str(tmpdir.dirpath()).encode('utf-8')
+    a = os.path.join(path, b'\xf0\x28\x8c\x28')
+    b = os.path.join(path, b'\xf0\x28\x8c\x29')
+    with open(a, 'w'), open(b, 'w'):
+        pass
+
+    # sys.argv does pretty much this decoding to arguments
+    files = [x.decode('utf-8', errors='surrogateescape') for x in (a, b)]
+    ret, out, err = run(capsys, *files)
+
+    assert ret == 0
+    assert out == err == ''

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/diffoscope.git


More information about the diffoscope mailing list