[Git][reproducible-builds/diffoscope][master] Prevent XML entity expansion attacks. Thanks to Florian Wilkens @ SRLabs for...

Chris Lamb (@lamby) gitlab at salsa.debian.org
Mon Jan 13 13:29:31 UTC 2025



Chris Lamb pushed to branch master at Reproducible Builds / diffoscope


Commits:
889597c9 by Chris Lamb at 2025-01-13T13:28:33+00:00
Prevent XML entity expansion attacks. Thanks to Florian Wilkens @ SRLabs for the report. (Closes: #397)

- - - - -


2 changed files:

- diffoscope/comparators/xml.py
- tests/comparators/test_xml.py


Changes:

=====================================
diffoscope/comparators/xml.py
=====================================
@@ -18,6 +18,7 @@
 # along with diffoscope.  If not, see <https://www.gnu.org/licenses/>.
 
 import re
+import pyexpat
 
 from xml.parsers.expat import ExpatError
 
@@ -36,6 +37,29 @@ except ImportError:
     from xml.dom import minidom
 
 
+def is_vulnerable_xml_parser():
+    # We assume defusedxml is safe
+    if defusedxml:
+        return False
+
+    """
+    As described in Pythons module documentation versions of expat <= 2.4.1
+    (released on 2021-05-23) are vulnerable to exponential/quadratic entity
+    expansion while versions <= 2.6.0 (released on 2024-02-06) are vulnerable to
+    large tokens. Since expat is usually provided by the system and not
+    directly bundled with Python, even recent Python installations can still be
+    vulnerable due to old expat versions.
+
+        <https://salsa.debian.org/reproducible-builds/diffoscope/-/issues/397>
+    """
+
+    pyexpat_version = tuple(
+        int(x) for x in pyexpat.EXPAT_VERSION.split("_", 1)[1].split(".")
+    )
+
+    return pyexpat_version <= (2, 6, 0)
+
+
 def _format(node):
     """
     Removes *inplace* spaces from minidom.Document
@@ -103,6 +127,11 @@ class XMLFile(File):
         if not super().recognizes(file) and not file.name.endswith(".xml"):
             return False
 
+        # Check that we aren't about to open an untrusted file with an old, and
+        # potentially vulnerable, version of pyexpat.
+        if is_vulnerable_xml_parser():
+            return False
+
         with open(file.path) as f:
             try:
                 file.parsed = _parse(f)


=====================================
tests/comparators/test_xml.py
=====================================
@@ -20,10 +20,14 @@
 import sys
 import pytest
 
-from diffoscope.comparators.xml import XMLFile
+from diffoscope.comparators.xml import XMLFile, is_vulnerable_xml_parser
 
 from ..utils.data import load_fixture, assert_diff
 
+skip_if_vulnerable_xml_parser = pytest.mark.skipif(
+    is_vulnerable_xml_parser(), reason="Vulnerable XML parser"
+)
+
 
 xml_a = load_fixture("test1.xml")
 xml_b = load_fixture("test2.xml")
@@ -32,14 +36,17 @@ xml_d = load_fixture("test4.xml")
 invalid_xml = load_fixture("test_invalid.xml")
 
 
+ at skip_if_vulnerable_xml_parser
 def test_identification(xml_a):
     assert isinstance(xml_a, XMLFile)
 
 
+ at skip_if_vulnerable_xml_parser
 def test_invalid(invalid_xml):
     assert not isinstance(invalid_xml, XMLFile)
 
 
+ at skip_if_vulnerable_xml_parser
 def test_no_differences(xml_a):
     assert xml_a.compare(xml_a) is None
 
@@ -52,6 +59,7 @@ def differences(xml_a, xml_b):
 @pytest.mark.skipif(
     sys.version_info < (3, 8), reason="requires Python 3.8 or higher"
 )
+ at skip_if_vulnerable_xml_parser
 def test_diff(differences):
     assert_diff(differences[0], "test_xml_expected_diff")
 
@@ -59,6 +67,7 @@ def test_diff(differences):
 @pytest.mark.skipif(
     sys.version_info < (3, 8), reason="requires Python 3.8 or higher"
 )
+ at skip_if_vulnerable_xml_parser
 def test_ordering_differences(xml_c, xml_d):
     diff = xml_c.compare(xml_d)
     assert diff.details[0].comments == ["Ordering differences only"]



View it on GitLab: https://salsa.debian.org/reproducible-builds/diffoscope/-/commit/889597c91f19dc34d8a4ccc6db213c2ca15d4a21

-- 
View it on GitLab: https://salsa.debian.org/reproducible-builds/diffoscope/-/commit/889597c91f19dc34d8a4ccc6db213c2ca15d4a21
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.reproducible-builds.org/pipermail/rb-commits/attachments/20250113/c18d9bb2/attachment.htm>


More information about the rb-commits mailing list