[Git][reproducible-builds/diffoscope][master] 3 commits: Drop the ALLOWED_TEST_FILES test; it's mostly just annoying.

Chris Lamb (@lamby) gitlab at salsa.debian.org
Fri Oct 28 18:29:47 UTC 2022



Chris Lamb pushed to branch master at Reproducible Builds / diffoscope


Commits:
7b7d28f0 by Chris Lamb at 2022-10-28T10:58:17-07:00
Drop the ALLOWED_TEST_FILES test; it's mostly just annoying.

- - - - -
06fd0c79 by Chris Lamb at 2022-10-28T10:59:35-07:00
Pylint tests/test_source.py.

- - - - -
d647eb75 by Chris Lamb at 2022-10-28T11:29:23-07:00
Add support for comparing the "text" content of HTML files using html2text. (Closes: reproducible-builds/diffoscope#318, Debian:#1022209)

- - - - -


11 changed files:

- debian/control
- debian/tests/control
- diffoscope/comparators/__init__.py
- + diffoscope/comparators/html.py
- diffoscope/external_tools.py
- + tests/comparators/test_html.py
- + tests/data/html_expected_diff
- + tests/data/html_text_expected_diff
- + tests/data/test1.html
- + tests/data/test2.html
- tests/test_source.py


Changes:

=====================================
debian/control
=====================================
@@ -38,6 +38,7 @@ Build-Depends:
  gnupg-utils <!nocheck>,
  hdf5-tools <!nocheck>,
  help2man,
+ html2text <!nocheck>,
  imagemagick <!nocheck>,
  jsbeautifier <!nocheck>,
  libarchive-tools <!nocheck>,


=====================================
debian/tests/control
=====================================
@@ -7,7 +7,7 @@
 #   $ mv debian/tests/control.tmp debian/tests/control
 
 Tests: pytest-with-recommends
-Depends: python3-all, diffoscope, black, python3-pytest, python3-h5py, file, linux-image-amd64 [amd64] | linux-image-generic [amd64], abootimg, acl, apksigner, apktool [!ppc64el !s390x], binutils-multiarch, bzip2, caca-utils, colord, coreboot-utils, db-util, default-jdk-headless | default-jdk | java-sdk, device-tree-compiler, docx2txt, e2fsprogs, enjarify, ffmpeg, fontforge-extras, fonttools, fp-utils [!ppc64el !s390x], genisoimage, gettext, ghc, ghostscript, giflib-tools, gnumeric, gnupg, gnupg-utils, hdf5-tools, imagemagick, jsbeautifier, libarchive-tools, libxmlb-dev, llvm, lz4 | liblz4-tool, mono-utils, ocaml-nox, odt2txt, oggvideotools [!s390x], openssh-client, openssl, pgpdump, poppler-utils, procyon-decompiler, python3-pdfminer, r-base-core, rpm2cpio, sng, sqlite3, squashfs-tools, tcpdump, u-boot-tools, unzip, wabt, xmlbeans, xxd, xz-utils, zip, zstd, androguard, python3-argcomplete, python3-binwalk, python3-defusedxml, python3-distro, python3-guestfs, python3-jsondiff, python3-progressbar, python3-pypdf2, python3-debian, python3-pyxattr, python3-rpm, python3-tlsh
+Depends: python3-all, diffoscope, black, python3-pytest, python3-h5py, file, linux-image-amd64 [amd64] | linux-image-generic [amd64], abootimg, acl, apksigner, apktool [!ppc64el !s390x], binutils-multiarch, bzip2, caca-utils, colord, coreboot-utils, db-util, default-jdk-headless | default-jdk | java-sdk, device-tree-compiler, docx2txt, e2fsprogs, enjarify, ffmpeg, fontforge-extras, fonttools, fp-utils [!ppc64el !s390x], genisoimage, gettext, ghc, ghostscript, giflib-tools, gnumeric, gnupg, gnupg-utils, hdf5-tools, html2text, imagemagick, jsbeautifier, libarchive-tools, libxmlb-dev, llvm, lz4 | liblz4-tool, mono-utils, ocaml-nox, odt2txt, oggvideotools [!s390x], openssh-client, openssl, pgpdump, poppler-utils, procyon-decompiler, python3-pdfminer, r-base-core, rpm2cpio, sng, sqlite3, squashfs-tools, tcpdump, u-boot-tools, unzip, wabt, xmlbeans, xxd, xz-utils, zip, zstd, 
 
 Tests: pytest
 Depends: python3-all, diffoscope, python3-pytest, python3-h5py, file, python3-tlsh


=====================================
diffoscope/comparators/__init__.py
=====================================
@@ -54,6 +54,7 @@ class ComparatorManager:
         ("xmlb.XMLBFile",),
         ("openssl.Pkcs7File",),
         ("openssl.MobileProvisionFile",),
+        ("html.HtmlFile",),
         ("text.TextFile",),
         ("bzip2.Bzip2File",),
         ("cpio.CpioFile",),


=====================================
diffoscope/comparators/html.py
=====================================
@@ -0,0 +1,50 @@
+#
+# diffoscope: in-depth comparison of files, archives, and directories
+#
+# Copyright © 2022 Chris Lamb <lamby at debian.org>
+#
+# diffoscope is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# diffoscope is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with diffoscope.  If not, see <https://www.gnu.org/licenses/>.
+
+import re
+
+from diffoscope.difference import Difference
+from diffoscope.exc import RequiredToolNotFound
+from diffoscope.tools import tool_required
+
+from .text import TextFile
+from .utils.command import Command
+
+
+class Htmltotext(Command):
+    @tool_required("html2text")
+    def cmdline(self):
+        return ["html2text", self.path]
+
+
+class HtmlFile(TextFile):
+    DESCRIPTION = "HTML files (.html)"
+    FILE_TYPE_RE = re.compile(r"^HTML document")
+
+    def compare(self, other, source=None):
+        difference = super().compare(other, source)
+
+        # Show text-only differences as a sub-diff.
+        try:
+            text = Difference.from_operation(Htmltotext, self.path, other.path)
+            if text is not None:
+                difference.add_details([text])
+        except RequiredToolNotFound as exc:  # noqa
+            difference.add_comment(exc.get_comment())
+
+        return difference


=====================================
diffoscope/external_tools.py
=====================================
@@ -82,6 +82,7 @@ EXTERNAL_TOOLS = {
     },
     "gzip": {"debian": "gzip", "arch": "gzip", "guix": "gzip"},
     "h5dump": {"debian": "hdf5-tools", "arch": "hdf5", "guix": "hdf5"},
+    "html2text": {"debian": "html2text"},
     "identify": {
         "debian": "imagemagick",
         "arch": "imagemagick",


=====================================
tests/comparators/test_html.py
=====================================
@@ -0,0 +1,46 @@
+#
+# diffoscope: in-depth comparison of files, archives, and directories
+#
+# Copyright © 2022 Chris Lamb <lamby at debian.org>
+#
+# diffoscope is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# diffoscope is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with diffoscope.  If not, see <https://www.gnu.org/licenses/>.
+
+import pytest
+
+from diffoscope.comparators.html import HtmlFile
+
+from ..utils.data import assert_diff, load_fixture
+
+
+html1 = load_fixture("test1.html")
+html2 = load_fixture("test2.html")
+
+
+def test_identification(html1, html2):
+    assert isinstance(html1, HtmlFile)
+    assert isinstance(html2, HtmlFile)
+
+
+def test_no_differences(html1):
+    assert html1.compare(html1) is None
+
+
+ at pytest.fixture
+def differences(html1, html2):
+    return html1.compare(html2)
+
+
+def test_diff(differences):
+    assert_diff(differences, "html_expected_diff")
+    assert_diff(differences.details[0], "html_text_expected_diff")


=====================================
tests/data/html_expected_diff
=====================================
@@ -0,0 +1,22 @@
+@@ -1,17 +1,17 @@
+ <html>
+ <head>
+-<title>foo</title>
++<title>bar</title>
+ <style>
+ <!--
+ BODY {
+-BACKGROUND: #000000;
+-COLOR: #FFFFFF;
++BACKGROUND: #FFFFFF;
++COLOR: #000000;
+ -->
+ </style>
+ </head>
+ <body>
+ <p>
+-foo
++bar
+ </p>
+ </body>
+ </html>


=====================================
tests/data/html_text_expected_diff
=====================================
@@ -0,0 +1,3 @@
+@@ -1 +1 @@
+-foo
++bar


=====================================
tests/data/test1.html
=====================================
@@ -0,0 +1,17 @@
+<html>
+<head>
+<title>foo</title>
+<style>
+<!--
+BODY {
+BACKGROUND: #000000;
+COLOR: #FFFFFF;
+-->
+</style>
+</head>
+<body>
+<p>
+foo
+</p>
+</body>
+</html>


=====================================
tests/data/test2.html
=====================================
@@ -0,0 +1,17 @@
+<html>
+<head>
+<title>bar</title>
+<style>
+<!--
+BODY {
+BACKGROUND: #FFFFFF;
+COLOR: #000000;
+-->
+</style>
+</head>
+<body>
+<p>
+bar
+</p>
+</body>
+</html>


=====================================
tests/test_source.py
=====================================
@@ -16,241 +16,16 @@
 # You should have received a copy of the GNU General Public License
 # along with diffoscope.  If not, see <https://www.gnu.org/licenses/>.
 
-import os
-import glob
 import subprocess
 
 from .utils.tools import skip_unless_tool_is_at_least
 
-ALLOWED_TEST_FILES = {
-    # Data files we would prefer to generate dynamically
-    "android1.img",
-    "android2.img",
-    "archive1.tar",
-    "archive2.tar",
-    "base-files_157-r45695_ar71xx.ipk",
-    "base-files_157-r45918_ar71xx.ipk",
-    "binary1",
-    "binary2",
-    "bug881937_1.deb",
-    "bug881937_2.deb",
-    "bug903391_1.deb",
-    "bug903391_2.deb",
-    "bug903401_1.deb",
-    "bug903401_2.deb",
-    "bug903565_1.deb",
-    "bug903565_2.deb",
-    "containers",
-    "containers/a.tar.bz2",
-    "containers/a.tar.gz",
-    "containers/a.tar.xz",
-    "containers/b.tar.bz2",
-    "containers/b.tar.gz",
-    "containers/b.tar.xz",
-    "containers/magic_bzip2",
-    "containers/magic_gzip",
-    "containers/magic_xz",
-    "dbgsym/add/test-dbgsym-dbgsym_1_amd64.deb",
-    "dbgsym/add/test-dbgsym_1_amd64.deb",
-    "dbgsym/mult/test-dbgsym-dbgsym_1_amd64.deb",
-    "dbgsym/mult/test-dbgsym_1_amd64.deb",
-    "dbgsym/test-dbgsym_1.dsc",
-    "dbgsym/test-dbgsym_1.tar.gz",
-    "debian-bug-876316-control.tar.gz",
-    # Outputs
-    "devicetree1.dtb",
-    "devicetree2.dtb",
-    "elfmix1.not_a",
-    "elfmix2.a",
-    "encrypted1.zip",
-    "encrypted2.zip",
-    "fuzzy-tar-in-tar1.tar",
-    "fuzzy-tar-in-tar2.tar",
-    "fuzzy1.tar",
-    "fuzzy2.tar",
-    "fuzzy3.tar",
-    "hello1.wasm",
-    "hello2.wasm",
-    "no-perms.tar",
-    "quine.gz",
-    "quine.zip",
-    "quine_a.deb",
-    "quine_b.deb",
-    "Samyak-Malayalam1.ttf",
-    "Samyak-Malayalam2.ttf",
-    "test1-le64.cache-4",
-    "test1.a",
-    "test1.apk",
-    "test1.asc",
-    "test1.binwalk",
-    "test1.buildinfo",
-    "test1.bz2",
-    "test1.changes",
-    "Test1.class",
-    "Test2.class",
-    "test1.cpio",
-    "test1.db",
-    "test1.deb",
-    "test1.debsrc.tar.gz",
-    "test1.debug",
-    "test1.dex",
-    "test1.docx",
-    "test1.dsc",
-    "test1.epub",
-    "test1.exe",
-    "test1.ext4",
-    "test1.fat12",
-    "test1.fat16",
-    "test1.fat32",
-    "test1.gif",
-    "test1.git-index",
-    "test1.gnumeric",
-    "test1.gz",
-    "test1.hi",
-    "test1.icc",
-    "test1.ico",
-    "test1.inv",
-    "test1.iso",
-    "test1.jmod",
-    "test1.jpg",
-    "test1.js",
-    "test1.json",
-    "test1.kbx",
-    "test1.lz4",
-    "test1.macho",
-    "test1.mo",
-    "test1.mozzip",
-    "test1.mp3",
-    "test1.o",
-    "test1.odt",
-    "test1.ogg",
-    "test1.pcap",
-    "test1.pdf",
-    "test1.pgp",
-    "test1_signed.pgp",
-    "test1.png",
-    "test1.ppu",
-    "test1.ps",
-    "test1.pyc-renamed",
-    "test1.rdb",
-    "test1.rdx",
-    "test1.rlib",
-    "test1.rpm",
-    "test1.sqlite3",
-    "test1.squashfs",
-    "test1_root.squashfs",
-    "test1.tar",
-    "test1.xml",
-    "test1.xsb",
-    "test1.xz",
-    "test1.zip",
-    "test1_meta.ico",
-    "test1_meta.jpg",
-    "test2-le64.cache-4",
-    "test2.a",
-    "test2.apk",
-    "test2.asc",
-    "test2.binwalk",
-    "test2.buildinfo",
-    "test2.bz2",
-    "test2.changes",
-    "test2.cpio",
-    "test2.db",
-    "test2.deb",
-    "test2.debsrc.tar.gz",
-    "test2.debug",
-    "test2.dex",
-    "test2.docx",
-    "test2.dsc",
-    "test2.epub",
-    "test2.exe",
-    "test2.ext4",
-    "test2.fat12",
-    "test2.gif",
-    "test2.git-index",
-    "test2.gnumeric",
-    "test2.gz",
-    "test2.hi",
-    "test2.icc",
-    "test2.ico",
-    "test2.inv",
-    "test2.iso",
-    "test2.jmod",
-    "test2.jpg",
-    "test2.js",
-    "test2.json",
-    "test2.kbx",
-    "test2.lz4",
-    "test2.macho",
-    "test2.mo",
-    "test2.mozzip",
-    "test2.mp3",
-    "test2.o",
-    "test2.odt",
-    "test2.ogg",
-    "test2.pcap",
-    "test2.pdf",
-    "test2.pgp",
-    "test2_signed.pgp",
-    "test2.png",
-    "test2.ppu",
-    "test2.ps",
-    "test2.pyc-renamed",
-    "test2.rdb",
-    "test2.rdx",
-    "test2.rlib",
-    "test2.rpm",
-    "test2.sqlite3",
-    "test2.squashfs",
-    "test2_root.squashfs",
-    "test2.tar",
-    "test2.xml",
-    "test2.xsb",
-    "test2.xz",
-    "test2.zip",
-    "test2_meta.ico",
-    "test2_meta.jpg",
-    "test3.apk",
-    "test3.changes",
-    "test3.gif",
-    "test3.pdf",
-    "test3.xml",
-    "test3.zip",
-    "test4.changes",
-    "test4.gif",
-    "test4.pdf",
-    "test4.xml",
-    "test5.changes",
-    "test_comment1.zip",
-    "test_comment2.zip",
-    "test_invalid.json",
-    "test_invalid.xml",
-    "test_iso8859-1.mo",
-    "test_no_charset.mo",
-    "test_openssh_pub_key1.pub",
-    "test_openssh_pub_key2.pub",
-    "test_weird_non_unicode_chars1.pdf",
-    "test_weird_non_unicode_chars2.pdf",
-    "text_ascii1",  # used by multiple tests
-    "text_ascii2",  # used by multiple tests
-    "text_iso8859",
-    "text_order1",
-    "text_order2",
-    "text_unicode1",
-    "text_unicode2",
-    "text_unicode_binary_fallback",
-    # Outputs
-    "debian-bug-875281.collapsed-diff.json",
-    "order1a.json",
-    "order1b.json",
-}
-
 
 def black_version():
     try:
         out = subprocess.check_output(("black", "--version"))
-    except subprocess.CalledProcessError as e:
-        out = e.output
+    except subprocess.CalledProcessError as exc:
+        out = exc.output
 
     # black --version format changed starting in 21.11b0. Returning the first
     # token starting with a decimal digit, since its ordinal position may vary.
@@ -270,41 +45,3 @@ def test_code_is_black_clean():
         print(output)
 
     assert not output, output
-
-
-def test_does_not_add_new_test_files():
-    """
-    For a variety of reasons we are now prefering to generate any test data
-    dynamically (via pytest fixtures, etc.) rather than committing and shipping
-    such files.
-
-    Exceptions to this may be appropriate (or even required) but this test
-    ensures that test files that could be dynamically generated are not added
-    "automatically", for example by following previous/older commits.
-    """
-
-    test_dir = os.path.join(os.path.dirname(__file__), "data")
-
-    seen = set()
-
-    for x in glob.iglob(os.path.join(test_dir, "**"), recursive=True):
-        if os.path.isdir(x):
-            continue
-
-        # Strip off common prefix
-        x = x[len(test_dir) + 1 :]
-
-        # Skip some known expected diff filename patterns
-        if (
-            x.endswith("_diff")
-            or x.endswith("_diffs")
-            or x.endswith(".diff")
-            or "_diff_" in x
-            or "diff." in x
-            or x.startswith("output")
-        ):
-            continue
-
-        seen.add(x)
-
-    assert seen - ALLOWED_TEST_FILES - {""} == set()



View it on GitLab: https://salsa.debian.org/reproducible-builds/diffoscope/-/compare/54fbfb98cfc8ddf7e44e17c2a57a342f67958cf3...d647eb7554e3bd51ab8fbe18fc84f885fce4f789

-- 
View it on GitLab: https://salsa.debian.org/reproducible-builds/diffoscope/-/compare/54fbfb98cfc8ddf7e44e17c2a57a342f67958cf3...d647eb7554e3bd51ab8fbe18fc84f885fce4f789
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.reproducible-builds.org/pipermail/rb-commits/attachments/20221028/606573d6/attachment.htm>


More information about the rb-commits mailing list