[Git][reproducible-builds/diffoscope][master] 3 commits: Drop the ALLOWED_TEST_FILES test; it's mostly just annoying.
Chris Lamb (@lamby)
gitlab at salsa.debian.org
Fri Oct 28 18:29:47 UTC 2022
Chris Lamb pushed to branch master at Reproducible Builds / diffoscope
Commits:
7b7d28f0 by Chris Lamb at 2022-10-28T10:58:17-07:00
Drop the ALLOWED_TEST_FILES test; it's mostly just annoying.
- - - - -
06fd0c79 by Chris Lamb at 2022-10-28T10:59:35-07:00
Pylint tests/test_source.py.
- - - - -
d647eb75 by Chris Lamb at 2022-10-28T11:29:23-07:00
Add support for comparing the "text" content of HTML files using html2text. (Closes: reproducible-builds/diffoscope#318, Debian:#1022209)
- - - - -
11 changed files:
- debian/control
- debian/tests/control
- diffoscope/comparators/__init__.py
- + diffoscope/comparators/html.py
- diffoscope/external_tools.py
- + tests/comparators/test_html.py
- + tests/data/html_expected_diff
- + tests/data/html_text_expected_diff
- + tests/data/test1.html
- + tests/data/test2.html
- tests/test_source.py
Changes:
=====================================
debian/control
=====================================
@@ -38,6 +38,7 @@ Build-Depends:
gnupg-utils <!nocheck>,
hdf5-tools <!nocheck>,
help2man,
+ html2text <!nocheck>,
imagemagick <!nocheck>,
jsbeautifier <!nocheck>,
libarchive-tools <!nocheck>,
=====================================
debian/tests/control
=====================================
@@ -7,7 +7,7 @@
# $ mv debian/tests/control.tmp debian/tests/control
Tests: pytest-with-recommends
-Depends: python3-all, diffoscope, black, python3-pytest, python3-h5py, file, linux-image-amd64 [amd64] | linux-image-generic [amd64], abootimg, acl, apksigner, apktool [!ppc64el !s390x], binutils-multiarch, bzip2, caca-utils, colord, coreboot-utils, db-util, default-jdk-headless | default-jdk | java-sdk, device-tree-compiler, docx2txt, e2fsprogs, enjarify, ffmpeg, fontforge-extras, fonttools, fp-utils [!ppc64el !s390x], genisoimage, gettext, ghc, ghostscript, giflib-tools, gnumeric, gnupg, gnupg-utils, hdf5-tools, imagemagick, jsbeautifier, libarchive-tools, libxmlb-dev, llvm, lz4 | liblz4-tool, mono-utils, ocaml-nox, odt2txt, oggvideotools [!s390x], openssh-client, openssl, pgpdump, poppler-utils, procyon-decompiler, python3-pdfminer, r-base-core, rpm2cpio, sng, sqlite3, squashfs-tools, tcpdump, u-boot-tools, unzip, wabt, xmlbeans, xxd, xz-utils, zip, zstd, androguard, python3-argcomplete, python3-binwalk, python3-defusedxml, python3-distro, python3-guestfs, python3-jsondiff, python3-progressbar, python3-pypdf2, python3-debian, python3-pyxattr, python3-rpm, python3-tlsh
+Depends: python3-all, diffoscope, black, python3-pytest, python3-h5py, file, linux-image-amd64 [amd64] | linux-image-generic [amd64], abootimg, acl, apksigner, apktool [!ppc64el !s390x], binutils-multiarch, bzip2, caca-utils, colord, coreboot-utils, db-util, default-jdk-headless | default-jdk | java-sdk, device-tree-compiler, docx2txt, e2fsprogs, enjarify, ffmpeg, fontforge-extras, fonttools, fp-utils [!ppc64el !s390x], genisoimage, gettext, ghc, ghostscript, giflib-tools, gnumeric, gnupg, gnupg-utils, hdf5-tools, html2text, imagemagick, jsbeautifier, libarchive-tools, libxmlb-dev, llvm, lz4 | liblz4-tool, mono-utils, ocaml-nox, odt2txt, oggvideotools [!s390x], openssh-client, openssl, pgpdump, poppler-utils, procyon-decompiler, python3-pdfminer, r-base-core, rpm2cpio, sng, sqlite3, squashfs-tools, tcpdump, u-boot-tools, unzip, wabt, xmlbeans, xxd, xz-utils, zip, zstd,
Tests: pytest
Depends: python3-all, diffoscope, python3-pytest, python3-h5py, file, python3-tlsh
=====================================
diffoscope/comparators/__init__.py
=====================================
@@ -54,6 +54,7 @@ class ComparatorManager:
("xmlb.XMLBFile",),
("openssl.Pkcs7File",),
("openssl.MobileProvisionFile",),
+ ("html.HtmlFile",),
("text.TextFile",),
("bzip2.Bzip2File",),
("cpio.CpioFile",),
=====================================
diffoscope/comparators/html.py
=====================================
@@ -0,0 +1,50 @@
+#
+# diffoscope: in-depth comparison of files, archives, and directories
+#
+# Copyright © 2022 Chris Lamb <lamby at debian.org>
+#
+# diffoscope is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# diffoscope is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with diffoscope. If not, see <https://www.gnu.org/licenses/>.
+
+import re
+
+from diffoscope.difference import Difference
+from diffoscope.exc import RequiredToolNotFound
+from diffoscope.tools import tool_required
+
+from .text import TextFile
+from .utils.command import Command
+
+
+class Htmltotext(Command):
+ @tool_required("html2text")
+ def cmdline(self):
+ return ["html2text", self.path]
+
+
+class HtmlFile(TextFile):
+ DESCRIPTION = "HTML files (.html)"
+ FILE_TYPE_RE = re.compile(r"^HTML document")
+
+ def compare(self, other, source=None):
+ difference = super().compare(other, source)
+
+ # Show text-only differences as a sub-diff.
+ try:
+ text = Difference.from_operation(Htmltotext, self.path, other.path)
+ if text is not None:
+ difference.add_details([text])
+ except RequiredToolNotFound as exc: # noqa
+ difference.add_comment(exc.get_comment())
+
+ return difference
=====================================
diffoscope/external_tools.py
=====================================
@@ -82,6 +82,7 @@ EXTERNAL_TOOLS = {
},
"gzip": {"debian": "gzip", "arch": "gzip", "guix": "gzip"},
"h5dump": {"debian": "hdf5-tools", "arch": "hdf5", "guix": "hdf5"},
+ "html2text": {"debian": "html2text"},
"identify": {
"debian": "imagemagick",
"arch": "imagemagick",
=====================================
tests/comparators/test_html.py
=====================================
@@ -0,0 +1,46 @@
+#
+# diffoscope: in-depth comparison of files, archives, and directories
+#
+# Copyright © 2022 Chris Lamb <lamby at debian.org>
+#
+# diffoscope is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# diffoscope is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with diffoscope. If not, see <https://www.gnu.org/licenses/>.
+
+import pytest
+
+from diffoscope.comparators.html import HtmlFile
+
+from ..utils.data import assert_diff, load_fixture
+
+
+html1 = load_fixture("test1.html")
+html2 = load_fixture("test2.html")
+
+
+def test_identification(html1, html2):
+ assert isinstance(html1, HtmlFile)
+ assert isinstance(html2, HtmlFile)
+
+
+def test_no_differences(html1):
+ assert html1.compare(html1) is None
+
+
+ at pytest.fixture
+def differences(html1, html2):
+ return html1.compare(html2)
+
+
+def test_diff(differences):
+ assert_diff(differences, "html_expected_diff")
+ assert_diff(differences.details[0], "html_text_expected_diff")
=====================================
tests/data/html_expected_diff
=====================================
@@ -0,0 +1,22 @@
+@@ -1,17 +1,17 @@
+ <html>
+ <head>
+-<title>foo</title>
++<title>bar</title>
+ <style>
+ <!--
+ BODY {
+-BACKGROUND: #000000;
+-COLOR: #FFFFFF;
++BACKGROUND: #FFFFFF;
++COLOR: #000000;
+ -->
+ </style>
+ </head>
+ <body>
+ <p>
+-foo
++bar
+ </p>
+ </body>
+ </html>
=====================================
tests/data/html_text_expected_diff
=====================================
@@ -0,0 +1,3 @@
+@@ -1 +1 @@
+-foo
++bar
=====================================
tests/data/test1.html
=====================================
@@ -0,0 +1,17 @@
+<html>
+<head>
+<title>foo</title>
+<style>
+<!--
+BODY {
+BACKGROUND: #000000;
+COLOR: #FFFFFF;
+-->
+</style>
+</head>
+<body>
+<p>
+foo
+</p>
+</body>
+</html>
=====================================
tests/data/test2.html
=====================================
@@ -0,0 +1,17 @@
+<html>
+<head>
+<title>bar</title>
+<style>
+<!--
+BODY {
+BACKGROUND: #FFFFFF;
+COLOR: #000000;
+-->
+</style>
+</head>
+<body>
+<p>
+bar
+</p>
+</body>
+</html>
=====================================
tests/test_source.py
=====================================
@@ -16,241 +16,16 @@
# You should have received a copy of the GNU General Public License
# along with diffoscope. If not, see <https://www.gnu.org/licenses/>.
-import os
-import glob
import subprocess
from .utils.tools import skip_unless_tool_is_at_least
-ALLOWED_TEST_FILES = {
- # Data files we would prefer to generate dynamically
- "android1.img",
- "android2.img",
- "archive1.tar",
- "archive2.tar",
- "base-files_157-r45695_ar71xx.ipk",
- "base-files_157-r45918_ar71xx.ipk",
- "binary1",
- "binary2",
- "bug881937_1.deb",
- "bug881937_2.deb",
- "bug903391_1.deb",
- "bug903391_2.deb",
- "bug903401_1.deb",
- "bug903401_2.deb",
- "bug903565_1.deb",
- "bug903565_2.deb",
- "containers",
- "containers/a.tar.bz2",
- "containers/a.tar.gz",
- "containers/a.tar.xz",
- "containers/b.tar.bz2",
- "containers/b.tar.gz",
- "containers/b.tar.xz",
- "containers/magic_bzip2",
- "containers/magic_gzip",
- "containers/magic_xz",
- "dbgsym/add/test-dbgsym-dbgsym_1_amd64.deb",
- "dbgsym/add/test-dbgsym_1_amd64.deb",
- "dbgsym/mult/test-dbgsym-dbgsym_1_amd64.deb",
- "dbgsym/mult/test-dbgsym_1_amd64.deb",
- "dbgsym/test-dbgsym_1.dsc",
- "dbgsym/test-dbgsym_1.tar.gz",
- "debian-bug-876316-control.tar.gz",
- # Outputs
- "devicetree1.dtb",
- "devicetree2.dtb",
- "elfmix1.not_a",
- "elfmix2.a",
- "encrypted1.zip",
- "encrypted2.zip",
- "fuzzy-tar-in-tar1.tar",
- "fuzzy-tar-in-tar2.tar",
- "fuzzy1.tar",
- "fuzzy2.tar",
- "fuzzy3.tar",
- "hello1.wasm",
- "hello2.wasm",
- "no-perms.tar",
- "quine.gz",
- "quine.zip",
- "quine_a.deb",
- "quine_b.deb",
- "Samyak-Malayalam1.ttf",
- "Samyak-Malayalam2.ttf",
- "test1-le64.cache-4",
- "test1.a",
- "test1.apk",
- "test1.asc",
- "test1.binwalk",
- "test1.buildinfo",
- "test1.bz2",
- "test1.changes",
- "Test1.class",
- "Test2.class",
- "test1.cpio",
- "test1.db",
- "test1.deb",
- "test1.debsrc.tar.gz",
- "test1.debug",
- "test1.dex",
- "test1.docx",
- "test1.dsc",
- "test1.epub",
- "test1.exe",
- "test1.ext4",
- "test1.fat12",
- "test1.fat16",
- "test1.fat32",
- "test1.gif",
- "test1.git-index",
- "test1.gnumeric",
- "test1.gz",
- "test1.hi",
- "test1.icc",
- "test1.ico",
- "test1.inv",
- "test1.iso",
- "test1.jmod",
- "test1.jpg",
- "test1.js",
- "test1.json",
- "test1.kbx",
- "test1.lz4",
- "test1.macho",
- "test1.mo",
- "test1.mozzip",
- "test1.mp3",
- "test1.o",
- "test1.odt",
- "test1.ogg",
- "test1.pcap",
- "test1.pdf",
- "test1.pgp",
- "test1_signed.pgp",
- "test1.png",
- "test1.ppu",
- "test1.ps",
- "test1.pyc-renamed",
- "test1.rdb",
- "test1.rdx",
- "test1.rlib",
- "test1.rpm",
- "test1.sqlite3",
- "test1.squashfs",
- "test1_root.squashfs",
- "test1.tar",
- "test1.xml",
- "test1.xsb",
- "test1.xz",
- "test1.zip",
- "test1_meta.ico",
- "test1_meta.jpg",
- "test2-le64.cache-4",
- "test2.a",
- "test2.apk",
- "test2.asc",
- "test2.binwalk",
- "test2.buildinfo",
- "test2.bz2",
- "test2.changes",
- "test2.cpio",
- "test2.db",
- "test2.deb",
- "test2.debsrc.tar.gz",
- "test2.debug",
- "test2.dex",
- "test2.docx",
- "test2.dsc",
- "test2.epub",
- "test2.exe",
- "test2.ext4",
- "test2.fat12",
- "test2.gif",
- "test2.git-index",
- "test2.gnumeric",
- "test2.gz",
- "test2.hi",
- "test2.icc",
- "test2.ico",
- "test2.inv",
- "test2.iso",
- "test2.jmod",
- "test2.jpg",
- "test2.js",
- "test2.json",
- "test2.kbx",
- "test2.lz4",
- "test2.macho",
- "test2.mo",
- "test2.mozzip",
- "test2.mp3",
- "test2.o",
- "test2.odt",
- "test2.ogg",
- "test2.pcap",
- "test2.pdf",
- "test2.pgp",
- "test2_signed.pgp",
- "test2.png",
- "test2.ppu",
- "test2.ps",
- "test2.pyc-renamed",
- "test2.rdb",
- "test2.rdx",
- "test2.rlib",
- "test2.rpm",
- "test2.sqlite3",
- "test2.squashfs",
- "test2_root.squashfs",
- "test2.tar",
- "test2.xml",
- "test2.xsb",
- "test2.xz",
- "test2.zip",
- "test2_meta.ico",
- "test2_meta.jpg",
- "test3.apk",
- "test3.changes",
- "test3.gif",
- "test3.pdf",
- "test3.xml",
- "test3.zip",
- "test4.changes",
- "test4.gif",
- "test4.pdf",
- "test4.xml",
- "test5.changes",
- "test_comment1.zip",
- "test_comment2.zip",
- "test_invalid.json",
- "test_invalid.xml",
- "test_iso8859-1.mo",
- "test_no_charset.mo",
- "test_openssh_pub_key1.pub",
- "test_openssh_pub_key2.pub",
- "test_weird_non_unicode_chars1.pdf",
- "test_weird_non_unicode_chars2.pdf",
- "text_ascii1", # used by multiple tests
- "text_ascii2", # used by multiple tests
- "text_iso8859",
- "text_order1",
- "text_order2",
- "text_unicode1",
- "text_unicode2",
- "text_unicode_binary_fallback",
- # Outputs
- "debian-bug-875281.collapsed-diff.json",
- "order1a.json",
- "order1b.json",
-}
-
def black_version():
try:
out = subprocess.check_output(("black", "--version"))
- except subprocess.CalledProcessError as e:
- out = e.output
+ except subprocess.CalledProcessError as exc:
+ out = exc.output
# black --version format changed starting in 21.11b0. Returning the first
# token starting with a decimal digit, since its ordinal position may vary.
@@ -270,41 +45,3 @@ def test_code_is_black_clean():
print(output)
assert not output, output
-
-
-def test_does_not_add_new_test_files():
- """
- For a variety of reasons we are now prefering to generate any test data
- dynamically (via pytest fixtures, etc.) rather than committing and shipping
- such files.
-
- Exceptions to this may be appropriate (or even required) but this test
- ensures that test files that could be dynamically generated are not added
- "automatically", for example by following previous/older commits.
- """
-
- test_dir = os.path.join(os.path.dirname(__file__), "data")
-
- seen = set()
-
- for x in glob.iglob(os.path.join(test_dir, "**"), recursive=True):
- if os.path.isdir(x):
- continue
-
- # Strip off common prefix
- x = x[len(test_dir) + 1 :]
-
- # Skip some known expected diff filename patterns
- if (
- x.endswith("_diff")
- or x.endswith("_diffs")
- or x.endswith(".diff")
- or "_diff_" in x
- or "diff." in x
- or x.startswith("output")
- ):
- continue
-
- seen.add(x)
-
- assert seen - ALLOWED_TEST_FILES - {""} == set()
View it on GitLab: https://salsa.debian.org/reproducible-builds/diffoscope/-/compare/54fbfb98cfc8ddf7e44e17c2a57a342f67958cf3...d647eb7554e3bd51ab8fbe18fc84f885fce4f789
--
View it on GitLab: https://salsa.debian.org/reproducible-builds/diffoscope/-/compare/54fbfb98cfc8ddf7e44e17c2a57a342f67958cf3...d647eb7554e3bd51ab8fbe18fc84f885fce4f789
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.reproducible-builds.org/pipermail/rb-commits/attachments/20221028/606573d6/attachment.htm>
More information about the rb-commits
mailing list