[Git][reproducible-builds/diffoscope][master] 2 commits: Move to assert_diff in test_pdf.py.
Chris Lamb
gitlab at salsa.debian.org
Tue Apr 27 10:34:46 UTC 2021
Chris Lamb pushed to branch master at Reproducible Builds / diffoscope
Commits:
6be044d3 by Chris Lamb at 2021-04-27T11:28:01+01:00
Move to assert_diff in test_pdf.py.
- - - - -
62c41a2b by Chris Lamb at 2021-04-27T11:33:56+01:00
Add support for showing annotations in PDF files. (Closes: reproducible-builds/diffoscope#249)
- - - - -
6 changed files:
- diffoscope/comparators/pdf.py
- tests/comparators/test_pdf.py
- + tests/data/pdf_annotations_expected_diff
- + tests/data/test3.pdf
- + tests/data/test4.pdf
- tests/test_source.py
Changes:
=====================================
diffoscope/comparators/pdf.py
=====================================
@@ -66,6 +66,16 @@ class PdfFile(File):
difference.add_comment("Document info")
xs.append(difference)
+ difference = Difference.from_text(
+ self.dump_pypdf2_annotations(self),
+ self.dump_pypdf2_annotations(other),
+ self.path,
+ other.path,
+ )
+ if difference:
+ difference.add_comment("Annotations")
+ xs.append(difference)
+
xs.append(Difference.from_operation(Pdftotext, self.path, other.path))
# Don't include verbose dumppdf output unless we won't see any any
@@ -93,3 +103,24 @@ class PdfFile(File):
xs.append("{}: {!r}".format(k.lstrip("/"), v))
return "\n".join(xs)
+
+ @staticmethod
+ def dump_pypdf2_annotations(file):
+ try:
+ pdf = PyPDF2.PdfFileReader(file.path)
+ except PyPDF2.utils.PdfReadError as e:
+ return f"(Could not open file: {e})"
+
+ xs = []
+ for x in range(pdf.getNumPages()):
+ page = pdf.getPage(x)
+
+ try:
+ for annot in page["/Annots"]:
+ subtype = annot.getObject()["/Subtype"]
+ if subtype == "/Text":
+ xs.append(annot.getObject()["/Contents"])
+ except:
+ pass
+
+ return "\n".join(xs)
=====================================
tests/comparators/test_pdf.py
=====================================
@@ -21,13 +21,15 @@ import pytest
from diffoscope.comparators.pdf import PdfFile
-from ..utils.data import load_fixture, get_data
+from ..utils.data import load_fixture, assert_diff
from ..utils.tools import skip_unless_tools_exist, skip_unless_module_exists
from ..utils.nonexisting import assert_non_existing
pdf1 = load_fixture("test1.pdf")
pdf2 = load_fixture("test2.pdf")
+pdf3 = load_fixture("test3.pdf")
+pdf4 = load_fixture("test4.pdf")
pdf1a = load_fixture("test_weird_non_unicode_chars1.pdf")
pdf2a = load_fixture("test_weird_non_unicode_chars2.pdf")
@@ -54,8 +56,7 @@ def differences(pdf1, pdf2):
@skip_unless_tools_exist("pdftotext")
def test_text_diff(differences):
- expected_diff = get_data("pdf_text_expected_diff")
- assert differences[0].unified_diff == expected_diff
+ assert_diff(differences[0], "pdf_text_expected_diff")
@skip_unless_tools_exist("pdftotext")
@@ -71,5 +72,17 @@ def differences_metadata(pdf1, pdf1a):
@skip_unless_tools_exist("pdftotext")
@skip_unless_module_exists("PyPDF2")
def test_metadata(differences_metadata):
- expected_diff = get_data("pdf_metadata_expected_diff")
- assert differences_metadata[0].unified_diff == expected_diff
+ assert_diff(differences_metadata[0], "pdf_metadata_expected_diff")
+
+
+ at pytest.fixture
+def differences_annotations(pdf3, pdf4):
+ return pdf3.compare(pdf4).details
+
+
+ at skip_unless_tools_exist("pdftotext")
+ at skip_unless_module_exists("PyPDF2")
+def test_annotations(differences_annotations):
+ with open("tests/data/pdf_annotations_expected_diff", "w") as f:
+ f.write(differences_annotations[0].unified_diff)
+ assert_diff(differences_annotations[0], "pdf_annotations_expected_diff")
=====================================
tests/data/pdf_annotations_expected_diff
=====================================
@@ -0,0 +1,3 @@
+@@ -0,0 +1,2 @@
++1: A PDF comment created in Okular
++11: Comment created with evince
=====================================
tests/data/test3.pdf
=====================================
Binary files /dev/null and b/tests/data/test3.pdf differ
=====================================
tests/data/test4.pdf
=====================================
Binary files /dev/null and b/tests/data/test4.pdf differ
=====================================
tests/test_source.py
=====================================
@@ -205,9 +205,11 @@ ALLOWED_TEST_FILES = {
"test3.apk",
"test3.changes",
"test3.gif",
+ "test3.pdf",
"test3.zip",
"test4.changes",
"test4.gif",
+ "test4.pdf",
"test_comment1.zip",
"test_comment2.zip",
"test_invalid.json",
View it on GitLab: https://salsa.debian.org/reproducible-builds/diffoscope/-/compare/a3bfba06da482fa1e9ee1a8733aa0f54d29c8e32...62c41a2b86e214754c7e413d5eb08ba8ff852416
--
View it on GitLab: https://salsa.debian.org/reproducible-builds/diffoscope/-/compare/a3bfba06da482fa1e9ee1a8733aa0f54d29c8e32...62c41a2b86e214754c7e413d5eb08ba8ff852416
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.reproducible-builds.org/pipermail/rb-commits/attachments/20210427/4385f629/attachment.htm>
More information about the rb-commits
mailing list