[Git][reproducible-builds/diffoscope][master] 2 commits: Move to assert_diff in test_pdf.py.

Chris Lamb gitlab at salsa.debian.org
Tue Apr 27 10:34:46 UTC 2021



Chris Lamb pushed to branch master at Reproducible Builds / diffoscope


Commits:
6be044d3 by Chris Lamb at 2021-04-27T11:28:01+01:00
Move to assert_diff in test_pdf.py.

- - - - -
62c41a2b by Chris Lamb at 2021-04-27T11:33:56+01:00
Add support for showing annotations in PDF files. (Closes: reproducible-builds/diffoscope#249)

- - - - -


6 changed files:

- diffoscope/comparators/pdf.py
- tests/comparators/test_pdf.py
- + tests/data/pdf_annotations_expected_diff
- + tests/data/test3.pdf
- + tests/data/test4.pdf
- tests/test_source.py


Changes:

=====================================
diffoscope/comparators/pdf.py
=====================================
@@ -66,6 +66,16 @@ class PdfFile(File):
                 difference.add_comment("Document info")
             xs.append(difference)
 
+            difference = Difference.from_text(
+                self.dump_pypdf2_annotations(self),
+                self.dump_pypdf2_annotations(other),
+                self.path,
+                other.path,
+            )
+            if difference:
+                difference.add_comment("Annotations")
+            xs.append(difference)
+
         xs.append(Difference.from_operation(Pdftotext, self.path, other.path))
 
         # Don't include verbose dumppdf output unless we won't see any any
@@ -93,3 +103,24 @@ class PdfFile(File):
             xs.append("{}: {!r}".format(k.lstrip("/"), v))
 
         return "\n".join(xs)
+
+    @staticmethod
+    def dump_pypdf2_annotations(file):
+        try:
+            pdf = PyPDF2.PdfFileReader(file.path)
+        except PyPDF2.utils.PdfReadError as e:
+            return f"(Could not open file: {e})"
+
+        xs = []
+        for x in range(pdf.getNumPages()):
+            page = pdf.getPage(x)
+
+            try:
+                for annot in page["/Annots"]:
+                    subtype = annot.getObject()["/Subtype"]
+                    if subtype == "/Text":
+                        xs.append(annot.getObject()["/Contents"])
+            except:
+                pass
+
+        return "\n".join(xs)


=====================================
tests/comparators/test_pdf.py
=====================================
@@ -21,13 +21,15 @@ import pytest
 
 from diffoscope.comparators.pdf import PdfFile
 
-from ..utils.data import load_fixture, get_data
+from ..utils.data import load_fixture, assert_diff
 from ..utils.tools import skip_unless_tools_exist, skip_unless_module_exists
 from ..utils.nonexisting import assert_non_existing
 
 
 pdf1 = load_fixture("test1.pdf")
 pdf2 = load_fixture("test2.pdf")
+pdf3 = load_fixture("test3.pdf")
+pdf4 = load_fixture("test4.pdf")
 pdf1a = load_fixture("test_weird_non_unicode_chars1.pdf")
 pdf2a = load_fixture("test_weird_non_unicode_chars2.pdf")
 
@@ -54,8 +56,7 @@ def differences(pdf1, pdf2):
 
 @skip_unless_tools_exist("pdftotext")
 def test_text_diff(differences):
-    expected_diff = get_data("pdf_text_expected_diff")
-    assert differences[0].unified_diff == expected_diff
+    assert_diff(differences[0], "pdf_text_expected_diff")
 
 
 @skip_unless_tools_exist("pdftotext")
@@ -71,5 +72,17 @@ def differences_metadata(pdf1, pdf1a):
 @skip_unless_tools_exist("pdftotext")
 @skip_unless_module_exists("PyPDF2")
 def test_metadata(differences_metadata):
-    expected_diff = get_data("pdf_metadata_expected_diff")
-    assert differences_metadata[0].unified_diff == expected_diff
+    assert_diff(differences_metadata[0], "pdf_metadata_expected_diff")
+
+
+ at pytest.fixture
+def differences_annotations(pdf3, pdf4):
+    return pdf3.compare(pdf4).details
+
+
+ at skip_unless_tools_exist("pdftotext")
+ at skip_unless_module_exists("PyPDF2")
+def test_annotations(differences_annotations):
+    with open("tests/data/pdf_annotations_expected_diff", "w") as f:
+        f.write(differences_annotations[0].unified_diff)
+    assert_diff(differences_annotations[0], "pdf_annotations_expected_diff")


=====================================
tests/data/pdf_annotations_expected_diff
=====================================
@@ -0,0 +1,3 @@
+@@ -0,0 +1,2 @@
++1: A PDF comment created in Okular
++11: Comment created with evince


=====================================
tests/data/test3.pdf
=====================================
Binary files /dev/null and b/tests/data/test3.pdf differ


=====================================
tests/data/test4.pdf
=====================================
Binary files /dev/null and b/tests/data/test4.pdf differ


=====================================
tests/test_source.py
=====================================
@@ -205,9 +205,11 @@ ALLOWED_TEST_FILES = {
     "test3.apk",
     "test3.changes",
     "test3.gif",
+    "test3.pdf",
     "test3.zip",
     "test4.changes",
     "test4.gif",
+    "test4.pdf",
     "test_comment1.zip",
     "test_comment2.zip",
     "test_invalid.json",



View it on GitLab: https://salsa.debian.org/reproducible-builds/diffoscope/-/compare/a3bfba06da482fa1e9ee1a8733aa0f54d29c8e32...62c41a2b86e214754c7e413d5eb08ba8ff852416

-- 
View it on GitLab: https://salsa.debian.org/reproducible-builds/diffoscope/-/compare/a3bfba06da482fa1e9ee1a8733aa0f54d29c8e32...62c41a2b86e214754c7e413d5eb08ba8ff852416
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.reproducible-builds.org/pipermail/rb-commits/attachments/20210427/4385f629/attachment.htm>


More information about the rb-commits mailing list