[Git][reproducible-builds/diffoscope][master] 2 commits: Regenerate debian/tests/control with no material changes to "add" the regeneration comment.

Chris Lamb gitlab at salsa.debian.org
Tue Oct 23 05:07:00 CEST 2018


Chris Lamb pushed to branch master at Reproducible Builds / diffoscope


Commits:
f8fc0baa by Chris Lamb at 2018-10-23T02:59:49Z
Regenerate debian/tests/control with no material changes to "add" the regeneration comment.

Gbp-Dch: ignore

- - - - -
4e7ba71d by Chris Lamb at 2018-10-23T03:06:49Z
Add support for comparing PDF metadata using PyPDF2. (Closes: #911446)

- - - - -


6 changed files:

- debian/control
- debian/tests/control
- diffoscope/comparators/pdf.py
- setup.py
- tests/comparators/test_pdf.py
- + tests/data/pdf_metadata_expected_diff


Changes:

=====================================
debian/control
=====================================
@@ -62,6 +62,7 @@ Build-Depends:
  python3-libarchive-c,
  python3-magic,
  python3-progressbar <!nocheck>,
+ python3-pypdf2 <!nocheck>,
  python3-pytest <!nocheck>,
  python3-pytest-cov <!nocheck>,
  python3-pyxattr <!nocheck>,


=====================================
debian/tests/control
=====================================
@@ -1,8 +1,13 @@
 # DON'T MANUALLY MODIFY!
 # EDIT debian/tests/control.in INSTEAD!
 #
+# To regenerate:
+#
+#   $ debian/rules clean
+#   $ mv debian/tests/control.tmp debian/tests/control
+
 Tests: pytest-with-recommends
-Depends: diffoscope, python3-pytest, file, linux-image-amd64 [amd64] | linux-image-generic [amd64], abootimg, acl, binutils-multiarch, bzip2, caca-utils, colord, db-util, default-jdk-headless | default-jdk | java-sdk, device-tree-compiler, docx2txt, e2fsprogs, enjarify, fontforge-extras, fp-utils [!ppc64el !s390x], genisoimage, gettext, ghc, ghostscript, giflib-tools, gnumeric, gnupg, imagemagick, jsbeautifier, libarchive-tools, llvm, lz4 | liblz4-tool, mono-utils, ocaml-nox, odt2txt, openssh-client, pgpdump, poppler-utils, procyon-decompiler, r-base-core, rpm2cpio, sng, sqlite3, squashfs-tools, tcpdump, unzip, xmlbeans, xxd | vim-common, xz-utils, python3-distro, python3-argcomplete, python3-progressbar, python3-binwalk, python3-defusedxml, python3-guestfs, python3-jsondiff, python3-debian, python3-pyxattr, python3-rpm, python3-tlsh
+Depends: diffoscope, python3-pytest, file, linux-image-amd64 [amd64] | linux-image-generic [amd64], abootimg, acl, binutils-multiarch, bzip2, caca-utils, colord, db-util, default-jdk-headless | default-jdk | java-sdk, device-tree-compiler, docx2txt, e2fsprogs, enjarify, fontforge-extras, fp-utils [!ppc64el !s390x], genisoimage, gettext, ghc, ghostscript, giflib-tools, gnumeric, gnupg, imagemagick, jsbeautifier, libarchive-tools, llvm, lz4 | liblz4-tool, mono-utils, ocaml-nox, odt2txt, openssh-client, pgpdump, poppler-utils, procyon-decompiler, r-base-core, rpm2cpio, sng, sqlite3, squashfs-tools, tcpdump, unzip, xmlbeans, xxd | vim-common, xz-utils, python3-distro, python3-argcomplete, python3-progressbar, python3-binwalk, python3-defusedxml, python3-guestfs, python3-jsondiff, python3-debian, python3-pypdf2, python3-pyxattr, python3-rpm, python3-tlsh
 
 Tests: pytest
 Depends: diffoscope, python3-pytest, file


=====================================
diffoscope/comparators/pdf.py
=====================================
@@ -25,6 +25,11 @@ from diffoscope.difference import Difference
 from .utils.file import File
 from .utils.command import Command
 
+try:
+    import PyPDF2
+except ImportError:  # noqa
+    PyPDF2 = None
+
 
 class Pdftotext(Command):
     @tool_required('pdftotext')
@@ -37,4 +42,32 @@ class PdfFile(File):
     FILE_TYPE_RE = re.compile(r'^PDF document\b')
 
     def compare_details(self, other, source=None):
-        return [Difference.from_command(Pdftotext, self.path, other.path)]
+        xs = []
+
+        if PyPDF2 is not None:
+            difference = Difference.from_text(
+                self.dump_pypdf2_metadata(self),
+                self.dump_pypdf2_metadata(other),
+                self.path,
+                other.path,
+            )
+            if difference:
+                difference.add_comment("Document info")
+            xs.append(difference)
+
+        xs.append(Difference.from_command(Pdftotext, self.path, other.path))
+
+        return xs
+
+    @staticmethod
+    def dump_pypdf2_metadata(file):
+        try:
+            pdf = PyPDF2.PdfFileReader(file.path)
+        except PyPDF2.utils.PdfReadError:
+            return "(Could not extract metadata)"
+
+        xs = []
+        for k, v in sorted(pdf.getDocumentInfo().items()):
+            xs.append("{}: {!r}".format(k.lstrip('/'), v))
+
+        return "\n".join(xs)


=====================================
setup.py
=====================================
@@ -61,6 +61,7 @@ setup(
             'guestfs',
             'jsondiff',
             'python-debian',
+            'pypdf2',
             'pyxattr',
             'rpm-python',
             'tlsh',


=====================================
tests/comparators/test_pdf.py
=====================================
@@ -22,7 +22,7 @@ import pytest
 from diffoscope.comparators.pdf import PdfFile
 
 from ..utils.data import load_fixture, get_data
-from ..utils.tools import skip_unless_tools_exist
+from ..utils.tools import skip_unless_tools_exist, skip_unless_module_exists
 from ..utils.nonexisting import assert_non_existing
 
 
@@ -61,3 +61,14 @@ def test_text_diff(differences):
 @skip_unless_tools_exist('pdftotext')
 def test_compare_non_existing(monkeypatch, pdf1):
     assert_non_existing(monkeypatch, pdf1, has_null_source=False)
+
+
+ at pytest.fixture
+def differences_metadata(pdf1, pdf1a):
+    return pdf1.compare(pdf1a).details
+
+
+ at skip_unless_module_exists('PyPDF2')
+def test_metadata(differences_metadata):
+    expected_diff = get_data('pdf_metadata_expected_diff')
+    assert differences_metadata[0].unified_diff == expected_diff


=====================================
tests/data/pdf_metadata_expected_diff
=====================================
@@ -0,0 +1,13 @@
+@@ -1,2 +1,10 @@
+-Creator: 'Prawn'
+-Producer: 'Prawn'
++Author: ''
++CreationDate: 'D:20180428153751Z'
++Creator: 'LaTeX with hyperref package'
++Keywords: ''
++ModDate: 'D:20180428153751Z'
++PTEX.Fullbanner: 'This is pdfTeX, Version 3.14159265-2.6-1.40.19 (TeX Live 2018/Debian) kpathsea version 6.3.0'
++Producer: 'pdfTeX-1.40.19'
++Subject: ''
++Title: ''
++Trapped: '/False'



View it on GitLab: https://salsa.debian.org/reproducible-builds/diffoscope/compare/5574a4e499f3a64d219f1e9d6f541adbe7c790f1...4e7ba71d40384c7bddc3365814cb91fd6e551790

-- 
View it on GitLab: https://salsa.debian.org/reproducible-builds/diffoscope/compare/5574a4e499f3a64d219f1e9d6f541adbe7c790f1...4e7ba71d40384c7bddc3365814cb91fd6e551790
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.reproducible-builds.org/pipermail/rb-commits/attachments/20181023/fd4d9900/attachment.html>


More information about the rb-commits mailing list