[diffoscope] 04/04: Add --exclude option. (Closes: #854783)

Chris Lamb chris at chris-lamb.co.uk
Fri Feb 10 11:08:05 CET 2017


This is an automated email from the git hooks/post-receive script.

lamby pushed a commit to branch master
in repository diffoscope.

commit bb2ff3b8855d5c72d0907734dda0004b8d6bed8d
Author: Chris Lamb <lamby at debian.org>
Date:   Fri Feb 10 23:07:09 2017 +1300

    Add --exclude option. (Closes: #854783)
    
    Signed-off-by: Chris Lamb <lamby at debian.org>
---
 diffoscope/comparators/directory.py        |  2 +
 diffoscope/comparators/utils/compare.py    |  6 +++
 diffoscope/comparators/utils/libarchive.py |  5 ++
 diffoscope/config.py                       |  1 +
 diffoscope/excludes.py                     | 42 +++++++++++++++
 diffoscope/main.py                         |  4 ++
 tests/test_excludes.py                     | 85 ++++++++++++++++++++++++++++++
 7 files changed, 145 insertions(+)

diff --git a/diffoscope/comparators/directory.py b/diffoscope/comparators/directory.py
index 59a9029..d30cb64 100644
--- a/diffoscope/comparators/directory.py
+++ b/diffoscope/comparators/directory.py
@@ -25,6 +25,7 @@ import subprocess
 from diffoscope.exc import RequiredToolNotFound
 from diffoscope.tools import tool_required
 from diffoscope.progress import Progress
+from diffoscope.excludes import filter_excludes
 from diffoscope.difference import Difference
 
 from .binary import FilesystemFile
@@ -167,6 +168,7 @@ class FilesystemDirectory(object):
         my_names = my_container.get_member_names()
         other_names = other_container.get_member_names()
         to_compare = set(my_names).intersection(other_names)
+        to_compare = set(filter_excludes(to_compare))
         with Progress(len(to_compare)) as p:
             for name in sorted(to_compare):
                 my_file = my_container.get_member(name)
diff --git a/diffoscope/comparators/utils/compare.py b/diffoscope/comparators/utils/compare.py
index 1f326d5..b069a0e 100644
--- a/diffoscope/comparators/utils/compare.py
+++ b/diffoscope/comparators/utils/compare.py
@@ -26,6 +26,7 @@ import binascii
 from diffoscope.tools import tool_required
 from diffoscope.exc import RequiredToolNotFound
 from diffoscope.config import Config
+from diffoscope.excludes import any_excluded
 from diffoscope.profiling import profile
 from diffoscope.difference import Difference
 
@@ -52,6 +53,8 @@ def compare_root_paths(path1, path2):
 
     if not Config().new_file:
         bail_if_non_existing(path1, path2)
+    if any_excluded(path1, path2):
+        return None
     if os.path.isdir(path1) and os.path.isdir(path2):
         return compare_directories(path1, path2)
     container1 = FilesystemDirectory(os.path.dirname(path1)).as_container
@@ -69,6 +72,9 @@ def compare_files(file1, file2, source=None):
         file2.__class__.__name__,
     )
 
+    if any_excluded(file1.name, file2.name):
+        return None
+
     with profile('has_same_content_as', file1):
         if file1.has_same_content_as(file2):
             logger.debug("has_same_content_as returned True; skipping further comparisons")
diff --git a/diffoscope/comparators/utils/libarchive.py b/diffoscope/comparators/utils/libarchive.py
index 49ae253..9dd0b54 100644
--- a/diffoscope/comparators/utils/libarchive.py
+++ b/diffoscope/comparators/utils/libarchive.py
@@ -25,6 +25,7 @@ import logging
 import libarchive
 import collections
 
+from diffoscope.excludes import any_excluded
 from diffoscope.tempfiles import get_temporary_directory
 
 from ..device import Device
@@ -212,6 +213,10 @@ class LibarchiveContainer(Archive):
                 if entry.isdir:
                     continue
 
+                # Save extracting excluded files
+                if any_excluded(entry.pathname):
+                    continue
+
                 # Maintain a mapping of archive path to the extracted path,
                 # avoiding the need to sanitise filenames.
                 dst = os.path.join(tmpdir, '{}'.format(idx))
diff --git a/diffoscope/config.py b/diffoscope/config.py
index 5512f64..025790d 100644
--- a/diffoscope/config.py
+++ b/diffoscope/config.py
@@ -33,6 +33,7 @@ class Config(object):
     new_file = False
     fuzzy_threshold = 60
     enforce_constraints = True
+    excludes = ()
 
     _singleton = {}
 
diff --git a/diffoscope/excludes.py b/diffoscope/excludes.py
new file mode 100644
index 0000000..0bb509a
--- /dev/null
+++ b/diffoscope/excludes.py
@@ -0,0 +1,42 @@
+# -*- coding: utf-8 -*-
+#
+# diffoscope: in-depth comparison of files, archives, and directories
+#
+# Copyright © 2017 Chris Lamb <lamby at debian.org>
+#
+# diffoscope is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# diffoscope is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with diffoscope.  If not, see <https://www.gnu.org/licenses/>.
+
+import fnmatch
+import logging
+
+from diffoscope.config import Config
+
+logger = logging.getLogger(__name__)
+
+
+def filter_excludes(filenames):
+    result = []
+
+    for x in filenames:
+        for y in Config().excludes:
+            if fnmatch.fnmatchcase(x, y):
+                logger.debug("Excluding %s as it matches pattern '%s'", x, y)
+                break
+        else:
+            result.append(x)
+
+    return result
+
+def any_excluded(*filenames):
+    return len(filter_excludes(filenames)) != len(filenames)
diff --git a/diffoscope/main.py b/diffoscope/main.py
index c483728..4ed97d3 100644
--- a/diffoscope/main.py
+++ b/diffoscope/main.py
@@ -158,6 +158,9 @@ def create_parser():
     group3 = parser.add_argument_group('diff calculation')
     group3.add_argument('--new-file', dest='new_file', action='store_true',
                         help='Treat absent files as empty')
+    group3.add_argument('--exclude', dest='excludes', nargs='?',
+                        metavar='PATTERN', action='append', default=[],
+                        help='Exclude files that match %(metavar)s')
     group3.add_argument('--fuzzy-threshold', dest='fuzzy_threshold', type=int,
                         help='Threshold for fuzzy-matching '
                         '(0 to disable, %(default)s is default, 400 is high fuzziness)',
@@ -248,6 +251,7 @@ def run_diffoscope(parsed_args):
     maybe_set_limit(Config(), parsed_args, "max_diff_input_lines")
     Config().fuzzy_threshold = parsed_args.fuzzy_threshold
     Config().new_file = parsed_args.new_file
+    Config().excludes = parsed_args.excludes
     set_locale()
     logger.debug('Starting comparison')
     ProgressManager().setup(parsed_args)
diff --git a/tests/test_excludes.py b/tests/test_excludes.py
new file mode 100644
index 0000000..029a717
--- /dev/null
+++ b/tests/test_excludes.py
@@ -0,0 +1,85 @@
+# -*- coding: utf-8 -*-
+#
+# diffoscope: in-depth comparison of files, archives, and directories
+#
+# Copyright © 2017 Chris Lamb <lamby at debian.org>
+#
+# diffoscope is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# diffoscope is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with diffoscope.  If not, see <https://www.gnu.org/licenses/>.
+
+import os
+import pytest
+
+from diffoscope.main import main
+
+
+def run(capsys, *args):
+    with pytest.raises(SystemExit) as exc:
+        main(args + tuple(
+            os.path.join(os.path.dirname(__file__), 'data', x)
+            for x in ('test1.tar', 'test2.tar')
+        ))
+
+    out, err = capsys.readouterr()
+
+    assert err == ''
+
+    return exc.value.code, out
+
+def test_none(capsys):
+    ret, out = run(capsys)
+
+    assert ret == 1
+    assert '── dir/text' in out
+    assert '── dir/link' in out
+
+def test_all(capsys):
+    ret, out = run(capsys, '--exclude=*')
+
+    assert ret == 0
+    assert out == ''
+
+def test_specific(capsys):
+    ret, out = run(capsys, '--exclude=dir/text')
+
+    assert ret == 1
+    assert '── dir/text' not in out
+    assert '── dir/link' in out
+
+def test_specific_case(capsys):
+    ret, out = run(capsys, '--exclude=dir/TEXT')
+
+    assert ret == 1
+    assert '── dir/text' in out
+    assert '── dir/link' in out
+
+def test_multiple(capsys):
+    ret, out = run(capsys, '--exclude=dir/text', '--exclude=dir/link')
+
+    assert ret == 1
+    assert '── dir/text' not in out
+    assert '── dir/link' not in out
+
+def test_nomatch(capsys):
+    ret, out = run(capsys, '--exclude=nomatch')
+
+    assert ret == 1
+    assert '── dir/text' in out
+    assert '── dir/link' in out
+
+def test_wildcard(capsys):
+    ret, out = run(capsys, '--exclude=*link')
+
+    assert ret == 1
+    assert '── dir/text' in out
+    assert '── dir/link' not in out

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/diffoscope.git


More information about the diffoscope mailing list