[rb-general] [PATCH v2] Add --clamp-mtime option to GNU tar

Daniel Kahn Gillmor dkg at fifthhorseman.net
Wed Jan 20 00:28:36 CET 2016


[This is a revision of the patch submitted a few months ago by Jérémy
 Bobbio <lunar at debian.org> in Message-ID:
 <20150604111918.GA28246 at loar>; it applies to the current master
 branch; updates the test suite and documentation as well; and
 hopefully offers clearer justification for why this belongs in GNU
 tar. ]

This feature is desired by important users of tar for reproducible
archive creation (e.g., dpkg-dev -- see
<https://bugs.debian.org/759999#20>).  It is intended in the same
spirit as other features in GNU tar (e.g., --exclude-vcs-ignores) that
offer simple ways to handle common use cases related to software
development and distribution.

The new `--clamp-mtime` option will change the behavior of `--mtime` to
only use the time specified if the file mtime is newer than the given time.
The `--clamp-mtime` option can only be used together with `--mtime`.

This addresses a common scenario: a software distributor wants to ship
compiled binaries, but the package shipped should also include some
files copied directly from the source distribution (e.g. config files,
etc).  At the same time, we'd like the archive in question to be
"reproducible" where possible -- getting a byte-for-byte identical
match upon rebuild.  See <https://reproducible-builds.org/> for more
information.

With this feature, a reproducible tarball can be created by noting the
time just before the build (either via $(date) or some pre-build
artifact like a revision control system or changelog), and then
bounding the mtimes in the resulting archive via --clamp-mtime.

In order to implement the option, we transform `set_mtime_option` from
a bool to an enum with three values: use original file mtime, force all mtimes
to be of the same value, and clamp mtimes (as explained above).

To verify that `--clamp-mtime` is used together with `--mtime`, `mtime_option`
is now initialized to a minimal value as done for `newer_mtime_option`. As
the same macro can now be used for both options, NEWER_OPTION_INITIALIZED
has been renamed to TIME_OPTION_INITIALIZED.

Documentation and the test suite have also been updated.
---
 doc/tar.1          |  5 ++++-
 doc/tar.texi       | 21 +++++++++++++++++++++
 src/common.h       | 17 ++++++++++++-----
 src/create.c       | 15 ++++++++++++++-
 src/list.c         |  2 +-
 src/tar.c          | 23 ++++++++++++++++++++---
 tests/Makefile.am  |  1 +
 tests/testsuite.at |  1 +
 tests/time02.at    | 42 ++++++++++++++++++++++++++++++++++++++++++
 9 files changed, 116 insertions(+), 11 deletions(-)

diff --git a/doc/tar.1 b/doc/tar.1
index 18fdc9d..3f20214 100644
--- a/doc/tar.1
+++ b/doc/tar.1
@@ -13,7 +13,7 @@
 .\"
 .\" You should have received a copy of the GNU General Public License
 .\" along with this program.  If not, see <http://www.gnu.org/licenses/>.
-.TH TAR 1 "December 5, 2015" "TAR" "GNU TAR Manual"
+.TH TAR 1 "January 19, 2016" "TAR" "GNU TAR Manual"
 .SH NAME
 tar \- an archiving utility
 .SH SYNOPSIS
@@ -1023,6 +1023,9 @@ Display progress messages every \fIN\fRth record (default 10).
 \fB\-\-checkpoint\-action\fR=\fIACTION\fR
 Run \fIACTION\fR on each checkpoint.
 .TP
+\fB\-\-clamp\-mtime\fR
+Only set time when the file is more recent than what was given with \-\-mtime.
+.TP
 \fB\-\-full\-time\fR
 Print file time to its full resolution.
 .TP
diff --git a/doc/tar.texi b/doc/tar.texi
index 4f4d106..9b7e08f 100644
--- a/doc/tar.texi
+++ b/doc/tar.texi
@@ -2544,6 +2544,11 @@ complies to UNIX98, was introduced with version
 writing the archive.  This allows you to directly act on archives
 while saving space.  @xref{gzip}.
 
+ at opsummary{clamp-mtime}
+ at item --clamp-mtime
+
+(See @option{--mtime}.)
+
 @opsummary{confirmation}
 @item --confirmation
 
@@ -2944,6 +2949,11 @@ either a textual date representation (@pxref{Date input formats}) or a
 name of the existing file, starting with @samp{/} or @samp{.}.  In the
 latter case, the modification time of that file is used. @xref{override}.
 
+When @command{--clamp-mtime} is also specified, files with
+modification times earlier than @var{date} will retain their actual
+modification times, and @var{date} will only be used for files whose
+modification times are later than @var{date}.
+
 @opsummary{multi-volume}
 @item --multi-volume
 @itemx -M
@@ -5368,6 +5378,17 @@ tar: Option --mtime: Treating date 'yesterday' as 2006-06-20
 @dots{}
 @end smallexample
 
+ at noindent
+When used with @option{--clamp-mtime} @GNUTAR{} will only set the
+modification date to @var{date} on files whose actual modification
+date is later than @var{date}.  This is to make it easy to build
+reproducible archives given a common timestamp for generated files
+while still retaining the original timestamps of untouched files.
+
+ at smallexample
+$ @kbd{tar -c -f archive.tar --clamp-mtime --mtime=@atchar{}$SOURCE_DATE_EPOCH .}
+ at end smallexample
+
 @item --owner=@var{user}
 @opindex owner
 
diff --git a/src/common.h b/src/common.h
index 7534748..faf1b6e 100644
--- a/src/common.h
+++ b/src/common.h
@@ -211,13 +211,20 @@ GLOBAL bool multi_volume_option;
    do not get archived (also see after_date_option above).  */
 GLOBAL struct timespec newer_mtime_option;
 
-/* If true, override actual mtime (see below) */
-GLOBAL bool set_mtime_option;
-/* Value to be put in mtime header field instead of the actual mtime */
+enum set_mtime_option_mode
+{
+  USE_FILE_MTIME,
+  FORCE_MTIME,
+  CLAMP_MTIME,
+};
+
+/* Override actual mtime if set to FORCE_MTIME or CLAMP_MTIME */
+GLOBAL enum set_mtime_option_mode set_mtime_option;
+/* Value to use when forcing or clamping the mtime header field. */
 GLOBAL struct timespec mtime_option;
 
-/* Return true if newer_mtime_option is initialized.  */
-#define NEWER_OPTION_INITIALIZED(opt) (0 <= (opt).tv_nsec)
+/* Return true if mtime_option or newer_mtime_option is initialized.  */
+#define TIME_OPTION_INITIALIZED(opt) (0 <= (opt).tv_nsec)
 
 /* Return true if the struct stat ST's M time is less than
    newer_mtime_option.  */
diff --git a/src/create.c b/src/create.c
index b7c19ab..48eda8c 100644
--- a/src/create.c
+++ b/src/create.c
@@ -823,7 +823,20 @@ start_header (struct tar_stat_info *st)
   }
 
   {
-    struct timespec mtime = set_mtime_option ? mtime_option : st->mtime;
+    struct timespec mtime;
+    switch (set_mtime_option)
+      {
+        case FORCE_MTIME:
+          mtime = mtime_option;
+          break;
+        case CLAMP_MTIME:
+          mtime = timespec_cmp (st->mtime, mtime_option) > 0 ? mtime_option : st->mtime;
+          break;
+        default:
+          mtime = st->mtime;
+          break;
+      }
+
     if (archive_format == POSIX_FORMAT)
       {
 	if (MAX_OCTAL_VAL (header->header.mtime) < mtime.tv_sec
diff --git a/src/list.c b/src/list.c
index 70e3375..db87515 100644
--- a/src/list.c
+++ b/src/list.c
@@ -195,7 +195,7 @@ read_and (void (*do_something) (void))
 	  decode_header (current_header, &current_stat_info,
 			 &current_format, 1);
 	  if (! name_match (current_stat_info.file_name)
-	      || (NEWER_OPTION_INITIALIZED (newer_mtime_option)
+	      || (TIME_OPTION_INITIALIZED (newer_mtime_option)
 		  /* FIXME: We get mtime now, and again later; this causes
 		     duplicate diagnostics if header.mtime is bogus.  */
 		  && ((mtime.tv_sec
diff --git a/src/tar.c b/src/tar.c
index bacb142..891ab21 100644
--- a/src/tar.c
+++ b/src/tar.c
@@ -276,6 +276,7 @@ enum
   CHECK_DEVICE_OPTION,
   CHECKPOINT_OPTION,
   CHECKPOINT_ACTION_OPTION,
+  CLAMP_MTIME_OPTION,
   DELAY_DIRECTORY_RESTORE_OPTION,
   HARD_DEREFERENCE_OPTION,
   DELETE_OPTION,
@@ -514,6 +515,8 @@ static struct argp_option options[] = {
    N_("use FILE to map file owner GIDs and names"), GRID+1 },
   {"mtime", MTIME_OPTION, N_("DATE-OR-FILE"), 0,
    N_("set mtime for added files from DATE-OR-FILE"), GRID+1 },
+  {"clamp-mtime", CLAMP_MTIME_OPTION, 0, 0,
+   N_("only set time when the file is more recent than what was given with --mtime"), GRID+1 },
   {"mode", MODE_OPTION, N_("CHANGES"), 0,
    N_("force (symbolic) mode CHANGES for added files"), GRID+1 },
   {"atime-preserve", ATIME_PRESERVE_OPTION,
@@ -1364,6 +1367,10 @@ parse_opt (int key, char *arg, struct argp_state *state)
       set_subcommand_option (CREATE_SUBCOMMAND);
       break;
 
+    case CLAMP_MTIME_OPTION:
+      set_mtime_option = CLAMP_MTIME;
+      break;
+
     case 'd':
       set_subcommand_option (DIFF_SUBCOMMAND);
       break;
@@ -1505,7 +1512,8 @@ parse_opt (int key, char *arg, struct argp_state *state)
 
     case MTIME_OPTION:
       get_date_or_file (args, "--mtime", arg, &mtime_option);
-      set_mtime_option = true;
+      if (set_mtime_option == USE_FILE_MTIME)
+        set_mtime_option = FORCE_MTIME;
       break;
 
     case 'n':
@@ -1521,7 +1529,7 @@ parse_opt (int key, char *arg, struct argp_state *state)
       /* Fall through.  */
 
     case NEWER_MTIME_OPTION:
-      if (NEWER_OPTION_INITIALIZED (newer_mtime_option))
+      if (TIME_OPTION_INITIALIZED (newer_mtime_option))
 	USAGE_ERROR ((0, 0, _("More than one threshold date")));
       get_date_or_file (args,
 			key == NEWER_MTIME_OPTION ? "--newer-mtime"
@@ -2236,6 +2244,8 @@ decode_options (int argc, char **argv)
 
   newer_mtime_option.tv_sec = TYPE_MINIMUM (time_t);
   newer_mtime_option.tv_nsec = -1;
+  mtime_option.tv_sec = TYPE_MINIMUM (time_t);
+  mtime_option.tv_nsec = -1;
   recursion_option = FNM_LEADING_DIR;
   unquote_option = true;
   tar_sparse_major = 1;
@@ -2397,7 +2407,7 @@ decode_options (int argc, char **argv)
 		  _("Multiple archive files require '-M' option")));
 
   if (listed_incremental_option
-      && NEWER_OPTION_INITIALIZED (newer_mtime_option))
+      && TIME_OPTION_INITIALIZED (newer_mtime_option))
     {
       struct option_locus *listed_loc = optloc_lookup (OC_LISTED_INCREMENTAL);
       struct option_locus *newer_loc = optloc_lookup (OC_NEWER);
@@ -2464,6 +2474,13 @@ decode_options (int argc, char **argv)
 	USAGE_ERROR ((0, 0, _("Cannot concatenate compressed archives")));
     }
 
+  if (set_mtime_option == CLAMP_MTIME)
+    {
+      if (!TIME_OPTION_INITIALIZED (mtime_option))
+	USAGE_ERROR ((0, 0,
+		      _("--clamp-mtime needs a date specified using --mtime")));
+    }
+
   /* It is no harm to use --pax-option on non-pax archives in archive
      reading mode. It may even be useful, since it allows to override
      file attributes from tar headers. Therefore I allow such usage.
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 05c5959..772c6f0 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -217,6 +217,7 @@ TESTSUITE_AT = \
  spmvp01.at\
  spmvp10.at\
  time01.at\
+ time02.at\
  truncate.at\
  update.at\
  update01.at\
diff --git a/tests/testsuite.at b/tests/testsuite.at
index 42a4027..48e6dd7 100644
--- a/tests/testsuite.at
+++ b/tests/testsuite.at
@@ -352,6 +352,7 @@ m4_include([lustar03.at])
 m4_include([old.at])
 
 m4_include([time01.at])
+m4_include([time02.at])
 
 AT_BANNER([Multivolume archives])
 m4_include([multiv01.at])
diff --git a/tests/time02.at b/tests/time02.at
new file mode 100644
index 0000000..d179c17
--- /dev/null
+++ b/tests/time02.at
@@ -0,0 +1,42 @@
+# Test clamping mtime GNU tar.  -*- Autotest -*-
+#
+# Copyright 2016 Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+# written by Daniel Kahn Gillmor
+
+AT_SETUP([time: clamping mtime])
+AT_KEYWORDS([time time02])
+
+AT_TAR_CHECK([
+export TZ=UTC0
+mkdir dir
+
+touch -d 2015-12-01T00:00:00 dir/a >/dev/null 2>&1 || continue
+touch -d 2016-01-01T00:00:00 dir/b >/dev/null 2>&1 || continue
+touch -d 2016-02-01T00:00:00 dir/c >/dev/null 2>&1 || continue
+touch -d 9999-01-01T00:00:00 dir/d >/dev/null 2>&1 || continue
+
+tar -c --mtime 2016-01-15T00:00:00 --clamp-mtime -f archive.tar dir
+tar -d -f archive.tar dir
+],
+[1],
+[
+dir/c: Mod time differs
+dir/d: Mod time differs
+], [], [], [],
+[pax])
+
+AT_CLEANUP
-- 
2.7.0.rc3



More information about the rb-general mailing list