[Git][reproducible-builds/strip-nondeterminism][master] Handle Python "pyzip" files. (Closes: reproducible-builds/strip-nondeterminism#18)
Chris Lamb
gitlab at salsa.debian.org
Tue May 4 10:32:32 UTC 2021
Chris Lamb pushed to branch master at Reproducible Builds / strip-nondeterminism
Commits:
064e276d by Chris Lamb at 2021-05-04T11:31:48+01:00
Handle Python "pyzip" files. (Closes: reproducible-builds/strip-nondeterminism#18)
- - - - -
4 changed files:
- lib/File/StripNondeterminism.pm
- + lib/File/StripNondeterminism/handlers/pyzip.pm
- + t/fixtures/pyzip/pyzip.in
- + t/fixtures/pyzip/pyzip.out
Changes:
=====================================
lib/File/StripNondeterminism.pm
=====================================
@@ -109,6 +109,14 @@ sub get_normalizer_for_file($) {
&& _get_file_type($_) =~ m/Zip archive data|EPUB document/) {
return _handler('zip');
}
+
+ # pyzip - check last due to call to file(1)
+ if (_get_file_type($_) =~ m/python3 script executable \(binary data\)/) {
+ my $handler = _handler('pyzip');
+ return $handler
+ if File::StripNondeterminism::handlers::pyzip::is_pyzip_file($_);
+ }
+
return undef;
}
@@ -124,6 +132,7 @@ our %KNOWN_HANDLERS = (
jmod => 1,
uimage => 1,
png => 1,
+ pyzip => 1,
javaproperties => 1,
zip => 1,
);
=====================================
lib/File/StripNondeterminism/handlers/pyzip.pm
=====================================
@@ -0,0 +1,108 @@
+#
+# Copyright 2021 Chris Lamb <lamby at debian.org>
+#
+# This file is part of strip-nondeterminism.
+#
+# strip-nondeterminism is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# strip-nondeterminism is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with strip-nondeterminism. If not, see <http://www.gnu.org/licenses/>.
+#
+package File::StripNondeterminism::handlers::pyzip;
+
+use strict;
+use warnings;
+
+use File::StripNondeterminism;
+use File::StripNondeterminism::Common qw(copy_data);
+use File::Basename;
+use File::StripNondeterminism::handlers::zip;
+use File::Temp;
+use Fcntl q/SEEK_SET/;
+
+=head1 ABOUT
+
+Python supports running .zip'd .py files:
+
+ $ cat __main__.py
+ #!/usr/bin/python3
+ print("Hello World")
+ $ zip pyzip.zip __main__.py
+ $ head -1 __main__.py | cat - pyzip.zip > pyzip
+ $ chmod a+x pyzip
+
+They require special handling to not mangle the shebang.
+
+=head1 DEPRECATION PLAN
+
+Unclear, as many tools can, after all, generate these .zip files.
+
+=cut
+
+sub is_pyzip_file {
+ my ($filename) = @_;
+
+ my $fh;
+ my $str;
+
+ return
+ open($fh, '<', $filename)
+ && read($fh, $str, 32)
+ && $str =~ /^#!.*\n\x{50}\x{4b}\x{03}\x{04}/s;
+}
+
+sub normalize {
+ my ($filename) = @_;
+
+ my $buf;
+ my $bytes_read;
+
+ # Create a .zip file without the shebang
+ my $stripped = File::Temp->new(DIR => dirname($filename));
+ open my $fh, '<', $filename;
+
+ # Save the shebang for later
+ my $shebang = <$fh>;
+
+ # Copy through the rest of the file
+ while ($bytes_read = read($fh, $buf, 4096)) {
+ print $stripped $buf;
+ }
+ defined($bytes_read) or die "$filename: read failed: $!";
+ $stripped->close;
+ close $fh;
+
+ # Normalize the stripped version
+ my $modified = File::StripNondeterminism::handlers::zip::normalize(
+ $stripped->filename
+ );
+
+ # If we didnt change anything, no need to mess around with a new file
+ return 0 if not $modified;
+
+ # Create a file with the existing shebang
+ my $pyzip = File::Temp->new(DIR => dirname($filename));
+ print $pyzip $shebang;
+ open $fh, '<', $stripped->filename;
+ while ($bytes_read = read($fh, $buf, 4096)) {
+ print $pyzip $buf;
+ }
+ close $fh;
+ $pyzip->close;
+
+ # Copy the result, preserving the attributes of the original
+ copy_data($pyzip->filename, $filename)
+ or die "$filename: unable to overwrite: copy_data: $!";
+
+ return 1;
+}
+
+1;
=====================================
t/fixtures/pyzip/pyzip.in
=====================================
Binary files /dev/null and b/t/fixtures/pyzip/pyzip.in differ
=====================================
t/fixtures/pyzip/pyzip.out
=====================================
Binary files /dev/null and b/t/fixtures/pyzip/pyzip.out differ
View it on GitLab: https://salsa.debian.org/reproducible-builds/strip-nondeterminism/-/commit/064e276d70b7d1462ac478c9b1adb88c2d59c42b
--
View it on GitLab: https://salsa.debian.org/reproducible-builds/strip-nondeterminism/-/commit/064e276d70b7d1462ac478c9b1adb88c2d59c42b
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.reproducible-builds.org/pipermail/rb-commits/attachments/20210504/cf1ecfcd/attachment.htm>
More information about the rb-commits
mailing list