[Git][reproducible-builds/strip-nondeterminism][master] Handle Python "pyzip" files. (Closes: reproducible-builds/strip-nondeterminism#18)

Chris Lamb gitlab at salsa.debian.org
Tue May 4 10:32:32 UTC 2021



Chris Lamb pushed to branch master at Reproducible Builds / strip-nondeterminism


Commits:
064e276d by Chris Lamb at 2021-05-04T11:31:48+01:00
Handle Python "pyzip" files. (Closes: reproducible-builds/strip-nondeterminism#18)

- - - - -


4 changed files:

- lib/File/StripNondeterminism.pm
- + lib/File/StripNondeterminism/handlers/pyzip.pm
- + t/fixtures/pyzip/pyzip.in
- + t/fixtures/pyzip/pyzip.out


Changes:

=====================================
lib/File/StripNondeterminism.pm
=====================================
@@ -109,6 +109,14 @@ sub get_normalizer_for_file($) {
 		&& _get_file_type($_) =~ m/Zip archive data|EPUB document/) {
 		return _handler('zip');
 	}
+
+	# pyzip - check last due to call to file(1)
+	if (_get_file_type($_) =~ m/python3 script executable \(binary data\)/) {
+		my $handler = _handler('pyzip');
+		return $handler
+		  if File::StripNondeterminism::handlers::pyzip::is_pyzip_file($_);
+	}
+
 	return undef;
 }
 
@@ -124,6 +132,7 @@ our %KNOWN_HANDLERS = (
 	jmod	=> 1,
 	uimage	=> 1,
 	png	=> 1,
+	pyzip	=> 1,
 	javaproperties => 1,
 	zip	=> 1,
 );


=====================================
lib/File/StripNondeterminism/handlers/pyzip.pm
=====================================
@@ -0,0 +1,108 @@
+#
+# Copyright 2021 Chris Lamb <lamby at debian.org>
+#
+# This file is part of strip-nondeterminism.
+#
+# strip-nondeterminism is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# strip-nondeterminism is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with strip-nondeterminism.  If not, see <http://www.gnu.org/licenses/>.
+#
+package File::StripNondeterminism::handlers::pyzip;
+
+use strict;
+use warnings;
+
+use File::StripNondeterminism;
+use File::StripNondeterminism::Common qw(copy_data);
+use File::Basename;
+use File::StripNondeterminism::handlers::zip;
+use File::Temp;
+use Fcntl q/SEEK_SET/;
+
+=head1 ABOUT
+
+Python supports running .zip'd .py files:
+
+	$ cat __main__.py
+	#!/usr/bin/python3
+	print("Hello World")
+	$ zip pyzip.zip __main__.py
+	$ head -1 __main__.py | cat - pyzip.zip > pyzip
+	$ chmod a+x pyzip 
+
+They require special handling to not mangle the shebang.
+
+=head1 DEPRECATION PLAN
+
+Unclear, as many tools can, after all, generate these .zip files.
+
+=cut
+
+sub is_pyzip_file {
+	my ($filename) = @_;
+
+	my $fh;
+	my $str;
+
+	return
+	  open($fh, '<', $filename)
+	  && read($fh, $str, 32)
+	  && $str =~ /^#!.*\n\x{50}\x{4b}\x{03}\x{04}/s;
+}
+
+sub normalize {
+	my ($filename) = @_;
+
+	my $buf;
+	my $bytes_read;
+
+	# Create a .zip file without the shebang
+	my $stripped = File::Temp->new(DIR => dirname($filename));
+	open my $fh, '<', $filename;
+
+	# Save the shebang for later
+	my $shebang = <$fh>; 
+
+	# Copy through the rest of the file
+	while ($bytes_read = read($fh, $buf, 4096)) {
+		print $stripped $buf;
+	}
+	defined($bytes_read) or die "$filename: read failed: $!";
+	$stripped->close;
+	close $fh;
+
+	# Normalize the stripped version
+	my $modified = File::StripNondeterminism::handlers::zip::normalize(
+		$stripped->filename
+	);
+
+	# If we didnt change anything, no need to mess around with a new file
+	return 0 if not $modified;
+
+	# Create a file with the existing shebang
+	my $pyzip = File::Temp->new(DIR => dirname($filename));
+	print $pyzip $shebang;
+	open $fh, '<', $stripped->filename;
+	while ($bytes_read = read($fh, $buf, 4096)) {
+		print $pyzip $buf;
+	}
+	close $fh;
+	$pyzip->close;
+
+	# Copy the result, preserving the attributes of the original
+	copy_data($pyzip->filename, $filename)
+	  or die "$filename: unable to overwrite: copy_data: $!";
+
+	return 1;
+}
+
+1;


=====================================
t/fixtures/pyzip/pyzip.in
=====================================
Binary files /dev/null and b/t/fixtures/pyzip/pyzip.in differ


=====================================
t/fixtures/pyzip/pyzip.out
=====================================
Binary files /dev/null and b/t/fixtures/pyzip/pyzip.out differ



View it on GitLab: https://salsa.debian.org/reproducible-builds/strip-nondeterminism/-/commit/064e276d70b7d1462ac478c9b1adb88c2d59c42b

-- 
View it on GitLab: https://salsa.debian.org/reproducible-builds/strip-nondeterminism/-/commit/064e276d70b7d1462ac478c9b1adb88c2d59c42b
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.reproducible-builds.org/pipermail/rb-commits/attachments/20210504/cf1ecfcd/attachment.htm>


More information about the rb-commits mailing list