Bug 1288610 - Add functions for creating deterministic tar archives; r?glandium
I have a need to create tar archives deterministically
and reproducibly. Since we already have similar functionality in
mozpack for producting zip/jar archives, I figured it made sense for
this functionality to live in mozpack.
I made the functionality as simple as possible: we only accept
files from the filesystem and the set of files must be known in
advance. No class to hold/buffer state: just a simple function
that takes a mapping of files and writes to a stream.
MozReview-Commit-ID: If0NTcA7wpc
new file mode 100644
--- /dev/null
+++ b/python/mozbuild/mozpack/archive.py
@@ -0,0 +1,107 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import
+
+import bz2
+import gzip
+import stat
+import tarfile
+
+
+# 2016-01-01T00:00:00+0000
+DEFAULT_MTIME = 1451606400
+
+
+def create_tar_from_files(fp, files):
+ """Create a tar file deterministically.
+
+ Receives a dict mapping names of files in the archive to local filesystem
+ paths.
+
+ The files will be archived and written to the passed file handle opened
+ for writing.
+
+ Only regular files can be written.
+
+ FUTURE accept mozpack.files classes for writing
+ FUTURE accept a filename argument (or create APIs to write files)
+ """
+ with tarfile.open(name='', mode='w', fileobj=fp, dereference=True) as tf:
+ for archive_path, fs_path in sorted(files.items()):
+ ti = tf.gettarinfo(fs_path, archive_path)
+
+ if not ti.isreg():
+ raise ValueError('not a regular file: %s' % fs_path)
+
+ # Disallow setuid and setgid bits. This is an arbitrary restriction.
+ # However, since we set uid/gid to root:root, setuid and setgid
+ # would be a glaring security hole if the archive were
+ # uncompressed as root.
+ if ti.mode & (stat.S_ISUID | stat.S_ISGID):
+ raise ValueError('cannot add file with setuid or setgid set: '
+ '%s' % fs_path)
+
+ # Set uid, gid, username, and group as deterministic values.
+ ti.uid = 0
+ ti.gid = 0
+ ti.uname = ''
+ ti.gname = ''
+
+ # Set mtime to a constant value.
+ ti.mtime = DEFAULT_MTIME
+
+ with open(fs_path, 'rb') as fh:
+ tf.addfile(ti, fh)
+
+
+def create_tar_gz_from_files(fp, files, filename=None, compresslevel=9):
+ """Create a tar.gz file deterministically from files.
+
+ This is a glorified wrapper around ``create_tar_from_files`` that
+ adds gzip compression.
+
+ The passed file handle should be opened for writing in binary mode.
+ When the function returns, all data has been written to the handle.
+ """
+ # Offset 3-7 in the gzip header contains an mtime. Pin it to a known
+ # value so output is deterministic.
+ gf = gzip.GzipFile(filename=filename or '', mode='wb', fileobj=fp,
+ compresslevel=compresslevel, mtime=DEFAULT_MTIME)
+ with gf:
+ create_tar_from_files(gf, files)
+
+
+class _BZ2Proxy(object):
+ """File object that proxies writes to a bz2 compressor."""
+ def __init__(self, fp, compresslevel=9):
+ self.fp = fp
+ self.compressor = bz2.BZ2Compressor(compresslevel=compresslevel)
+ self.pos = 0
+
+ def tell(self):
+ return self.pos
+
+ def write(self, data):
+ data = self.compressor.compress(data)
+ self.pos += len(data)
+ self.fp.write(data)
+
+ def close(self):
+ data = self.compressor.flush()
+ self.pos += len(data)
+ self.fp.write(data)
+
+
+def create_tar_bz2_from_files(fp, files, compresslevel=9):
+ """Create a tar.bz2 file deterministically from files.
+
+ This is a glorified wrapper around ``create_tar_from_files`` that
+ adds bzip2 compression.
+
+ This function is similar to ``create_tar_gzip_from_files()``.
+ """
+ proxy = _BZ2Proxy(fp, compresslevel=compresslevel)
+ create_tar_from_files(proxy, files)
+ proxy.close()
new file mode 100644
--- /dev/null
+++ b/python/mozbuild/mozpack/test/test_archive.py
@@ -0,0 +1,190 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import
+
+import hashlib
+import os
+import shutil
+import stat
+import tarfile
+import tempfile
+import unittest
+
+from mozpack.archive import (
+ DEFAULT_MTIME,
+ create_tar_from_files,
+ create_tar_gz_from_files,
+ create_tar_bz2_from_files,
+)
+
+from mozunit import main
+
+
+MODE_STANDARD = stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH
+
+
+def file_hash(path):
+ h = hashlib.sha1()
+ with open(path, 'rb') as fh:
+ while True:
+ data = fh.read(8192)
+ if not data:
+ break
+ h.update(data)
+
+ return h.hexdigest()
+
+
+class TestArchive(unittest.TestCase):
+ def _create_files(self, root):
+ files = {}
+ for i in range(10):
+ p = os.path.join(root, b'file%d' % i)
+ with open(p, 'wb') as fh:
+ fh.write(b'file%d' % i)
+ # Need to set permissions or umask may influence testing.
+ os.chmod(p, MODE_STANDARD)
+ files[b'file%d' % i] = p
+
+ return files
+
+ def _verify_basic_tarfile(self, tf):
+ self.assertEqual(len(tf.getmembers()), 10)
+
+ names = ['file%d' % i for i in range(10)]
+ self.assertEqual(tf.getnames(), names)
+
+ for ti in tf.getmembers():
+ self.assertEqual(ti.uid, 0)
+ self.assertEqual(ti.gid, 0)
+ self.assertEqual(ti.uname, '')
+ self.assertEqual(ti.gname, '')
+ self.assertEqual(ti.mode, MODE_STANDARD)
+ self.assertEqual(ti.mtime, DEFAULT_MTIME)
+
+ def test_dirs_refused(self):
+ d = tempfile.mkdtemp()
+ try:
+ tp = os.path.join(d, 'test.tar')
+ with open(tp, 'wb') as fh:
+ with self.assertRaisesRegexp(ValueError, 'not a regular'):
+ create_tar_from_files(fh, {'test': d})
+ finally:
+ shutil.rmtree(d)
+
+ def test_setuid_setgid_refused(self):
+ d = tempfile.mkdtemp()
+ try:
+ uid = os.path.join(d, 'setuid')
+ gid = os.path.join(d, 'setgid')
+ with open(uid, 'a'):
+ pass
+ with open(gid, 'a'):
+ pass
+
+ os.chmod(uid, MODE_STANDARD | stat.S_ISUID)
+ os.chmod(gid, MODE_STANDARD | stat.S_ISGID)
+
+ tp = os.path.join(d, 'test.tar')
+ with open(tp, 'wb') as fh:
+ with self.assertRaisesRegexp(ValueError, 'cannot add file with setuid'):
+ create_tar_from_files(fh, {'test': uid})
+ with self.assertRaisesRegexp(ValueError, 'cannot add file with setuid'):
+ create_tar_from_files(fh, {'test': gid})
+ finally:
+ shutil.rmtree(d)
+
+ def test_create_tar_basic(self):
+ d = tempfile.mkdtemp()
+ try:
+ files = self._create_files(d)
+
+ tp = os.path.join(d, 'test.tar')
+ with open(tp, 'wb') as fh:
+ create_tar_from_files(fh, files)
+
+ # Output should be deterministic.
+ self.assertEqual(file_hash(tp), 'cd16cee6f13391abd94dfa435d2633b61ed727f1')
+
+ with tarfile.open(tp, 'r') as tf:
+ self._verify_basic_tarfile(tf)
+
+ finally:
+ shutil.rmtree(d)
+
+ def test_executable_preserved(self):
+ d = tempfile.mkdtemp()
+ try:
+ p = os.path.join(d, 'exec')
+ with open(p, 'wb') as fh:
+ fh.write('#!/bin/bash\n')
+ os.chmod(p, MODE_STANDARD | stat.S_IXUSR)
+
+ tp = os.path.join(d, 'test.tar')
+ with open(tp, 'wb') as fh:
+ create_tar_from_files(fh, {'exec': p})
+
+ self.assertEqual(file_hash(tp), '357e1b81c0b6cfdfa5d2d118d420025c3c76ee93')
+
+ with tarfile.open(tp, 'r') as tf:
+ m = tf.getmember('exec')
+ self.assertEqual(m.mode, MODE_STANDARD | stat.S_IXUSR)
+
+ finally:
+ shutil.rmtree(d)
+
+ def test_create_tar_gz_basic(self):
+ d = tempfile.mkdtemp()
+ try:
+ files = self._create_files(d)
+
+ gp = os.path.join(d, 'test.tar.gz')
+ with open(gp, 'wb') as fh:
+ create_tar_gz_from_files(fh, files)
+
+ self.assertEqual(file_hash(gp), 'acb602239c1aeb625da5e69336775609516d60f5')
+
+ with tarfile.open(gp, 'r:gz') as tf:
+ self._verify_basic_tarfile(tf)
+
+ finally:
+ shutil.rmtree(d)
+
+ def test_tar_gz_name(self):
+ d = tempfile.mkdtemp()
+ try:
+ files = self._create_files(d)
+
+ gp = os.path.join(d, 'test.tar.gz')
+ with open(gp, 'wb') as fh:
+ create_tar_gz_from_files(fh, files, filename='foobar', compresslevel=1)
+
+ self.assertEqual(file_hash(gp), 'fd099f96480cc1100f37baa8e89a6b820dbbcbd3')
+
+ with tarfile.open(gp, 'r:gz') as tf:
+ self._verify_basic_tarfile(tf)
+
+ finally:
+ shutil.rmtree(d)
+
+ def test_create_tar_bz2_basic(self):
+ d = tempfile.mkdtemp()
+ try:
+ files = self._create_files(d)
+
+ bp = os.path.join(d, 'test.tar.bz2')
+ with open(bp, 'wb') as fh:
+ create_tar_bz2_from_files(fh, files)
+
+ self.assertEqual(file_hash(bp), '1827ad00dfe7acf857b7a1c95ce100361e3f6eea')
+
+ with tarfile.open(bp, 'r:bz2') as tf:
+ self._verify_basic_tarfile(tf)
+ finally:
+ shutil.rmtree(d)
+
+
+if __name__ == '__main__':
+ main()