Bug 1288567 - Use deterministic tar archive generation; r?dustin draft
authorGregory Szorc <gps@mozilla.com>
Fri, 22 Jul 2016 10:29:58 -0700
changeset 392502 557cb3c6ff4869c8e7a575e00da6dea6e800babb
parent 392501 f858dec9674caada90e12c6c7d5489cf4c36b1ca
child 392503 293026da2349c6d42d5d98d30c1e0f5fdf7e294c
push id24042
push userbmo:gps@mozilla.com
push dateMon, 25 Jul 2016 18:25:42 +0000
reviewersdustin
bugs1288567
milestone50.0a1
Bug 1288567 - Use deterministic tar archive generation; r?dustin We recently implemented code in mozpack for performing deterministic tar file creation. It normalizes things like uids, gids, and mtimes that creep into archives. MozReview-Commit-ID: 1tn5eXkqACQ
taskcluster/taskgraph/util/docker.py
--- a/taskcluster/taskgraph/util/docker.py
+++ b/taskcluster/taskgraph/util/docker.py
@@ -1,17 +1,21 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 from __future__ import absolute_import, print_function, unicode_literals
 
 import hashlib
 import os
-import tarfile
+
+from mozpack.archive import (
+    create_tar_gz_from_files,
+)
+
 
 GECKO = os.path.realpath(os.path.join(__file__, '..', '..', '..', '..'))
 DOCKER_ROOT = os.path.join(GECKO, 'testing', 'docker')
 ARTIFACT_URL = 'https://queue.taskcluster.net/v1/task/{}/artifacts/{}'
 
 
 def docker_image(name):
     '''Determine the docker image name, including repository and tag, from an
@@ -60,18 +64,27 @@ def create_context_tar(context_dir, out_
     """Create a context tarball.
 
     A directory ``context_dir`` containing a Dockerfile will be assembled into
     a gzipped tar file at ``out_path``. Files inside the archive will be
     prefixed by directory ``prefix``.
 
     Returns the SHA-256 hex digest of the created archive.
     """
-    with tarfile.open(out_path, 'w:gz') as tar:
-        tar.add(context_dir, arcname=prefix)
+    archive_files = {}
+
+    for root, dirs, files in os.walk(context_dir):
+        for f in files:
+            source_path = os.path.join(root, f)
+            rel = source_path[len(context_dir) + 1:]
+            archive_path = os.path.join(prefix, rel)
+            archive_files[archive_path] = source_path
+
+    with open(out_path, 'wb') as fh:
+        create_tar_gz_from_files(fh, archive_files, '%s.tar.gz' % prefix)
 
     h = hashlib.sha256()
     with open(out_path, 'rb') as fh:
         while True:
             data = fh.read(32768)
             if not data:
                 break
             h.update(data)