Bug 1288567 - Extract function for creating context tars; r?dustin draft
authorGregory Szorc <gps@mozilla.com>
Fri, 22 Jul 2016 10:20:06 -0700
changeset 392501 f858dec9674caada90e12c6c7d5489cf4c36b1ca
parent 392500 4f4dba2093aa48f0b456629f6effbd97d6d51042
child 392502 557cb3c6ff4869c8e7a575e00da6dea6e800babb
push id24042
push userbmo:gps@mozilla.com
push dateMon, 25 Jul 2016 18:25:42 +0000
reviewersdustin
bugs1288567
milestone50.0a1
Bug 1288567 - Extract function for creating context tars; r?dustin Upcoming commits will refactor how context tarballs are created. In preparation for this, we establish a standalone function for creating context tarballs and refactor docker_image.py to use it. MozReview-Commit-ID: KEW6ppO1vCl
taskcluster/taskgraph/task/docker_image.py
taskcluster/taskgraph/util/docker.py
--- a/taskcluster/taskgraph/task/docker_image.py
+++ b/taskcluster/taskgraph/task/docker_image.py
@@ -4,23 +4,23 @@
 
 from __future__ import absolute_import, print_function, unicode_literals
 
 import logging
 import json
 import os
 import re
 import urllib2
-import tarfile
 import time
 
 from . import base
 from taskgraph.util.docker import (
+    create_context_tar,
     docker_image,
-    generate_context_hash
+    generate_context_hash,
 )
 from taskgraph.util.templates import Templates
 
 logger = logging.getLogger(__name__)
 GECKO = os.path.realpath(os.path.join(__file__, '..', '..', '..', '..'))
 ARTIFACT_URL = 'https://queue.taskcluster.net/v1/task/{}/artifacts/{}'
 INDEX_URL = 'https://index.taskcluster.net/v1/task/{}'
 INDEX_REGEX = r'index\.(docker\.images\.v1\.(.+)\.(.+)\.hash\.(.+))'
@@ -133,18 +133,17 @@ class DockerImageTask(base.Task):
 
     @classmethod
     def create_context_tar(cls, context_dir, destination, image_name):
         'Creates a tar file of a particular context directory.'
         destination = os.path.abspath(destination)
         if not os.path.exists(os.path.dirname(destination)):
             os.makedirs(os.path.dirname(destination))
 
-        with tarfile.open(destination, 'w:gz') as tar:
-            tar.add(context_dir, arcname=image_name)
+        create_context_tar(context_dir, destination, image_name)
 
     @classmethod
     def from_json(cls, task_dict):
         # Generating index_paths for optimization
         routes = task_dict['task']['routes']
         index_paths = []
         for route in routes:
             index_path_regex = re.compile(INDEX_REGEX)
--- a/taskcluster/taskgraph/util/docker.py
+++ b/taskcluster/taskgraph/util/docker.py
@@ -1,16 +1,17 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 from __future__ import absolute_import, print_function, unicode_literals
 
 import hashlib
 import os
+import tarfile
 
 GECKO = os.path.realpath(os.path.join(__file__, '..', '..', '..', '..'))
 DOCKER_ROOT = os.path.join(GECKO, 'testing', 'docker')
 ARTIFACT_URL = 'https://queue.taskcluster.net/v1/task/{}/artifacts/{}'
 
 
 def docker_image(name):
     '''Determine the docker image name, including repository and tag, from an
@@ -48,8 +49,30 @@ def generate_context_hash(image_path):
         relative_filename = filename.replace(GECKO, '')
         with open(filename, 'rb') as f:
             file_hash = hashlib.sha256()
             data = f.read()
             file_hash.update(data)
             context_hash.update(file_hash.hexdigest() + '\t' + relative_filename + '\n')
 
     return context_hash.hexdigest()
+
+
+def create_context_tar(context_dir, out_path, prefix):
+    """Create a context tarball.
+
+    A directory ``context_dir`` containing a Dockerfile will be assembled into
+    a gzipped tar file at ``out_path``. Files inside the archive will be
+    prefixed by directory ``prefix``.
+
+    Returns the SHA-256 hex digest of the created archive.
+    """
+    with tarfile.open(out_path, 'w:gz') as tar:
+        tar.add(context_dir, arcname=prefix)
+
+    h = hashlib.sha256()
+    with open(out_path, 'rb') as fh:
+        while True:
+            data = fh.read(32768)
+            if not data:
+                break
+            h.update(data)
+    return h.hexdigest()