Bug 1244189: add 'mach taskcluster-load-image'; r?garndt draft
authorDustin J. Mitchell <dustin@mozilla.com>
Fri, 25 Mar 2016 21:49:05 +0000
changeset 344827 9f35ddd7599bc622a8490133d4fdd1b80e58cb7f
parent 344796 ac482faf3373c1909d75ba89ca748f59bee609a2
child 517055 69d491328275378bc8d4bfbf848acb2d7c14f182
push id13935
push userdmitchell@mozilla.com
push dateFri, 25 Mar 2016 22:22:17 +0000
reviewersgarndt
bugs1244189
milestone48.0a1
Bug 1244189: add 'mach taskcluster-load-image'; r?garndt The command helpfully downloads a docker image created by a taskcluster task, which can be helpful when trying to replicate subtle bugs that only occur in-tree. This also fixes a bug in hashing Dockerfile directories, where the full pathname was taken into account. While this pathname is consistent from decision task to decision task, it is not consistent with developers' home directories. This change omits the directory prefix, which will cause a one-time shift in all directory hashes. MozReview-Commit-ID: EamQzUGG5qY
testing/taskcluster/mach_commands.py
testing/taskcluster/taskcluster_graph/image_builder.py
--- a/testing/taskcluster/mach_commands.py
+++ b/testing/taskcluster/mach_commands.py
@@ -222,16 +222,50 @@ class DecisionTask(object):
             'as_slugid': SlugidJar(),
             'from_now': json_time_from_now,
             'now': current_json_time()
         }.items())
         task = templates.load(params['task'], parameters)
         print(json.dumps(task, indent=4))
 
 @CommandProvider
+class LoadImage(object):
+    @Command('taskcluster-load-image', category="ci",
+        description="Load a pre-built Docker image")
+    @CommandArgument('--task-id',
+        help="Load the image at public/image.tar in this task, rather than "
+             "searching the index")
+    @CommandArgument('image_name', nargs='?',
+        help="Load the image of this name based on the current contents of the tree "
+             "(as built for mozilla-central or mozilla-inbound)")
+    def load_image(self, image_name, task_id):
+        from taskcluster_graph.image_builder import (
+            task_id_for_image,
+            docker_load_from_url
+        )
+
+        if not image_name and not task_id:
+            print("Specify either IMAGE-NAME or TASK-ID")
+            sys.exit(1)
+
+        if not task_id:
+            task_id = task_id_for_image({}, 'mozilla-inbound', image_name, create=False)
+            if not task_id:
+                print("No task found in the TaskCluster index for {}".format(image_name))
+                sys.exit(1)
+
+        print("Task ID: {}".format(task_id))
+
+        ARTIFACT_URL = 'https://queue.taskcluster.net/v1/task/{}/artifacts/{}'
+        image_name = docker_load_from_url(ARTIFACT_URL.format(task_id, 'public/image.tar'))
+
+        print("Loaded image is named {}".format(image_name))
+
+
+@CommandProvider
 class Graph(object):
     @Command('taskcluster-graph', category="ci",
         description="Create taskcluster task graph")
     @CommandArgument('--base-repository',
         default=os.environ.get('GECKO_BASE_REPOSITORY'),
         help='URL for "base" repository to clone')
     @CommandArgument('--head-repository',
         default=os.environ.get('GECKO_HEAD_REPOSITORY'),
--- a/testing/taskcluster/taskcluster_graph/image_builder.py
+++ b/testing/taskcluster/taskcluster_graph/image_builder.py
@@ -1,11 +1,12 @@
 import hashlib
 import json
 import os
+import subprocess
 import tarfile
 import urllib2
 
 import taskcluster_graph.transform.routes as routes_transform
 import taskcluster_graph.transform.treeherder as treeherder_transform
 from slugid import nice as slugid
 from taskcluster_graph.templates import Templates
 
@@ -28,28 +29,31 @@ def docker_image(name):
 
     version = open(os.path.join(DOCKER_ROOT, name, 'VERSION')).read().strip()
 
     if os.path.isfile(repository_path):
         repository = open(repository_path).read().strip()
 
     return '{}/{}:{}'.format(repository, name, version)
 
-def task_id_for_image(seen_images, project, name):
+def task_id_for_image(seen_images, project, name, create=True):
     if name in seen_images:
         return seen_images[name]['taskId']
 
     context_path = os.path.join('testing', 'docker', name)
     context_hash = generate_context_hash(context_path)
     task_id = get_task_id_for_namespace(project, name, context_hash)
 
     if task_id:
         seen_images[name] = {'taskId': task_id}
         return task_id
 
+    if not create:
+        return None
+
     task_id = slugid()
     seen_images[name] = {
         'taskId': task_id,
         'path': context_path,
         'hash': context_hash
     }
 
     return task_id
@@ -106,24 +110,25 @@ def generate_context_hash(image_path):
     context_hash = hashlib.sha256()
     files = []
 
     for dirpath, dirnames, filenames in os.walk(os.path.join(GECKO, image_path)):
         for filename in filenames:
             files.append(os.path.join(dirpath, filename))
 
     for filename in sorted(files):
+        relative_filename = filename.replace(GECKO, '')
         with open(filename, 'rb') as f:
             file_hash = hashlib.sha256()
             while True:
                 data = f.read()
                 if not data:
                     break
                 file_hash.update(data)
-            context_hash.update(file_hash.hexdigest() + '\t' + filename + '\n')
+            context_hash.update(file_hash.hexdigest() + '\t' + relative_filename + '\n')
 
     return context_hash.hexdigest()
 
 def create_context_tar(context_dir, destination, image_name):
     ''' Creates a tar file of a particular context directory '''
     if not os.path.exists(os.path.dirname(destination)):
         os.makedirs(os.path.dirname(destination))
 
@@ -224,8 +229,41 @@ def normalize_image_details(graph, task,
     )
 
     graph['scopes'].add(define_task)
     graph['scopes'] |= set(image_task['task'].get('scopes', []))
     route_scopes = map(lambda route: 'queue:route:' + route, image_task['task'].get('routes', []))
     graph['scopes'] |= set(route_scopes)
 
     details['required'] = True
+
+def docker_load_from_url(url):
+    """Get a docker image from a `docker save` tarball at the given URL,
+    loading it into the running daemon and returning the image name."""
+
+    # because we need to read this file twice (and one read is not all the way
+    # through), it is difficult to stream it.  So we downlaod to disk and then
+    # read it back.
+    filename = 'temp-docker-image.tar'
+
+    print("Downloading {}".format(url))
+    subprocess.check_call(['curl', '-#', '-L', '-o', filename, url])
+
+    print("Determining image name")
+    tf = tarfile.open(filename)
+    repositories = json.load(tf.extractfile('repositories'))
+    name = repositories.keys()[0]
+    tag = repositories[name].keys()[0]
+    name = '{}:{}'.format(name, tag)
+    print("Image name: {}".format(name))
+
+    print("Loading image into docker")
+    try:
+        subprocess.check_call(['docker', 'load', '-i', filename])
+    except subprocess.CalledProcessError:
+        print("*** `docker load` failed.  You may avoid re-downloading that tarball by fixing the")
+        print("*** problem and running `docker load < {}`.".format(filename))
+        raise
+
+    print("Deleting temporary file")
+    os.unlink(filename)
+
+    return name