Bug 1244189: add 'mach taskcluster-load-image'; r?garndt
The command helpfully downloads a docker image created by a taskcluster task,
which can be helpful when trying to replicate subtle bugs that only occur
in-tree.
This also fixes a bug in hashing Dockerfile directories, where the full
pathname was taken into account. While this pathname is consistent from
decision task to decision task, it is not consistent with developers' home
directories. This change omits the directory prefix, which will cause a
one-time shift in all directory hashes.
MozReview-Commit-ID: EamQzUGG5qY
--- a/testing/taskcluster/mach_commands.py
+++ b/testing/taskcluster/mach_commands.py
@@ -222,16 +222,50 @@ class DecisionTask(object):
'as_slugid': SlugidJar(),
'from_now': json_time_from_now,
'now': current_json_time()
}.items())
task = templates.load(params['task'], parameters)
print(json.dumps(task, indent=4))
@CommandProvider
+class LoadImage(object):
+ @Command('taskcluster-load-image', category="ci",
+ description="Load a pre-built Docker image")
+ @CommandArgument('--task-id',
+ help="Load the image at public/image.tar in this task, rather than "
+ "searching the index")
+ @CommandArgument('image_name', nargs='?',
+ help="Load the image of this name based on the current contents of the tree "
+ "(as built for mozilla-central or mozilla-inbound)")
+ def load_image(self, image_name, task_id):
+ from taskcluster_graph.image_builder import (
+ task_id_for_image,
+ docker_load_from_url
+ )
+
+ if not image_name and not task_id:
+ print("Specify either IMAGE-NAME or TASK-ID")
+ sys.exit(1)
+
+ if not task_id:
+ task_id = task_id_for_image({}, 'mozilla-inbound', image_name, create=False)
+ if not task_id:
+ print("No task found in the TaskCluster index for {}".format(image_name))
+ sys.exit(1)
+
+ print("Task ID: {}".format(task_id))
+
+ ARTIFACT_URL = 'https://queue.taskcluster.net/v1/task/{}/artifacts/{}'
+ image_name = docker_load_from_url(ARTIFACT_URL.format(task_id, 'public/image.tar'))
+
+ print("Loaded image is named {}".format(image_name))
+
+
+@CommandProvider
class Graph(object):
@Command('taskcluster-graph', category="ci",
description="Create taskcluster task graph")
@CommandArgument('--base-repository',
default=os.environ.get('GECKO_BASE_REPOSITORY'),
help='URL for "base" repository to clone')
@CommandArgument('--head-repository',
default=os.environ.get('GECKO_HEAD_REPOSITORY'),
--- a/testing/taskcluster/taskcluster_graph/image_builder.py
+++ b/testing/taskcluster/taskcluster_graph/image_builder.py
@@ -1,11 +1,12 @@
import hashlib
import json
import os
+import subprocess
import tarfile
import urllib2
import taskcluster_graph.transform.routes as routes_transform
import taskcluster_graph.transform.treeherder as treeherder_transform
from slugid import nice as slugid
from taskcluster_graph.templates import Templates
@@ -28,28 +29,31 @@ def docker_image(name):
version = open(os.path.join(DOCKER_ROOT, name, 'VERSION')).read().strip()
if os.path.isfile(repository_path):
repository = open(repository_path).read().strip()
return '{}/{}:{}'.format(repository, name, version)
-def task_id_for_image(seen_images, project, name):
+def task_id_for_image(seen_images, project, name, create=True):
if name in seen_images:
return seen_images[name]['taskId']
context_path = os.path.join('testing', 'docker', name)
context_hash = generate_context_hash(context_path)
task_id = get_task_id_for_namespace(project, name, context_hash)
if task_id:
seen_images[name] = {'taskId': task_id}
return task_id
+ if not create:
+ return None
+
task_id = slugid()
seen_images[name] = {
'taskId': task_id,
'path': context_path,
'hash': context_hash
}
return task_id
@@ -106,24 +110,25 @@ def generate_context_hash(image_path):
context_hash = hashlib.sha256()
files = []
for dirpath, dirnames, filenames in os.walk(os.path.join(GECKO, image_path)):
for filename in filenames:
files.append(os.path.join(dirpath, filename))
for filename in sorted(files):
+ relative_filename = filename.replace(GECKO, '')
with open(filename, 'rb') as f:
file_hash = hashlib.sha256()
while True:
data = f.read()
if not data:
break
file_hash.update(data)
- context_hash.update(file_hash.hexdigest() + '\t' + filename + '\n')
+ context_hash.update(file_hash.hexdigest() + '\t' + relative_filename + '\n')
return context_hash.hexdigest()
def create_context_tar(context_dir, destination, image_name):
''' Creates a tar file of a particular context directory '''
if not os.path.exists(os.path.dirname(destination)):
os.makedirs(os.path.dirname(destination))
@@ -224,8 +229,41 @@ def normalize_image_details(graph, task,
)
graph['scopes'].add(define_task)
graph['scopes'] |= set(image_task['task'].get('scopes', []))
route_scopes = map(lambda route: 'queue:route:' + route, image_task['task'].get('routes', []))
graph['scopes'] |= set(route_scopes)
details['required'] = True
+
+def docker_load_from_url(url):
+ """Get a docker image from a `docker save` tarball at the given URL,
+ loading it into the running daemon and returning the image name."""
+
+ # because we need to read this file twice (and one read is not all the way
+ # through), it is difficult to stream it. So we downlaod to disk and then
+ # read it back.
+ filename = 'temp-docker-image.tar'
+
+ print("Downloading {}".format(url))
+ subprocess.check_call(['curl', '-#', '-L', '-o', filename, url])
+
+ print("Determining image name")
+ tf = tarfile.open(filename)
+ repositories = json.load(tf.extractfile('repositories'))
+ name = repositories.keys()[0]
+ tag = repositories[name].keys()[0]
+ name = '{}:{}'.format(name, tag)
+ print("Image name: {}".format(name))
+
+ print("Loading image into docker")
+ try:
+ subprocess.check_call(['docker', 'load', '-i', filename])
+ except subprocess.CalledProcessError:
+ print("*** `docker load` failed. You may avoid re-downloading that tarball by fixing the")
+ print("*** problem and running `docker load < {}`.".format(filename))
+ raise
+
+ print("Deleting temporary file")
+ os.unlink(filename)
+
+ return name