Bug 1432390 - Allow to derive docker images from other in-tree images. r?dustin draft
authorMike Hommey <mh+mozilla@glandium.org>
Thu, 25 Jan 2018 11:07:54 +0900
changeset 747408 71d24ffdbc7d7114e4b08f1631a451045c27ac66
parent 747407 2ffde75bc224df7e9679758052bb2e5891a213ad
child 747484 2eaffe6795e27fd455c9b5739748bf35bf9e6da0
push id96904
push userbmo:mh+mozilla@glandium.org
push dateFri, 26 Jan 2018 01:20:26 +0000
reviewersdustin
bugs1432390
milestone60.0a1
Bug 1432390 - Allow to derive docker images from other in-tree images. r?dustin So far, the best we've been able to do is to upload an image to the docker hub, and point an image's Dockerfile's FROM to the version uploaded onto the hub. That is a cumbersome process, and makes the use of "layered" docker images painful. This change allows to declare a parent docker image in the taskcluster/ci/docker-image/kind.yml definitions, which will be automatically loaded before building the image. The Dockerfile can then reference the image, using the DOCKER_IMAGE_PARENT argument, which will contain the full image name:tag. Some details are left off, for now, such as VOLUMEs. At this point, VOLUMEs should all be defined in leaf docker images.
taskcluster/docker/image_builder/build-image.sh
taskcluster/taskgraph/transforms/docker_image.py
--- a/taskcluster/docker/image_builder/build-image.sh
+++ b/taskcluster/docker/image_builder/build-image.sh
@@ -20,24 +20,34 @@ test -n "$IMAGE_NAME" || raise_error "IM
 
 # The docker socket is mounted by the taskcluster worker in a way that prevents
 # us changing its permissions to allow the worker user to access it. Create a
 # proxy socket that the worker user can use.
 export DOCKER_SOCKET=/var/run/docker.proxy
 socat UNIX-LISTEN:$DOCKER_SOCKET,fork,group=worker,mode=0775 UNIX-CLIENT:/var/run/docker.sock </dev/null &
 trap "kill $!" EXIT
 
+LOAD_COMMAND=
+if [ -n "$DOCKER_IMAGE_PARENT" ]; then
+    test -n "$DOCKER_IMAGE_PARENT_TASK" || raise_error "DOCKER_IMAGE_PARENT_TASK must be provided."
+    LOAD_COMMAND="\
+      /builds/worker/checkouts/gecko/mach taskcluster-load-image \
+      --task-id \"$DOCKER_IMAGE_PARENT_TASK\" \
+      -t \"$DOCKER_IMAGE_PARENT\" && "
+fi
+
 # Build image
 run-task \
   --vcs-checkout "/builds/worker/checkouts/gecko" \
   --sparse-profile build/sparse-profiles/docker-image \
   -- \
+  sh -x -c "$LOAD_COMMAND \
   /builds/worker/checkouts/gecko/mach taskcluster-build-image \
-  -t "$IMAGE_NAME:$HASH" \
-  "$IMAGE_NAME"
+  -t \"$IMAGE_NAME:$HASH\" \
+  \"$IMAGE_NAME\""
 
 # Create artifact folder (note that this must occur after run-task)
 mkdir -p /builds/worker/workspace/artifacts
 
 # Get image from docker daemon (try up to 10 times)
 # This interacts directly with the docker remote API, see:
 # https://docs.docker.com/engine/reference/api/docker_remote_api_v1.18/
 #
--- a/taskcluster/taskgraph/transforms/docker_image.py
+++ b/taskcluster/taskgraph/transforms/docker_image.py
@@ -2,16 +2,17 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 from __future__ import absolute_import, print_function, unicode_literals
 
 import os
 import re
 
+from collections import deque
 from taskgraph.transforms.base import TransformSequence
 from taskgraph.transforms.task import _run_task_suffix
 from .. import GECKO
 from taskgraph.util.docker import (
     generate_context_hash,
 )
 from taskgraph.util.cached_tasks import add_optimization
 from taskgraph.util.schema import (
@@ -26,16 +27,19 @@ from voluptuous import (
 DIGEST_RE = re.compile('^[0-9a-f]{64}$')
 
 transforms = TransformSequence()
 
 docker_image_schema = Schema({
     # Name of the docker image.
     Required('name'): basestring,
 
+    # Name of the parent docker image.
+    Optional('parent'): basestring,
+
     # Treeherder symbol.
     Required('symbol'): basestring,
 
     # relative path (from config.path) to the file the docker image was defined
     # in.
     Optional('job-from'): basestring,
 
     # Arguments to use for the Dockerfile.
@@ -54,54 +58,78 @@ docker_image_schema = Schema({
 def validate(config, tasks):
     for task in tasks:
         validate_schema(
             docker_image_schema, task,
             "In docker image {!r}:".format(task.get('name', 'unknown')))
         yield task
 
 
+def order_image_tasks(tasks):
+    """Iterate image tasks in an order where parent images come first."""
+    pending = deque(tasks)
+    emitted = set()
+    while True:
+        try:
+            task = pending.popleft()
+        except IndexError:
+            break
+        parent = task.get('parent')
+        if parent and parent not in emitted:
+            pending.append(task)
+            continue
+        emitted.add(task['name'])
+        yield task
+
+
 @transforms.add
 def fill_template(config, tasks):
     available_packages = {}
     for task in config.kind_dependencies_tasks:
         if task.kind != 'packages':
             continue
         name = task.label.replace('packages-', '')
         for route in task.task.get('routes', []):
             if route.startswith('index.') and '.hash.' in route:
                 # Only keep the hash part of the route.
                 h = route.rsplit('.', 1)[1]
                 assert DIGEST_RE.match(h)
                 available_packages[name] = h
                 break
-    for task in tasks:
+
+    context_hashes = {}
+
+    for task in order_image_tasks(tasks):
         image_name = task.pop('name')
         job_symbol = task.pop('symbol')
         args = task.pop('args', {})
         definition = task.pop('definition', image_name)
         packages = task.pop('packages', [])
+        parent = task.pop('parent', None)
 
         for p in packages:
             if p not in available_packages:
                 raise Exception('Missing package job for {}-{}: {}'.format(
                     config.kind, image_name, p))
 
         # Generating the context hash relies on arguments being set, so we
         # set this now, although it's not the final value (it's a
         # task-reference value, see further below). We add the package routes
         # containing a hash to get the overall docker image hash, so changes
         # to packages will be reflected in the docker image hash.
         args['DOCKER_IMAGE_PACKAGES'] = ' '.join('<{}>'.format(p)
                                                  for p in packages)
+        if parent:
+            args['DOCKER_IMAGE_PARENT'] = '{}:{}'.format(parent, context_hashes[parent])
 
         context_path = os.path.join('taskcluster', 'docker', definition)
         context_hash = generate_context_hash(
             GECKO, context_path, image_name, args)
         digest_data = [context_hash]
+        context_hashes[image_name] = context_hash
 
         description = 'Build the docker image {} for use by dependent tasks'.format(
             image_name)
 
         # Adjust the zstandard compression level based on the execution level.
         # We use faster compression for level 1 because we care more about
         # end-to-end times. We use slower/better compression for other levels
         # because images are read more often and it is worth the trade-off to
@@ -187,16 +215,23 @@ def fill_template(config, tasks):
                 worker['env'][k] = v
 
         if packages:
             deps = taskdesc.setdefault('dependencies', {})
             for p in sorted(packages):
                 deps[p] = 'packages-{}'.format(p)
                 digest_data.append(available_packages[p])
 
+        if parent:
+            deps = taskdesc.setdefault('dependencies', {})
+            deps[parent] = 'build-docker-image-{}'.format(parent)
+            worker['env']['DOCKER_IMAGE_PARENT_TASK'] = {
+                'task-reference': '<{}>'.format(parent)
+            }
+
         if len(digest_data) > 1:
             kwargs = {'digest_data': digest_data}
         else:
             kwargs = {'digest': digest_data[0]}
         add_optimization(
             config, taskdesc,
             cache_type="docker-images.v1",
             cache_name=image_name,