Bug 1391476 - Capture Docker volumes in docker-worker config; r?dustin draft
authorGregory Szorc <gps@mozilla.com>
Wed, 23 Aug 2017 08:53:56 -0700
changeset 651407 a53676dc0c4f26e7e2cdd36ad4d7039cf7a98923
parent 651406 4a610f3d8a9269bc1b2c977e5e9d3a3b90359ff1
child 651408 4d9331997a1550d71a19cb7dab3e24c2a125b1bc
push id75721
push usergszorc@mozilla.com
push dateWed, 23 Aug 2017 17:48:59 +0000
reviewersdustin
bugs1391476
milestone57.0a1
Bug 1391476 - Capture Docker volumes in docker-worker config; r?dustin Docker volumes are host-mounted filesystems. We typically mount caches at their location. But not always. The reason we define VOLUME in Dockerfiles is we're guaranteed to get a fast host filesystem instead of AUFS when a cache isn't mounted. In this commit, we teach the docker-worker payload builder about the existence of Docker volumes. Docker volumes can be declared inline in the YAML. More conveniently, we automatically parse out VOLUME lines from corresponding in-tree Dockerfile. We'll do useful things with this data in subsequent commits. MozReview-Commit-ID: BNxp8EDEYw
taskcluster/taskgraph/transforms/task.py
taskcluster/taskgraph/util/docker.py
--- a/taskcluster/taskgraph/transforms/task.py
+++ b/taskcluster/taskgraph/transforms/task.py
@@ -20,16 +20,17 @@ from mozbuild.util import memoize
 from taskgraph.util.attributes import TRUNK_PROJECTS
 from taskgraph.util.hash import hash_path
 from taskgraph.util.treeherder import split_symbol
 from taskgraph.transforms.base import TransformSequence
 from taskgraph.util.schema import validate_schema, Schema
 from taskgraph.util.scriptworker import get_release_config
 from voluptuous import Any, Required, Optional, Extra
 from taskgraph import GECKO
+from ..util import docker as dockerutil
 
 from .gecko_v2_whitelist import JOB_NAME_WHITELIST, JOB_NAME_WHITELIST_ERROR
 
 
 RUN_TASK = os.path.join(GECKO, 'taskcluster', 'docker', 'recipes', 'run-task')
 
 
 @memoize
@@ -188,16 +189,28 @@ task_description_schema = Schema({
         Required('relengapi-proxy', default=False): bool,
         Required('chain-of-trust', default=False): bool,
         Required('taskcluster-proxy', default=False): bool,
         Required('allow-ptrace', default=False): bool,
         Required('loopback-video', default=False): bool,
         Required('loopback-audio', default=False): bool,
         Required('docker-in-docker', default=False): bool,  # (aka 'dind')
 
+        # Paths to Docker volumes.
+        #
+        # For in-tree Docker images, volumes can be parsed from Dockerfile.
+        # This only works for the Dockerfile itself: if a volume is defined in
+        # a base image, it will need to be declared here. Out-of-tree Docker
+        # images will also require explicit volume annotation.
+        #
+        # Caches are often mounted to the same path as Docker volumes. In this
+        # case, they take precedence over a Docker volume. But a volume still
+        # needs to be declared for the path.
+        Optional('volumes', default=[]): [basestring],
+
         # caches to set up for the task
         Optional('caches'): [{
             # only one type is supported by any of the workers right now
             'type': 'persistent',
 
             # name of the cache, allowing re-use by subsequent tasks naming the
             # same cache
             'name': basestring,
@@ -589,24 +602,37 @@ def index_builder(name):
 @payload_builder('docker-worker')
 def build_docker_worker_payload(config, task, task_def):
     worker = task['worker']
     level = int(config.params['level'])
 
     image = worker['docker-image']
     if isinstance(image, dict):
         if 'in-tree' in image:
+            name = image['in-tree']
             docker_image_task = 'build-docker-image-' + image['in-tree']
             task.setdefault('dependencies', {})['docker-image'] = docker_image_task
 
             image = {
                 "path": "public/image.tar.zst",
                 "taskId": {"task-reference": "<docker-image>"},
                 "type": "task-image",
             }
+
+            # Find VOLUME in Dockerfile.
+            volumes = dockerutil.parse_volumes(name)
+            for v in sorted(volumes):
+                if v in worker['volumes']:
+                    raise Exception('volume %s already defined; '
+                                    'if it is defined in a Dockerfile, '
+                                    'it does not need to be specified in the '
+                                    'worker definition' % v)
+
+                worker['volumes'].append(v)
+
         elif 'indexed' in image:
             image = {
                 "path": "public/image.tar.zst",
                 "namespace": image['indexed'],
                 "type": "indexed-image",
             }
         else:
             raise Exception("unknown docker image type")
--- a/taskcluster/taskgraph/util/docker.py
+++ b/taskcluster/taskgraph/util/docker.py
@@ -6,16 +6,17 @@ from __future__ import absolute_import, 
 
 import hashlib
 import os
 import shutil
 import subprocess
 import tarfile
 import tempfile
 
+from mozbuild.util import memoize
 from mozpack.archive import (
     create_tar_gz_from_files,
 )
 from .. import GECKO
 
 
 IMAGE_DIR = os.path.join(GECKO, 'taskcluster', 'docker')
 INDEX_PREFIX = 'docker.images.v2'
@@ -158,8 +159,29 @@ def build_from_context(docker_bin, conte
 
         args.append('.')
 
         res = subprocess.call(args, cwd=os.path.join(d, prefix))
         if res:
             raise Exception('error building image')
     finally:
         shutil.rmtree(d)
+
+
+@memoize
+def parse_volumes(image):
+    """Parse VOLUME entries from a Dockerfile for an image."""
+    volumes = set()
+
+    with open(os.path.join(IMAGE_DIR, image, 'Dockerfile'), 'rb') as fh:
+        for line in fh:
+            line = line.strip()
+            if not line.startswith(b'VOLUME '):
+                continue
+
+            v = line.split(None, 1)[1]
+            if v.startswith(b'['):
+                raise ValueError('cannot parse array syntax for VOLUME; '
+                                 'convert to multiple entries')
+
+            volumes |= set(v.split())
+
+    return volumes