Bug 1419638 - Allow to pass arguments to docker when building docker-images. r=dustin draft
authorMike Hommey <mh+mozilla@glandium.org>
Sun, 24 Dec 2017 07:51:29 +0900
changeset 715671 56cbc4041aed566952dbe81eccc8e62cb95fa428
parent 715670 0525a6c2030c363339e616d02790eb1f3aa7d498
child 715672 72335a93d9a1b16af067eccd6fd5dfd7c5bc5c4d
push id94224
push userbmo:mh+mozilla@glandium.org
push dateThu, 04 Jan 2018 11:02:43 +0000
reviewersdustin
bugs1419638
milestone59.0a1
Bug 1419638 - Allow to pass arguments to docker when building docker-images. r=dustin Ideally, we'd simply use the --build-arg docker argument along with ARG in the Dockerfile, but that's only supported from Docker API 1.21, and we're stuck on 1.18 for the moment. So we add another hack to how we handle the Dockerfile, by adding a commented syntax that allows to declare arguments to the Dockerfile. The arguments can be defined in the docker images kind.yml file through the `args` keyword. Under the hood, they are passed down to the docker image task through the environment. The mach taskcluster-build-image command then uses the corresponding values from the environment to generate a "preprocessed" Dockerfile for its context.
taskcluster/mach_commands.py
taskcluster/taskgraph/docker.py
taskcluster/taskgraph/transforms/docker_image.py
taskcluster/taskgraph/util/docker.py
--- a/taskcluster/mach_commands.py
+++ b/taskcluster/mach_commands.py
@@ -412,19 +412,19 @@ class TaskClusterImagesProvider(object):
     @CommandArgument('--context-only',
                      help="File name the context tarball should be written to."
                           "with this option it will only build the context.tar.",
                      metavar='context.tar')
     def build_image(self, image_name, context_only):
         from taskgraph.docker import build_image, build_context
         try:
             if context_only is None:
-                build_image(image_name)
+                build_image(image_name, os.environ)
             else:
-                build_context(image_name, context_only)
+                build_context(image_name, context_only, os.environ)
         except Exception:
             traceback.print_exc()
             sys.exit(1)
 
 
 @CommandProvider
 class TaskClusterPartialsData(object):
     @Command('release-history', category="ci",
--- a/taskcluster/taskgraph/docker.py
+++ b/taskcluster/taskgraph/docker.py
@@ -48,32 +48,32 @@ def load_image_by_task_id(task_id, tag=N
     if tag:
         print("Re-tagged as: {}".format(tag))
     else:
         tag = '{}:{}'.format(result['image'], result['tag'])
     print("Try: docker run -ti --rm {} bash".format(tag))
     return True
 
 
-def build_context(name, outputFile):
+def build_context(name, outputFile, args=None):
     """Build a context.tar for image with specified name.
     """
     if not name:
         raise ValueError('must provide a Docker image name')
     if not outputFile:
         raise ValueError('must provide a outputFile')
 
     image_dir = os.path.join(docker.IMAGE_DIR, name)
     if not os.path.isdir(image_dir):
         raise Exception('image directory does not exist: %s' % image_dir)
 
-    docker.create_context_tar(GECKO, image_dir, outputFile, "")
+    docker.create_context_tar(GECKO, image_dir, outputFile, "", args)
 
 
-def build_image(name):
+def build_image(name, args=None):
     """Build a Docker image of specified name.
 
     Output from image building process will be printed to stdout.
     """
     if not name:
         raise ValueError('must provide a Docker image name')
 
     image_dir = os.path.join(docker.IMAGE_DIR, name)
@@ -93,17 +93,17 @@ def build_image(name):
 
     # We obtain a context archive and build from that. Going through the
     # archive creation is important: it normalizes things like file owners
     # and mtimes to increase the chances that image generation is
     # deterministic.
     fd, context_path = tempfile.mkstemp()
     os.close(fd)
     try:
-        docker.create_context_tar(GECKO, image_dir, context_path, name)
+        docker.create_context_tar(GECKO, image_dir, context_path, name, args)
         docker.build_from_context(docker_bin, context_path, name, tag)
     finally:
         os.unlink(context_path)
 
     print('Successfully built %s and tagged with %s' % (name, tag))
 
     if tag.endswith(':latest'):
         print('*' * 50)
--- a/taskcluster/taskgraph/transforms/docker_image.py
+++ b/taskcluster/taskgraph/transforms/docker_image.py
@@ -29,35 +29,40 @@ docker_image_schema = Schema({
     Required('name'): basestring,
 
     # Treeherder symbol.
     Required('symbol'): basestring,
 
     # relative path (from config.path) to the file the docker image was defined
     # in.
     Optional('job-from'): basestring,
+
+    # Arguments to use for the Dockerfile.
+    Optional('args'): {basestring: basestring},
 })
 
 
 @transforms.add
 def validate(config, tasks):
     for task in tasks:
         yield validate_schema(
             docker_image_schema, task,
             "In docker image {!r}:".format(task.get('name', 'unknown')))
 
 
 @transforms.add
 def fill_template(config, tasks):
     for task in tasks:
         image_name = task.pop('name')
         job_symbol = task.pop('symbol')
+        args = task.pop('args', {})
 
         context_path = os.path.join('taskcluster', 'docker', image_name)
-        context_hash = generate_context_hash(GECKO, context_path, image_name)
+        context_hash = generate_context_hash(
+            GECKO, context_path, image_name, args)
 
         description = 'Build the docker image {} for use by dependent tasks'.format(
             image_name)
 
         # Adjust the zstandard compression level based on the execution level.
         # We use faster compression for level 1 because we care more about
         # end-to-end times. We use slower/better compression for other levels
         # because images are read more often and it is worth the trade-off to
@@ -115,16 +120,19 @@ def fill_template(config, tasks):
                 },
                 'chain-of-trust': True,
                 'docker-in-docker': True,
                 'taskcluster-proxy': True,
                 'max-run-time': 7200,
             },
         }
 
+        for k, v in args.items():
+            taskdesc['worker']['env'][k] = v
+
         add_optimization(
             config, taskdesc,
             cache_type="docker-images.v1",
             cache_name=image_name,
             digest=context_hash,
         )
 
         yield taskdesc
--- a/taskcluster/taskgraph/util/docker.py
+++ b/taskcluster/taskgraph/util/docker.py
@@ -1,22 +1,24 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 from __future__ import absolute_import, print_function, unicode_literals
 
 import hashlib
 import os
+import re
 import shutil
 import subprocess
 import tarfile
 import tempfile
 
 from mozbuild.util import memoize
+from mozpack.files import GeneratedFile
 from mozpack.archive import (
     create_tar_gz_from_files,
 )
 from .. import GECKO
 
 
 IMAGE_DIR = os.path.join(GECKO, 'taskcluster', 'docker')
 
@@ -44,60 +46,79 @@ def docker_image(name, by_tag=False):
     try:
         with open(os.path.join(IMAGE_DIR, name, 'VERSION')) as f:
             tag = f.read().strip()
     except IOError:
         tag = 'latest'
     return '{}/{}:{}'.format(registry, name, tag)
 
 
-def generate_context_hash(topsrcdir, image_path, image_name):
+def generate_context_hash(topsrcdir, image_path, image_name, args=None):
     """Generates a sha256 hash for context directory used to build an image."""
 
     # It is a bit unfortunate we have to create a temp file here - it would
     # be nicer to use an in-memory buffer.
     fd, p = tempfile.mkstemp()
     os.close(fd)
     try:
-        return create_context_tar(topsrcdir, image_path, p, image_name)
+        return create_context_tar(topsrcdir, image_path, p, image_name, args)
     finally:
         os.unlink(p)
 
 
-def create_context_tar(topsrcdir, context_dir, out_path, prefix):
+def create_context_tar(topsrcdir, context_dir, out_path, prefix, args=None):
     """Create a context tarball.
 
     A directory ``context_dir`` containing a Dockerfile will be assembled into
     a gzipped tar file at ``out_path``. Files inside the archive will be
     prefixed by directory ``prefix``.
 
     We also scan the source Dockerfile for special syntax that influences
     context generation.
 
     If a line in the Dockerfile has the form ``# %include <path>``,
     the relative path specified on that line will be matched against
     files in the source repository and added to the context under the
     path ``topsrcdir/``. If an entry is a directory, we add all files
     under that directory.
 
+    If a line in the Dockerfile has the form ``# %ARG <name>``, occurrences of
+    the string ``$<name>`` in subsequent lines are replaced with the value
+    found in the ``args`` argument. Exception: this doesn't apply to VOLUME
+    definitions.
+
     Returns the SHA-256 hex digest of the created archive.
     """
     archive_files = {}
+    replace = []
 
     for root, dirs, files in os.walk(context_dir):
         for f in files:
             source_path = os.path.join(root, f)
             rel = source_path[len(context_dir) + 1:]
             archive_path = os.path.join(prefix, rel)
             archive_files[archive_path] = source_path
 
     # Parse Dockerfile for special syntax of extra files to include.
+    content = []
     with open(os.path.join(context_dir, 'Dockerfile'), 'rb') as fh:
         for line in fh:
-            line = line.rstrip()
+            if line.startswith('# %ARG'):
+                p = line[len('# %ARG '):].strip()
+                if not args or p not in args:
+                    raise Exception('missing argument: {}'.format(p))
+                replace.append((re.compile(r'\${}\b'.format(p)),
+                                args[p].encode('ascii')))
+                continue
+
+            for regexp, s in replace:
+                line = re.sub(regexp, s, line)
+
+            content.append(line)
+
             if not line.startswith('# %include'):
                 continue
 
             p = line[len('# %include '):].strip()
             if os.path.isabs(p):
                 raise Exception('extra include path cannot be absolute: %s' % p)
 
             fs_path = os.path.normpath(os.path.join(topsrcdir, p))
@@ -113,16 +134,19 @@ def create_context_tar(topsrcdir, contex
                     for f in files:
                         source_path = os.path.join(root, f)
                         archive_path = os.path.join(prefix, 'topsrcdir', p, f)
                         archive_files[archive_path] = source_path
             else:
                 archive_path = os.path.join(prefix, 'topsrcdir', p)
                 archive_files[archive_path] = fs_path
 
+    archive_files[os.path.join(prefix, 'Dockerfile')] = \
+        GeneratedFile(b''.join(content))
+
     with open(out_path, 'wb') as fh:
         create_tar_gz_from_files(fh, archive_files, '%s.tar.gz' % prefix)
 
     h = hashlib.sha256()
     with open(out_path, 'rb') as fh:
         while True:
             data = fh.read(32768)
             if not data:
@@ -168,16 +192,17 @@ def build_from_context(docker_bin, conte
 @memoize
 def parse_volumes(image):
     """Parse VOLUME entries from a Dockerfile for an image."""
     volumes = set()
 
     with open(os.path.join(IMAGE_DIR, image, 'Dockerfile'), 'rb') as fh:
         for line in fh:
             line = line.strip()
+            # We assume VOLUME definitions don't use %ARGS.
             if not line.startswith(b'VOLUME '):
                 continue
 
             v = line.split(None, 1)[1]
             if v.startswith(b'['):
                 raise ValueError('cannot parse array syntax for VOLUME; '
                                  'convert to multiple entries')