Bug 1288567 - Add special Dockerfile syntax to add arbitrary files to context; r?dustin draft
authorGregory Szorc <gps@mozilla.com>
Thu, 21 Jul 2016 16:51:30 -0700
changeset 392507 6124f28061d9fe4be430749a658f6c5a3e78c268
parent 392506 853f5bf425bb70f9a8ca05aa4512eb4d3f7b8efe
child 392508 2a0325ae8ac98efbc661b5e7dd5e43efd3e5d60e
push id24042
push userbmo:gps@mozilla.com
push dateMon, 25 Jul 2016 18:25:42 +0000
reviewersdustin
bugs1288567
milestone50.0a1
Bug 1288567 - Add special Dockerfile syntax to add arbitrary files to context; r?dustin A limitation of traditional docker build context generation is it only includes files from the same directory as the Dockerfile. When repositories have multiple, related Dockerfiles, this limitation results file duplication or putting all Dockerfiles in the same directory (which isn't feasible for mozilla-central since they would need to be in the root directory). This commit enhances Dockerfiles to allow *any* file from the repository checkout to be ADDed to the docker build context. Using the syntax "# %include <path>" you are able to include paths or directories (relative from the top source directory root) in the generated context archive. Files add this way are available under the "topsrcdir/" path and can be ADDed to Docker images. Since context archive generation is deterministic and the hash of the resulting archive is used to determine when images need to be rebuilt, any extra included file that changes will change the hash of the context archive and force image regeneration. Basic tests for the new feature have been added. MozReview-Commit-ID: 4hPZesJuGQV
taskcluster/docs/docker-images.rst
taskcluster/docs/index.rst
taskcluster/taskgraph/test/test_util_docker.py
taskcluster/taskgraph/util/docker.py
new file mode 100644
--- /dev/null
+++ b/taskcluster/docs/docker-images.rst
@@ -0,0 +1,42 @@
+.. taskcluster_dockerimages:
+
+=============
+Docker Images
+=============
+
+TaskCluster Docker images are defined in the source directory under
+``testing/docker``. Each directory therein contains the name of an
+image used as part of the task graph.
+
+Adding Extra Files to Images
+============================
+
+Dockerfile syntax has been extended to allow *any* file from the
+source checkout to be added to the image build *context*. (Traditionally
+you can only ``ADD`` files from the same directory as the Dockerfile.)
+
+Simply add the following syntax as a comment in a Dockerfile::
+
+   # %include <path>
+
+e.g.
+
+   # %include mach
+   # %include testing/mozharness
+
+The argument to ``# %include`` is a relative path from the root level of
+the source directory. It can be a file or a directory. If a file, only that
+file will be added. If a directory, every file under that directory will be
+added (even files that are untracked or ignored by version control).
+
+Files added using ``# %include`` syntax are available inside the build
+context under the ``topsrcdir/`` path.
+
+Files are added as they exist on disk. e.g. executable flags should be
+preserved. However, the file owner/group is changed to ``root`` and the
+``mtime`` of the file is normalized.
+
+Here is an example Dockerfile snippet::
+
+   # %include mach
+   ADD topsrcdir/mach /home/worker/mach
--- a/taskcluster/docs/index.rst
+++ b/taskcluster/docs/index.rst
@@ -23,8 +23,9 @@ check out the :doc:`how-to section <how-
 
     taskgraph
     parameters
     attributes
     kinds
     transforms
     yaml-templates
     how-tos
+    docker-images
--- a/taskcluster/taskgraph/test/test_util_docker.py
+++ b/taskcluster/taskgraph/test/test_util_docker.py
@@ -72,8 +72,117 @@ class TestDocker(unittest.TestCase):
             # File prefix should be "my_image"
             with tarfile.open(tp, 'r:gz') as tf:
                 self.assertEqual(tf.getnames(), [
                     'my_image/Dockerfile',
                     'my_image/extra',
                 ])
         finally:
             shutil.rmtree(tmp)
+
+    def test_create_context_topsrcdir_files(self):
+        tmp = tempfile.mkdtemp()
+        try:
+            d = os.path.join(tmp, 'test-image')
+            os.mkdir(d)
+            with open(os.path.join(d, 'Dockerfile'), 'wb') as fh:
+                fh.write(b'# %include extra/file0\n')
+            os.chmod(os.path.join(d, 'Dockerfile'), MODE_STANDARD)
+
+            extra = os.path.join(tmp, 'extra')
+            os.mkdir(extra)
+            with open(os.path.join(extra, 'file0'), 'a'):
+                pass
+            os.chmod(os.path.join(extra, 'file0'), MODE_STANDARD)
+
+            tp = os.path.join(tmp, 'tar')
+            h = docker.create_context_tar(tmp, d, tp, 'test_image')
+            self.assertEqual(h, '20faeb7c134f21187b142b5fadba94ae58865dc929c6c293d8cbc0a087269338')
+
+            with tarfile.open(tp, 'r:gz') as tf:
+                self.assertEqual(tf.getnames(), [
+                    'test_image/Dockerfile',
+                    'test_image/topsrcdir/extra/file0',
+                ])
+        finally:
+            shutil.rmtree(tmp)
+
+    def test_create_context_absolute_path(self):
+        tmp = tempfile.mkdtemp()
+        try:
+            d = os.path.join(tmp, 'test-image')
+            os.mkdir(d)
+
+            # Absolute paths in %include syntax are not allowed.
+            with open(os.path.join(d, 'Dockerfile'), 'wb') as fh:
+                fh.write(b'# %include /etc/shadow\n')
+
+            with self.assertRaisesRegexp(Exception, 'cannot be absolute'):
+                docker.create_context_tar(tmp, d, os.path.join(tmp, 'tar'), 'test')
+        finally:
+            shutil.rmtree(tmp)
+
+    def test_create_context_outside_topsrcdir(self):
+        tmp = tempfile.mkdtemp()
+        try:
+            d = os.path.join(tmp, 'test-image')
+            os.mkdir(d)
+
+            with open(os.path.join(d, 'Dockerfile'), 'wb') as fh:
+                fh.write(b'# %include foo/../../../etc/shadow\n')
+
+            with self.assertRaisesRegexp(Exception, 'path outside topsrcdir'):
+                docker.create_context_tar(tmp, d, os.path.join(tmp, 'tar'), 'test')
+        finally:
+            shutil.rmtree(tmp)
+
+    def test_create_context_missing_extra(self):
+        tmp = tempfile.mkdtemp()
+        try:
+            d = os.path.join(tmp, 'test-image')
+            os.mkdir(d)
+
+            with open(os.path.join(d, 'Dockerfile'), 'wb') as fh:
+                fh.write(b'# %include does/not/exist\n')
+
+            with self.assertRaisesRegexp(Exception, 'path does not exist'):
+                docker.create_context_tar(tmp, d, os.path.join(tmp, 'tar'), 'test')
+        finally:
+            shutil.rmtree(tmp)
+
+    def test_create_context_extra_directory(self):
+        tmp = tempfile.mkdtemp()
+        try:
+            d = os.path.join(tmp, 'test-image')
+            os.mkdir(d)
+
+            with open(os.path.join(d, 'Dockerfile'), 'wb') as fh:
+                fh.write(b'# %include extra\n')
+                fh.write(b'# %include file0\n')
+            os.chmod(os.path.join(d, 'Dockerfile'), MODE_STANDARD)
+
+            extra = os.path.join(tmp, 'extra')
+            os.mkdir(extra)
+            for i in range(3):
+                p = os.path.join(extra, 'file%d' % i)
+                with open(p, 'wb') as fh:
+                    fh.write(b'file%d' % i)
+                os.chmod(p, MODE_STANDARD)
+
+            with open(os.path.join(tmp, 'file0'), 'a'):
+                pass
+            os.chmod(os.path.join(tmp, 'file0'), MODE_STANDARD)
+
+            tp = os.path.join(tmp, 'tar')
+            h = docker.create_context_tar(tmp, d, tp, 'my_image')
+
+            self.assertEqual(h, 'e5440513ab46ae4c1d056269e1c6715d5da7d4bd673719d360411e35e5b87205')
+
+            with tarfile.open(tp, 'r:gz') as tf:
+                self.assertEqual(tf.getnames(), [
+                    'my_image/Dockerfile',
+                    'my_image/topsrcdir/extra/file0',
+                    'my_image/topsrcdir/extra/file1',
+                    'my_image/topsrcdir/extra/file2',
+                    'my_image/topsrcdir/file0',
+                ])
+        finally:
+            shutil.rmtree(tmp)
--- a/taskcluster/taskgraph/util/docker.py
+++ b/taskcluster/taskgraph/util/docker.py
@@ -49,27 +49,65 @@ def generate_context_hash(topsrcdir, ima
 
 def create_context_tar(topsrcdir, context_dir, out_path, prefix):
     """Create a context tarball.
 
     A directory ``context_dir`` containing a Dockerfile will be assembled into
     a gzipped tar file at ``out_path``. Files inside the archive will be
     prefixed by directory ``prefix``.
 
+    We also scan the source Dockerfile for special syntax that influences
+    context generation.
+
+    If a line in the Dockerfile has the form ``# %include <path>``,
+    the relative path specified on that line will be matched against
+    files in the source repository and added to the context under the
+    path ``topsrcdir/``. If an entry is a directory, we add all files
+    under that directory.
+
     Returns the SHA-256 hex digest of the created archive.
     """
     archive_files = {}
 
     for root, dirs, files in os.walk(context_dir):
         for f in files:
             source_path = os.path.join(root, f)
             rel = source_path[len(context_dir) + 1:]
             archive_path = os.path.join(prefix, rel)
             archive_files[archive_path] = source_path
 
+    # Parse Dockerfile for special syntax of extra files to include.
+    with open(os.path.join(context_dir, 'Dockerfile'), 'rb') as fh:
+        for line in fh:
+            line = line.rstrip()
+            if not line.startswith('# %include'):
+                continue
+
+            p = line[len('# %include '):].strip()
+            if os.path.isabs(p):
+                raise Exception('extra include path cannot be absolute: %s' % p)
+
+            fs_path = os.path.normpath(os.path.join(topsrcdir, p))
+            # Check for filesystem traversal exploits.
+            if not fs_path.startswith(topsrcdir):
+                raise Exception('extra include path outside topsrcdir: %s' % p)
+
+            if not os.path.exists(fs_path):
+                raise Exception('extra include path does not exist: %s' % p)
+
+            if os.path.isdir(fs_path):
+                for root, dirs, files in os.walk(fs_path):
+                    for f in files:
+                        source_path = os.path.join(root, f)
+                        archive_path = os.path.join(prefix, 'topsrcdir', p, f)
+                        archive_files[archive_path] = source_path
+            else:
+                archive_path = os.path.join(prefix, 'topsrcdir', p)
+                archive_files[archive_path] = fs_path
+
     with open(out_path, 'wb') as fh:
         create_tar_gz_from_files(fh, archive_files, '%s.tar.gz' % prefix)
 
     h = hashlib.sha256()
     with open(out_path, 'rb') as fh:
         while True:
             data = fh.read(32768)
             if not data: