Bug 1288567 - Add special Dockerfile syntax to add arbitrary files to context; r?dustin
A limitation of traditional docker build context generation is it
only includes files from the same directory as the Dockerfile. When
repositories have multiple, related Dockerfiles, this limitation
results file duplication or putting all Dockerfiles in the same
directory (which isn't feasible for mozilla-central since they would
need to be in the root directory).
This commit enhances Dockerfiles to allow *any* file from the
repository checkout to be ADDed to the docker build context.
Using the syntax "# %include <path>" you are able to include paths
or directories (relative from the top source directory root) in the
generated context archive. Files add this way are available under the
"topsrcdir/" path and can be ADDed to Docker images.
Since context archive generation is deterministic and the hash of
the resulting archive is used to determine when images need to be
rebuilt, any extra included file that changes will change the hash
of the context archive and force image regeneration.
Basic tests for the new feature have been added.
MozReview-Commit-ID: 4hPZesJuGQV
new file mode 100644
--- /dev/null
+++ b/taskcluster/docs/docker-images.rst
@@ -0,0 +1,42 @@
+.. taskcluster_dockerimages:
+
+=============
+Docker Images
+=============
+
+TaskCluster Docker images are defined in the source directory under
+``testing/docker``. Each directory therein contains the name of an
+image used as part of the task graph.
+
+Adding Extra Files to Images
+============================
+
+Dockerfile syntax has been extended to allow *any* file from the
+source checkout to be added to the image build *context*. (Traditionally
+you can only ``ADD`` files from the same directory as the Dockerfile.)
+
+Simply add the following syntax as a comment in a Dockerfile::
+
+ # %include <path>
+
+e.g.
+
+ # %include mach
+ # %include testing/mozharness
+
+The argument to ``# %include`` is a relative path from the root level of
+the source directory. It can be a file or a directory. If a file, only that
+file will be added. If a directory, every file under that directory will be
+added (even files that are untracked or ignored by version control).
+
+Files added using ``# %include`` syntax are available inside the build
+context under the ``topsrcdir/`` path.
+
+Files are added as they exist on disk. e.g. executable flags should be
+preserved. However, the file owner/group is changed to ``root`` and the
+``mtime`` of the file is normalized.
+
+Here is an example Dockerfile snippet::
+
+ # %include mach
+ ADD topsrcdir/mach /home/worker/mach
--- a/taskcluster/docs/index.rst
+++ b/taskcluster/docs/index.rst
@@ -23,8 +23,9 @@ check out the :doc:`how-to section <how-
taskgraph
parameters
attributes
kinds
transforms
yaml-templates
how-tos
+ docker-images
--- a/taskcluster/taskgraph/test/test_util_docker.py
+++ b/taskcluster/taskgraph/test/test_util_docker.py
@@ -72,8 +72,117 @@ class TestDocker(unittest.TestCase):
# File prefix should be "my_image"
with tarfile.open(tp, 'r:gz') as tf:
self.assertEqual(tf.getnames(), [
'my_image/Dockerfile',
'my_image/extra',
])
finally:
shutil.rmtree(tmp)
+
+ def test_create_context_topsrcdir_files(self):
+ tmp = tempfile.mkdtemp()
+ try:
+ d = os.path.join(tmp, 'test-image')
+ os.mkdir(d)
+ with open(os.path.join(d, 'Dockerfile'), 'wb') as fh:
+ fh.write(b'# %include extra/file0\n')
+ os.chmod(os.path.join(d, 'Dockerfile'), MODE_STANDARD)
+
+ extra = os.path.join(tmp, 'extra')
+ os.mkdir(extra)
+ with open(os.path.join(extra, 'file0'), 'a'):
+ pass
+ os.chmod(os.path.join(extra, 'file0'), MODE_STANDARD)
+
+ tp = os.path.join(tmp, 'tar')
+ h = docker.create_context_tar(tmp, d, tp, 'test_image')
+ self.assertEqual(h, '20faeb7c134f21187b142b5fadba94ae58865dc929c6c293d8cbc0a087269338')
+
+ with tarfile.open(tp, 'r:gz') as tf:
+ self.assertEqual(tf.getnames(), [
+ 'test_image/Dockerfile',
+ 'test_image/topsrcdir/extra/file0',
+ ])
+ finally:
+ shutil.rmtree(tmp)
+
+ def test_create_context_absolute_path(self):
+ tmp = tempfile.mkdtemp()
+ try:
+ d = os.path.join(tmp, 'test-image')
+ os.mkdir(d)
+
+ # Absolute paths in %include syntax are not allowed.
+ with open(os.path.join(d, 'Dockerfile'), 'wb') as fh:
+ fh.write(b'# %include /etc/shadow\n')
+
+ with self.assertRaisesRegexp(Exception, 'cannot be absolute'):
+ docker.create_context_tar(tmp, d, os.path.join(tmp, 'tar'), 'test')
+ finally:
+ shutil.rmtree(tmp)
+
+ def test_create_context_outside_topsrcdir(self):
+ tmp = tempfile.mkdtemp()
+ try:
+ d = os.path.join(tmp, 'test-image')
+ os.mkdir(d)
+
+ with open(os.path.join(d, 'Dockerfile'), 'wb') as fh:
+ fh.write(b'# %include foo/../../../etc/shadow\n')
+
+ with self.assertRaisesRegexp(Exception, 'path outside topsrcdir'):
+ docker.create_context_tar(tmp, d, os.path.join(tmp, 'tar'), 'test')
+ finally:
+ shutil.rmtree(tmp)
+
+ def test_create_context_missing_extra(self):
+ tmp = tempfile.mkdtemp()
+ try:
+ d = os.path.join(tmp, 'test-image')
+ os.mkdir(d)
+
+ with open(os.path.join(d, 'Dockerfile'), 'wb') as fh:
+ fh.write(b'# %include does/not/exist\n')
+
+ with self.assertRaisesRegexp(Exception, 'path does not exist'):
+ docker.create_context_tar(tmp, d, os.path.join(tmp, 'tar'), 'test')
+ finally:
+ shutil.rmtree(tmp)
+
+ def test_create_context_extra_directory(self):
+ tmp = tempfile.mkdtemp()
+ try:
+ d = os.path.join(tmp, 'test-image')
+ os.mkdir(d)
+
+ with open(os.path.join(d, 'Dockerfile'), 'wb') as fh:
+ fh.write(b'# %include extra\n')
+ fh.write(b'# %include file0\n')
+ os.chmod(os.path.join(d, 'Dockerfile'), MODE_STANDARD)
+
+ extra = os.path.join(tmp, 'extra')
+ os.mkdir(extra)
+ for i in range(3):
+ p = os.path.join(extra, 'file%d' % i)
+ with open(p, 'wb') as fh:
+ fh.write(b'file%d' % i)
+ os.chmod(p, MODE_STANDARD)
+
+ with open(os.path.join(tmp, 'file0'), 'a'):
+ pass
+ os.chmod(os.path.join(tmp, 'file0'), MODE_STANDARD)
+
+ tp = os.path.join(tmp, 'tar')
+ h = docker.create_context_tar(tmp, d, tp, 'my_image')
+
+ self.assertEqual(h, 'e5440513ab46ae4c1d056269e1c6715d5da7d4bd673719d360411e35e5b87205')
+
+ with tarfile.open(tp, 'r:gz') as tf:
+ self.assertEqual(tf.getnames(), [
+ 'my_image/Dockerfile',
+ 'my_image/topsrcdir/extra/file0',
+ 'my_image/topsrcdir/extra/file1',
+ 'my_image/topsrcdir/extra/file2',
+ 'my_image/topsrcdir/file0',
+ ])
+ finally:
+ shutil.rmtree(tmp)
--- a/taskcluster/taskgraph/util/docker.py
+++ b/taskcluster/taskgraph/util/docker.py
@@ -49,27 +49,65 @@ def generate_context_hash(topsrcdir, ima
def create_context_tar(topsrcdir, context_dir, out_path, prefix):
"""Create a context tarball.
A directory ``context_dir`` containing a Dockerfile will be assembled into
a gzipped tar file at ``out_path``. Files inside the archive will be
prefixed by directory ``prefix``.
+ We also scan the source Dockerfile for special syntax that influences
+ context generation.
+
+ If a line in the Dockerfile has the form ``# %include <path>``,
+ the relative path specified on that line will be matched against
+ files in the source repository and added to the context under the
+ path ``topsrcdir/``. If an entry is a directory, we add all files
+ under that directory.
+
Returns the SHA-256 hex digest of the created archive.
"""
archive_files = {}
for root, dirs, files in os.walk(context_dir):
for f in files:
source_path = os.path.join(root, f)
rel = source_path[len(context_dir) + 1:]
archive_path = os.path.join(prefix, rel)
archive_files[archive_path] = source_path
+ # Parse Dockerfile for special syntax of extra files to include.
+ with open(os.path.join(context_dir, 'Dockerfile'), 'rb') as fh:
+ for line in fh:
+ line = line.rstrip()
+ if not line.startswith('# %include'):
+ continue
+
+ p = line[len('# %include '):].strip()
+ if os.path.isabs(p):
+ raise Exception('extra include path cannot be absolute: %s' % p)
+
+ fs_path = os.path.normpath(os.path.join(topsrcdir, p))
+ # Check for filesystem traversal exploits.
+ if not fs_path.startswith(topsrcdir):
+ raise Exception('extra include path outside topsrcdir: %s' % p)
+
+ if not os.path.exists(fs_path):
+ raise Exception('extra include path does not exist: %s' % p)
+
+ if os.path.isdir(fs_path):
+ for root, dirs, files in os.walk(fs_path):
+ for f in files:
+ source_path = os.path.join(root, f)
+ archive_path = os.path.join(prefix, 'topsrcdir', p, f)
+ archive_files[archive_path] = source_path
+ else:
+ archive_path = os.path.join(prefix, 'topsrcdir', p)
+ archive_files[archive_path] = fs_path
+
with open(out_path, 'wb') as fh:
create_tar_gz_from_files(fh, archive_files, '%s.tar.gz' % prefix)
h = hashlib.sha256()
with open(out_path, 'rb') as fh:
while True:
data = fh.read(32768)
if not data: