Bug 1415619: Use `gecko.caches` for indexing docker tasks. draft
authorTom Prince <mozilla@hocat.ca>
Thu, 09 Nov 2017 17:08:41 -0700
changeset 699895 6c0666068e506c622c26849aa907367e014f1678
parent 699894 88bb9924a072faca96861214ab7d06657b5ee23e
child 699896 a79e62c8e7f45993ad5760e1d607ac933b860e46
child 699899 de95ec5a42fc5b49d9c0a5cdc0956e3989a8cf40
push id89712
push userbmo:mozilla@hocat.ca
push dateFri, 17 Nov 2017 21:39:08 +0000
bugs1415619
milestone59.0a1
Bug 1415619: Use `gecko.caches` for indexing docker tasks. MozReview-Commit-ID: IN17XmVk7HJ
taskcluster/docs/docker-images.rst
taskcluster/taskgraph/docker.py
taskcluster/taskgraph/transforms/docker_image.py
taskcluster/taskgraph/util/cached_tasks.py
taskcluster/taskgraph/util/docker.py
--- a/taskcluster/docs/docker-images.rst
+++ b/taskcluster/docs/docker-images.rst
@@ -73,19 +73,19 @@ This ensures that the hash is consistent
 in different hashes being generated.
 
 Task Image Index Namespace
 ..........................
 
 Images that are built on push and uploaded as an artifact of a task will be indexed under the
 following namespaces.
 
-* docker.images.v2.level-{level}.{image_name}.latest
-* docker.images.v2.level-{level}.{image_name}.pushdate.{year}.{month}-{day}-{pushtime}
-* docker.images.v2.level-{level}.{image_name}.hash.{context_hash}
+* gecko.cache.level-{level}.docker.v2.{name}.hash.{digest}
+* gecko.cache.level-{level}.docker.v2.{name}.latest
+* gecko.cache.level-{level}.docker.v2.{name}.pushdate.{year}.{month}-{day}-{pushtime}
 
 Not only can images be browsed by the pushdate and context hash, but the 'latest' namespace
 is meant to view the latest built image.  This functions similarly to the 'latest' tag
 for docker images that are pushed to a registry.
 
 Docker Registry Images (prebuilt)
 :::::::::::::::::::::::::::::::::
 
--- a/taskcluster/taskgraph/docker.py
+++ b/taskcluster/taskgraph/docker.py
@@ -16,26 +16,26 @@ import which
 from subprocess import Popen, PIPE
 from io import BytesIO
 
 from taskgraph.util import docker
 from taskgraph.util.taskcluster import (
     find_task_id,
     get_artifact_url,
 )
+from taskgraph.util.cached_tasks import cached_index_path
 from . import GECKO
 
-DOCKER_INDEX = docker.INDEX_PREFIX + '.{}.{}.hash.{}'
-
 
 def load_image_by_name(image_name, tag=None):
     context_path = os.path.join(GECKO, 'taskcluster', 'docker', image_name)
     context_hash = docker.generate_context_hash(GECKO, context_path, image_name)
 
-    index_path = DOCKER_INDEX.format('level-3', image_name, context_hash)
+    index_path = cached_index_path(
+        level=3, cache_type='docker-images.v1', cache_name=image_name, digest=context_hash)
     task_id = find_task_id(index_path)
 
     return load_image_by_task_id(task_id, tag)
 
 
 def load_image_by_task_id(task_id, tag=None):
     artifact_url = get_artifact_url(task_id, 'public/image.tar.zst')
     result = load_image(artifact_url, tag)
--- a/taskcluster/taskgraph/transforms/docker_image.py
+++ b/taskcluster/taskgraph/transforms/docker_image.py
@@ -11,81 +11,48 @@ from __future__ import absolute_import, 
 
 import os
 
 from taskgraph.transforms.base import TransformSequence
 from .. import GECKO
 from taskgraph.util.docker import (
     docker_image,
     generate_context_hash,
-    INDEX_PREFIX,
 )
+from taskgraph.util.cached_tasks import add_optimization
 
 transforms = TransformSequence()
 
-ROUTE_TEMPLATES = [
-    'index.{index_prefix}.level-{level}.{image_name}.latest',
-    'index.{index_prefix}.level-{level}.{image_name}.pushdate.{year}.{month}-{day}-{pushtime}',
-    'index.{index_prefix}.level-{level}.{image_name}.hash.{context_hash}',
-]
-
 
 @transforms.add
 def fill_template(config, tasks):
     for task in tasks:
         image_name = task.pop('name')
         job_symbol = task.pop('symbol')
 
         context_path = os.path.join('taskcluster', 'docker', image_name)
         context_hash = generate_context_hash(GECKO, context_path, image_name)
 
         description = 'Build the docker image {} for use by dependent tasks'.format(
             image_name)
 
-        routes = []
-        for tpl in ROUTE_TEMPLATES:
-            routes.append(tpl.format(
-                index_prefix=INDEX_PREFIX,
-                level=config.params['level'],
-                image_name=image_name,
-                project=config.params['project'],
-                head_rev=config.params['head_rev'],
-                pushlog_id=config.params.get('pushlog_id', 0),
-                pushtime=config.params['moz_build_date'][8:],
-                year=config.params['moz_build_date'][0:4],
-                month=config.params['moz_build_date'][4:6],
-                day=config.params['moz_build_date'][6:8],
-                context_hash=context_hash,
-            ))
-
-        # As an optimization, if the context hash exists for a high level, that image
-        # task ID will be used.  The reasoning behind this is that eventually everything ends
-        # up on level 3 at some point if most tasks use this as a common image
-        # for a given context hash, a worker within Taskcluster does not need to contain
-        # the same image per branch.
-        optimization = {'index-search': ['{}.level-{}.{}.hash.{}'.format(
-            INDEX_PREFIX, level, image_name, context_hash)
-            for level in reversed(range(int(config.params['level']), 4))]}
-
         # Adjust the zstandard compression level based on the execution level.
         # We use faster compression for level 1 because we care more about
         # end-to-end times. We use slower/better compression for other levels
         # because images are read more often and it is worth the trade-off to
         # burn more CPU once to reduce image size.
         zstd_level = '3' if int(config.params['level']) == 1 else '10'
 
         # include some information that is useful in reconstructing this task
         # from JSON
         taskdesc = {
             'label': 'build-docker-image-' + image_name,
             'description': description,
             'attributes': {'image_name': image_name},
             'expires-after': '1 year',
-            'routes': routes,
-            'optimization': optimization,
             'scopes': ['secrets:get:project/taskcluster/gecko/hgfingerprint'],
             'treeherder': {
                 'symbol': job_symbol,
                 'platform': 'taskcluster-images/opt',
                 'kind': 'other',
                 'tier': 1,
             },
             'run-on-projects': [],
@@ -125,9 +92,16 @@ def fill_template(config, tasks):
                 },
                 'chain-of-trust': True,
                 'docker-in-docker': True,
                 'taskcluster-proxy': True,
                 'max-run-time': 7200,
             },
         }
 
+        add_optimization(
+            config, taskdesc,
+            cache_type="docker-images.v1",
+            cache_name=image_name,
+            digest=context_hash,
+        )
+
         yield taskdesc
--- a/taskcluster/taskgraph/util/cached_tasks.py
+++ b/taskcluster/taskgraph/util/cached_tasks.py
@@ -61,8 +61,35 @@ def add_optimization(config, taskdesc, c
 
     # ... and add some extra routes for humans
     subs['build_date_long'] = time.strftime("%Y.%m.%d.%Y%m%d%H%M%S",
                                             time.gmtime(config.params['build_date']))
     taskdesc['routes'].extend([
         'index.{}'.format(route.format(**subs))
         for route in EXTRA_CACHE_INDEXES
     ])
+
+
+def cached_index_path(level, cache_type, cache_name, digest=None, digest_data=None):
+    """
+    Get the index path needed to locate the task that would be created by
+    :func:`add_optimization`.
+
+    :param int level: The SCM level of the task to look for.
+    :param str cache_type: The type of task result being cached.
+    :param str cache_name: The name of the object being cached.
+    :param digest: A unique string indentifying this version of the artifacts
+        being generated. Typically this will be the hash of inputs to the task.
+    :type digest: bytes or None
+    :param digest_data: A list of bytes representing the inputs of this task.
+        They will be concatenated and hashed to create the digest for this
+        task.
+    :type digest_data: list of bytes or None
+
+    :return str: The index path.
+    """
+    if (digest is None) == (digest_data is None):
+        raise Exception("Must pass exactly one of `digest` and `digest_data`.")
+    if digest is None:
+        digest = hashlib.sha256('\n'.join(digest_data)).hexdigest()
+
+    return TARGET_CACHE_INDEX.format(
+        level=level, type=cache_type, name=cache_name, digest=digest)
--- a/taskcluster/taskgraph/util/docker.py
+++ b/taskcluster/taskgraph/util/docker.py
@@ -14,17 +14,16 @@ import tempfile
 from mozbuild.util import memoize
 from mozpack.archive import (
     create_tar_gz_from_files,
 )
 from .. import GECKO
 
 
 IMAGE_DIR = os.path.join(GECKO, 'taskcluster', 'docker')
-INDEX_PREFIX = 'docker.images.v2'
 
 
 def docker_image(name, by_tag=False):
     '''
         Resolve in-tree prebuilt docker image to ``<registry>/<repository>@sha256:<digest>``,
         or ``<registry>/<repository>:<tag>`` if `by_tag` is `True`.
     '''
     try: