author Gregory Szorc <gps@mozilla.com>

Fri, 18 Aug 2017 14:07:03 -0700

changeset 650037 7868ce6e0cae9eef0d140a2043be825ff2a62250

parent 650036 5178a7f5269a76edd2cb6ac559279e86ae1357e7

child 650038 51e741d7a5429bfb0a761695c96d43c6212617a1

push id 75226

push user bmo:gps@mozilla.com

push date Mon, 21 Aug 2017 16:19:27 +0000

reviewers dustin

bugs 1391789

milestone 57.0a1

taskcluster/docker/recipes/run-task file | annotate | diff | comparison | revisions

taskcluster/docs/caches.rst file | annotate | diff | comparison | revisions

taskcluster/taskgraph/transforms/task.py file | annotate | diff | comparison | revisions
--- a/taskcluster/docker/recipes/run-task
+++ b/taskcluster/docker/recipes/run-task
@@ -251,16 +251,88 @@ def main(args):
         # Find all groups to which this user is a member.
         gids = [g.gr_gid for g in grp.getgrall() if args.group in g.gr_mem]
 
         uid = user.pw_uid
         gid = group.gr_gid
     else:
         uid = gid = gids = None
 
+    # Validate caches.
+    #
+    # Taskgraph should pass in a list of paths that are caches via an
+    # environment variable (which we don't want to pass down to child
+    # processes). For each cache, we write out a special file denoting
+    # attributes and capabilities of run-task and the task being executed.
+    # These attributes are used by subsequent run-task invocations to
+    # validate that use of the cache is acceptable.
+    #
+    # We /could/ blow away the cache data on requirements mismatch.
+    # While this would be convenient, this could result in "competing" tasks
+    # effectively undoing the other's work. This would slow down task
+    # execution in aggregate. Without monitoring for this, people may not notice
+    # the problem and tasks would be slower than they could be. We follow the
+    # principle of "fail fast" to ensure optimal task execution.
+
+    if 'TASKCLUSTER_CACHES' in os.environ:
+        caches = os.environ['TASKCLUSTER_CACHES'].split(';')
+        del os.environ['TASKCLUSTER_CACHES']
+    else:
+        caches = []
+
+    our_requirements = {
+        # Include a version string that we can bump whenever to trigger
+        # fresh caches. The actual value is not relevant and doesn't need
+        # to follow any explicit order. Since taskgraph bakes this file's
+        # hash into cache names, any change to this file/version is sufficient
+        # to force the use of a new cache.
+        b'version=1',
+    }
+
+    for cache in caches:
+        if not os.path.isdir(cache):
+            print('cache %s is not a directory; this should never happen')
+            return 1
+
+        requires_path = os.path.join(cache, '.cacherequires')
+
+        # The cache is empty. No validation necessary. Just set up our
+        # requirements file.
+        if not os.listdir(cache):
+            with open(requires_path, 'wb') as fh:
+                fh.write(b'\n'.join(sorted(our_requirements)))
+
+            # And make it read-only as a precaution against deletion.
+            os.chmod(requires_path, stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH)
+
+        # The cache has content and we have a requirements file. Validate
+        # requirements alignment.
+        elif os.path.exists(requires_path):
+            with open(requires_path, 'rb') as fh:
+                wanted_requirements = set(fh.read().splitlines())
+
+                missing = wanted_requirements - our_requirements
+                if missing:
+                    print('requirements for populated cache %s differ from '
+                          'this task' % cache)
+                    print('cache requirements: %s' % ' '.join(sorted(
+                        wanted_requirements)))
+                    print('our requirements:   %s' % ' '.join(sorted(
+                        our_requirements)))
+                    return 1
+
+        # The cache has content and no requirements file. This shouldn't
+        # happen because run-task should be the first thing that touches a
+        # cache.
+        else:
+            print('cache %s is not empty and is missing a .cacherequires '
+                  'file; the cache names for this task are likely '
+                  'mis-configured')
+            return 1
+
     # Change ownership of requested paths.
     # FUTURE: parse argument values for user/group if we don't want to
     # use --user/--group.
     for path in args.chown or []:
         if not running_as_root:
             print_line(b'set_dir_permissions', b'--chown not allowed when not running as root')
             return 1
 
--- a/taskcluster/docs/caches.rst
+++ b/taskcluster/docs/caches.rst
@@ -1,21 +1,66 @@
 .. taskcluster_caches:
 
-=============
-Common Caches
-=============
+Caches
+======
 
 There are various caches used by the in-tree tasks. This page attempts to
 document them and their appropriate use.
 
+Caches are essentially isolated filesystems that are persisted between
+tasks. For example, if 2 tasks run on a worker - one after the other -
+and both tasks request the same cache, the subsequent task will be
+able to see files in the cache that were created by the first task.
+It's also worth noting that TaskCluster workers ensure a cache can only
+be used by 1 task at a time. If a worker is simultaneously running
+multiple tasks requesting the same named cache, the worker will
+have multiple caches of the same name on the worker.
+
+Caches and ``run-task``
+=======================
+
+``run-task`` is our generic task wrapper script. It does common activities
+like ensure a version control checkout is present.
+
+One of the roles of ``run-task`` is to verify and sanitize caches.
+It does this by storing state in a cache on its first use. If the recorded
+*capabilities* of an existing cache don't match expectations for the
+current task, ``run-task`` bails. This ensures that caches are only
+reused by tasks with similar execution *profiles*. This prevents
+accidental cache use across incompatible tasks. It also allows run-task
+to make assumptions about the state of caches, which can help avoid
+costly operations.
+
+In addition, the hash of ``run-task`` is used to derive the cache name.
+So any time ``run-task`` changes, a new set of caches are used. This
+ensures that any backwards incompatible changes or bug fixes to
+``run-task`` result in fresh caches.
+
+Some caches are reserved for use with run-task. That property will be denoted
+below.
+
+Common Caches
+=============
+
 Version Control Caches
-======================
+----------------------
+
+``level-{{level}}-checkouts-{{hash}}``
+   This cache holds version control checkouts, each in a subdirectory named
+   after the repo (e.g., ``gecko``).
 
-``level-{{level}}-checkouts-{{version}}``
+   Checkouts should be read-only. If a task needs to create new files from
+   content of a checkout, this content should be written in a separate
+   directory/cache (like a workspace).
+
+   This cache name pattern is managed by ``run-task`` and must only be
+   used with ``run-task``.
+
+``level-{{level}}-checkouts-{{version}}`` (deprecated)
    This cache holds version control checkouts, each in a subdirectory named
    after the repo (e.g., ``gecko``).
 
    Checkouts should be read-only. If a task needs to create new files from
    content of a checkout, this content should be written in a separate
    directory/cache (like a workspace).
 
    A ``version`` parameter appears in the cache name to allow
@@ -26,26 +71,28 @@ Version Control Caches
    should set the ``HG_STORE_PATH`` environment variable to point to this
    directory. If you are using ``hg robustcheckout``, pass this directory to the
    ``--sharebase`` option.
 
 ``level-{{level}}-{{project}}-tc-vcs`` (deprecated)
     This cache is used internally by ``tc-vcs``.  This tool is deprecated and
     should be replaced with ``hg robustcheckout``.
 
-
 Workspace Caches
-================
+----------------
 
 ``level-{{level}}-*-workspace``
    These caches (of various names typically ending with ``workspace``)
    contain state to be shared between task invocations. Use cases are
    dependent on the task.
 
 Other
-=====
+-----
 
-``level-{{level}}-tooltool-cache
-    Tooltool invocations should use this cache. Tooltool will store files here
-    indexed by their hash.
+``level-{{level}}-tooltool-cache-{{hash}}
+   Tooltool invocations should use this cache. Tooltool will store files here
+   indexed by their hash.
+
+   This cache name pattern is reserved for use with ``run-task`` and must only
+   be used by ``run-task``
 
 ``tooltool-cache`` (deprecated)
-    Legacy location for tooltool files. Use the per-level one instead.
+   Legacy location for tooltool files. Use the per-level one instead.
--- a/taskcluster/taskgraph/transforms/task.py
+++ b/taskcluster/taskgraph/transforms/task.py
@@ -10,27 +10,38 @@ complexities of worker implementations, 
 
 from __future__ import absolute_import, print_function, unicode_literals
 
 import json
 import os
 import time
 from copy import deepcopy
 
+from mozbuild.util import memoize
 from taskgraph.util.attributes import TRUNK_PROJECTS
+from taskgraph.util.hash import hash_path
 from taskgraph.util.treeherder import split_symbol
 from taskgraph.transforms.base import TransformSequence
 from taskgraph.util.schema import validate_schema, Schema
 from taskgraph.util.scriptworker import get_release_config
 from voluptuous import Any, Required, Optional, Extra
 from taskgraph import GECKO
 
 from .gecko_v2_whitelist import JOB_NAME_WHITELIST, JOB_NAME_WHITELIST_ERROR
 
 
+RUN_TASK = os.path.join(GECKO, 'taskcluster', 'docker', 'recipes', 'run-task')
+
+
+@memoize
+def _run_task_suffix():
+    """String to append to cache names under control of run-task."""
+    return hash_path(RUN_TASK)[0:20]
+
+
 # shortcut for a string where task references are allowed
 taskref_or_string = Any(
     basestring,
     {Required('task-reference'): basestring})
 
 # A task description is a general description of a TaskCluster task
 task_description_schema = Schema({
     # the label for this task
@@ -651,19 +662,42 @@ def build_docker_worker_payload(config, 
                 'path': artifact['path'],
                 'type': artifact['type'],
                 'expires': task_def['expires'],  # always expire with the task
             }
         payload['artifacts'] = artifacts
 
     if 'caches' in worker:
         caches = {}
+
+        # run-task knows how to validate caches.
+        #
+        # To help ensure new run-task features and bug fixes don't interfere
+        # with existing caches, we seed the hash of run-task into cache names.
+        # So, any time run-task changes, we should get a fresh set of caches.
+        # This means run-task can make changes to cache interaction at any time
+        # without regards for backwards or future compatibility.
+
+        run_task = payload.get('command', [''])[0].endswith('run-task')
+
+        if run_task:
+            suffix = '-%s' % _run_task_suffix()
+        else:
+            suffix = ''
+
         for cache in worker['caches']:
-            caches[cache['name']] = cache['mount-point']
-            task_def['scopes'].append('docker-worker:cache:' + cache['name'])
+            name = '%s%s' % (cache['name'], suffix)
+            caches[name] = cache['mount-point']
+            task_def['scopes'].append('docker-worker:cache:%s' % name)
+
+        # Assertion: only run-task is interested in this.
+        if run_task:
+            payload['env']['TASKCLUSTER_CACHES'] = ';'.join(sorted(
+                caches.values()))
+
         payload['cache'] = caches
 
     if features:
         payload['features'] = features
     if capabilities:
         payload['capabilities'] = capabilities
 
     # coalesce / superseding
author	Gregory Szorc <gps@mozilla.com>
	Fri, 18 Aug 2017 14:07:03 -0700
changeset 650037	7868ce6e0cae9eef0d140a2043be825ff2a62250
parent 650036	5178a7f5269a76edd2cb6ac559279e86ae1357e7
child 650038	51e741d7a5429bfb0a761695c96d43c6212617a1
push id	75226
push user	bmo:gps@mozilla.com
push date	Mon, 21 Aug 2017 16:19:27 +0000
reviewers	dustin
bugs	1391789
milestone	57.0a1
taskcluster/docker/recipes/run-task		file \| annotate \| diff \| comparison \| revisions
taskcluster/docs/caches.rst		file \| annotate \| diff \| comparison \| revisions
taskcluster/taskgraph/transforms/task.py		file \| annotate \| diff \| comparison \| revisions