Bug 1408352 - [taskgraph] Implement 'always_target' attribute, r?dustin draft
authorAndrew Halberstadt <ahalberstadt@mozilla.com>
Mon, 30 Oct 2017 09:41:51 -0400
changeset 692216 a6a8fc4fd8e2f479b544a64e49223149349e2462
parent 692089 40a14ca1cf04499f398e4cb8ba359b39eae4e216
child 692217 6c19de382ad3dc300a7321effe138756a4329fe5
push id87441
push userahalberstadt@mozilla.com
push dateThu, 02 Nov 2017 18:52:31 +0000
reviewersdustin
bugs1408352
milestone58.0a1
Bug 1408352 - [taskgraph] Implement 'always_target' attribute, r?dustin Tasks that have the 'always_target' attribute set will be always be included in the target_task_graph, regardless of target task filtering. Furthermore, if they were only added because of this attribute (i.e, the filters would have excluded the task), then the task will be a candidate for optimization even if the 'optimize_target_tasks' parameter is False. MozReview-Commit-ID: 9eoVJ5qpAMO
taskcluster/docs/attributes.rst
taskcluster/taskgraph/generator.py
taskcluster/taskgraph/test/test_generator.py
taskcluster/taskgraph/transforms/job/__init__.py
taskcluster/taskgraph/transforms/task.py
--- a/taskcluster/docs/attributes.rst
+++ b/taskcluster/docs/attributes.rst
@@ -158,19 +158,31 @@ specific locale involved. Currently this
 ``beetmover`` and ``balrog`` kinds.
 
 signed
 ======
 Signals that the output of this task contains signed artifacts.
 
 repackage_type
 ==============
-This is the type of repackage. Can be ``repackage`` or 
+This is the type of repackage. Can be ``repackage`` or
 ``repackage_signing``.
 
 toolchain-artifact
 ==================
 For toolchain jobs, this is the path to the artifact for that toolchain.
 
 toolchain-alias
 ===============
 For toolchain jobs, this optionally gives an alias that can be used instead of the
 real toolchain job name in the toolchains list for build jobs.
+
+always_target
+=============
+
+Tasks with this attribute will be included in the ``target_task_graph`` regardless
+of any target task filtering that occurs. When a task is included in this manner
+(i.e it otherwise would have been filtered out), it will be considered for
+optimization even if the ``optimize_target_tasks`` parameter is False.
+
+This is meant to be used for tasks which a developer would almost always want to
+run. Typically these tasks will be short running and have a high risk of causing
+a backout. For example ``lint`` or ``python-unittest`` tasks.
--- a/taskcluster/taskgraph/generator.py
+++ b/taskcluster/taskgraph/generator.py
@@ -252,17 +252,23 @@ class TaskGraphGenerator(object):
                 len(target_tasks)))
 
         yield verifications('target_task_set', target_task_set)
 
         logger.info("Generating target task graph")
         # include all docker-image build tasks here, in case they are needed for a graph morph
         docker_image_tasks = set(t.label for t in full_task_graph.tasks.itervalues()
                                  if t.attributes['kind'] == 'docker-image')
-        target_graph = full_task_graph.graph.transitive_closure(target_tasks | docker_image_tasks)
+        # include all tasks with `always_target` set
+        always_target_tasks = set(t.label for t in full_task_graph.tasks.itervalues()
+                                  if t.attributes.get('always_target'))
+        logger.info('Adding %d tasks with `always_target` attribute' % (
+                    len(always_target_tasks) - len(always_target_tasks & target_tasks)))
+        target_graph = full_task_graph.graph.transitive_closure(
+            target_tasks | docker_image_tasks | always_target_tasks)
         target_task_graph = TaskGraph(
             {l: all_tasks[l] for l in target_graph.nodes},
             target_graph)
         yield verifications('target_task_graph', target_task_graph)
 
         logger.info("Generating optimized task graph")
         existing_tasks = self.parameters.get('existing_tasks')
         do_not_optimize = set(self.parameters.get('do_not_optimize', []))
--- a/taskcluster/taskgraph/test/test_generator.py
+++ b/taskcluster/taskgraph/test/test_generator.py
@@ -1,84 +1,123 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 from __future__ import absolute_import, print_function, unicode_literals
 
+import pytest
 import unittest
+from mozunit import main
 
 from taskgraph.generator import TaskGraphGenerator, Kind
-from taskgraph import graph, target_tasks as target_tasks_mod
-from mozunit import main
+from taskgraph.optimize import OptimizationStrategy
+from taskgraph.util.templates import merge
+from taskgraph import (
+    graph,
+    optimize as optimize_mod,
+    target_tasks as target_tasks_mod,
+)
 
 
 def fake_loader(kind, path, config, parameters, loaded_tasks):
     for i in range(3):
         dependencies = {}
         if i >= 1:
             dependencies['prev'] = '{}-t-{}'.format(kind, i-1)
-        yield {'kind': kind,
-               'label': '{}-t-{}'.format(kind, i),
-               'attributes': {'_tasknum': str(i)},
-               'task': {'i': i},
-               'dependencies': dependencies}
+
+        task = {
+            'kind': kind,
+            'label': '{}-t-{}'.format(kind, i),
+            'attributes': {'_tasknum': str(i)},
+            'task': {'i': i},
+            'dependencies': dependencies,
+        }
+        if 'job-defaults' in config:
+            task = merge(config['job-defaults'], task)
+        yield task
 
 
 class FakeKind(Kind):
 
     def _get_loader(self):
         return fake_loader
 
     def load_tasks(self, parameters, loaded_tasks):
         FakeKind.loaded_kinds.append(self.name)
         return super(FakeKind, self).load_tasks(parameters, loaded_tasks)
 
 
 class WithFakeKind(TaskGraphGenerator):
 
     def _load_kinds(self):
-        for kind_name, deps in self.parameters['_kinds']:
+        for kind_name, cfg in self.parameters['_kinds']:
             config = {
                 'transforms': [],
             }
-            if deps:
-                config['kind-dependencies'] = deps
+            if cfg:
+                config.update(cfg)
             yield FakeKind(kind_name, '/fake', config)
 
 
 class FakeParameters(dict):
     strict = True
 
 
+class FakeOptimization(OptimizationStrategy):
+    def __init__(self, mode, *args, **kwargs):
+        super(FakeOptimization, self).__init__(*args, **kwargs)
+        self.mode = mode
+
+    def should_remove_task(self, task, params, arg):
+        if self.mode == 'always':
+            return True
+        if self.mode == 'even':
+            return task.task['i'] % 2 == 0
+        if self.mode == 'odd':
+            return task.task['i'] % 2 != 0
+        return False
+
+
 class TestGenerator(unittest.TestCase):
 
-    def maketgg(self, target_tasks=None, kinds=[('_fake', [])]):
+    @pytest.fixture(autouse=True)
+    def patch(self, monkeypatch):
+        self.patch = monkeypatch
+
+    def maketgg(self, target_tasks=None, kinds=[('_fake', [])], params=None):
+        params = params or {}
         FakeKind.loaded_kinds = []
         self.target_tasks = target_tasks or []
 
         def target_tasks_method(full_task_graph, parameters):
             return self.target_tasks
 
+        def make_fake_strategies():
+            return {mode: FakeOptimization(mode)
+                    for mode in ('always', 'never', 'even', 'odd')}
+
         target_tasks_mod._target_task_methods['test_method'] = target_tasks_method
+        self.patch.setattr(optimize_mod, '_make_default_strategies', make_fake_strategies)
 
         parameters = FakeParameters({
             '_kinds': kinds,
             'target_tasks_method': 'test_method',
             'try_mode': None,
         })
+        parameters.update(params)
 
         return WithFakeKind('/root', parameters)
 
     def test_kind_ordering(self):
         "When task kinds depend on each other, they are loaded in postorder"
         self.tgg = self.maketgg(kinds=[
-            ('_fake3', ['_fake2', '_fake1']),
-            ('_fake2', ['_fake1']),
-            ('_fake1', []),
+            ('_fake3', {'kind-dependencies': ['_fake2', '_fake1']}),
+            ('_fake2', {'kind-dependencies': ['_fake1']}),
+            ('_fake1', {'kind-dependencies': []}),
         ])
         self.tgg._run_until('full_task_set')
         self.assertEqual(FakeKind.loaded_kinds, ['_fake1', '_fake2', '_fake3'])
 
     def test_full_task_set(self):
         "The full_task_set property has all tasks"
         self.tgg = self.maketgg()
         self.assertEqual(self.tgg.full_task_set.graph,
@@ -110,16 +149,40 @@ class TestGenerator(unittest.TestCase):
         "The target_task_graph property has the targeted tasks and deps"
         self.tgg = self.maketgg(['_fake-t-1'])
         self.assertEqual(self.tgg.target_task_graph.graph,
                          graph.Graph({'_fake-t-0', '_fake-t-1'},
                                      {('_fake-t-1', '_fake-t-0', 'prev')}))
         self.assertEqual(sorted(self.tgg.target_task_graph.tasks.keys()),
                          sorted(['_fake-t-0', '_fake-t-1']))
 
+    def test_always_target_tasks(self):
+        "The target_task_graph includes tasks with 'always_target'"
+        tgg_args = {
+            'target_tasks': ['_fake-t-0', '_fake-t-1', '_ignore-t-0', '_ignore-t-1'],
+            'kinds': [
+                ('_fake', {'job-defaults': {'optimization': {'odd': None}}}),
+                ('_ignore', {'job-defaults': {
+                    'attributes': {'always_target': True},
+                    'optimization': {'even': None},
+                }}),
+            ],
+            'params': {'optimize_target_tasks': False},
+        }
+        self.tgg = self.maketgg(**tgg_args)
+        self.assertEqual(
+            sorted(self.tgg.target_task_set.tasks.keys()),
+            sorted(['_fake-t-0', '_fake-t-1', '_ignore-t-0', '_ignore-t-1']))
+        self.assertEqual(
+            sorted(self.tgg.target_task_graph.tasks.keys()),
+            sorted(['_fake-t-0', '_fake-t-1', '_ignore-t-0', '_ignore-t-1', '_ignore-t-2']))
+        self.assertEqual(
+            sorted([t.label for t in self.tgg.optimized_task_graph.tasks.values()]),
+            sorted(['_fake-t-0', '_fake-t-1', '_ignore-t-0', '_ignore-t-1']))
+
     def test_optimized_task_graph(self):
         "The optimized task graph contains task ids"
         self.tgg = self.maketgg(['_fake-t-2'])
         tid = self.tgg.label_to_taskid
         self.assertEqual(
             self.tgg.optimized_task_graph.graph,
             graph.Graph({tid['_fake-t-0'], tid['_fake-t-1'], tid['_fake-t-2']}, {
                 (tid['_fake-t-1'], tid['_fake-t-0'], 'prev'),
--- a/taskcluster/taskgraph/transforms/job/__init__.py
+++ b/taskcluster/taskgraph/transforms/job/__init__.py
@@ -56,16 +56,17 @@ job_description_schema = Schema({
     Optional('scopes'): task_description_schema['scopes'],
     Optional('tags'): task_description_schema['tags'],
     Optional('extra'): task_description_schema['extra'],
     Optional('notifications'): task_description_schema['notifications'],
     Optional('treeherder'): task_description_schema['treeherder'],
     Optional('index'): task_description_schema['index'],
     Optional('run-on-projects'): task_description_schema['run-on-projects'],
     Optional('coalesce'): task_description_schema['coalesce'],
+    Optional('always-target'): task_description_schema['always-target'],
     Exclusive('optimization', 'optimization'): task_description_schema['optimization'],
     Optional('needs-sccache'): task_description_schema['needs-sccache'],
 
     # The "when" section contains descriptions of the circumstances under which
     # this task should be included in the task graph.  This will be converted
     # into an optimization, so it cannot be specified in a job description that
     # also gives 'optimization'.
     Exclusive('when', 'optimization'): Any({
--- a/taskcluster/taskgraph/transforms/task.py
+++ b/taskcluster/taskgraph/transforms/task.py
@@ -186,16 +186,23 @@ task_description_schema = Schema({
         # tasks.
         'age': int,
 
         # The minimum number of backlogged tasks with the same coalescing key,
         # before the coalescing service will return tasks.
         'size': int,
     },
 
+    # The `always-target` attribute will cause the task to be included in the
+    # target_task_graph regardless of filtering. Tasks included in this manner
+    # will be candidates for optimization even when `optimize_target_tasks` is
+    # False, unless the task was also explicitly chosen by the target_tasks
+    # method.
+    Required('always-target', default=False): bool,
+
     # Optimization to perform on this task during the optimization phase.
     # Optimizations are defined in taskcluster/taskgraph/optimize.py.
     Required('optimization', default=None): Any(
         # always run this task (default)
         None,
         # search the index for the given index namespaces, and replace this task if found
         # the search occurs in order, with the first match winning
         {'index-search': [basestring]},
@@ -1317,16 +1324,17 @@ def build_task(config, tasks):
             task_def['metadata']['description'] += ' ([Treeherder push]({}))'.format(
                 th_push_link)
 
         # add the payload and adjust anything else as required (e.g., scopes)
         payload_builders[task['worker']['implementation']](config, task, task_def)
 
         attributes = task.get('attributes', {})
         attributes['run_on_projects'] = task.get('run-on-projects', ['all'])
+        attributes['always_target'] = task['always-target']
 
         # Set MOZ_AUTOMATION on all jobs.
         if task['worker']['implementation'] in (
             'generic-worker',
             'docker-engine',
             'native-engine',
             'docker-worker',
         ):