Bug 1280231: Load kinds in order by dependencies; r=jonasfj draft
authorDustin J. Mitchell <dustin@mozilla.com>
Mon, 27 Jun 2016 22:31:06 +0000
changeset 382017 36fc7e2d9c5987a4bb8b3779cf1a9308f5561828
parent 382016 4f0fcce2bcea0fb78ba70e7c052638ca2c5b8a3d
child 382018 12e30313eb1fd4062c0be9d869460644ae949019
push id21592
push userdmitchell@mozilla.com
push dateTue, 28 Jun 2016 16:13:44 +0000
reviewersjonasfj
bugs1280231
milestone50.0a1
Bug 1280231: Load kinds in order by dependencies; r=jonasfj This enables kinds that generate tasks based on those output by another kind. For example, the test kind might generate a set of test tasks for each build task. MozReview-Commit-ID: K7ha9OmJ6gd
taskcluster/docs/taskgraph.rst
taskcluster/taskgraph/generator.py
taskcluster/taskgraph/kind/base.py
taskcluster/taskgraph/kind/docker_image.py
taskcluster/taskgraph/kind/legacy.py
taskcluster/taskgraph/test/test_generator.py
--- a/taskcluster/docs/taskgraph.rst
+++ b/taskcluster/docs/taskgraph.rst
@@ -36,16 +36,29 @@ differently.  Some kinds may generate ta
 example, symbol-upload tasks are all alike, and very simple), while other kinds
 may do little more than parse a directory of YAML files.
 
 A `kind.yml` file contains data about the kind, as well as referring to a
 Python class implementing the kind in its ``implementation`` key.  That
 implementation may rely on lots of code shared with other kinds, or contain a
 completely unique implementation of some functionality.
 
+The full list of pre-defined keys in this file is:
+
+``implementation``
+   Class implementing this kind, in the form ``<module-path>:<object-path>``.
+   This class should be a subclass of ``taskgraph.kind.base:Kind``.
+
+``kind-dependencies``
+   Kinds which should be loaded before this one.  This is useful when the kind
+   will use the list of already-created tasks to determine which tasks to
+   create, for example adding an upload-symbols task after every build task.
+
+Any other keys are subject to interpretation by the kind implementation.
+
 The result is a nice segmentation of implementation so that the more esoteric
 in-tree projects can do their crazy stuff in an isolated kind without making
 the bread-and-butter build and test configuration more complicated.
 
 Dependencies
 ------------
 
 Dependency links between tasks are always between different kinds(*).  At a
--- a/taskcluster/taskgraph/generator.py
+++ b/taskcluster/taskgraph/generator.py
@@ -9,16 +9,48 @@ import yaml
 
 from .graph import Graph
 from .types import TaskGraph
 from .optimize import optimize_task_graph
 
 logger = logging.getLogger(__name__)
 
 
+class Kind(object):
+
+    def __init__(self, name, path, config):
+        self.name = name
+        self.path = path
+        self.config = config
+
+    def _get_impl_class(self):
+        # load the class defined by implementation
+        try:
+            impl = self.config['implementation']
+        except KeyError:
+            raise KeyError("{!r} does not define implementation".format(self.path))
+        if impl.count(':') != 1:
+            raise TypeError('{!r} implementation does not have the form "module:object"'
+                            .format(self.path))
+
+        impl_module, impl_object = impl.split(':')
+        impl_class = __import__(impl_module)
+        for a in impl_module.split('.')[1:]:
+            impl_class = getattr(impl_class, a)
+        for a in impl_object.split('.'):
+            impl_class = getattr(impl_class, a)
+
+        return impl_class
+
+    def load_tasks(self, parameters, loaded_tasks):
+        impl_class = self._get_impl_class()
+        return impl_class.load_tasks(self.name, self.path, self.config,
+                                     parameters, loaded_tasks)
+
+
 class TaskGraphGenerator(object):
     """
     The central controller for taskgraph.  This handles all phases of graph
     generation.  The task is generated from all of the kinds defined in
     subdirectories of the generator's root directory.
 
     Access to the results of this generation, as well as intermediate values at
     various phases of generation, is available via properties.  This encourages
@@ -117,43 +149,38 @@ class TaskGraphGenerator(object):
                 continue
             kind_name = os.path.basename(path)
             logger.debug("loading kind `{}` from `{}`".format(kind_name, path))
 
             kind_yml = os.path.join(path, 'kind.yml')
             with open(kind_yml) as f:
                 config = yaml.load(f)
 
-            # load the class defined by implementation
-            try:
-                impl = config['implementation']
-            except KeyError:
-                raise KeyError("{!r} does not define implementation".format(kind_yml))
-            if impl.count(':') != 1:
-                raise TypeError('{!r} implementation does not have the form "module:object"'
-                                .format(kind_yml))
-
-            impl_module, impl_object = impl.split(':')
-            impl_class = __import__(impl_module)
-            for a in impl_module.split('.')[1:]:
-                impl_class = getattr(impl_class, a)
-            for a in impl_object.split('.'):
-                impl_class = getattr(impl_class, a)
-
-            for task in impl_class.load_tasks(kind_name, path, config, self.parameters):
-                yield task
+            yield Kind(kind_name, path, config)
 
     def _run(self):
+        logger.info("Loading kinds")
+        # put the kinds into a graph and sort topologically so that kinds are loaded
+        # in post-order
+        kinds = {kind.name: kind for kind in self._load_kinds()}
+        edges = set()
+        for kind in kinds.itervalues():
+            for dep in kind.config.get('kind-dependencies', []):
+                edges.add((kind.name, dep, 'kind-dependency'))
+        kind_graph = Graph(set(kinds), edges)
+
         logger.info("Generating full task set")
         all_tasks = {}
-        for task in self._load_kinds():
-            if task.label in all_tasks:
-                raise Exception("duplicate tasks with label " + task.label)
-            all_tasks[task.label] = task
-
+        for kind_name in kind_graph.visit_postorder():
+            logger.debug("Loading tasks for kind {}".format(kind_name))
+            kind = kinds[kind_name]
+            for task in kind.load_tasks(self.parameters, list(all_tasks.values())):
+                if task.label in all_tasks:
+                    raise Exception("duplicate tasks with label " + task.label)
+                all_tasks[task.label] = task
         full_task_set = TaskGraph(all_tasks, Graph(set(all_tasks), set()))
         yield 'full_task_set', full_task_set
 
         logger.info("Generating full task graph")
         edges = set()
         for t in full_task_set:
             for dep, depname in t.get_dependencies(full_task_set):
                 edges.add((t.label, dep, depname))
--- a/taskcluster/taskgraph/kind/base.py
+++ b/taskcluster/taskgraph/kind/base.py
@@ -40,29 +40,34 @@ class Task(object):
         self.attributes['kind'] = kind
 
         if not (all(isinstance(x, basestring) for x in self.attributes.iterkeys()) and
                 all(isinstance(x, basestring) for x in self.attributes.itervalues())):
             raise TypeError("attribute names and values must be strings")
 
     @classmethod
     @abc.abstractmethod
-    def load_tasks(cls, kind, path, config, parameters):
+    def load_tasks(cls, kind, path, config, parameters, loaded_tasks):
         """
         Load the tasks for a given kind.
 
         The `kind` is the name of the kind; the configuration for that kind
         named this class.
 
         The `path` is the path to the configuration directory for the kind.  This
         can be used to load extra data, templates, etc.
 
         The `parameters` give details on which to base the task generation.
         See `taskcluster/docs/parameters.rst` for details.
 
+        At the time this method is called, all kinds on which this kind depends
+        (that is, specified in the `kind-dependencies` key in `self.config`
+        have already loaded their tasks, and those tasks are available in
+        the list `loaded_tasks`.
+
         The return value is a list of Task instances.
         """
 
     @abc.abstractmethod
     def get_dependencies(self, taskgraph):
         """
         Get the set of task labels this task depends on, by querying the full
         task set, given as `taskgraph`.
--- a/taskcluster/taskgraph/kind/docker_image.py
+++ b/taskcluster/taskgraph/kind/docker_image.py
@@ -30,17 +30,17 @@ INDEX_URL = 'https://index.taskcluster.n
 
 class DockerImageTask(base.Task):
 
     def __init__(self, *args, **kwargs):
         self.index_paths = kwargs.pop('index_paths')
         super(DockerImageTask, self).__init__(*args, **kwargs)
 
     @classmethod
-    def load_tasks(cls, kind, path, config, params):
+    def load_tasks(cls, kind, path, config, params, loaded_tasks):
         # TODO: make this match the pushdate (get it from a parameter rather than vcs)
         pushdate = time.strftime('%Y%m%d%H%M%S', time.gmtime())
 
         parameters = {
             'pushlog_id': params.get('pushlog_id', 0),
             'pushdate': pushdate,
             'pushtime': pushdate[8:],
             'year': pushdate[0:4],
--- a/taskcluster/taskgraph/kind/legacy.py
+++ b/taskcluster/taskgraph/kind/legacy.py
@@ -301,17 +301,17 @@ class LegacyTask(base.Task):
     "TaskLabel==".  These labels are unfortunately not stable from run to run.
     """
 
     def __init__(self, *args, **kwargs):
         self.task_dict = kwargs.pop('task_dict')
         super(LegacyTask, self).__init__(*args, **kwargs)
 
     @classmethod
-    def load_tasks(cls, kind, path, config, params):
+    def load_tasks(cls, kind, path, config, params, loaded_tasks):
         root = os.path.abspath(os.path.join(path, config['legacy_path']))
 
         project = params['project']
         # NOTE: message is ignored here; we always use DEFAULT_TRY, then filter the
         # resulting task graph later
         message = DEFAULT_TRY
 
         templates = Templates(root)
--- a/taskcluster/taskgraph/test/test_generator.py
+++ b/taskcluster/taskgraph/test/test_generator.py
@@ -1,104 +1,129 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 from __future__ import absolute_import, print_function, unicode_literals
 
 import unittest
 
-from ..generator import TaskGraphGenerator
+from ..generator import TaskGraphGenerator, Kind
 from .. import graph
 from ..kind import base
 from mozunit import main
 
 
 class FakeTask(base.Task):
 
     def __init__(self, **kwargs):
         self.i = kwargs.pop('i')
         super(FakeTask, self).__init__(**kwargs)
 
     @classmethod
-    def load_tasks(cls, kind, path, config, parameters):
+    def load_tasks(cls, kind, path, config, parameters, loaded_tasks):
         return [cls(kind=kind,
-                    label='t-{}'.format(i),
+                    label='{}-t-{}'.format(kind, i),
                     attributes={'tasknum': str(i)},
                     task={},
                     i=i)
                 for i in range(3)]
 
     def get_dependencies(self, full_task_set):
         i = self.i
         if i > 0:
-            return [('t-{}'.format(i - 1), 'prev')]
+            return [('{}-t-{}'.format(self.kind, i - 1), 'prev')]
         else:
             return []
 
     def optimize(self):
         return False, None
 
 
-class WithFakeTask(TaskGraphGenerator):
+class FakeKind(Kind):
+
+    def _get_impl_class(self):
+        return FakeTask
+
+    def load_tasks(self, parameters, loaded_tasks):
+        FakeKind.loaded_kinds.append(self.name)
+        return super(FakeKind, self).load_tasks(parameters, loaded_tasks)
+
+
+class WithFakeKind(TaskGraphGenerator):
 
     def _load_kinds(self):
-        return FakeTask.load_tasks('fake', '/fake', {}, {})
+        for kind_name, deps in self.parameters['kinds']:
+            yield FakeKind(
+                kind_name, '/fake',
+                {'kind-dependencies': deps} if deps else {})
 
 
 class TestGenerator(unittest.TestCase):
 
-    def setUp(self):
-        self.target_tasks = []
+    def maketgg(self, target_tasks=None, kinds=[('fake', [])]):
+        FakeKind.loaded_kinds = []
+        self.target_tasks = target_tasks or []
 
         def target_tasks_method(full_task_graph, parameters):
             return self.target_tasks
-        self.tgg = WithFakeTask('/root', {}, target_tasks_method)
+        return WithFakeKind('/root', {'kinds': kinds}, target_tasks_method)
+
+    def test_kind_ordering(self):
+        "When task kinds depend on each other, they are loaded in postorder"
+        self.tgg = self.maketgg(kinds=[
+            ('fake3', ['fake2', 'fake1']),
+            ('fake2', ['fake1']),
+            ('fake1', []),
+        ])
+        self.tgg._run_until('full_task_set')
+        self.assertEqual(FakeKind.loaded_kinds, ['fake1', 'fake2', 'fake3'])
 
     def test_full_task_set(self):
         "The full_task_set property has all tasks"
+        self.tgg = self.maketgg()
         self.assertEqual(self.tgg.full_task_set.graph,
-                         graph.Graph({'t-0', 't-1', 't-2'}, set()))
-        self.assertEqual(self.tgg.full_task_set.tasks.keys(),
-                         ['t-0', 't-1', 't-2'])
+                         graph.Graph({'fake-t-0', 'fake-t-1', 'fake-t-2'}, set()))
+        self.assertEqual(sorted(self.tgg.full_task_set.tasks.keys()),
+                         sorted(['fake-t-0', 'fake-t-1', 'fake-t-2']))
 
     def test_full_task_graph(self):
         "The full_task_graph property has all tasks, and links"
+        self.tgg = self.maketgg()
         self.assertEqual(self.tgg.full_task_graph.graph,
-                         graph.Graph({'t-0', 't-1', 't-2'},
+                         graph.Graph({'fake-t-0', 'fake-t-1', 'fake-t-2'},
                                      {
-                                         ('t-1', 't-0', 'prev'),
-                                         ('t-2', 't-1', 'prev'),
+                                         ('fake-t-1', 'fake-t-0', 'prev'),
+                                         ('fake-t-2', 'fake-t-1', 'prev'),
                          }))
-        self.assertEqual(self.tgg.full_task_graph.tasks.keys(),
-                         ['t-0', 't-1', 't-2'])
+        self.assertEqual(sorted(self.tgg.full_task_graph.tasks.keys()),
+                         sorted(['fake-t-0', 'fake-t-1', 'fake-t-2']))
 
     def test_target_task_set(self):
         "The target_task_set property has the targeted tasks"
-        self.target_tasks = ['t-1']
+        self.tgg = self.maketgg(['fake-t-1'])
         self.assertEqual(self.tgg.target_task_set.graph,
-                         graph.Graph({'t-1'}, set()))
+                         graph.Graph({'fake-t-1'}, set()))
         self.assertEqual(self.tgg.target_task_set.tasks.keys(),
-                         ['t-1'])
+                         ['fake-t-1'])
 
     def test_target_task_graph(self):
         "The target_task_graph property has the targeted tasks and deps"
-        self.target_tasks = ['t-1']
+        self.tgg = self.maketgg(['fake-t-1'])
         self.assertEqual(self.tgg.target_task_graph.graph,
-                         graph.Graph({'t-0', 't-1'},
-                                     {('t-1', 't-0', 'prev')}))
+                         graph.Graph({'fake-t-0', 'fake-t-1'},
+                                     {('fake-t-1', 'fake-t-0', 'prev')}))
         self.assertEqual(sorted(self.tgg.target_task_graph.tasks.keys()),
-                         sorted(['t-0', 't-1']))
+                         sorted(['fake-t-0', 'fake-t-1']))
 
     def test_optimized_task_graph(self):
         "The optimized task graph contains task ids"
-        self.target_tasks = ['t-2']
+        self.tgg = self.maketgg(['fake-t-2'])
         tid = self.tgg.label_to_taskid
         self.assertEqual(
             self.tgg.optimized_task_graph.graph,
-            graph.Graph({tid['t-0'], tid['t-1'], tid['t-2']}, {
-                (tid['t-1'], tid['t-0'], 'prev'),
-                (tid['t-2'], tid['t-1'], 'prev'),
-            })
-            )
+            graph.Graph({tid['fake-t-0'], tid['fake-t-1'], tid['fake-t-2']}, {
+                (tid['fake-t-1'], tid['fake-t-0'], 'prev'),
+                (tid['fake-t-2'], tid['fake-t-1'], 'prev'),
+            }))
 
 if __name__ == '__main__':
     main()