Bug 1384433 - Generalize verifications done on task graphs. r?dustin draft
authorMike Hommey <mh+mozilla@glandium.org>
Fri, 25 Aug 2017 10:02:42 +0900
changeset 654546 b31c603abc67878a835ad63c870f65365869f44a
parent 654545 780a6bc62b590b51766674cb7613ffead4dac17e
child 654547 0dfad6c3bc2830ef8da2cbe2bdac365301bfe6f0
push id76595
push userbmo:mh+mozilla@glandium.org
push dateMon, 28 Aug 2017 22:56:40 +0000
reviewersdustin
bugs1384433
milestone57.0a1
Bug 1384433 - Generalize verifications done on task graphs. r?dustin
taskcluster/taskgraph/generator.py
taskcluster/taskgraph/util/verify.py
--- a/taskcluster/taskgraph/generator.py
+++ b/taskcluster/taskgraph/generator.py
@@ -13,18 +13,17 @@ from .graph import Graph
 from .taskgraph import TaskGraph
 from .task import Task
 from .optimize import optimize_task_graph
 from .morph import morph
 from .util.python_path import find_object
 from .transforms.base import TransformSequence, TransformConfig
 from .util.verify import (
     verify_docs,
-    verify_task_graph_symbol,
-    verify_gecko_v2_routes,
+    verifications,
 )
 
 logger = logging.getLogger(__name__)
 
 
 class Kind(object):
 
     def __init__(self, name, path, config):
@@ -223,73 +222,71 @@ class TaskGraphGenerator(object):
             for task in new_tasks:
                 if task.label in all_tasks:
                     raise Exception("duplicate tasks with label " + task.label)
                 all_tasks[task.label] = task
             logger.info("Generated {} tasks for kind {}".format(len(new_tasks), kind_name))
         full_task_set = TaskGraph(all_tasks, Graph(set(all_tasks), set()))
         self.verify_attributes(all_tasks)
         self.verify_run_using()
-        yield 'full_task_set', full_task_set
+        yield verifications('full_task_set', full_task_set)
 
         logger.info("Generating full task graph")
         edges = set()
         for t in full_task_set:
             for depname, dep in t.dependencies.iteritems():
                 edges.add((t.label, dep, depname))
 
         full_task_graph = TaskGraph(all_tasks,
                                     Graph(full_task_set.graph.nodes, edges))
-        full_task_graph.for_each_task(verify_task_graph_symbol, scratch_pad={})
-        full_task_graph.for_each_task(verify_gecko_v2_routes, scratch_pad={})
         logger.info("Full task graph contains %d tasks and %d dependencies" % (
             len(full_task_set.graph.nodes), len(edges)))
-        yield 'full_task_graph', full_task_graph
+        yield verifications('full_task_graph', full_task_graph)
 
         logger.info("Generating target task set")
         target_task_set = TaskGraph(dict(all_tasks),
                                     Graph(set(all_tasks.keys()), set()))
         for fltr in self.filters:
             old_len = len(target_task_set.graph.nodes)
             target_tasks = set(fltr(target_task_set, self.parameters))
             target_task_set = TaskGraph(
                 {l: all_tasks[l] for l in target_tasks},
                 Graph(target_tasks, set()))
             logger.info('Filter %s pruned %d tasks (%d remain)' % (
                 fltr.__name__,
                 old_len - len(target_tasks),
                 len(target_tasks)))
 
-        yield 'target_task_set', target_task_set
+        yield verifications('target_task_set', target_task_set)
 
         logger.info("Generating target task graph")
         # include all docker-image build tasks here, in case they are needed for a graph morph
         docker_image_tasks = set(t.label for t in full_task_graph.tasks.itervalues()
                                  if t.attributes['kind'] == 'docker-image')
         target_graph = full_task_graph.graph.transitive_closure(target_tasks | docker_image_tasks)
         target_task_graph = TaskGraph(
             {l: all_tasks[l] for l in target_graph.nodes},
             target_graph)
-        yield 'target_task_graph', target_task_graph
+        yield verifications('target_task_graph', target_task_graph)
 
         logger.info("Generating optimized task graph")
         do_not_optimize = set()
         if not self.parameters.get('optimize_target_tasks', True):
             do_not_optimize = target_task_set.graph.nodes
         optimized_task_graph, label_to_taskid = optimize_task_graph(target_task_graph,
                                                                     self.parameters,
                                                                     do_not_optimize)
 
-        yield 'optimized_task_graph', optimized_task_graph
+        yield verifications('optimized_task_graph', optimized_task_graph)
 
         morphed_task_graph, label_to_taskid = morph(
             optimized_task_graph, label_to_taskid, self.parameters)
 
         yield 'label_to_taskid', label_to_taskid
-        yield 'morphed_task_graph', morphed_task_graph
+        yield verifications('morphed_task_graph', morphed_task_graph)
 
     def _run_until(self, name):
         while name not in self._run_results:
             try:
                 k, v = self._run.next()
             except StopIteration:
                 raise AttributeError("No such run result {}".format(name))
             self._run_results[k] = v
--- a/taskcluster/taskgraph/util/verify.py
+++ b/taskcluster/taskgraph/util/verify.py
@@ -4,16 +4,40 @@
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 import re
 import os
 
 base_path = os.path.join(os.getcwd(), "taskcluster/docs/")
 
 
+class VerificationSequence(object):
+    """
+    Container for a sequence of verifications over a TaskGraph. Each
+    verification is represented as a callable taking (task, taskgraph,
+    scratch_pad), called for each task in the taskgraph.
+    """
+    def __init__(self):
+        self.verifications = {}
+
+    def __call__(self, graph_name, graph):
+        for verification in self.verifications.get(graph_name, []):
+            graph.for_each_task(verification, scratch_pad={})
+        return graph_name, graph
+
+    def add(self, graph_name):
+        def wrap(func):
+            self.verifications.setdefault(graph_name, []).append(func)
+            return func
+        return wrap
+
+
+verifications = VerificationSequence()
+
+
 def verify_docs(filename, identifiers, appearing_as):
 
     # We ignore identifiers starting with '_' for the sake of tests.
     # Strings starting with "_" are ignored for doc verification
     # hence they can be used for faking test values
     with open(os.path.join(base_path, filename)) as fileObject:
         doctext = "".join(fileObject.readlines())
         if appearing_as == "inline-literal":
@@ -35,16 +59,17 @@ def verify_docs(filename, identifiers, a
             match_group = re.search(expression, doctext)
             if not match_group:
                 raise Exception(
                     "{}: `{}` missing from doc file: `{}`"
                     .format(appearing_as, identifier, filename)
                 )
 
 
+@verifications.add('full_task_graph')
 def verify_task_graph_symbol(task, taskgraph, scratch_pad):
     """
         This function verifies that tuple
         (collection.keys(), machine.platform, groupSymbol, symbol) is unique
         for a target task graph.
     """
     task_dict = task.task
     if "extra" in task_dict:
@@ -62,16 +87,17 @@ def verify_task_graph_symbol(task, taskg
                 raise Exception(
                     "conflict between `{}`:`{}` for values `{}`"
                     .format(task.label, scratch_pad[key], key)
                 )
             else:
                 scratch_pad[key] = task.label
 
 
+@verifications.add('full_task_graph')
 def verify_gecko_v2_routes(task, taskgraph, scratch_pad):
     """
         This function ensures that any two
         tasks have distinct index.v2.routes
     """
     route_prefix = "index.gecko.v2"
     task_dict = task.task
     routes = task_dict.get('routes', [])