Bug 1322193 - Verify taskgraph implementations against documentation, with proper regex. Updated doc verification for fake values of kinds, parameters etc., regex optimized draft
authorHammad Akhtar <hammad13060@iiitd.ac.in>
Tue, 06 Dec 2016 12:03:36 +0530
changeset 447865 f05fe33a03a343c655e3cddf969b5f7b38ed5ace
parent 447449 bd9e81439725f3d4135652cc3d65f2bfba527b7b
child 539159 01849c1c426d2b7d20ee0a596b13e8e6ac6f3a1e
push id38201
push userhammad13060@iiitd.ac.in
push dateThu, 08 Dec 2016 06:38:28 +0000
bugs1322193
milestone53.0a1
Bug 1322193 - Verify taskgraph implementations against documentation, with proper regex. Updated doc verification for fake values of kinds, parameters etc., regex optimized MozReview-Commit-ID: 56ZEJECbtK5
taskcluster/taskgraph/decision.py
taskcluster/taskgraph/generator.py
taskcluster/taskgraph/test/test_generator.py
taskcluster/taskgraph/util/verifydoc.py
--- a/taskcluster/taskgraph/decision.py
+++ b/taskcluster/taskgraph/decision.py
@@ -11,17 +11,16 @@ import logging
 
 import time
 import yaml
 
 from .generator import TaskGraphGenerator
 from .create import create_tasks
 from .parameters import Parameters
 from .taskgraph import TaskGraph
-from .util.verifydoc import verify_docs
 
 from taskgraph.util.templates import Templates
 from taskgraph.util.time import (
     json_time_from_now,
     current_json_time,
 )
 
 logger = logging.getLogger(__name__)
@@ -71,17 +70,16 @@ def taskgraph_decision(options):
      * processing decision task command-line options into parameters
      * running task-graph generation exactly the same way the other `mach
        taskgraph` commands do
      * generating a set of artifacts to memorialize the graph
      * calling TaskCluster APIs to create the graph
     """
 
     parameters = get_decision_parameters(options)
-    verify_parameters(parameters)
     # create a TaskGraphGenerator instance
     tgg = TaskGraphGenerator(
         root_dir=options['root'],
         parameters=parameters)
 
     # write out the parameters used to generate this graph
     write_artifact('parameters.yml', dict(**parameters))
 
@@ -182,17 +180,8 @@ def get_action_yml(parameters):
     action_parameters = parameters.copy()
     action_parameters.update({
         "decision_task_id": "{{decision_task_id}}",
         "task_labels": "{{task_labels}}",
         "from_now": json_time_from_now,
         "now": current_json_time()
     })
     return templates.load('action.yml', action_parameters)
-
-
-def verify_parameters(parameters):
-        parameters_dict = dict(**parameters)
-        verify_docs(
-            filename="parameters.rst",
-            identifiers=parameters_dict.keys(),
-            appearing_as="inline-literal"
-         )
--- a/taskcluster/taskgraph/generator.py
+++ b/taskcluster/taskgraph/generator.py
@@ -57,16 +57,18 @@ class TaskGraphGenerator(object):
         """
         @param root_dir: root directory, with subdirectories for each kind
         @param parameters: parameters for this task-graph generation
         @type parameters: dict
         """
         self.root_dir = root_dir
         self.parameters = parameters
 
+        self.verify_parameters(self.parameters)
+
         filters = parameters.get('filters', [])
 
         # Always add legacy target tasks method until we deprecate that API.
         if 'target_tasks_method' not in filters:
             filters.insert(0, 'target_tasks_method')
 
         self.filters = [filter_tasks.filter_task_functions[f] for f in filters]
 
@@ -232,16 +234,24 @@ class TaskGraphGenerator(object):
         while name not in self._run_results:
             try:
                 k, v = self._run.next()
             except StopIteration:
                 raise AttributeError("No such run result {}".format(name))
             self._run_results[k] = v
         return self._run_results[name]
 
+    def verify_parameters(self, parameters):
+        parameters_dict = dict(**parameters)
+        verify_docs(
+            filename="parameters.rst",
+            identifiers=parameters_dict.keys(),
+            appearing_as="inline-literal"
+         )
+
     def verify_kinds(self, kinds):
         verify_docs(
             filename="kinds.rst",
             identifiers=kinds.keys(),
             appearing_as="heading"
          )
 
     def verify_attributes(self, all_tasks):
--- a/taskcluster/taskgraph/test/test_generator.py
+++ b/taskcluster/taskgraph/test/test_generator.py
@@ -17,17 +17,17 @@ class FakeTask(base.Task):
     def __init__(self, **kwargs):
         self.i = kwargs.pop('i')
         super(FakeTask, self).__init__(**kwargs)
 
     @classmethod
     def load_tasks(cls, kind, path, config, parameters, loaded_tasks):
         return [cls(kind=kind,
                     label='{}-t-{}'.format(kind, i),
-                    attributes={'tasknum': str(i)},
+                    attributes={'_tasknum': str(i)},
                     task={},
                     i=i)
                 for i in range(3)]
 
     def get_dependencies(self, full_task_set):
         i = self.i
         if i > 0:
             return [('{}-t-{}'.format(self.kind, i - 1), 'prev')]
@@ -46,92 +46,92 @@ class FakeKind(Kind):
     def load_tasks(self, parameters, loaded_tasks):
         FakeKind.loaded_kinds.append(self.name)
         return super(FakeKind, self).load_tasks(parameters, loaded_tasks)
 
 
 class WithFakeKind(TaskGraphGenerator):
 
     def _load_kinds(self):
-        for kind_name, deps in self.parameters['kinds']:
+        for kind_name, deps in self.parameters['_kinds']:
             yield FakeKind(
                 kind_name, '/fake',
                 {'kind-dependencies': deps} if deps else {})
 
 
 class TestGenerator(unittest.TestCase):
 
-    def maketgg(self, target_tasks=None, kinds=[('fake', [])]):
+    def maketgg(self, target_tasks=None, kinds=[('_fake', [])]):
         FakeKind.loaded_kinds = []
         self.target_tasks = target_tasks or []
 
         def target_tasks_method(full_task_graph, parameters):
             return self.target_tasks
 
         target_tasks_mod._target_task_methods['test_method'] = target_tasks_method
 
         parameters = {
-            'kinds': kinds,
+            '_kinds': kinds,
             'target_tasks_method': 'test_method',
         }
 
         return WithFakeKind('/root', parameters)
 
     def test_kind_ordering(self):
         "When task kinds depend on each other, they are loaded in postorder"
         self.tgg = self.maketgg(kinds=[
-            ('fake3', ['fake2', 'fake1']),
-            ('fake2', ['fake1']),
-            ('fake1', []),
+            ('_fake3', ['_fake2', '_fake1']),
+            ('_fake2', ['_fake1']),
+            ('_fake1', []),
         ])
         self.tgg._run_until('full_task_set')
-        self.assertEqual(FakeKind.loaded_kinds, ['fake1', 'fake2', 'fake3'])
+        self.assertEqual(FakeKind.loaded_kinds, ['_fake1', '_fake2', '_fake3'])
 
     def test_full_task_set(self):
         "The full_task_set property has all tasks"
         self.tgg = self.maketgg()
         self.assertEqual(self.tgg.full_task_set.graph,
-                         graph.Graph({'fake-t-0', 'fake-t-1', 'fake-t-2'}, set()))
+                         graph.Graph({'_fake-t-0', '_fake-t-1', '_fake-t-2'}, set()))
         self.assertEqual(sorted(self.tgg.full_task_set.tasks.keys()),
-                         sorted(['fake-t-0', 'fake-t-1', 'fake-t-2']))
+                         sorted(['_fake-t-0', '_fake-t-1', '_fake-t-2']))
 
     def test_full_task_graph(self):
         "The full_task_graph property has all tasks, and links"
         self.tgg = self.maketgg()
         self.assertEqual(self.tgg.full_task_graph.graph,
-                         graph.Graph({'fake-t-0', 'fake-t-1', 'fake-t-2'},
+                         graph.Graph({'_fake-t-0', '_fake-t-1', '_fake-t-2'},
                                      {
-                                         ('fake-t-1', 'fake-t-0', 'prev'),
-                                         ('fake-t-2', 'fake-t-1', 'prev'),
+                                         ('_fake-t-1', '_fake-t-0', 'prev'),
+                                         ('_fake-t-2', '_fake-t-1', 'prev'),
                          }))
         self.assertEqual(sorted(self.tgg.full_task_graph.tasks.keys()),
-                         sorted(['fake-t-0', 'fake-t-1', 'fake-t-2']))
+                         sorted(['_fake-t-0', '_fake-t-1', '_fake-t-2']))
 
     def test_target_task_set(self):
         "The target_task_set property has the targeted tasks"
-        self.tgg = self.maketgg(['fake-t-1'])
+        self.tgg = self.maketgg(['_fake-t-1'])
         self.assertEqual(self.tgg.target_task_set.graph,
-                         graph.Graph({'fake-t-1'}, set()))
+                         graph.Graph({'_fake-t-1'}, set()))
         self.assertEqual(self.tgg.target_task_set.tasks.keys(),
-                         ['fake-t-1'])
+                         ['_fake-t-1'])
 
     def test_target_task_graph(self):
         "The target_task_graph property has the targeted tasks and deps"
-        self.tgg = self.maketgg(['fake-t-1'])
+        self.tgg = self.maketgg(['_fake-t-1'])
         self.assertEqual(self.tgg.target_task_graph.graph,
-                         graph.Graph({'fake-t-0', 'fake-t-1'},
-                                     {('fake-t-1', 'fake-t-0', 'prev')}))
+                         graph.Graph({'_fake-t-0', '_fake-t-1'},
+                                     {('_fake-t-1', '_fake-t-0', 'prev')}))
         self.assertEqual(sorted(self.tgg.target_task_graph.tasks.keys()),
-                         sorted(['fake-t-0', 'fake-t-1']))
+                         sorted(['_fake-t-0', '_fake-t-1']))
 
     def test_optimized_task_graph(self):
         "The optimized task graph contains task ids"
-        self.tgg = self.maketgg(['fake-t-2'])
+        self.tgg = self.maketgg(['_fake-t-2'])
         tid = self.tgg.label_to_taskid
         self.assertEqual(
             self.tgg.optimized_task_graph.graph,
-            graph.Graph({tid['fake-t-0'], tid['fake-t-1'], tid['fake-t-2']}, {
-                (tid['fake-t-1'], tid['fake-t-0'], 'prev'),
-                (tid['fake-t-2'], tid['fake-t-1'], 'prev'),
+            graph.Graph({tid['_fake-t-0'], tid['_fake-t-1'], tid['_fake-t-2']}, {
+                (tid['_fake-t-1'], tid['_fake-t-0'], 'prev'),
+                (tid['_fake-t-2'], tid['_fake-t-1'], 'prev'),
             }))
 
 if __name__ == '__main__':
     main()
--- a/taskcluster/taskgraph/util/verifydoc.py
+++ b/taskcluster/taskgraph/util/verifydoc.py
@@ -5,23 +5,36 @@
 
 import re
 import os
 
 base_path = os.path.join(os.getcwd(), "taskcluster/docs/")
 
 
 def verify_docs(filename, identifiers, appearing_as):
+
+    # We ignore identifiers starting with '_' for the sake of tests.
+    # Strings starting with "_" are ignored for doc verification
+    # hence they can be used for faking test values
     with open(os.path.join(base_path, filename)) as fileObject:
         doctext = "".join(fileObject.readlines())
         if appearing_as == "inline-literal":
-            expression_list = ["``" + identifier + "``" for identifier in identifiers]
+            expression_list = [
+                "``" + identifier + "``"
+                for identifier in identifiers
+                if not identifier.startswith("_")
+            ]
         elif appearing_as == "heading":
-            expression_list = [identifier + "\n[-+\n*]+|[.+\n*]+" for identifier in identifiers]
+            expression_list = [
+                identifier + "\n(?:(?:(?:-+\n)+)|(?:(?:.+\n)+))"
+                for identifier in identifiers
+                if not identifier.startswith("_")
+            ]
         else:
-            raise Exception("appearing_as = {} not defined".format(appearing_as))
+            raise Exception("appearing_as = `{}` not defined".format(appearing_as))
 
         for expression, identifier in zip(expression_list, identifiers):
             match_group = re.search(expression, doctext)
             if not match_group:
                 raise Exception(
-                    "{}: {} missing from doc file: {}".format(appearing_as, identifier, filename)
+                    "{}: `{}` missing from doc file: `{}`"
+                    .format(appearing_as, identifier, filename)
                 )