Bug 1277417: output task information in JSON or just labels; r=ahal draft
authorDustin J. Mitchell <dustin@mozilla.com>
Tue, 07 Jun 2016 03:09:48 +0000
changeset 376393 4ffb78ab7a85b32e64d10218a4a8841c22e689f8
parent 376376 fb0507f42e74128627a5fad0c82d5f1cbe435d93
child 523141 a19392768fc407ffc41f913d2548141e8d5440b9
push id20565
push userdmitchell@mozilla.com
push dateTue, 07 Jun 2016 21:11:34 +0000
reviewersahal
bugs1277417
milestone50.0a1
Bug 1277417: output task information in JSON or just labels; r=ahal The JSON output is suitable for processing with `jq` to extract features of interest. MozReview-Commit-ID: 5wpV7sXlOz3
taskcluster/docs/taskgraph.rst
taskcluster/mach_commands.py
taskcluster/taskgraph/decision.py
taskcluster/taskgraph/test/test_decision.py
taskcluster/taskgraph/types.py
--- a/taskcluster/docs/taskgraph.rst
+++ b/taskcluster/docs/taskgraph.rst
@@ -146,42 +146,51 @@ parameter file.  The parameter keys and 
 Finally, the ``mach taskgraph decision`` subcommand performs the entire
 task-graph generation process, then creates the tasks.  This command should
 only be used within a decision task, as it assumes it is running in that
 context.
 
 Taskgraph JSON Format
 ---------------------
 
-Each task graph artifact is represented as a JSON object.  The object's
-properties are the task labels or taskIds (see below), and the value of each
-property describes a task in an object with the following attributes:
+Each task in the graph is represented as a JSON object.  The output is suitable
+for processing with the `jq <https://stedolan.github.io/jq/>`_ utility.
+
+Each task has the following properties:
+
+``task_id``
+   The task's taskId (only for optimized task graphs)
 
 ``label``
-   The task's label (never a taskId).
+   The task's label
 
 ``attributes``
    The task's attributes
 
 ``dependencies``
-   The task's in-graph dependencies, each represented as a pair ``[name, label]``
-   giving the dependency name and the label for the required task.
+   The task's in-graph dependencies, represented as an object mapping
+   dependency name to label (or to taskId for optimized task graphs)
 
 ``task``
    The task's TaskCluster task definition.
 
 The task definition may contain "task references" of the form
 ``{"task-reference": "string containing <task-label>"}``.  These will be
 replaced during the optimization step, with the appropriate taskId substituted
 for ``<task-label>`` in the string.  Multiple labels may be substituted in a
 single string, and ``<<>`` can be used to escape a literal ``<``.
 
 The results from each command are in the same format, but with some differences
 in the content:
 
 * The ``tasks`` and ``target`` subcommands both return graphs with no edges.
   That is, just collections of tasks without any dependencies indicated.
 
-* The ``optimized`` subcommand returns a graph keyed by taskId rather than
-  label.  The dependencies array, too, contains taskIds instead of labels.
-  Dependencies on optimized tasks are omitted.  However, the
-  ``task.dependencies`` array is populated with the full list of dependency
-  taskIds.  All task references are resolved in the optimized graph.
+* The ``optimized`` subcommand returns tasks that have been assigned taskIds.
+  The dependencies array, too, contains taskIds instead of labels, with
+  dependencies on optimized tasks omitted.  However, the ``task.dependencies``
+  array is populated with the full list of dependency taskIds.  All task
+  references are resolved in the optimized graph.
+
+The graph artifacts produced by the decision task are JSON objects, keyed by
+label (``full-task-graph.json`` and ``target-tasks``) or by taskId
+(``task-graph.json``).  For convenience, the decision task also writes out
+``label-to-taskid.json`` containing a mapping from label to taskId.
--- a/taskcluster/mach_commands.py
+++ b/taskcluster/mach_commands.py
@@ -1,16 +1,17 @@
 # -*- coding: utf-8 -*-
 
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 from __future__ import absolute_import, print_function, unicode_literals
 
+import json
 import logging
 import sys
 import traceback
 
 from mach.decorators import (
     CommandArgument,
     CommandProvider,
     Command,
@@ -27,16 +28,22 @@ class ShowTaskGraphSubCommand(SubCommand
         after = SubCommand.__call__(self, func)
         args = [
             CommandArgument('--root', '-r', default='taskcluster/ci',
                             help="root of the taskgraph definition relative to topsrcdir"),
             CommandArgument('--quiet', '-q', action="store_true",
                             help="suppress all logging output"),
             CommandArgument('--verbose', '-v', action="store_true",
                             help="include debug-level logging output"),
+            CommandArgument('--json', '-J', action="store_const",
+                            dest="format", const="json",
+                            help="Output each task in the task graph as a JSON object"),
+            CommandArgument('--labels', '-L', action="store_const",
+                            dest="format", const="labels",
+                            help="Output the label for each task in the task graph (default)"),
             CommandArgument('--parameters', '-p', required=True,
                             help="parameters file (.yml or .json; see "
                                  "`taskcluster/docs/parameters.rst`)`"),
             CommandArgument('--no-optimize', dest="optimize", action="store_false",
                             default="true",
                             help="do not remove tasks from the graph that are found in the "
                             "index (a.k.a. optimize the graph)"),
         ]
@@ -176,13 +183,23 @@ class MachCommands(MachCommandBase):
             target_tasks_method = taskgraph.target_tasks.get_method(target_tasks_method)
             tgg = taskgraph.generator.TaskGraphGenerator(
                 root_dir=options['root'],
                 parameters=parameters,
                 target_tasks_method=target_tasks_method)
 
             tg = getattr(tgg, graph_attr)
 
-            for label in tg.graph.visit_postorder():
-                print(tg.tasks[label])
+            show_method = getattr(self, 'show_taskgraph_' + (options['format'] or 'labels'))
+            show_method(tg)
         except Exception as e:
             traceback.print_exc()
             sys.exit(1)
+
+    def show_taskgraph_labels(self, taskgraph):
+        for label in taskgraph.graph.visit_postorder():
+            print(label)
+
+    def show_taskgraph_json(self, taskgraph):
+        # JSON output is a sequence of JSON objects, rather than a single object, so
+        # disassemble the dictionary
+        for task in taskgraph.to_json().itervalues():
+            print(json.dumps(task))
--- a/taskcluster/taskgraph/decision.py
+++ b/taskcluster/taskgraph/decision.py
@@ -60,27 +60,24 @@ def taskgraph_decision(options):
         root_dir=options['root'],
         parameters=parameters,
         target_tasks_method=target_tasks_method)
 
     # write out the parameters used to generate this graph
     write_artifact('parameters.yml', dict(**parameters))
 
     # write out the full graph for reference
-    write_artifact('full-task-graph.json',
-                   taskgraph_to_json(tgg.full_task_graph))
+    write_artifact('full-task-graph.json', tgg.full_task_graph.to_json())
 
     # write out the target task set to allow reproducing this as input
-    write_artifact('target-tasks.json',
-                   tgg.target_task_set.tasks.keys())
+    write_artifact('target-tasks.json', tgg.target_task_set.tasks.keys())
 
     # write out the optimized task graph to describe what will actually happen,
     # and the map of labels to taskids
-    write_artifact('task-graph.json',
-                   taskgraph_to_json(tgg.optimized_task_graph))
+    write_artifact('task-graph.json', tgg.optimized_task_graph.to_json())
     write_artifact('label-to-taskid.json', tgg.label_to_taskid)
 
     # actually create the graph
     create_tasks(tgg.optimized_task_graph, tgg.label_to_taskid)
 
 
 def get_decision_parameters(options):
     """
@@ -109,35 +106,16 @@ def get_decision_parameters(options):
         logger.warning("using default project parameters; add {} to "
               "PER_PROJECT_PARAMETERS in {} to customize behavior "
               "for this project".format(project, __file__))
         parameters.update(PER_PROJECT_PARAMETERS['default'])
 
     return Parameters(parameters)
 
 
-def taskgraph_to_json(taskgraph):
-    tasks = taskgraph.tasks
-
-    def tojson(task):
-        return {
-            'label': task.label,
-            'task': task.task,
-            'attributes': task.attributes,
-            'dependencies': []
-        }
-    rv = {label: tojson(tasks[label]) for label in taskgraph.graph.nodes}
-
-    # add dependencies with one trip through the graph edges
-    for (left, right, name) in taskgraph.graph.edges:
-        rv[left]['dependencies'].append((name, right))
-
-    return rv
-
-
 def write_artifact(filename, data):
     logger.info('writing artifact file `{}`'.format(filename))
     if not os.path.isdir(ARTIFACTS_DIR):
         os.mkdir(ARTIFACTS_DIR)
     path = os.path.join(ARTIFACTS_DIR, filename)
     if filename.endswith('.yml'):
         with open(path, 'w') as f:
             yaml.safe_dump(data, f, allow_unicode=True, default_flow_style=False)
--- a/taskcluster/taskgraph/test/test_decision.py
+++ b/taskcluster/taskgraph/test/test_decision.py
@@ -28,23 +28,23 @@ class TestDecision(unittest.TestCase):
 
         res = decision.taskgraph_to_json(taskgraph)
 
         self.assertEqual(res, {
             'a': {
                 'label': 'a',
                 'attributes': {'attr': 'a-task'},
                 'task': {},
-                'dependencies': [('edgelabel', 'b')],
+                'dependencies': {'edgelabel': 'b'},
             },
             'b': {
                 'label': 'b',
                 'attributes': {},
                 'task': {'task': 'def'},
-                'dependencies': [],
+                'dependencies': {},
             }
         })
 
 
     def test_write_artifact_json(self):
         data = [{'some': 'data'}]
         tmpdir = tempfile.mkdtemp()
         try:
--- a/taskcluster/taskgraph/types.py
+++ b/taskcluster/taskgraph/types.py
@@ -48,16 +48,34 @@ class TaskGraph(object):
     by label.  TaskGraph instances should be treated as immutable.
     """
 
     def __init__(self, tasks, graph):
         assert set(tasks) == graph.nodes
         self.tasks = tasks
         self.graph = graph
 
+    def to_json(self):
+        "Return a JSON-able object representing the task graph, as documented"
+        named_links_dict = self.graph.named_links_dict()
+        # this dictionary may be keyed by label or by taskid, so let's just call it 'key'
+        tasks = {}
+        for key in self.graph.visit_postorder():
+            task = self.tasks[key]
+            task_json = {
+                'label': task.label,
+                'attributes': task.attributes,
+                'dependencies': named_links_dict.get(key, {}),
+                'task': task.task
+            }
+            if task.task_id:
+                task_json['task_id'] = task.task_id
+            tasks[key] = task_json
+        return tasks
+
     def __getitem__(self, label):
         "Get a task by label"
         return self.tasks[label]
 
     def __iter__(self):
         "Iterate over tasks in undefined order"
         return self.tasks.itervalues()