Bug 1279676 - Support --rebuild try flag to schedule tests N times. r=dustin draft
authorArmen Zambrano Gasparnian <armenzg@mozilla.com>
Thu, 28 Jul 2016 13:20:44 -0400
changeset 396378 355ca631353015bf63461c194168d753efd6958e
parent 396350 331c4166a3a2df2d3a037107addef5d85cdc31b5
child 527180 f9147e600fbf17765d8e7472b35152c1986804af
push id24977
push userarmenzg@mozilla.com
push dateWed, 03 Aug 2016 18:49:23 +0000
reviewersdustin
bugs1279676
milestone51.0a1
Bug 1279676 - Support --rebuild try flag to schedule tests N times. r=dustin MozReview-Commit-ID: Lrxi8t53nwy If a developer adds '--rebuild N' to their try syntax they will get test jobs scheduled N times. This is useful to determine intermittency rate. This fixes a regression due to the recent refactoring on how we schedule tasks.
taskcluster/docs/attributes.rst
taskcluster/taskgraph/create.py
taskcluster/taskgraph/target_tasks.py
--- a/taskcluster/docs/attributes.rst
+++ b/taskcluster/docs/attributes.rst
@@ -14,16 +14,29 @@ The attributes, and acceptable values, a
 names and values are the short, lower-case form, with underscores.
 
 kind
 ====
 
 A task's ``kind`` attribute gives the name of the kind that generated it, e.g.,
 ``build`` or ``legacy``.
 
+task_duplicates
+===============
+
+This is used to indicate that we want multiple copies of the task created.
+This feature is used to track down intermittent job failures.
+
+If this value is set to N, the task-creation machinery will create a total of N
+copies of the task.  Only the first copy will be included in the taskgraph
+output artifacts, although all tasks will be contained in the same taskGroup.
+
+While most attributes are considered read-only, target task methods may alter
+this attribute of tasks they include in the target set.
+
 build_platform
 ==============
 
 The build platform defines the platform for which the binary was built.  It is
 set for both build and test jobs, although test jobs may have a different
 ``test_platform``.
 
 build_type
--- a/taskcluster/taskgraph/create.py
+++ b/taskcluster/taskgraph/create.py
@@ -49,16 +49,17 @@ def create_tasks(taskgraph, label_to_tas
         #
         # Using visit_postorder() here isn't the most efficient: we'll
         # block waiting for dependencies of task N to submit even though
         # dependencies for task N+1 may be finished. If we need to optimize
         # this further, we can build a graph of task dependencies and walk
         # that.
         for task_id in taskgraph.graph.visit_postorder():
             task_def = taskgraph.tasks[task_id].task
+            attributes = taskgraph.tasks[task_id].attributes
             # if this task has no dependencies, make it depend on this decision
             # task so that it does not start immediately; and so that if this loop
             # fails halfway through, none of the already-created tasks run.
             if decision_task_id and not task_def.get('dependencies'):
                 task_def['dependencies'] = [decision_task_id]
 
             task_def['taskGroupId'] = task_group_id
             task_def['schedulerId'] = '-'
@@ -67,16 +68,22 @@ def create_tasks(taskgraph, label_to_tas
             deps_fs = [fs[dep] for dep in task_def.get('dependencies', [])
                        if dep in fs]
             for f in futures.as_completed(deps_fs):
                 f.result()
 
             fs[task_id] = e.submit(_create_task, session, task_id,
                                    taskid_to_label[task_id], task_def)
 
+            # Schedule tasks as many times as task_duplicates indicates
+            for i in range(1, attributes.get('task_duplicates', 1)):
+                # We use slugid() since we want a distinct task id
+                fs[task_id] = e.submit(_create_task, session, slugid(),
+                                       taskid_to_label[task_id], task_def)
+
         # Wait for all futures to complete.
         for f in futures.as_completed(fs.values()):
             f.result()
 
 
 def _create_task(session, task_id, label, task_def):
     # create the task using 'http://taskcluster/queue', which is proxied to the queue service
     # with credentials appropriate to this job.
--- a/taskcluster/taskgraph/target_tasks.py
+++ b/taskcluster/taskgraph/target_tasks.py
@@ -37,18 +37,27 @@ def target_tasks_from_parameters(full_ta
     return parameters['target_tasks']
 
 
 @_target_task('try_option_syntax')
 def target_tasks_try_option_syntax(full_task_graph, parameters):
     """Generate a list of target tasks based on try syntax in
     parameters['message'] and, for context, the full task graph."""
     options = try_option_syntax.TryOptionSyntax(parameters['message'], full_task_graph)
-    return [t.label for t in full_task_graph.tasks.itervalues()
-            if options.task_matches(t.attributes)]
+    target_tasks_labels = [t.label for t in full_task_graph.tasks.itervalues()
+                           if options.task_matches(t.attributes)]
+
+    # If the developer wants test jobs to be rebuilt N times we add that value here
+    if int(options.trigger_tests) > 1:
+        for l in target_tasks_labels:
+            task = full_task_graph[l]
+            if 'unittest_suite' in task.attributes:
+                task.attributes['task_duplicates'] = options.trigger_tests
+
+    return target_tasks_labels
 
 
 @_target_task('all_builds_and_tests')
 def target_tasks_all_builds_and_tests(full_task_graph, parameters):
     """Trivially target all build and test tasks.  This is used for
     branches where we want to build "everyting", but "everything"
     does not include uninteresting things like docker images"""
     def filter(task):