Bug 1383880: add support for optimizing tasks based on SCHEDULES; r=ahal draft
authorDustin J. Mitchell <dustin@mozilla.com>
Wed, 23 Aug 2017 16:21:06 +0000
changeset 668272 de961f3de9585022a753f3b3bb5921ad335d95da
parent 668271 5dc1f2b98d419f4027b159b311be2f455364a807
child 668273 ce00c24017f3ecdb497bd295abd20eb8c9126dea
push id80998
push userdmitchell@mozilla.com
push dateThu, 21 Sep 2017 12:49:52 +0000
reviewersahal
bugs1383880
milestone57.0a1
Bug 1383880: add support for optimizing tasks based on SCHEDULES; r=ahal This adds some new optimization strategies. For tests, we use Either(SETA, SkipUnlessSchedules), thereby giving both mechanisms a chance to skip tasks. On try, SETA is omitted. MozReview-Commit-ID: GL4tlwyeBa6
build/sparse-profiles/taskgraph
taskcluster/taskgraph/optimize.py
taskcluster/taskgraph/transforms/task.py
--- a/build/sparse-profiles/taskgraph
+++ b/build/sparse-profiles/taskgraph
@@ -17,11 +17,15 @@ path:taskcluster/
 # them all in.
 path:testing/config/tooltool-manifests/
 path:testing/mozharness/
 path:tools/lint/
 
 # for new-style try pushes
 path:try_task_config.json
 
+# Moz.build files are read in filesystem mode
+glob:**/moz.build
+glob:**/*.mozbuild
+
 # Tooltool manifests also need to be opened. Assume they
 # are all somewhere in "tooltool-manifests" directories.
 glob:**/tooltool-manifests/**
--- a/taskcluster/taskgraph/optimize.py
+++ b/taskcluster/taskgraph/optimize.py
@@ -19,20 +19,24 @@ import requests
 from collections import defaultdict
 
 from .graph import Graph
 from . import files_changed
 from .taskgraph import TaskGraph
 from .util.seta import is_low_value_task
 from .util.taskcluster import find_task_id
 from .util.parameterization import resolve_task_references
+from mozbuild.util import memoize
 from slugid import nice as slugid
+from mozbuild.frontend import reader
 
 logger = logging.getLogger(__name__)
 
+TOPSRCDIR = os.path.abspath(os.path.join(__file__, '../../../'))
+
 
 def optimize_task_graph(target_task_graph, params, do_not_optimize,
                         existing_tasks=None, strategies=None):
     """
     Perform task optimization, returning a taskgraph and a map from label to
     assigned taskId, including replacement tasks.
     """
     label_to_taskid = {}
@@ -66,16 +70,18 @@ def optimize_task_graph(target_task_grap
 
 
 def _make_default_strategies():
     return {
         'never': OptimizationStrategy(),  # "never" is the default behavior
         'index-search': IndexSearch(),
         'seta': SETA(),
         'skip-unless-changed': SkipUnlessChanged(),
+        'skip-unless-schedules': SkipUnlessSchedules(),
+        'skip-unless-schedules-or-seta': Either(SkipUnlessSchedules(), SETA()),
     }
 
 
 def _get_optimizations(target_task_graph, strategies):
     def optimizations(label):
         task = target_task_graph.tasks[label]
         if task.optimization:
             opt_by, arg = task.optimization.items()[0]
@@ -239,16 +245,47 @@ class OptimizationStrategy(object):
 
     def should_replace_task(self, task, params, arg):
         """Determine whether to optimize this task by replacing it.  Returns a
         taskId to replace this task, True to replace with nothing, or False to
         keep the task."""
         return False
 
 
+class Either(OptimizationStrategy):
+    """Given one or more optimization strategies, remove a task if any of them
+    says to, and replace with a task if any finds a replacement (preferring the
+    earliest).  By default, each substrategy gets the same arg, but split_args
+    can return a list of args for each strategy, if desired."""
+    def __init__(self, *substrategies, **kwargs):
+        self.substrategies = substrategies
+        self.split_args = kwargs.pop('split_args', None)
+        if not self.split_args:
+            self.split_args = lambda arg: [arg] * len(substrategies)
+        if kwargs:
+            raise TypeError("unexpected keyword args")
+
+    def _for_substrategies(self, arg, fn):
+        for sub, arg in zip(self.substrategies, self.split_args(arg)):
+            rv = fn(sub, arg)
+            if rv:
+                return rv
+        return False
+
+    def should_remove_task(self, task, params, arg):
+        return self._for_substrategies(
+            arg,
+            lambda sub, arg: sub.should_remove_task(task, params, arg))
+
+    def should_replace_task(self, task, params, arg):
+        return self._for_substrategies(
+            arg,
+            lambda sub, arg: sub.should_replace_task(task, params, arg))
+
+
 class IndexSearch(OptimizationStrategy):
     def should_remove_task(self, task, params, index_paths):
         "If this task has no dependencies, don't run it.."
         return True
 
     def should_replace_task(self, task, params, index_paths):
         "Look for a task with one of the given index paths"
         for index_path in index_paths:
@@ -295,8 +332,35 @@ class SkipUnlessChanged(OptimizationStra
             return False
 
         changed = files_changed.check(params, file_patterns)
         if not changed:
             logger.debug('no files found matching a pattern in `skip-unless-changed` for ' +
                          task.label)
             return True
         return False
+
+
+class SkipUnlessSchedules(OptimizationStrategy):
+
+    @memoize
+    def scheduled_by_push(self, repository, revision):
+        changed_files = files_changed.get_changed_files(repository, revision)
+
+        config = reader.EmptyConfig(TOPSRCDIR)
+        rdr = reader.BuildReader(config)
+        components = set()
+        for p, m in rdr.files_info(changed_files).items():
+            components |= set(m['SCHEDULES'].components)
+
+        return components
+
+    def should_remove_task(self, task, params, conditions):
+        if params.get('pushlog_id') == -1:
+            return False
+
+        scheduled = self.scheduled_by_push(params['head_repository'], params['head_rev'])
+        conditions = set(conditions)
+        # if *any* of the condition components are scheduled, do not optimize
+        if conditions & scheduled:
+            return False
+
+        return True
--- a/taskcluster/taskgraph/transforms/task.py
+++ b/taskcluster/taskgraph/transforms/task.py
@@ -13,16 +13,17 @@ from __future__ import absolute_import, 
 import hashlib
 import json
 import os
 import re
 import time
 from copy import deepcopy
 
 from mozbuild.util import memoize
+from mozbuild import schedules
 from taskgraph.util.attributes import TRUNK_PROJECTS
 from taskgraph.util.hash import hash_path
 from taskgraph.util.treeherder import split_symbol
 from taskgraph.transforms.base import TransformSequence
 from taskgraph.util.schema import validate_schema, Schema
 from taskgraph.util.scriptworker import get_release_config
 from voluptuous import Any, Required, Optional, Extra
 from taskgraph import GECKO
@@ -174,16 +175,20 @@ task_description_schema = Schema({
         None,
         # search the index for the given index namespaces, and replace this task if found
         # the search occurs in order, with the first match winning
         {'index-search': [basestring]},
         # consult SETA and skip this task if it is low-value
         {'seta': None},
         # skip this task if none of the given file patterns match
         {'skip-unless-changed': [basestring]},
+        # skip this task if unless the change files' SCHEDULES contains any of these components
+        {'skip-unless-schedules': list(schedules.ALL_COMPONENTS)},
+        # skip if SETA or skip-unless-schedules says to
+        {'skip-unless-schedules-or-seta': list(schedules.ALL_COMPONENTS)},
     ),
 
     # the provisioner-id/worker-type for the task.  The following parameters will
     # be substituted in this string:
     #  {level} -- the scm level of this push
     'worker-type': basestring,
 
     # Whether the job should use sccache compiler caching.