Bug 1383880: add support for optimizing tasks based on SCHEDULES; r=ahal
This adds some new optimization strategies. For tests, we use Either(SETA,
SkipUnlessSchedules), thereby giving both mechanisms a chance to skip tasks. On
try, SETA is omitted.
MozReview-Commit-ID: GL4tlwyeBa6
--- a/build/sparse-profiles/taskgraph
+++ b/build/sparse-profiles/taskgraph
@@ -17,11 +17,15 @@ path:taskcluster/
# them all in.
path:testing/config/tooltool-manifests/
path:testing/mozharness/
path:tools/lint/
# for new-style try pushes
path:try_task_config.json
+# Moz.build files are read in filesystem mode
+glob:**/moz.build
+glob:**/*.mozbuild
+
# Tooltool manifests also need to be opened. Assume they
# are all somewhere in "tooltool-manifests" directories.
glob:**/tooltool-manifests/**
--- a/taskcluster/taskgraph/optimize.py
+++ b/taskcluster/taskgraph/optimize.py
@@ -19,20 +19,24 @@ import requests
from collections import defaultdict
from .graph import Graph
from . import files_changed
from .taskgraph import TaskGraph
from .util.seta import is_low_value_task
from .util.taskcluster import find_task_id
from .util.parameterization import resolve_task_references
+from mozbuild.util import memoize
from slugid import nice as slugid
+from mozbuild.frontend import reader
logger = logging.getLogger(__name__)
+TOPSRCDIR = os.path.abspath(os.path.join(__file__, '../../../'))
+
def optimize_task_graph(target_task_graph, params, do_not_optimize,
existing_tasks=None, strategies=None):
"""
Perform task optimization, returning a taskgraph and a map from label to
assigned taskId, including replacement tasks.
"""
label_to_taskid = {}
@@ -66,16 +70,18 @@ def optimize_task_graph(target_task_grap
def _make_default_strategies():
return {
'never': OptimizationStrategy(), # "never" is the default behavior
'index-search': IndexSearch(),
'seta': SETA(),
'skip-unless-changed': SkipUnlessChanged(),
+ 'skip-unless-schedules': SkipUnlessSchedules(),
+ 'skip-unless-schedules-or-seta': Either(SkipUnlessSchedules(), SETA()),
}
def _get_optimizations(target_task_graph, strategies):
def optimizations(label):
task = target_task_graph.tasks[label]
if task.optimization:
opt_by, arg = task.optimization.items()[0]
@@ -239,16 +245,47 @@ class OptimizationStrategy(object):
def should_replace_task(self, task, params, arg):
"""Determine whether to optimize this task by replacing it. Returns a
taskId to replace this task, True to replace with nothing, or False to
keep the task."""
return False
+class Either(OptimizationStrategy):
+ """Given one or more optimization strategies, remove a task if any of them
+ says to, and replace with a task if any finds a replacement (preferring the
+ earliest). By default, each substrategy gets the same arg, but split_args
+ can return a list of args for each strategy, if desired."""
+ def __init__(self, *substrategies, **kwargs):
+ self.substrategies = substrategies
+ self.split_args = kwargs.pop('split_args', None)
+ if not self.split_args:
+ self.split_args = lambda arg: [arg] * len(substrategies)
+ if kwargs:
+ raise TypeError("unexpected keyword args")
+
+ def _for_substrategies(self, arg, fn):
+ for sub, arg in zip(self.substrategies, self.split_args(arg)):
+ rv = fn(sub, arg)
+ if rv:
+ return rv
+ return False
+
+ def should_remove_task(self, task, params, arg):
+ return self._for_substrategies(
+ arg,
+ lambda sub, arg: sub.should_remove_task(task, params, arg))
+
+ def should_replace_task(self, task, params, arg):
+ return self._for_substrategies(
+ arg,
+ lambda sub, arg: sub.should_replace_task(task, params, arg))
+
+
class IndexSearch(OptimizationStrategy):
def should_remove_task(self, task, params, index_paths):
"If this task has no dependencies, don't run it.."
return True
def should_replace_task(self, task, params, index_paths):
"Look for a task with one of the given index paths"
for index_path in index_paths:
@@ -295,8 +332,35 @@ class SkipUnlessChanged(OptimizationStra
return False
changed = files_changed.check(params, file_patterns)
if not changed:
logger.debug('no files found matching a pattern in `skip-unless-changed` for ' +
task.label)
return True
return False
+
+
+class SkipUnlessSchedules(OptimizationStrategy):
+
+ @memoize
+ def scheduled_by_push(self, repository, revision):
+ changed_files = files_changed.get_changed_files(repository, revision)
+
+ config = reader.EmptyConfig(TOPSRCDIR)
+ rdr = reader.BuildReader(config)
+ components = set()
+ for p, m in rdr.files_info(changed_files).items():
+ components |= set(m['SCHEDULES'].components)
+
+ return components
+
+ def should_remove_task(self, task, params, conditions):
+ if params.get('pushlog_id') == -1:
+ return False
+
+ scheduled = self.scheduled_by_push(params['head_repository'], params['head_rev'])
+ conditions = set(conditions)
+ # if *any* of the condition components are scheduled, do not optimize
+ if conditions & scheduled:
+ return False
+
+ return True
--- a/taskcluster/taskgraph/transforms/task.py
+++ b/taskcluster/taskgraph/transforms/task.py
@@ -13,16 +13,17 @@ from __future__ import absolute_import,
import hashlib
import json
import os
import re
import time
from copy import deepcopy
from mozbuild.util import memoize
+from mozbuild import schedules
from taskgraph.util.attributes import TRUNK_PROJECTS
from taskgraph.util.hash import hash_path
from taskgraph.util.treeherder import split_symbol
from taskgraph.transforms.base import TransformSequence
from taskgraph.util.schema import validate_schema, Schema
from taskgraph.util.scriptworker import get_release_config
from voluptuous import Any, Required, Optional, Extra
from taskgraph import GECKO
@@ -174,16 +175,20 @@ task_description_schema = Schema({
None,
# search the index for the given index namespaces, and replace this task if found
# the search occurs in order, with the first match winning
{'index-search': [basestring]},
# consult SETA and skip this task if it is low-value
{'seta': None},
# skip this task if none of the given file patterns match
{'skip-unless-changed': [basestring]},
+ # skip this task if unless the change files' SCHEDULES contains any of these components
+ {'skip-unless-schedules': list(schedules.ALL_COMPONENTS)},
+ # skip if SETA or skip-unless-schedules says to
+ {'skip-unless-schedules-or-seta': list(schedules.ALL_COMPONENTS)},
),
# the provisioner-id/worker-type for the task. The following parameters will
# be substituted in this string:
# {level} -- the scm level of this push
'worker-type': basestring,
# Whether the job should use sccache compiler caching.