Bug 1340785 - SETA: Add support for TC BBB tasks; r?jmaher draft
authorRob Wood <rwood@mozilla.com>
Tue, 21 Feb 2017 17:07:02 -0500
changeset 487646 f4d2a78e81adef37074914fd1a0be3dcca87bf3f
parent 487645 972714aa03b210476c57991c58e30ea9f98745e2
child 546506 c2d0eab7503b329617afde104ceabbf60e80004b
push id46279
push userrwood@mozilla.com
push dateTue, 21 Feb 2017 22:08:55 +0000
reviewersjmaher
bugs1340785
milestone54.0a1
Bug 1340785 - SETA: Add support for TC BBB tasks; r?jmaher MozReview-Commit-ID: 54UMq7OTrnR
taskcluster/taskgraph/task/transform.py
taskcluster/taskgraph/util/seta.py
--- a/taskcluster/taskgraph/task/transform.py
+++ b/taskcluster/taskgraph/task/transform.py
@@ -82,35 +82,43 @@ class TransformTask(base.Task):
         super(TransformTask, self).__init__(kind, task['label'],
                                             task['attributes'], task['task'],
                                             index_paths=task.get('index-paths'))
 
     def get_dependencies(self, taskgraph):
         return [(label, name) for name, label in self.dependencies.items()]
 
     def optimize(self, params):
+        bbb_task = False
+
         if self.index_paths:
             optimized, taskId = super(TransformTask, self).optimize(params)
             if optimized:
                 return optimized, taskId
 
         elif 'files-changed' in self.when:
             changed = files_changed.check(
                 params, self.when['files-changed'])
             if not changed:
                 logger.debug('no files found matching a pattern in `when.files-changed` for ' +
                              self.label)
                 return True, None
 
+        # for bbb tasks we need to send in the buildbot buildername
+        if self.task.get('provisionerId') == 'buildbot-bridge':
+            self.label = self.task.get('payload').get('buildername')
+            bbb_task = True
+
         # we would like to return 'False, None' while it's high_value_task
         # and we wouldn't optimize it. Otherwise, it will return 'True, None'
         if is_low_value_task(self.label,
                              params.get('project'),
                              params.get('pushlog_id'),
-                             params.get('pushdate')):
+                             params.get('pushdate'),
+                             bbb_task):
             # Always optimize away low-value tasks
             return True, None
         else:
             return False, None
 
     @classmethod
     def from_json(cls, task_dict):
         # when reading back from JSON, we lose the "when" information
--- a/taskcluster/taskgraph/util/seta.py
+++ b/taskcluster/taskgraph/util/seta.py
@@ -8,28 +8,29 @@ from requests import exceptions
 logger = logging.getLogger(__name__)
 
 # It's a list of project name which SETA is useful on
 SETA_PROJECTS = ['mozilla-inbound', 'autoland']
 PROJECT_SCHEDULE_ALL_EVERY_PUSHES = {'mozilla-inbound': 5, 'autoland': 5}
 PROJECT_SCHEDULE_ALL_EVERY_MINUTES = {'mozilla-inbound': 60, 'autoland': 60}
 
 SETA_ENDPOINT = "https://treeherder.mozilla.org/api/project/%s/seta/" \
-                "job-priorities/?build_system_type=taskcluster"
+                "job-priorities/?build_system_type=%s"
 PUSH_ENDPOINT = "https://hg.mozilla.org/integration/%s/json-pushes/?startID=%d&endID=%d"
 
 
 class SETA(object):
     """
     Interface to the SETA service, which defines low-value tasks that can be optimized out
     of the taskgraph.
     """
     def __init__(self):
         # cached low value tasks, by project
         self.low_value_tasks = {}
+        self.low_value_bb_tasks = {}
         # cached push dates by project
         self.push_dates = defaultdict(dict)
         # cached push_ids that failed to retrieve datetime for
         self.failed_json_push_calls = []
 
     def _get_task_string(self, task_tuple):
         # convert task tuple to single task string, so the task label sent in can match
         # remove any empty parts of the tuple
@@ -37,22 +38,28 @@ class SETA(object):
 
         if len(task_tuple) == 0:
             return ''
         if len(task_tuple) != 3:
             return ' '.join(task_tuple)
 
         return 'test-%s/%s-%s' % (task_tuple[0], task_tuple[1], task_tuple[2])
 
-    def query_low_value_tasks(self, project):
+    def query_low_value_tasks(self, project, bbb=False):
         # Request the set of low value tasks from the SETA service.  Low value tasks will be
         # optimized out of the task graph.
         low_value_tasks = []
 
-        url = SETA_ENDPOINT % project
+        if not bbb:
+            # we want to get low priority tasklcuster jobs
+            url = SETA_ENDPOINT % (project, 'taskcluster')
+        else:
+            # we want low priority buildbot jobs
+            url = SETA_ENDPOINT % (project, 'buildbot&priority=5')
+
         # Try to fetch the SETA data twice, falling back to an empty list of low value tasks.
         # There are 10 seconds between each try.
         try:
             logger.debug("Retrieving low-value jobs list from SETA")
             response = retry(requests.get, attempts=2, sleeptime=10,
                              args=(url, ),
                              kwargs={'timeout': 60, 'headers': ''})
             task_list = json.loads(response.content).get('jobtypes', '')
@@ -157,17 +164,17 @@ class SETA(object):
 
         # We just print the error out as a debug message if we failed to catch the exception above
         except exceptions.RequestException as error:
             logger.warning(error)
             self.failed_json_push_calls.append(prev_push_id)
 
         return min_between_pushes
 
-    def is_low_value_task(self, label, project, pushlog_id, push_date):
+    def is_low_value_task(self, label, project, pushlog_id, push_date, bbb_task=False):
         # marking a task as low_value means it will be optimized out by tc
         if project not in SETA_PROJECTS:
             return False
 
         schedule_all_every = PROJECT_SCHEDULE_ALL_EVERY_PUSHES.get(project, 5)
         # on every Nth push, want to run all tasks
         if int(pushlog_id) % schedule_all_every == 0:
             return False
@@ -175,16 +182,23 @@ class SETA(object):
         # Nth push, so time to call seta based on number of pushes; however
         # we also want to ensure we run all tasks at least once per N minutes
         if self.minutes_between_pushes(
                 project,
                 int(pushlog_id),
                 int(push_date)) >= PROJECT_SCHEDULE_ALL_EVERY_MINUTES.get(project, 60):
             return False
 
-        # cache the low value tasks per project to avoid repeated SETA server queries
-        if project not in self.low_value_tasks:
-            self.low_value_tasks[project] = self.query_low_value_tasks(project)
-        return label in self.low_value_tasks[project]
+        if not bbb_task:
+            # cache the low value tasks per project to avoid repeated SETA server queries
+            if project not in self.low_value_tasks:
+                self.low_value_tasks[project] = self.query_low_value_tasks(project)
+            return label in self.low_value_tasks[project]
+
+        # gecko decision task requesting if a bbb task is a low value task, so use bb jobs
+        # in this case, the label param sent in will be the buildbot buildername already
+        if project not in self.low_value_bb_tasks:
+            self.low_value_bb_tasks[project] = self.query_low_value_tasks(project, bbb=True)
+        return label in self.low_value_bb_tasks[project]
 
 # create a single instance of this class, and expose its `is_low_value_task`
 # bound method as a module-level function
 is_low_value_task = SETA().is_low_value_task