Bug 1328727: use json-e for cron decision tasks too; r?jonasfj r?aki draft
authorDustin J. Mitchell <dustin@mozilla.com>
Fri, 21 Jul 2017 18:08:06 +0000
changeset 613224 78c8782cd7bbfd8509f1b1a509cb35ad6eaacf5d
parent 613223 9db1cdfdd41c0eea2fae3487ec8cf898060b5703
child 638639 86c7638a20f6fcf51c39142bf3a0cee4e15b8c75
push id69756
push userdmitchell@mozilla.com
push dateFri, 21 Jul 2017 19:09:34 +0000
reviewersjonasfj, aki
bugs1328727
milestone56.0a1
Bug 1328727: use json-e for cron decision tasks too; r?jonasfj r?aki Changes to cron decision tasks: - drops some unnecessary routes - drops tags.createdForUser: nobody@.. - more use of environment variables within the command line MozReview-Commit-ID: 9zoqFvwrBRs
.taskcluster.yml
taskcluster/taskgraph/cron/decision.py
--- a/.taskcluster.yml
+++ b/.taskcluster.yml
@@ -1,60 +1,79 @@
-# This file is handled by mozilla-taskcluster; see
-# https://docs.taskcluster.net/reference/integrations/mozilla-taskcluster/docs/taskcluster-yml
+# This file is rendered via JSON-e by
+# - mozilla-taskcluster - https://docs.taskcluster.net/reference/integrations/mozilla-taskcluster/docs/taskcluster-yml
+# - cron tasks - taskcluster/taskgraph/cron/decision.py
 version: 1
 tasks:
   $let:
     # sometimes the push user is just `ffxbld` or the like, but we want an email-like field..
     ownerEmail: {$if: '"@" in push.owner', then: '${push.owner}', else: '${push.owner}@noreply.mozilla.org'}
+    # ensure there's no trailing `/` on the repo URL
+    repoUrl: {$if: 'repository.url[-1] == "/"', then: {$eval: 'repository.url[:-1]'}, else: {$eval: 'repository.url'}}
   in:
   - taskId: '${as_slugid("decision")}'
     taskGroupId: '${as_slugid("decision")}' # same as tsakId; this is how automation identifies a decision tsak
     schedulerId: 'gecko-level-${repository.level}'
 
     created: {$fromNow: ''}
     deadline: {$fromNow: '1 day'}
     expires: {$fromNow: '1 year 1 second'} # 1 second so artifacts expire first, despite rounding errors
     metadata:
-      owner: "${ownerEmail}"
-      source: "${repository.url}/raw-file/${push.revision}/.taskcluster.yml"
-      name: "Gecko Decision Task"
-      description: |
-          The task that creates all of the other tasks in the task graph
+      $merge:
+        - owner: "${ownerEmail}"
+          source: "${repoUrl}/raw-file/${push.revision}/.taskcluster.yml"
+        - $if: 'tasks_for == "hg-push"'
+          then:
+            name: "Gecko Decision Task"
+            description: 'The task that creates all of the other tasks in the task graph'
+          else:
+            name: "Decision Task for cron job ${cron.job_name}"
+            description: 'Created by a [cron task](https://tools.taskcluster.net/tasks/${cron.task_id})'
 
     provisionerId: "aws-provisioner-v1"
     workerType: "gecko-decision"
 
     tags:
-      createdForUser: "${ownerEmail}"
+      $if: 'tasks_for == "hg-push"'
+      then: {createdForUser: "${ownerEmail}"}
 
     routes:
-      - "index.gecko.v2.${repository.project}.latest.firefox.decision"
-      - "index.gecko.v2.${repository.project}.pushlog-id.${push.pushlog_id}.decision"
-      - "tc-treeherder.v2.${repository.project}.${push.revision}.${push.pushlog_id}"
-      - "tc-treeherder-stage.v2.${repository.project}.${push.revision}.${push.pushlog_id}"
-      - "notify.email.${ownerEmail}.on-failed"
-      - "notify.email.${ownerEmail}.on-exception"
+      $if: 'tasks_for == "hg-push"'
+      then:
+        - "index.gecko.v2.${repository.project}.latest.firefox.decision"
+        - "index.gecko.v2.${repository.project}.pushlog-id.${push.pushlog_id}.decision"
+        - "tc-treeherder.v2.${repository.project}.${push.revision}.${push.pushlog_id}"
+        - "tc-treeherder-stage.v2.${repository.project}.${push.revision}.${push.pushlog_id}"
+        - "notify.email.${ownerEmail}.on-failed"
+        - "notify.email.${ownerEmail}.on-exception"
+      else:
+        - "index.gecko.v2.${repository.project}.latest.firefox.decision-${cron.job_name}"
+        - "tc-treeherder.v2.${repository.project}.${push.revision}.${push.pushlog_id}"
+        - "tc-treeherder-stage.v2.${repository.project}.${push.revision}.${push.pushlog_id}"
 
     scopes:
-      - 'assume:repo:${repository.url[8:-1]}:*'
-      - 'queue:route:notify.email.${ownerEmail}.*'
+      $if: 'tasks_for == "hg-push"'
+      then:
+        - 'assume:repo:${repoUrl[8:]}:*'
+        - 'queue:route:notify.email.${ownerEmail}.*'
+      else:
+        - 'assume:repo:${repoUrl[8:]}:cron:${cron.job_name}'
 
     dependencies: []
     requires: all-completed
 
     priority: lowest
     retries: 5
 
     payload:
       env:
         # checkout-gecko uses these to check out the source; the inputs
         # to `mach taskgraph decision` are all on the command line.
         GECKO_BASE_REPOSITORY: 'https://hg.mozilla.org/mozilla-unified'
-        GECKO_HEAD_REPOSITORY: '${repository.url}'
+        GECKO_HEAD_REPOSITORY: '${repoUrl}'
         GECKO_HEAD_REF: '${push.revision}'
         GECKO_HEAD_REV: '${push.revision}'
         GECKO_COMMIT_MSG: '${push.comment}'
         HG_STORE_PATH: /home/worker/checkouts/hg-store
 
       cache:
         level-${repository.level}-checkouts: /home/worker/checkouts
 
@@ -73,32 +92,40 @@ tasks:
       # TODO use mozilla-unified for the base repository once the tc-vcs
       # tar.gz archives are created or tc-vcs isn't being used.
       command:
         - /home/worker/bin/run-task
         - '--vcs-checkout=/home/worker/checkouts/gecko'
         - '--'
         - bash
         - -cx
-        - >
+        - $let:
+            extraArgs: {$if: 'tasks_for == "hg-push"', then: '', else: '${cron.quoted_args}'}
+          in: >
             cd /home/worker/checkouts/gecko &&
             ln -s /home/worker/artifacts artifacts &&
             ./mach --log-no-times taskgraph decision
             --pushlog-id='${push.pushlog_id}'
             --pushdate='${push.pushdate}'
             --project='${repository.project}'
             --message="$GECKO_COMMIT_MSG"
             --owner='${ownerEmail}'
             --level='${repository.level}'
             --base-repository="$GECKO_BASE_REPOSITORY"
             --head-repository="$GECKO_HEAD_REPOSITORY"
             --head-ref="$GECKO_HEAD_REF"
             --head-rev="$GECKO_HEAD_REV"
+            ${extraArgs}
 
       artifacts:
         'public':
           type: 'directory'
           path: '/home/worker/artifacts'
           expires: {$fromNow: '1 year'}
 
     extra:
       treeherder:
-        symbol: D
+        $if: 'tasks_for == "hg-push"'
+        then:
+          symbol: D
+        else:
+          groupSymbol: cron
+          symbol: "${cron.job_symbol}"
--- a/taskcluster/taskgraph/cron/decision.py
+++ b/taskcluster/taskgraph/cron/decision.py
@@ -2,99 +2,82 @@
 
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 
 from __future__ import absolute_import, print_function, unicode_literals
 
+import jsone
 import pipes
 import yaml
-import re
 import os
 import slugid
 
+from taskgraph.util.time import current_json_time
+
 
 def run_decision_task(job, params):
     arguments = []
     if 'target-tasks-method' in job:
         arguments.append('--target-tasks-method={}'.format(job['target-tasks-method']))
     return [
         make_decision_task(
             params,
             symbol=job['treeherder-symbol'],
             arguments=arguments),
     ]
 
 
 def make_decision_task(params, symbol, arguments=[], head_rev=None):
-    """Generate a basic decision task, based on the root
-    .taskcluster.yml"""
+    """Generate a basic decision task, based on the root .taskcluster.yml"""
     with open('.taskcluster.yml') as f:
-        taskcluster_yml = f.read()
+        taskcluster_yml = yaml.load(f)
 
     if not head_rev:
         head_rev = params['head_rev']
 
-    # do a cheap and dirty job of the template substitution that mozilla-taskcluster
-    # does when it reads .taskcluster.yml
-    comment = '"no push -- cron task \'{job_name}\'"'.format(**params),
-    replacements = {
-        '\'{{{?now}}}?\'': "{'relative-datestamp': '0 seconds'}",
-        '{{{?owner}}}?': 'nobody@mozilla.org',
-        '{{#shellquote}}{{{comment}}}{{/shellquote}}': comment,
-        '{{{?source}}}?': params['head_repository'],
-        '{{{?url}}}?': params['head_repository'],
-        '{{{?project}}}?': params['project'],
-        '{{{?level}}}?': params['level'],
-        '{{{?revision}}}?': head_rev,
-        '\'{{#from_now}}([^{]*){{/from_now}}\'': "{'relative-datestamp': '\\1'}",
-        '{{{?pushdate}}}?': '0',
-        # treeherder ignores pushlog_id, so set it to -1
-        '{{{?pushlog_id}}}?': '-1',
-        # omitted as unnecessary
-        # {{#as_slugid}}..{{/as_slugid}}
-    }
-    for pattern, replacement in replacements.iteritems():
-        taskcluster_yml = re.sub(pattern, replacement, taskcluster_yml)
+    slugids = {}
 
-    task = yaml.load(taskcluster_yml)['tasks'][0]['task']
-
-    # set some metadata
-    task['metadata']['name'] = 'Decision task for cron job ' + params['job_name']
-    cron_task_id = os.environ.get('TASK_ID', '<cron task id>')
-    descr_md = 'Created by a [cron task](https://tools.taskcluster.net/task-inspector/#{}/)'
-    task['metadata']['description'] = descr_md.format(cron_task_id)
+    def as_slugid(name):
+        # https://github.com/taskcluster/json-e/issues/164
+        name = name[0]
+        if name not in slugids:
+            slugids[name] = slugid.nice()
+        return slugids[name]
 
-    # create new indices so these aren't mixed in with regular decision tasks
-    for i, route in enumerate(task['routes']):
-        if route.startswith('index'):
-            task['routes'][i] = route + '-' + params['job_name']
-
-    th = task['extra']['treeherder']
-    th['groupSymbol'] = 'cron'
-    th['symbol'] = symbol
-
-    # add a scope based on the repository, with a cron:<job_name> suffix
-    match = re.match(r'https://(hg.mozilla.org)/(.*?)/?$', params['head_repository'])
-    if not match:
-        raise Exception('Unrecognized head_repository')
-    repo_scope = 'assume:repo:{}/{}:cron:{}'.format(
-        match.group(1), match.group(2), params['job_name'])
-    task.setdefault('scopes', []).append(repo_scope)
+    # provide a similar JSON-e context to what mozilla-taskcluster provides:
+    # https://docs.taskcluster.net/reference/integrations/mozilla-taskcluster/docs/taskcluster-yml
+    # but with a different tasks_for and an extra `cron` section
+    context = {
+        'tasks_for': 'cron',
+        'repository': {
+            'url': params['head_repository'],
+            'project': params['project'],
+            'level': params['level'],
+        },
+        'push': {
+            'revision': params['head_rev'],
+            # remainder are fake values, but the decision task expects them anyway
+            'pushlog_id': -1,
+            'pushdate': 0,
+            'owner': 'nobody',
+            'comment': '',
+        },
+        'cron': {
+            'task_id': os.environ.get('TASK_ID', '<cron task id>'),
+            'job_name': params['job_name'],
+            'job_symbol': symbol,
+            # args are shell-quoted since they are given to `bash -c`
+            'quoted_args': ' '.join(pipes.quote(a) for a in arguments),
+        },
+        'now': current_json_time(),
+        'as_slugid': as_slugid,
+    }
 
-    # append arguments, quoted, to the decision task command
-    shellcmd = task['payload']['command']
-    shellcmd[-1] = shellcmd[-1].rstrip('\n')  # strip yaml artifact
-    for arg in arguments:
-        shellcmd[-1] += ' ' + pipes.quote(arg)
-
-    task_id = slugid.nice()
+    rendered = jsone.render(taskcluster_yml, context)
+    if len(rendered['tasks']) != 1:
+        raise Exception("Expected .taskcluster.yml to only produce one cron task")
+    task = rendered['tasks'][0]
 
-    # set taskGroupid = taskId, as expected of decision tasks by other systems.
-    # This creates a new taskGroup for this graph.
-    task['taskGroupId'] = task_id
-
-    # set the schedulerId based on the level
-    task['schedulerId'] = 'gecko-level-{}'.format(params['level'])
-
+    task_id = task.pop('taskId')
     return (task_id, task)