Bug 1324767 - Simplify decision task definition; r?dustin draft
authorJonas Finnemann Jensen <jopsen@gmail.com>
Wed, 21 Dec 2016 10:19:20 +0100
changeset 452283 6df04102015995be5d6f5ba8e60cd6200ab10c34
parent 452146 c36fbe84042debef0a5d58b7fc88185b401762ce
child 540198 3792a03c543549c0fac77726f7c8a7ad174eb7d5
push id39374
push userbmo:jopsen@gmail.com
push dateWed, 21 Dec 2016 14:13:31 +0000
reviewersdustin
bugs1324767
milestone53.0a1
Bug 1324767 - Simplify decision task definition; r?dustin Setting more environment variables, ENTRYPOINT and CMD in the Dockerfile for the decision image as well as taking inputs from environment variables in ./mach taskgraph decision, allows us to greatly simplify the decision task definition in .taskcluster.yml. This is useful as a general cleanup, making it easier to see what is input and what is template. But also simplifies validation of task definitions, which is required when validating the Chain-Of-Trust artifacts before signing. MozReview-Commit-ID: 3vJK5sr097G
.taskcluster.yml
taskcluster/mach_commands.py
taskcluster/taskgraph/decision.py
taskcluster/taskgraph/test/test_decision.py
testing/docker/decision/Dockerfile
testing/docker/decision/VERSION
testing/docker/decision/system-setup.sh
--- a/.taskcluster.yml
+++ b/.taskcluster.yml
@@ -64,61 +64,44 @@ tasks:
 
       routes:
         - "index.gecko.v2.{{project}}.latest.firefox.decision"
         - "tc-treeherder.v2.{{project}}.{{revision}}.{{pushlog_id}}"
         - "tc-treeherder-stage.v2.{{project}}.{{revision}}.{{pushlog_id}}"
 
       payload:
         env:
-          # checkout-gecko uses these to check out the source; the inputs
-          # to `mach taskgraph decision` are all on the command line.
-          GECKO_BASE_REPOSITORY: 'https://hg.mozilla.org/mozilla-unified'
-          GECKO_HEAD_REPOSITORY: '{{{url}}}'
-          GECKO_HEAD_REF: '{{revision}}'
-          GECKO_HEAD_REV: '{{revision}}'
-          HG_STORE_PATH: /home/worker/checkouts/hg-store
+          # Variables used by both run-task to checkout the source, the
+          # './mach taskgraph decision' command.
+          GECKO_BASE_REPOSITORY:  'https://hg.mozilla.org/mozilla-unified'
+          GECKO_HEAD_REPOSITORY:  '{{{url}}}'
+          GECKO_HEAD_REF:         '{{revision}}'
+          GECKO_HEAD_REV:         '{{revision}}'
+          # Variables only used by './mach taskgraph decision'
+          COMMIT_MESSAGE:         '{{{comment}}}'
+          REVISION_HASH:          '{{revision}}'
+          GECKO_PROJECT:          '{{project}}'
+          PUSHLOG_ID:             '{{pushlog_id}}'
+          PUSH_DATE:              '{{pushdate}}'
+          OWNER_EMAIL:            '{{owner}}'
+          SCM_LEVEL:              '{{level}}'
 
         cache:
           level-{{level}}-checkouts: /home/worker/checkouts
 
         features:
           taskclusterProxy: true
           chainOfTrust: true
 
         # Note: This task is built server side without the context or tooling that
         # exist in tree so we must hard code the hash
-        image: 'taskcluster/decision@sha256:0f59f922d86c471e208b7ea08ab077fc68c3920ed5e6895d69a23e8f3457dc24'
+        image: 'taskcluster/decision@sha256:e22982bfa3b1d44d26b3a1465b6d2225e8117a26f48f4a4d6975ed65aef2ec42'
 
         maxRunTime: 1800
 
-        # TODO use mozilla-unified for the base repository once the tc-vcs
-        # tar.gz archives are created or tc-vcs isn't being used.
-        command:
-          - /home/worker/bin/run-task
-          - '--vcs-checkout=/home/worker/checkouts/gecko'
-          - '--'
-          - bash
-          - -cx
-          - >
-              cd /home/worker/checkouts/gecko &&
-              ln -s /home/worker/artifacts artifacts &&
-              ./mach --log-no-times taskgraph decision
-              --pushlog-id='{{pushlog_id}}'
-              --pushdate='{{pushdate}}'
-              --project='{{project}}'
-              --message={{#shellquote}}{{{comment}}}{{/shellquote}}
-              --owner='{{owner}}'
-              --level='{{level}}'
-              --base-repository='https://hg.mozilla.org/mozilla-central'
-              --head-repository='{{{url}}}'
-              --head-ref='{{revision}}'
-              --head-rev='{{revision}}'
-              --revision-hash='{{revision_hash}}'
-
         artifacts:
           'public':
             type: 'directory'
             path: '/home/worker/artifacts'
             expires: '{{#from_now}}364 days{{/from_now}}'
 
       extra:
         treeherder:
--- a/taskcluster/mach_commands.py
+++ b/taskcluster/mach_commands.py
@@ -7,29 +7,50 @@
 
 from __future__ import absolute_import, print_function, unicode_literals
 
 import json
 import logging
 import sys
 import traceback
 import re
+import os
+import argparse
 
 from mach.decorators import (
     CommandArgument,
     CommandProvider,
     Command,
     SubCommand,
 )
 
 from mozbuild.base import MachCommandBase
 
 ARTIFACT_URL = 'https://queue.taskcluster.net/v1/task/{}/artifacts/{}'
 
 
+class EnvDefault(argparse.Action):
+    """
+        Action for use with argparse that supports loading option from
+        environment variable if not specified.
+
+        Credits: http://stackoverflow.com/a/10551190/68333
+    """
+
+    def __init__(self, envvar, required=True, default=None, **kwargs):
+        default = os.environ.get(envvar, default)
+        if required and default:
+            required = False
+        super(EnvDefault, self).__init__(default=default, required=required,
+                                         **kwargs)
+
+    def __call__(self, parser, namespace, values, option_string=None):
+        setattr(namespace, self.dest, values)
+
+
 class ShowTaskGraphSubCommand(SubCommand):
     """A SubCommand with TaskGraph-specific arguments"""
 
     def __call__(self, func):
         after = SubCommand.__call__(self, func)
         args = [
             CommandArgument('--root', '-r', default='taskcluster/ci',
                             help="root of the taskgraph definition relative to topsrcdir"),
@@ -109,55 +130,58 @@ class MachCommands(MachCommandBase):
         return self.show_taskgraph('optimized_task_graph', options)
 
     @SubCommand('taskgraph', 'decision',
                 description="Run the decision task")
     @CommandArgument('--root', '-r',
                      default='taskcluster/ci',
                      help="root of the taskgraph definition relative to topsrcdir")
     @CommandArgument('--base-repository',
-                     required=True,
+                     required=True, action=EnvDefault, envvar='GECKO_BASE_REPOSITORY',
                      help='URL for "base" repository to clone')
     @CommandArgument('--head-repository',
-                     required=True,
+                     required=True, action=EnvDefault, envvar='GECKO_HEAD_REPOSITORY',
                      help='URL for "head" repository to fetch revision from')
     @CommandArgument('--head-ref',
-                     required=True,
+                     required=True, action=EnvDefault, envvar='GECKO_HEAD_REF',
                      help='Reference (this is same as rev usually for hg)')
     @CommandArgument('--head-rev',
-                     required=True,
+                     required=True, action=EnvDefault, envvar='GECKO_HEAD_REV',
                      help='Commit revision to use from head repository')
     @CommandArgument('--message',
-                     required=True,
+                     required=True, action=EnvDefault, envvar='COMMIT_MESSAGE',
                      help='Commit message to be parsed. Example: "try: -b do -p all -u all"')
     @CommandArgument('--revision-hash',
-                     required=True,
+                     required=True, action=EnvDefault, envvar='REVISION_HASH',
                      help='Treeherder revision hash (long revision id) to attach results to')
     @CommandArgument('--project',
-                     required=True,
+                     required=True, action=EnvDefault, envvar='GECKO_PROJECT',
                      help='Project to use for creating task graph. Example: --project=try')
     @CommandArgument('--pushlog-id',
                      dest='pushlog_id',
-                     required=True,
+                     required=True, action=EnvDefault, envvar='PUSHLOG_ID',
                      default=0)
     @CommandArgument('--pushdate',
                      dest='pushdate',
-                     required=True,
+                     required=True, action=EnvDefault, envvar='PUSH_DATE',
                      type=int,
                      default=0)
     @CommandArgument('--owner',
-                     required=True,
+                     required=True, action=EnvDefault, envvar='OWNER_EMAIL',
                      help='email address of who owns this graph')
     @CommandArgument('--level',
-                     required=True,
+                     required=True, action=EnvDefault, envvar='SCM_LEVEL',
                      help='SCM level of this repository')
     @CommandArgument('--triggered-by',
                      choices=['nightly', 'push'],
                      default='push',
                      help='Source of execution of the decision graph')
+    @CommandArgument('--artifacts',
+                     required=True, action=EnvDefault, envvar='ARTIFACTS_FOLDER',
+                     help='Folder that artifacts should be written to')
     @CommandArgument('--target-tasks-method',
                      help='method for selecting the target tasks to generate')
     def taskgraph_decision(self, **options):
         """Run the decision task: generate a task graph and submit to
         TaskCluster.  This is only meant to be called within decision tasks,
         and requires a great many arguments.  Commands like `mach taskgraph
         optimized` are better suited to use on the command line, and can take
         the parameters file generated by a decision task.  """
--- a/taskcluster/taskgraph/decision.py
+++ b/taskcluster/taskgraph/decision.py
@@ -20,17 +20,16 @@ from .taskgraph import TaskGraph
 from taskgraph.util.templates import Templates
 from taskgraph.util.time import (
     json_time_from_now,
     current_json_time,
 )
 
 logger = logging.getLogger(__name__)
 
-ARTIFACTS_DIR = 'artifacts'
 GECKO = os.path.realpath(os.path.join(__file__, '..', '..', '..'))
 
 # For each project, this gives a set of parameters specific to the project.
 # See `taskcluster/docs/parameters.rst` for information on parameters.
 PER_PROJECT_PARAMETERS = {
     'try': {
         'target_tasks_method': 'try_option_syntax',
         # Always perform optimization.  This makes it difficult to use try
@@ -76,35 +75,35 @@ def taskgraph_decision(options):
 
     parameters = get_decision_parameters(options)
     # create a TaskGraphGenerator instance
     tgg = TaskGraphGenerator(
         root_dir=options['root'],
         parameters=parameters)
 
     # write out the parameters used to generate this graph
-    write_artifact('parameters.yml', dict(**parameters))
+    write_artifact(options['artifacts'], 'parameters.yml', dict(**parameters))
 
     # write out the yml file for action tasks
-    write_artifact('action.yml', get_action_yml(parameters))
+    write_artifact(options['artifacts'], 'action.yml', get_action_yml(parameters))
 
     # write out the full graph for reference
     full_task_json = tgg.full_task_graph.to_json()
-    write_artifact('full-task-graph.json', full_task_json)
+    write_artifact(options['artifacts'], 'full-task-graph.json', full_task_json)
 
     # this is just a test to check whether the from_json() function is working
     _, _ = TaskGraph.from_json(full_task_json)
 
     # write out the target task set to allow reproducing this as input
-    write_artifact('target-tasks.json', tgg.target_task_set.tasks.keys())
+    write_artifact(options['artifacts'], 'target-tasks.json', tgg.target_task_set.tasks.keys())
 
     # write out the optimized task graph to describe what will actually happen,
     # and the map of labels to taskids
-    write_artifact('task-graph.json', tgg.optimized_task_graph.to_json())
-    write_artifact('label-to-taskid.json', tgg.label_to_taskid)
+    write_artifact(options['artifacts'], 'task-graph.json', tgg.optimized_task_graph.to_json())
+    write_artifact(options['artifacts'], 'label-to-taskid.json', tgg.label_to_taskid)
 
     # actually create the graph
     create_tasks(tgg.optimized_task_graph, tgg.label_to_taskid, parameters)
 
 
 def get_decision_parameters(options):
     """
     Load parameters from the command-line options for 'taskgraph decision'.
@@ -155,21 +154,21 @@ def get_decision_parameters(options):
 
     # `target_tasks_method` has higher precedence than `project` parameters
     if options.get('target_tasks_method'):
         parameters['target_tasks_method'] = options['target_tasks_method']
 
     return Parameters(parameters)
 
 
-def write_artifact(filename, data):
+def write_artifact(folder, filename, data):
     logger.info('writing artifact file `{}`'.format(filename))
-    if not os.path.isdir(ARTIFACTS_DIR):
-        os.mkdir(ARTIFACTS_DIR)
-    path = os.path.join(ARTIFACTS_DIR, filename)
+    if not os.path.isdir(folder):
+        os.mkdir(folder)
+    path = os.path.join(folder, filename)
     if filename.endswith('.yml'):
         with open(path, 'w') as f:
             yaml.safe_dump(data, f, allow_unicode=True, default_flow_style=False)
     elif filename.endswith('.json'):
         with open(path, 'w') as f:
             json.dump(data, f, sort_keys=True, indent=2, separators=(',', ': '))
     else:
         raise TypeError("Don't know how to write to {}".format(filename))
--- a/taskcluster/taskgraph/test/test_decision.py
+++ b/taskcluster/taskgraph/test/test_decision.py
@@ -46,33 +46,29 @@ class TestDecision(unittest.TestCase):
                 'kind_implementation': 'taskgraph.test.util:TestTask',
             }
         })
 
     def test_write_artifact_json(self):
         data = [{'some': 'data'}]
         tmpdir = tempfile.mkdtemp()
         try:
-            decision.ARTIFACTS_DIR = os.path.join(tmpdir, "artifacts")
-            decision.write_artifact("artifact.json", data)
-            with open(os.path.join(decision.ARTIFACTS_DIR, "artifact.json")) as f:
+            decision.write_artifact(tmpdir, "artifact.json", data)
+            with open(os.path.join(tmpdir, "artifact.json")) as f:
                 self.assertEqual(json.load(f), data)
         finally:
             if os.path.exists(tmpdir):
                 shutil.rmtree(tmpdir)
-            decision.ARTIFACTS_DIR = 'artifacts'
 
     def test_write_artifact_yml(self):
         data = [{'some': 'data'}]
         tmpdir = tempfile.mkdtemp()
         try:
-            decision.ARTIFACTS_DIR = os.path.join(tmpdir, "artifacts")
-            decision.write_artifact("artifact.yml", data)
-            with open(os.path.join(decision.ARTIFACTS_DIR, "artifact.yml")) as f:
+            decision.write_artifact(tmpdir, "artifact.yml", data)
+            with open(os.path.join(tmpdir, "artifact.yml")) as f:
                 self.assertEqual(yaml.safe_load(f), data)
         finally:
             if os.path.exists(tmpdir):
                 shutil.rmtree(tmpdir)
-            decision.ARTIFACTS_DIR = 'artifacts'
 
 
 if __name__ == '__main__':
     main()
--- a/testing/docker/decision/Dockerfile
+++ b/testing/docker/decision/Dockerfile
@@ -1,28 +1,41 @@
 FROM          ubuntu:16.04
 MAINTAINER    Greg Arndt <garndt@mozilla.com>
 
-# Add worker user
-RUN useradd -d /home/worker -s /bin/bash -m worker
-RUN mkdir /home/worker/artifacts && chown worker:worker /home/worker/artifacts
-
 # %include testing/docker/recipes/tooltool.py
 ADD topsrcdir/testing/docker/recipes/tooltool.py /tmp/tooltool.py
 
 # %include testing/mozharness/external_tools/robustcheckout.py
 ADD topsrcdir/testing/mozharness/external_tools/robustcheckout.py /usr/local/mercurial/robustcheckout.py
 
 # %include testing/docker/recipes/install-mercurial.sh
 ADD topsrcdir/testing/docker/recipes/install-mercurial.sh /tmp/install-mercurial.sh
 
 ADD system-setup.sh /tmp/system-setup.sh
 RUN bash /tmp/system-setup.sh
 
 # %include testing/docker/recipes/run-task
-ADD topsrcdir/testing/docker/recipes/run-task /home/worker/bin/run-task
+ADD topsrcdir/testing/docker/recipes/run-task /usr/local/bin/run-task
+
+# Add worker user and create artifacts folder
+RUN useradd -d /home/worker -s /bin/bash -m worker && \
+    mkdir /home/worker/artifacts && \
+    chown worker:worker /home/worker/artifacts
+ENV           ARTIFACTS_FOLDER /home/worker/artifacts
 
-ENV PATH /home/worker/bin:$PATH
-ENV SHELL /bin/bash
-ENV HOME /home/worker
+# Set variable normally configured at login, by the shells parent process, these
+# are taken from GNU su manual
+ENV           HOME          /home/worker
+ENV           SHELL         /bin/bash
+ENV           USER          worker
+ENV           LOGNAME       worker
+ENV           HOSTNAME      taskcluster-worker
+ENV           LC_ALL        C
 
-# Set a default command useful for debugging
-CMD ["/bin/bash", "--login"]
+# Always use a volume for checkouts, this should be declared a cache in tasks
+VOLUME        /home/worker/checkouts
+ENV           HG_STORE_PATH /home/worker/checkouts/hg-store
+
+# Set some sane defaults
+WORKDIR       /home/worker/
+ENTRYPOINT    ["run-task", "--vcs-checkout=/home/worker/checkouts/gecko", "--"]
+CMD           ["bash", "-cx", "cd /home/worker/checkouts/gecko && ./mach --log-no-times taskgraph decision"]
--- a/testing/docker/decision/VERSION
+++ b/testing/docker/decision/VERSION
@@ -1,1 +1,1 @@
-0.1.7
+0.2.0
--- a/testing/docker/decision/system-setup.sh
+++ b/testing/docker/decision/system-setup.sh
@@ -1,14 +1,16 @@
 #!/usr/bin/env bash
 
 set -v -e
 
 test `whoami` == 'root'
 
+export DEBIAN_FRONTEND=noninteractive
+
 apt-get update
 apt-get install -y --force-yes --no-install-recommends \
     ca-certificates \
     python \
     sudo
 
 BUILD=/root/build
 mkdir $BUILD
@@ -21,9 +23,10 @@ tooltool_fetch() {
 
 cd $BUILD
 . /tmp/install-mercurial.sh
 
 cd /
 rm -rf $BUILD
 apt-get clean
 apt-get autoclean
+rm -rf /var/lib/apt/lists/
 rm $0