Bug 1324767 - Simplify decision task definition; r?dustin
Setting more environment variables, ENTRYPOINT and CMD in the
Dockerfile for the decision image as well as taking inputs from
environment variables in ./mach taskgraph decision, allows us
to greatly simplify the decision task definition in
.taskcluster.yml.
This is useful as a general cleanup, making it easier to
see what is input and what is template. But also simplifies
validation of task definitions, which is required when validating
the Chain-Of-Trust artifacts before signing.
MozReview-Commit-ID: 3vJK5sr097G
--- a/.taskcluster.yml
+++ b/.taskcluster.yml
@@ -64,61 +64,44 @@ tasks:
routes:
- "index.gecko.v2.{{project}}.latest.firefox.decision"
- "tc-treeherder.v2.{{project}}.{{revision}}.{{pushlog_id}}"
- "tc-treeherder-stage.v2.{{project}}.{{revision}}.{{pushlog_id}}"
payload:
env:
- # checkout-gecko uses these to check out the source; the inputs
- # to `mach taskgraph decision` are all on the command line.
- GECKO_BASE_REPOSITORY: 'https://hg.mozilla.org/mozilla-unified'
- GECKO_HEAD_REPOSITORY: '{{{url}}}'
- GECKO_HEAD_REF: '{{revision}}'
- GECKO_HEAD_REV: '{{revision}}'
- HG_STORE_PATH: /home/worker/checkouts/hg-store
+ # Variables used by both run-task to checkout the source, the
+ # './mach taskgraph decision' command.
+ GECKO_BASE_REPOSITORY: 'https://hg.mozilla.org/mozilla-unified'
+ GECKO_HEAD_REPOSITORY: '{{{url}}}'
+ GECKO_HEAD_REF: '{{revision}}'
+ GECKO_HEAD_REV: '{{revision}}'
+ # Variables only used by './mach taskgraph decision'
+ COMMIT_MESSAGE: '{{{comment}}}'
+ REVISION_HASH: '{{revision}}'
+ GECKO_PROJECT: '{{project}}'
+ PUSHLOG_ID: '{{pushlog_id}}'
+ PUSH_DATE: '{{pushdate}}'
+ OWNER_EMAIL: '{{owner}}'
+ SCM_LEVEL: '{{level}}'
cache:
level-{{level}}-checkouts: /home/worker/checkouts
features:
taskclusterProxy: true
chainOfTrust: true
# Note: This task is built server side without the context or tooling that
# exist in tree so we must hard code the hash
- image: 'taskcluster/decision@sha256:0f59f922d86c471e208b7ea08ab077fc68c3920ed5e6895d69a23e8f3457dc24'
+ image: 'taskcluster/decision@sha256:e22982bfa3b1d44d26b3a1465b6d2225e8117a26f48f4a4d6975ed65aef2ec42'
maxRunTime: 1800
- # TODO use mozilla-unified for the base repository once the tc-vcs
- # tar.gz archives are created or tc-vcs isn't being used.
- command:
- - /home/worker/bin/run-task
- - '--vcs-checkout=/home/worker/checkouts/gecko'
- - '--'
- - bash
- - -cx
- - >
- cd /home/worker/checkouts/gecko &&
- ln -s /home/worker/artifacts artifacts &&
- ./mach --log-no-times taskgraph decision
- --pushlog-id='{{pushlog_id}}'
- --pushdate='{{pushdate}}'
- --project='{{project}}'
- --message={{#shellquote}}{{{comment}}}{{/shellquote}}
- --owner='{{owner}}'
- --level='{{level}}'
- --base-repository='https://hg.mozilla.org/mozilla-central'
- --head-repository='{{{url}}}'
- --head-ref='{{revision}}'
- --head-rev='{{revision}}'
- --revision-hash='{{revision_hash}}'
-
artifacts:
'public':
type: 'directory'
path: '/home/worker/artifacts'
expires: '{{#from_now}}364 days{{/from_now}}'
extra:
treeherder:
--- a/taskcluster/mach_commands.py
+++ b/taskcluster/mach_commands.py
@@ -7,29 +7,50 @@
from __future__ import absolute_import, print_function, unicode_literals
import json
import logging
import sys
import traceback
import re
+import os
+import argparse
from mach.decorators import (
CommandArgument,
CommandProvider,
Command,
SubCommand,
)
from mozbuild.base import MachCommandBase
ARTIFACT_URL = 'https://queue.taskcluster.net/v1/task/{}/artifacts/{}'
+class EnvDefault(argparse.Action):
+ """
+ Action for use with argparse that supports loading option from
+ environment variable if not specified.
+
+ Credits: http://stackoverflow.com/a/10551190/68333
+ """
+
+ def __init__(self, envvar, required=True, default=None, **kwargs):
+ default = os.environ.get(envvar, default)
+ if required and default:
+ required = False
+ super(EnvDefault, self).__init__(default=default, required=required,
+ **kwargs)
+
+ def __call__(self, parser, namespace, values, option_string=None):
+ setattr(namespace, self.dest, values)
+
+
class ShowTaskGraphSubCommand(SubCommand):
"""A SubCommand with TaskGraph-specific arguments"""
def __call__(self, func):
after = SubCommand.__call__(self, func)
args = [
CommandArgument('--root', '-r', default='taskcluster/ci',
help="root of the taskgraph definition relative to topsrcdir"),
@@ -109,55 +130,58 @@ class MachCommands(MachCommandBase):
return self.show_taskgraph('optimized_task_graph', options)
@SubCommand('taskgraph', 'decision',
description="Run the decision task")
@CommandArgument('--root', '-r',
default='taskcluster/ci',
help="root of the taskgraph definition relative to topsrcdir")
@CommandArgument('--base-repository',
- required=True,
+ required=True, action=EnvDefault, envvar='GECKO_BASE_REPOSITORY',
help='URL for "base" repository to clone')
@CommandArgument('--head-repository',
- required=True,
+ required=True, action=EnvDefault, envvar='GECKO_HEAD_REPOSITORY',
help='URL for "head" repository to fetch revision from')
@CommandArgument('--head-ref',
- required=True,
+ required=True, action=EnvDefault, envvar='GECKO_HEAD_REF',
help='Reference (this is same as rev usually for hg)')
@CommandArgument('--head-rev',
- required=True,
+ required=True, action=EnvDefault, envvar='GECKO_HEAD_REV',
help='Commit revision to use from head repository')
@CommandArgument('--message',
- required=True,
+ required=True, action=EnvDefault, envvar='COMMIT_MESSAGE',
help='Commit message to be parsed. Example: "try: -b do -p all -u all"')
@CommandArgument('--revision-hash',
- required=True,
+ required=True, action=EnvDefault, envvar='REVISION_HASH',
help='Treeherder revision hash (long revision id) to attach results to')
@CommandArgument('--project',
- required=True,
+ required=True, action=EnvDefault, envvar='GECKO_PROJECT',
help='Project to use for creating task graph. Example: --project=try')
@CommandArgument('--pushlog-id',
dest='pushlog_id',
- required=True,
+ required=True, action=EnvDefault, envvar='PUSHLOG_ID',
default=0)
@CommandArgument('--pushdate',
dest='pushdate',
- required=True,
+ required=True, action=EnvDefault, envvar='PUSH_DATE',
type=int,
default=0)
@CommandArgument('--owner',
- required=True,
+ required=True, action=EnvDefault, envvar='OWNER_EMAIL',
help='email address of who owns this graph')
@CommandArgument('--level',
- required=True,
+ required=True, action=EnvDefault, envvar='SCM_LEVEL',
help='SCM level of this repository')
@CommandArgument('--triggered-by',
choices=['nightly', 'push'],
default='push',
help='Source of execution of the decision graph')
+ @CommandArgument('--artifacts',
+ required=True, action=EnvDefault, envvar='ARTIFACTS_FOLDER',
+ help='Folder that artifacts should be written to')
@CommandArgument('--target-tasks-method',
help='method for selecting the target tasks to generate')
def taskgraph_decision(self, **options):
"""Run the decision task: generate a task graph and submit to
TaskCluster. This is only meant to be called within decision tasks,
and requires a great many arguments. Commands like `mach taskgraph
optimized` are better suited to use on the command line, and can take
the parameters file generated by a decision task. """
--- a/taskcluster/taskgraph/decision.py
+++ b/taskcluster/taskgraph/decision.py
@@ -20,17 +20,16 @@ from .taskgraph import TaskGraph
from taskgraph.util.templates import Templates
from taskgraph.util.time import (
json_time_from_now,
current_json_time,
)
logger = logging.getLogger(__name__)
-ARTIFACTS_DIR = 'artifacts'
GECKO = os.path.realpath(os.path.join(__file__, '..', '..', '..'))
# For each project, this gives a set of parameters specific to the project.
# See `taskcluster/docs/parameters.rst` for information on parameters.
PER_PROJECT_PARAMETERS = {
'try': {
'target_tasks_method': 'try_option_syntax',
# Always perform optimization. This makes it difficult to use try
@@ -76,35 +75,35 @@ def taskgraph_decision(options):
parameters = get_decision_parameters(options)
# create a TaskGraphGenerator instance
tgg = TaskGraphGenerator(
root_dir=options['root'],
parameters=parameters)
# write out the parameters used to generate this graph
- write_artifact('parameters.yml', dict(**parameters))
+ write_artifact(options['artifacts'], 'parameters.yml', dict(**parameters))
# write out the yml file for action tasks
- write_artifact('action.yml', get_action_yml(parameters))
+ write_artifact(options['artifacts'], 'action.yml', get_action_yml(parameters))
# write out the full graph for reference
full_task_json = tgg.full_task_graph.to_json()
- write_artifact('full-task-graph.json', full_task_json)
+ write_artifact(options['artifacts'], 'full-task-graph.json', full_task_json)
# this is just a test to check whether the from_json() function is working
_, _ = TaskGraph.from_json(full_task_json)
# write out the target task set to allow reproducing this as input
- write_artifact('target-tasks.json', tgg.target_task_set.tasks.keys())
+ write_artifact(options['artifacts'], 'target-tasks.json', tgg.target_task_set.tasks.keys())
# write out the optimized task graph to describe what will actually happen,
# and the map of labels to taskids
- write_artifact('task-graph.json', tgg.optimized_task_graph.to_json())
- write_artifact('label-to-taskid.json', tgg.label_to_taskid)
+ write_artifact(options['artifacts'], 'task-graph.json', tgg.optimized_task_graph.to_json())
+ write_artifact(options['artifacts'], 'label-to-taskid.json', tgg.label_to_taskid)
# actually create the graph
create_tasks(tgg.optimized_task_graph, tgg.label_to_taskid, parameters)
def get_decision_parameters(options):
"""
Load parameters from the command-line options for 'taskgraph decision'.
@@ -155,21 +154,21 @@ def get_decision_parameters(options):
# `target_tasks_method` has higher precedence than `project` parameters
if options.get('target_tasks_method'):
parameters['target_tasks_method'] = options['target_tasks_method']
return Parameters(parameters)
-def write_artifact(filename, data):
+def write_artifact(folder, filename, data):
logger.info('writing artifact file `{}`'.format(filename))
- if not os.path.isdir(ARTIFACTS_DIR):
- os.mkdir(ARTIFACTS_DIR)
- path = os.path.join(ARTIFACTS_DIR, filename)
+ if not os.path.isdir(folder):
+ os.mkdir(folder)
+ path = os.path.join(folder, filename)
if filename.endswith('.yml'):
with open(path, 'w') as f:
yaml.safe_dump(data, f, allow_unicode=True, default_flow_style=False)
elif filename.endswith('.json'):
with open(path, 'w') as f:
json.dump(data, f, sort_keys=True, indent=2, separators=(',', ': '))
else:
raise TypeError("Don't know how to write to {}".format(filename))
--- a/taskcluster/taskgraph/test/test_decision.py
+++ b/taskcluster/taskgraph/test/test_decision.py
@@ -46,33 +46,29 @@ class TestDecision(unittest.TestCase):
'kind_implementation': 'taskgraph.test.util:TestTask',
}
})
def test_write_artifact_json(self):
data = [{'some': 'data'}]
tmpdir = tempfile.mkdtemp()
try:
- decision.ARTIFACTS_DIR = os.path.join(tmpdir, "artifacts")
- decision.write_artifact("artifact.json", data)
- with open(os.path.join(decision.ARTIFACTS_DIR, "artifact.json")) as f:
+ decision.write_artifact(tmpdir, "artifact.json", data)
+ with open(os.path.join(tmpdir, "artifact.json")) as f:
self.assertEqual(json.load(f), data)
finally:
if os.path.exists(tmpdir):
shutil.rmtree(tmpdir)
- decision.ARTIFACTS_DIR = 'artifacts'
def test_write_artifact_yml(self):
data = [{'some': 'data'}]
tmpdir = tempfile.mkdtemp()
try:
- decision.ARTIFACTS_DIR = os.path.join(tmpdir, "artifacts")
- decision.write_artifact("artifact.yml", data)
- with open(os.path.join(decision.ARTIFACTS_DIR, "artifact.yml")) as f:
+ decision.write_artifact(tmpdir, "artifact.yml", data)
+ with open(os.path.join(tmpdir, "artifact.yml")) as f:
self.assertEqual(yaml.safe_load(f), data)
finally:
if os.path.exists(tmpdir):
shutil.rmtree(tmpdir)
- decision.ARTIFACTS_DIR = 'artifacts'
if __name__ == '__main__':
main()
--- a/testing/docker/decision/Dockerfile
+++ b/testing/docker/decision/Dockerfile
@@ -1,28 +1,41 @@
FROM ubuntu:16.04
MAINTAINER Greg Arndt <garndt@mozilla.com>
-# Add worker user
-RUN useradd -d /home/worker -s /bin/bash -m worker
-RUN mkdir /home/worker/artifacts && chown worker:worker /home/worker/artifacts
-
# %include testing/docker/recipes/tooltool.py
ADD topsrcdir/testing/docker/recipes/tooltool.py /tmp/tooltool.py
# %include testing/mozharness/external_tools/robustcheckout.py
ADD topsrcdir/testing/mozharness/external_tools/robustcheckout.py /usr/local/mercurial/robustcheckout.py
# %include testing/docker/recipes/install-mercurial.sh
ADD topsrcdir/testing/docker/recipes/install-mercurial.sh /tmp/install-mercurial.sh
ADD system-setup.sh /tmp/system-setup.sh
RUN bash /tmp/system-setup.sh
# %include testing/docker/recipes/run-task
-ADD topsrcdir/testing/docker/recipes/run-task /home/worker/bin/run-task
+ADD topsrcdir/testing/docker/recipes/run-task /usr/local/bin/run-task
+
+# Add worker user and create artifacts folder
+RUN useradd -d /home/worker -s /bin/bash -m worker && \
+ mkdir /home/worker/artifacts && \
+ chown worker:worker /home/worker/artifacts
+ENV ARTIFACTS_FOLDER /home/worker/artifacts
-ENV PATH /home/worker/bin:$PATH
-ENV SHELL /bin/bash
-ENV HOME /home/worker
+# Set variable normally configured at login, by the shells parent process, these
+# are taken from GNU su manual
+ENV HOME /home/worker
+ENV SHELL /bin/bash
+ENV USER worker
+ENV LOGNAME worker
+ENV HOSTNAME taskcluster-worker
+ENV LC_ALL C
-# Set a default command useful for debugging
-CMD ["/bin/bash", "--login"]
+# Always use a volume for checkouts, this should be declared a cache in tasks
+VOLUME /home/worker/checkouts
+ENV HG_STORE_PATH /home/worker/checkouts/hg-store
+
+# Set some sane defaults
+WORKDIR /home/worker/
+ENTRYPOINT ["run-task", "--vcs-checkout=/home/worker/checkouts/gecko", "--"]
+CMD ["bash", "-cx", "cd /home/worker/checkouts/gecko && ./mach --log-no-times taskgraph decision"]
--- a/testing/docker/decision/VERSION
+++ b/testing/docker/decision/VERSION
@@ -1,1 +1,1 @@
-0.1.7
+0.2.0
--- a/testing/docker/decision/system-setup.sh
+++ b/testing/docker/decision/system-setup.sh
@@ -1,14 +1,16 @@
#!/usr/bin/env bash
set -v -e
test `whoami` == 'root'
+export DEBIAN_FRONTEND=noninteractive
+
apt-get update
apt-get install -y --force-yes --no-install-recommends \
ca-certificates \
python \
sudo
BUILD=/root/build
mkdir $BUILD
@@ -21,9 +23,10 @@ tooltool_fetch() {
cd $BUILD
. /tmp/install-mercurial.sh
cd /
rm -rf $BUILD
apt-get clean
apt-get autoclean
+rm -rf /var/lib/apt/lists/
rm $0