Bug 1356529 - Add a `mach artifact toolchain` option to get toolchains for use for a specific build job. r?gps draft
authorMike Hommey <mh+mozilla@glandium.org>
Fri, 14 Apr 2017 15:35:18 +0900
changeset 563540 482c5432ddc4893387c071322ac269900e612992
parent 563497 806004f50a679bf0833366d16b801bad9ecfea8c
child 563542 1b7a9ee7088b079723627369592b151502a7b1a5
push id54336
push userbmo:mh+mozilla@glandium.org
push dateMon, 17 Apr 2017 06:48:25 +0000
reviewersgps
bugs1356529, 1356952
milestone55.0a1
Bug 1356529 - Add a `mach artifact toolchain` option to get toolchains for use for a specific build job. r?gps While technically the options the `mach artifact toolchain` command supports are enough to get going to pull toolchains, it is desirable to have a more convenient interface, not only for normal usage, but also for non-taskcluster jobs and possibly local build oriented commands such as mach bootstrap. So instead of having all use cases having to figure out tooltool manifests and dependent toolchain jobs on their own to pass the right options to `mach artifact toolchain`, we augment `mach artifact toolchain` to figure them out on its own, provided a taskcluster job name (conveniently buildbot-based OSX builds will soon have associated taskcluster jobs using the bridge). This also has the advantage that automation uses the same code as what local developers will end up using, ensuring it doesn't break. The new flag is --for-job. The given job name is automatically normalized to begin with 'build-' and end with '/opt' if it doesn't match an existing job. So, for example, the following command: mach artifact toolchain --for-job linux64 would be equivalent to: mach artifact toolchain --tooltool-manifest \ browser/config/tooltool-manifests/linux64/releng.manifest \ --from-build linux64-gcc This currently relies on mozharness to get the path to the tooltool manifest, which makes it not find the tooltool manifest for non-mozharness-based jobs (e.g. spidermonkey, etc.), but until bug 1356952 is fixed, we'll have to live with that. This also relies on taskcluster job dependencies, which, however we'll go about defining them in the job definitions are always going to be there in the end, since we're looking at the byproduct of task transformations.
python/mozbuild/mozbuild/mach_commands.py
--- a/python/mozbuild/mozbuild/mach_commands.py
+++ b/python/mozbuild/mozbuild/mach_commands.py
@@ -1538,48 +1538,59 @@ class PackageFrontend(MachCommandBase):
     @SubCommand('artifact', 'toolchain')
     @CommandArgument('--verbose', '-v', action='store_true',
         help='Print verbose output.')
     @CommandArgument('--cache-dir', metavar='DIR',
         help='Directory where to store the artifacts cache')
     @CommandArgument('--skip-cache', action='store_true',
         help='Skip all local caches to force re-fetching remote artifacts.',
         default=False)
+    @CommandArgument('--for-job', metavar='JOB',
+        help='Get toolchains required for the given job')
     @CommandArgument('--from-build', metavar='BUILD', nargs='+',
         help='Get toolchains resulting from the given build(s)')
     @CommandArgument('--tooltool-manifest', metavar='MANIFEST',
         help='Explicit tooltool manifest to process')
     @CommandArgument('--authentication-file', metavar='FILE',
         help='Use the RelengAPI token found in the given file to authenticate')
     @CommandArgument('--tooltool-url', metavar='URL',
         help='Use the given url as tooltool server')
     @CommandArgument('--no-unpack', action='store_true',
         help='Do not unpack any downloaded file')
     @CommandArgument('--retry', type=int, default=0,
         help='Number of times to retry failed downloads')
     @CommandArgument('files', nargs='*',
         help='Only download the given file names (you may use file name stems)')
     def artifact_toolchain(self, verbose=False, cache_dir=None,
-                          skip_cache=False, from_build=(),
+                          skip_cache=False, for_job=None, from_build=(),
                           tooltool_manifest=None, authentication_file=None,
                           tooltool_url=None, no_unpack=False, retry=None,
                           files=()):
         '''Download, cache and install pre-built toolchains.
         '''
         from mozbuild.artifacts import ArtifactCache
         from mozbuild.action.tooltool import (
             FileRecord,
             open_manifest,
             unpack_file,
         )
         from requests.adapters import HTTPAdapter
         import redo
         import requests
         import shutil
 
+        # Normally, we'd have this in virtualenv_packages.txt, but that causes
+        # problems because of the embedded (and different) copies of things
+        # that are elsewhere (e.g. mozinfo). Well, ideally, we wouldn't even
+        # use mozharness, but until the tooltool manifests can be found in
+        # taskcluster configs instead of mozharness config (bug 1356952),
+        # we'll have to use mozharness. Ugly, but it does the job for now.
+        sys.path.append(os.path.join(self.topsrcdir, 'testing', 'mozharness'))
+
+        from mozharness.base.config import parse_config_file
         from taskgraph.generator import Kind
         from taskgraph.optimize import optimize_task
         from taskgraph.util.taskcluster import (
             get_artifact_url,
             list_artifacts,
         )
         import yaml
 
@@ -1632,28 +1643,21 @@ class PackageFrontend(MachCommandBase):
             def validate(self):
                 if self.size is None and self.digest is None:
                     return True
                 return super(DownloadRecord, self).validate()
 
         records = OrderedDict()
         downloaded = []
 
-        if tooltool_manifest:
-            manifest = open_manifest(tooltool_manifest)
-            for record in manifest.file_records:
-                url = '{}/{}/{}'.format(tooltool_url, record.algorithm,
-                                        record.digest)
-                records[record.filename] = DownloadRecord(
-                    url, record.filename, record.size, record.digest,
-                    record.algorithm, unpack=record.unpack,
-                    version=record.version, visibility=record.visibility,
-                    setup=record.setup)
+        from_build = from_build or []
+        files_from_build = {}
+        tooltool_manifests = [tooltool_manifest] if tooltool_manifest else []
 
-        if from_build:
+        if from_build or for_job:
             params = {
                 'message': '',
                 'project': '',
                 'level': os.environ.get('MOZ_SCM_LEVEL', '3'),
                 'base_repository': '',
                 'head_repository': '',
                 'head_rev': '',
                 'moz_build_date': '',
@@ -1662,66 +1666,119 @@ class PackageFrontend(MachCommandBase):
                 'owner': '',
             }
 
             # TODO: move to the taskcluster package
             def tasks(kind):
                 kind_path = mozpath.join(self.topsrcdir, 'taskcluster', 'ci', kind)
                 with open(mozpath.join(kind_path, 'kind.yml')) as f:
                     config = yaml.load(f)
-                    tasks = Kind(kind, kind_path, config).load_tasks(params, {})
+                    loaded_tasks = []
+                    for dep in config.get('kind-dependencies', ()):
+                        loaded_tasks.extend(tasks(dep).itervalues())
                     return {
                         task.task['metadata']['name']: task
-                        for task in tasks
+                        for task in Kind(kind, kind_path, config).load_tasks(
+                            params, loaded_tasks)
                     }
 
+            def normalize_task_name(name, kind):
+                if '/' not in name:
+                    name = '{}/opt'.format(name)
+
+                if not name.startswith('{}-'.format(kind)):
+                    name = '{}-{}'.format(kind, name)
+
+                return name
+
             toolchains = tasks('toolchain')
 
-            for b in from_build:
-                user_value = b
+            if for_job:
+                task = None
 
-                if '/' not in b:
-                    b = '{}/opt'.format(b)
+                for kind in os.listdir(os.path.join(
+                        self.topsrcdir, 'taskcluster', 'ci')):
+                    if for_job.startswith('{}-'.format(kind)):
+                        jobs = tasks(kind)
+                        j = normalize_task_name(for_job, kind)
+                        task = jobs.get(j)
+                        break
+
+                if not task:
+                    j = normalize_task_name(for_job, 'build')
+                    builds = tasks('build')
+                    task = builds.get(j)
 
-                if not b.startswith('toolchain-'):
-                    b = 'toolchain-{}'.format(b)
+                if not task:
+                    self.log(logging.ERROR, 'artifact', {'job': for_job},
+                             'Could not find a job named `{job}`')
+                    return 1
+                env = task.task.get('payload', {}).get('env', {})
+                mozharness_configs = env.get('MOZHARNESS_CONFIG', '')
+                for c in mozharness_configs.split():
+                    config = parse_config_file(os.path.join(
+                        self.topsrcdir, 'testing', 'mozharness', 'configs', c))
+                    m = config.get('tooltool_manifest_src')
+                    if m:
+                        tooltool_manifests.append(os.path.join(
+                            self.topsrcdir, m))
 
-                task = toolchains.get(b)
-                if not task:
-                    self.log(logging.ERROR, 'artifact', {'build': user_value},
-                             'Could not find a toolchain build named `{build}`')
-                    return 1
+                for d in task.dependencies.itervalues():
+                    if d in toolchains:
+                        from_build.append(d)
 
-                optimized, task_id = optimize_task(task, {})
-                if not optimized:
-                    self.log(logging.ERROR, 'artifact', {'build': user_value},
-                             'Could not find artifacts for a toolchain build '
-                             'named `{build}`')
-                    return 1
+        for tooltool_manifest in tooltool_manifests:
+            manifest = open_manifest(tooltool_manifest)
+            for record in manifest.file_records:
+                url = '{}/{}/{}'.format(tooltool_url, record.algorithm,
+                                        record.digest)
+                records[record.filename] = DownloadRecord(
+                    url, record.filename, record.size, record.digest,
+                    record.algorithm, unpack=record.unpack,
+                    version=record.version, visibility=record.visibility,
+                    setup=record.setup)
+
+        for b in from_build:
+            user_value = b
+            b = normalize_task_name(b, 'toolchain')
 
-                for artifact in list_artifacts(task_id):
-                    name = artifact['name']
-                    if not name.startswith('public/'):
-                        continue
-                    name = name[len('public/'):]
-                    if name.startswith('logs/'):
-                        continue
-                    name = os.path.basename(name)
-                    records[name] = DownloadRecord(
-                        get_artifact_url(task_id, artifact['name']),
-                        name, None, None, None, unpack=True)
+            task = toolchains.get(b)
+            if not task:
+                self.log(logging.ERROR, 'artifact', {'build': user_value},
+                         'Could not find a toolchain build named `{build}`')
+                return 1
+
+            optimized, task_id = optimize_task(task, {})
+            if not optimized:
+                self.log(logging.ERROR, 'artifact', {'build': user_value},
+                         'Could not find artifacts for a toolchain build '
+                         'named `{build}`')
+                return 1
+
+            for artifact in list_artifacts(task_id):
+                name = artifact['name']
+                if not name.startswith('public/'):
+                    continue
+                name = name[len('public/'):]
+                if name.startswith('logs/'):
+                    continue
+                name = os.path.basename(name)
+                records[name] = DownloadRecord(
+                    get_artifact_url(task_id, artifact['name']),
+                    name, None, None, None, unpack=True)
 
         for record in records.itervalues():
             if files and not any(record.basename == f or
                                       record.basename.startswith('%s.' % f)
                                       for f in files):
                 continue
 
-            self.log(logging.INFO, 'artifact', {'name': record.basename},
-                     'Downloading {name}')
+            self.log(logging.INFO, 'artifact', {'name': record.basename,
+                                                'url': record.url},
+                     'Downloading {name} from {url}')
             valid = False
             # sleeptime is 60 per retry.py, used by tooltool_wrapper.sh
             for attempt, _ in enumerate(redo.retrier(attempts=retry+1,
                                                      sleeptime=60)):
                 try:
                     record.fetch_with(cache)
                 except requests.exceptions.HTTPError as e:
                     status = e.response.status_code