Bug 1397847: use hashing and the index to download toolchains; r?glandium draft
authorDustin J. Mitchell <dustin@mozilla.com>
Thu, 14 Sep 2017 23:30:58 +0000
changeset 665129 19b510af2b7a6fe8b71f55fbb3d51d0a7696b54c
parent 665128 9e69a72a36ed17f05070419129a582ce2809eb91
child 665457 5da5cf9bf3583621dd5cfaa4d64e41b9a039fc07
push id79939
push userdmitchell@mozilla.com
push dateThu, 14 Sep 2017 23:47:12 +0000
reviewersglandium
bugs1397847
milestone57.0a1
Bug 1397847: use hashing and the index to download toolchains; r?glandium MozReview-Commit-ID: TqON8joEd6
python/mozbuild/mozbuild/mach_commands.py
taskcluster/taskgraph/util/taskcluster.py
--- a/python/mozbuild/mozbuild/mach_commands.py
+++ b/python/mozbuild/mozbuild/mach_commands.py
@@ -1776,23 +1776,24 @@ class PackageFrontend(MachCommandBase):
             open_manifest,
             unpack_file,
         )
         from requests.adapters import HTTPAdapter
         import redo
         import requests
         import shutil
 
-        from taskgraph.generator import Kind
-        from taskgraph.optimize import optimize_task
         from taskgraph.util.taskcluster import (
             get_artifact_url,
-            list_artifacts,
+            find_task_id,
         )
-        import yaml
+        from mozbuild.toolchains import (
+            load_toolchain_definitions,
+            hash_toolchain,
+        )
 
         self._set_log_level(verbose)
         # Normally, we'd use self.log_manager.enable_unstructured(),
         # but that enables all logging, while we only really want tooltool's
         # and it also makes structured log output twice.
         # So we manually do what it does, and limit that to the tooltool
         # logger.
         if self.log_manager.terminal_handler:
@@ -1876,69 +1877,45 @@ class PackageFrontend(MachCommandBase):
                                         record.digest)
                 records[record.filename] = DownloadRecord(
                     url, record.filename, record.size, record.digest,
                     record.algorithm, unpack=record.unpack,
                     version=record.version, visibility=record.visibility,
                     setup=record.setup)
 
         if from_build:
-            params = {
-                'message': '',
-                'project': '',
-                'level': os.environ.get('MOZ_SCM_LEVEL', '3'),
-                'base_repository': '',
-                'head_repository': '',
-                'head_rev': '',
-                'moz_build_date': '',
-                'build_date': 0,
-                'pushlog_id': 0,
-                'owner': '',
-            }
-
-            # TODO: move to the taskcluster package
-            def tasks(kind):
-                kind_path = mozpath.join(self.topsrcdir, 'taskcluster', 'ci', kind)
-                with open(mozpath.join(kind_path, 'kind.yml')) as f:
-                    config = yaml.load(f)
-                    tasks = Kind(kind, kind_path, config).load_tasks(params, {})
-                    return {
-                        task.task['metadata']['name']: task
-                        for task in tasks
-                    }
-
-            toolchains = tasks('toolchain')
-
-            aliases = {}
-            for t in toolchains.values():
-                alias = t.attributes.get('toolchain-alias')
-                if alias:
-                    aliases['toolchain-{}'.format(alias)] = \
-                        t.task['metadata']['name']
-
+            toolchains = load_toolchain_definitions()
             for b in from_build:
                 user_value = b
 
-                if not b.startswith('toolchain-'):
-                    b = 'toolchain-{}'.format(b)
+                if b.startswith('toolchain-'):
+                    b = b.replace('toolchain-', '')
 
-                task = toolchains.get(aliases.get(b, b))
-                if not task:
+                if b not in toolchains:
                     self.log(logging.ERROR, 'artifact', {'build': user_value},
                              'Could not find a toolchain build named `{build}`')
                     return 1
+                toolchain = toolchains[b]
+                digest = hash_toolchain(toolchains, b)
 
-                task_id = optimize_task(task, {})
-                artifact_name = task.attributes.get('toolchain-artifact')
-                if task_id in (True, False) or not artifact_name:
-                    self.log(logging.ERROR, 'artifact', {'build': user_value},
-                             'Could not find artifacts for a toolchain build '
-                             'named `{build}`')
+                index_path = 'gecko.cache.level-{level}.toolchains.v1.{name}.{digest}'
+                index_path = index_path.format(
+                    level=os.environ.get('MOZ_SCM_LEVEL', '3'),
+                    name=b,
+                    digest=digest)
+                try:
+                    task_id = find_task_id(index_path)
+                except KeyError:
+                    self.log(logging.ERROR, 'artifact',
+                             {'build': user_value, 'index_path': index_path},
+                             'Could not find task for a toolchain build '
+                             'named `{build}` (not found at index path {index_path})')
                     return 1
 
+                artifact_name = toolchain['toolchain-artifact']
                 record = ArtifactRecord(task_id, artifact_name)
                 records[record.filename] = record
 
         # Handle the list of files of the form path@task-id on the command
         # line. Each of those give a path to an artifact to download.
         for f in files:
             if '@' not in f:
                 self.log(logging.ERROR, 'artifact', {},
--- a/taskcluster/taskgraph/util/taskcluster.py
+++ b/taskcluster/taskgraph/util/taskcluster.py
@@ -75,17 +75,22 @@ def get_index_url(index_path, use_proxy=
     if use_proxy:
         INDEX_URL = 'http://taskcluster/index/v1/task/{}'
     else:
         INDEX_URL = 'https://index.taskcluster.net/v1/task/{}'
     return INDEX_URL.format(index_path)
 
 
 def find_task_id(index_path, use_proxy=False):
-    response = _do_request(get_index_url(index_path, use_proxy))
+    try:
+        response = _do_request(get_index_url(index_path, use_proxy))
+    except requests.exceptions.HTTPError as e:
+        if e.response.status_code == 404:
+            raise KeyError
+        raise
     return response.json()['taskId']
 
 
 def get_artifact_from_index(index_path, artifact_path, use_proxy=False):
     full_path = index_path + '/artifacts/' + artifact_path
     response = _do_request(get_index_url(full_path, use_proxy))
     return _handle_artifact(full_path, response)