Bug 1397847: build common code for finding and hashing toolchains; r?glandium draft
authorDustin J. Mitchell <dustin@mozilla.com>
Thu, 14 Sep 2017 22:51:17 +0000
changeset 665128 9e69a72a36ed17f05070419129a582ce2809eb91
parent 665127 de1be11a3660a800bdef6aa5e8a4b42052e92e04
child 665129 19b510af2b7a6fe8b71f55fbb3d51d0a7696b54c
push id79939
push userdmitchell@mozilla.com
push dateThu, 14 Sep 2017 23:47:12 +0000
reviewersglandium
bugs1397847
milestone57.0a1
Bug 1397847: build common code for finding and hashing toolchains; r?glandium And use this code to hash for optimization. This indexes tasks under both the primary name and aliases for ease of later discovery. MozReview-Commit-ID: 3VL3c31rg71
python/mozbuild/mozbuild/test/test_toolchains.py
python/mozbuild/mozbuild/toolchains.py
taskcluster/taskgraph/transforms/job/toolchain.py
new file mode 100644
--- /dev/null
+++ b/python/mozbuild/mozbuild/test/test_toolchains.py
@@ -0,0 +1,33 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+import mozunit
+import unittest
+
+from mozbuild.toolchains import (
+    load_toolchain_definitions,
+    hash_toolchain,
+)
+
+class TestArtifactCache(unittest.TestCase):
+
+    def test_load_toolchain_definitions(self):
+        toolchains = load_toolchain_definitions()
+        # if we stop using clang, update this to some other stable toolchain
+        self.failUnless('linux64-clang' in toolchains)
+
+    def test_hash_toolchain(self):
+        toolchains = load_toolchain_definitions()
+        # just check that the hash is at least stable when called twice; to do more
+        # would be to make this test rely on the contents of toolchain definitions..
+        self.assertEqual(
+            hash_toolchain(toolchains, 'linux64-clang'),
+            hash_toolchain(toolchains, 'linux64-clang'))
+
+
+if __name__ == '__main__':
+    mozunit.main()
+
new file mode 100644
--- /dev/null
+++ b/python/mozbuild/mozbuild/toolchains.py
@@ -0,0 +1,75 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+'''
+Support for toolchain definitions and finding associated tasks.
+'''
+
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+import hashlib
+import json
+import os
+import yaml
+
+import mozpack.path as mozpath
+from mozbuild.util import memoize
+from taskgraph.util.hash import hash_paths
+from taskgraph import GECKO
+
+# files that might affect all toolchains
+COMMON_FILES = (
+    'taskcluster/taskgraph/transforms/toolchain.py',
+    'taskcluster/taskgraph/transforms/job/toolchain.py',
+)
+
+@memoize
+def load_toolchain_definitions():
+    '''
+    Load all toolchain definitions in build/toolchains/*.yml, storing
+    each under its "primary" name as well as its alias, if given
+    '''
+    toolchains_dir = mozpath.join(GECKO, 'build', 'toolchains')
+    toolchains = {}
+    for filename in os.listdir(toolchains_dir):
+        if not filename.endswith('.yml'):
+            continue
+        with open(mozpath.join(toolchains_dir, filename)) as f:
+            toolchains.update(yaml.load(f))
+
+    # resolve aliases
+    aliases = {tc_def['toolchain-alias']: tc_def
+               for tc_def in toolchains.values()
+               if 'toolchain-alias' in tc_def}
+    toolchains.update(aliases)
+
+    return toolchains
+
+
+def hash_toolchain(definitions, toolchain_name):
+    '''
+    Return the hash value for the named toolchain. This process takes into
+    account the toolchain's resources, other associated files, and the hashes
+    of any toolchains on which it depends.
+    '''
+    toolchain = definitions[toolchain_name]
+    hash = hashlib.sha256()
+
+    # hash the input files
+    files = toolchain.get('resources', [])
+    files.extend(COMMON_FILES)
+    files.append('taskcluster/scripts/misc/{}'.format(toolchain['run']['script']))
+    if 'tooltool-manifest' in toolchain:
+        files.append(toolchain['tooltool-manifest'])
+    hash.update(hash_paths(GECKO, files))
+
+    # hash all required toolchains
+    for dep in toolchain.get('toolchains', []):
+        hash.update(hash_toolchain(definitions, dep))
+
+    # finally, hash the toolchain definition itself
+    hash.update(json.dumps(toolchain, sort_keys=True))
+
+    return hash.hexdigest()
--- a/taskcluster/taskgraph/transforms/job/toolchain.py
+++ b/taskcluster/taskgraph/transforms/job/toolchain.py
@@ -2,31 +2,31 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 """
 Support for running toolchain-building jobs via dedicated scripts
 """
 
 from __future__ import absolute_import, print_function, unicode_literals
 
-import hashlib
-
 from taskgraph.util.schema import Schema
 from voluptuous import Required, Any
 
 from taskgraph.transforms.job import run_job_using
 from taskgraph.transforms.job.common import (
     docker_worker_add_tc_vcs_cache,
     docker_worker_add_gecko_vcs_env_vars,
     docker_worker_add_public_artifacts,
     docker_worker_add_tooltool,
     support_vcs_checkout,
 )
-from taskgraph.util.hash import hash_paths
-from taskgraph import GECKO
+from mozbuild.toolchains import (
+    load_toolchain_definitions,
+    hash_toolchain,
+)
 
 
 TOOLCHAIN_INDEX = 'gecko.cache.level-{level}.toolchains.v1.{name}.{digest}'
 
 toolchain_run_schema = Schema({
     Required('using'): 'toolchain-script',
 
     # the script (in taskcluster/scripts/misc) to run
@@ -38,54 +38,36 @@ toolchain_run_schema = Schema({
         False,
         'public',
         'internal',
     ),
 })
 
 
 def add_optimizations(config, run, taskdesc):
-    files = []  # fixed in later commits..
-    # This file
-    files.append('taskcluster/taskgraph/transforms/job/toolchain.py')
-    # The script
-    files.append('taskcluster/scripts/misc/{}'.format(run['script']))
-    # Tooltool manifest if any is defined:
-    tooltool_manifest = taskdesc['worker']['env'].get('TOOLTOOL_MANIFEST')
-    if tooltool_manifest:
-        files.append(tooltool_manifest)
-
-    digest = hash_paths(GECKO, files)
-
-    # If the task has dependencies, we need those dependencies to influence
-    # the index path. So take the digest from the files above, add the list
-    # of its dependencies, and hash the aggregate.
-    # If the task has no dependencies, just use the digest from above.
-    deps = taskdesc['dependencies']
-    if deps:
-        data = [digest] + sorted(deps.values())
-        digest = hashlib.sha256('\n'.join(data)).hexdigest()
-
-    label = taskdesc['label']
-    subs = {
-        'name': label.replace('%s-' % config.kind, ''),
-        'digest': digest,
-    }
+    toolchains = load_toolchain_definitions()
+    name = taskdesc['label'].replace('%s-' % config.kind, '')
+    toolchain = toolchains[name]
+    digest = hash_toolchain(toolchains, name)
+    subs = {'name': name, 'digest': digest}
 
     optimizations = taskdesc.setdefault('optimizations', [])
 
     # We'll try to find a cached version of the toolchain at levels above
     # and including the current level, starting at the highest level.
     for level in reversed(range(int(config.params['level']), 4)):
         subs['level'] = level
         optimizations.append(['index-search', TOOLCHAIN_INDEX.format(**subs)])
 
-    # ... and cache at the lowest level.
-    taskdesc.setdefault('routes', []).append(
-        'index.{}'.format(TOOLCHAIN_INDEX.format(**subs)))
+    # ... and cache at the lowest level, including any aliases
+    routes = taskdesc.setdefault('routes', [])
+    routes.append('index.{}'.format(TOOLCHAIN_INDEX.format(**subs)))
+    if 'toolchain-alias' in toolchain:
+        subs['name'] = toolchain['toolchain-alias']
+        routes.append('index.{}'.format(TOOLCHAIN_INDEX.format(**subs)))
 
 
 @run_job_using("docker-worker", "toolchain-script", schema=toolchain_run_schema)
 def docker_worker_toolchain(config, job, taskdesc):
     run = job['run']
     taskdesc['run-on-projects'] = ['trunk', 'try']
 
     worker = taskdesc['worker']