Bug 1240667 - Detect a tree to use for artifact builds based on recent changesets. r=nalexander draft
authorChris Manchester <cmanchester@mozilla.com>
Fri, 29 Jan 2016 12:19:51 -0800
changeset 327151 5040369fb49bafb7017c5806448c67c6a89670a5
parent 327063 9b3c9c05e11c7406969b9f4246f83a0422e084ed
child 513661 8255dd81cd8f26989e11f402ab6b0e860413043c
push id10199
push usercmanchester@mozilla.com
push dateFri, 29 Jan 2016 20:22:58 +0000
reviewersnalexander
bugs1240667
milestone47.0a1
Bug 1240667 - Detect a tree to use for artifact builds based on recent changesets. r=nalexander Currenlty --enable-artifact builds will take artifacts from fx-team regardless of the state of the current working directory. This can lead to broken builds if someone updates to a tree other than fx-team. This commit changes the default behavior from tracking fx-team to finding all recent pushheads and finding the closest to the working parent that has artifacts available. This also fixes a mis-match between tree names according to mozext and branch names in the taskcluster index preventing artifact download from common integration branches.
python/mozbuild/mozbuild/artifacts.py
python/mozbuild/mozbuild/mach_commands.py
--- a/python/mozbuild/mozbuild/artifacts.py
+++ b/python/mozbuild/mozbuild/artifacts.py
@@ -35,16 +35,17 @@ A future need, perhaps.
 This module requires certain modules be importable from the ambient Python
 environment.  |mach artifact| ensures these modules are available, but other
 consumers will need to arrange this themselves.
 '''
 
 
 from __future__ import absolute_import, print_function, unicode_literals
 
+import collections
 import functools
 import hashlib
 import logging
 import operator
 import os
 import pickle
 import re
 import shutil
@@ -71,17 +72,16 @@ from mozpack.mozjar import (
 import mozpack.path as mozpath
 from mozregression.download_manager import (
     DownloadManager,
 )
 from mozregression.persist_limit import (
     PersistLimit,
 )
 
-MAX_CACHED_PARENTS = 100  # Number of parent changesets to cache candidate pushheads for.
 NUM_PUSHHEADS_TO_QUERY_PER_PARENT = 50  # Number of candidate pushheads to cache per parent changeset.
 
 MAX_CACHED_TASKS = 400  # Number of pushheads to cache Task Cluster task data for.
 
 # Number of downloaded artifacts to cache.  Each artifact can be very large,
 # so don't make this to large!  TODO: make this a size (like 500 megs) rather than an artifact count.
 MAX_CACHED_ARTIFACTS = 6
 
@@ -486,62 +486,42 @@ class CacheManager(object):
 
     def __enter__(self):
         self.load_cache()
         return self
 
     def __exit__(self, type, value, traceback):
         self.dump_cache()
 
-
-class PushHeadCache(CacheManager):
-    '''Map parent hg revisions to candidate pushheads.'''
+class TreeCache(CacheManager):
+    '''Map pushhead revisions to trees with tasks/artifacts known to taskcluster.'''
 
-    def __init__(self, hg, cache_dir, log=None):
-        # It's not unusual to pull hundreds of changesets at once, and perhaps
-        # |hg up| back and forth a few times.
-        CacheManager.__init__(self, cache_dir, 'pushheads', MAX_CACHED_PARENTS, log=log)
-        self._hg = hg
+    def __init__(self, cache_dir, log=None):
+        CacheManager.__init__(self, cache_dir, 'artifact_tree', MAX_CACHED_TASKS, log=log)
+
+        self._index = taskcluster.Index()
 
     @cachedmethod(operator.attrgetter('_cache'))
-    def pushheads(self, tree, parent):
+    def artifact_trees(self, rev, trees):
+        # The "trees" argument is intentionally ignored. If this value
+        # changes over time it means a changeset we care about has become
+        # a pushhead on another tree, and our cache may no longer be
+        # valid.
+        rev_ns = 'buildbot.revisions.{rev}'.format(rev=rev)
         try:
-            pushheads = subprocess.check_output([self._hg, 'log',
-                '--template', '{node}\n',
-                '-r', 'last(pushhead("{tree}") and ::"{parent}", {num})'.format(
-                    tree=tree, parent=parent, num=NUM_PUSHHEADS_TO_QUERY_PER_PARENT)])
-            # Filter blank lines.
-            pushheads = [ pushhead for pushhead in pushheads.strip().split('\n') if pushhead ]
-            if pushheads:
-                return pushheads
-        except subprocess.CalledProcessError as e:
-            # We probably don't have the mozext extension installed.
-            ret = subprocess.call([self._hg, 'showconfig', 'extensions.mozext'])
-            if ret:
-                raise Exception('Could not find candidate pushheads.\n\n'
-                                'You need to enable the "mozext" hg extension: '
-                                'see https://developer.mozilla.org/en-US/docs/Artifact_builds')
-            raise e
+            result = self._index.listNamespaces(rev_ns, {"limit": 10})
+        except Exception:
+            return []
+        return [ns['name'] for ns in result['namespaces']]
 
-        # We probably don't have the pushlog database present locally.  Check.
-        tree_pushheads = subprocess.check_output([self._hg, 'log',
-            '--template', '{node}\n',
-            '-r', 'last(pushhead("{tree}"))'.format(tree=tree)])
-        # Filter blank lines.
-        tree_pushheads = [ pushhead for pushhead in tree_pushheads.strip().split('\n') if pushhead ]
-        if tree_pushheads:
-            # Okay, we have some pushheads but no candidates.  This can happen
-            # for legitimate reasons: old revisions with no upstream builds
-            # remaining; or new revisions that don't have upstream builds yet.
-            return []
-
-        raise Exception('Could not find any pushheads for tree "{tree}".\n\n'
-                        'Try running |hg pushlogsync|; '
-                        'see https://developer.mozilla.org/en-US/docs/Artifact_builds'.format(tree=tree))
-
+    def print_last_item(self, args, sorted_kwargs, result):
+        rev, trees = args
+        self.log(logging.INFO, 'artifact',
+            {'rev': rev},
+            'Last fetched trees for pushhead revision {rev}')
 
 class TaskCache(CacheManager):
     '''Map candidate pushheads to Task Cluster task IDs and artifact URLs.'''
 
     def __init__(self, cache_dir, log=None):
         CacheManager.__init__(self, cache_dir, 'artifact_url', MAX_CACHED_TASKS, log=log)
         self._index = taskcluster.Index()
         self._queue = taskcluster.Queue()
@@ -666,24 +646,94 @@ class Artifacts(object):
         try:
             self._artifact_job = get_job_details(self._job, log=self._log)
         except KeyError:
             self.log(logging.INFO, 'artifact',
                 {'job': self._job},
                 'Unknown job {job}')
             raise KeyError("Unknown job")
 
-        self._pushhead_cache = PushHeadCache(self._hg, self._cache_dir, log=self._log)
         self._task_cache = TaskCache(self._cache_dir, log=self._log)
         self._artifact_cache = ArtifactCache(self._cache_dir, log=self._log)
+        self._tree_cache = TreeCache(self._cache_dir, log=self._log)
+        # A "tree" according to mozext and an integration branch isn't always
+        # an exact match. For example, pushhead("central") refers to pushheads
+        # with artifacts under the taskcluster namespace "mozilla-central".
+        self._tree_replacements = {
+            'inbound': 'mozilla-inbound',
+            'central': 'mozilla-central',
+        }
+
 
     def log(self, *args, **kwargs):
         if self._log:
             self._log(*args, **kwargs)
 
+    def _find_pushheads(self, parent):
+        # Return an ordered dict associating revisions that are pushheads with
+        # trees they are known to be in (starting with the first tree they're
+        # known to be in).
+
+        try:
+            output = subprocess.check_output([
+                self._hg, 'log',
+                '--template', '{node},{join(trees, ",")}\n',
+                '-r', 'last(pushhead({tree}) and ::{parent}, {num})'.format(
+                    tree=self._tree or '', parent=parent, num=NUM_PUSHHEADS_TO_QUERY_PER_PARENT)
+            ])
+        except subprocess.CalledProcessError:
+            # We probably don't have the mozext extension installed.
+            ret = subprocess.call([self._hg, 'showconfig', 'extensions.mozext'])
+            if ret:
+                raise Exception('Could not find pushheads for recent revisions.\n\n'
+                                'You need to enable the "mozext" hg extension: '
+                                'see https://developer.mozilla.org/en-US/docs/Artifact_builds')
+            raise
+
+        rev_trees = collections.OrderedDict()
+        for line in output.splitlines():
+            if not line:
+                continue
+            rev_info = line.split(',')
+            if len(rev_info) == 1:
+                # If pushhead() is true, it would seem "trees" should be
+                # non-empty, but this is defensive.
+                continue
+            rev_trees[rev_info[0]] = tuple(rev_info[1:])
+
+        if not rev_trees:
+            raise Exception('Could not find any candidate pushheads in the last {num} revisions.\n\n'
+                            'Try running |hg pushlogsync|;\n'
+                            'see https://developer.mozilla.org/en-US/docs/Artifact_builds'.format(
+                                num=NUM_PUSHHEADS_TO_QUERY_PER_PARENT))
+
+        return rev_trees
+
+    def find_pushhead_artifacts(self, task_cache, tree_cache, job, pushhead, trees):
+        known_trees = set(tree_cache.artifact_trees(pushhead, trees))
+        if not known_trees:
+            return None
+        # If we ever find a rev that's a pushhead on multiple trees, we want
+        # the most recent one.
+        for tree in reversed(trees):
+            tree = self._tree_replacements.get(tree) or tree
+            if tree not in known_trees:
+                continue
+            try:
+                urls = task_cache.artifact_urls(tree, job, pushhead)
+            except ValueError:
+                continue
+            if urls:
+                self.log(logging.INFO, 'artifact',
+                         {'pushhead': pushhead,
+                          'tree': tree},
+                         'Installing from remote pushhead {pushhead} on {tree}')
+                return urls
+        return None
+
     def install_from_file(self, filename, distdir, install_callback=None):
         self.log(logging.INFO, 'artifact',
             {'filename': filename},
             'Installing from {filename}')
 
         # Do we need to post-process?
         processed_filename = filename + PROCESSED_SUFFIX
         if not os.path.exists(processed_filename):
@@ -729,45 +779,32 @@ class Artifacts(object):
             'Installing from {url}')
         with self._artifact_cache as artifact_cache:  # The with block handles persistence.
             filename = artifact_cache.fetch(url)
         return self.install_from_file(filename, distdir, install_callback=install_callback)
 
     def install_from_hg(self, revset, distdir, install_callback=None):
         if not revset:
             revset = '.'
-        if len(revset) != 40:
-            revset = subprocess.check_output([self._hg, 'log', '--template', '{node}\n', '-r', revset]).strip()
-            if len(revset.split('\n')) != 1:
-                raise ValueError('hg revision specification must resolve to exactly one commit')
-
-        self.log(logging.INFO, 'artifact',
-            {'revset': revset},
-            'Installing from local revision {revset}')
-
+        rev_pushheads = self._find_pushheads(revset)
         urls = None
-        with self._task_cache as task_cache, self._pushhead_cache as pushhead_cache:
-            # with blocks handle handle persistence.
-            for pushhead in pushhead_cache.pushheads(self._tree, revset):
+        # with blocks handle handle persistence.
+        with self._task_cache as task_cache, self._tree_cache as tree_cache:
+            while rev_pushheads:
+                rev, trees = rev_pushheads.popitem(last=False)
                 self.log(logging.DEBUG, 'artifact',
-                    {'pushhead': pushhead},
-                    'Trying to find artifacts for pushhead {pushhead}.')
-                try:
-                    urls = task_cache.artifact_urls(self._tree, self._job, pushhead)
-                    self.log(logging.INFO, 'artifact',
-                        {'pushhead': pushhead},
-                        'Installing from remote pushhead {pushhead}')
-                    break
-                except ValueError:
-                    pass
-        if urls:
-            for url in urls:
-                if self.install_from_url(url, distdir, install_callback=install_callback):
-                    return 1
-            return 0
+                    {'rev': rev},
+                    'Trying to find artifacts for pushhead {rev}.')
+                urls = self.find_pushhead_artifacts(task_cache, tree_cache,
+                                                    self._job, rev, trees)
+                if urls:
+                    for url in urls:
+                        if self.install_from_url(url, distdir, install_callback=install_callback):
+                            return 1
+                    return 0
         self.log(logging.ERROR, 'artifact',
                  {'revset': revset},
                  'No built artifacts for {revset} found.')
         return 1
 
     def install_from(self, source, distdir, install_callback=None):
         """Install artifacts from a ``source`` into the given ``distdir``.
 
@@ -799,11 +836,11 @@ class Artifacts(object):
         self._pushhead_cache.clear_cache()
         self._task_cache.clear_cache()
         self._artifact_cache.clear_cache()
 
     def print_cache(self):
         self.log(logging.INFO, 'artifact',
             {},
             'Printing cached artifacts and caches.')
-        self._pushhead_cache.print_cache()
+        self._tree_cache.print_cache()
         self._task_cache.print_cache()
         self._artifact_cache.print_cache()
--- a/python/mozbuild/mozbuild/mach_commands.py
+++ b/python/mozbuild/mozbuild/mach_commands.py
@@ -1469,45 +1469,43 @@ class PackageFrontend(MachCommandBase):
         else:
           hg = which.which('hg')
 
         # Absolutely must come after the virtualenv is populated!
         from mozbuild.artifacts import Artifacts
         artifacts = Artifacts(tree, job, log=self.log, cache_dir=cache_dir, hg=hg)
         return artifacts
 
-    def _compute_defaults(self, tree=None, job=None):
-        # Firefox front-end developers mostly use fx-team.  Post auto-land, make this central.
-        tree = tree or 'fx-team'
+    def _compute_platform(self, job=None):
         if job:
-            return (tree, job)
+            return job
         if self.substs.get('MOZ_BUILD_APP', '') == 'mobile/android':
             if self.substs['ANDROID_CPU_ARCH'] == 'x86':
-                return tree, 'android-x86'
-            return tree, 'android-api-11'
+                return 'android-x86'
+            return 'android-api-11'
         # TODO: check for 32/64 bit builds.  We'd like to use HAVE_64BIT_BUILD
         # but that relies on the compile environment.
         if self.defines.get('XP_LINUX', False):
-            return tree, 'linux64'
+            return 'linux64'
         if self.defines.get('XP_MACOSX', False):
-            return tree, 'macosx64'
+            return 'macosx64'
         if self.defines.get('XP_WIN', False):
-            return tree, 'win32'
+            return 'win32'
         raise Exception('Cannot determine default tree and job for |mach artifact|!')
 
     @ArtifactSubCommand('artifact', 'install',
         'Install a good pre-built artifact.')
     @CommandArgument('source', metavar='SRC', nargs='?', type=str,
         help='Where to fetch and install artifacts from.  Can be omitted, in '
             'which case the current hg repository is inspected; an hg revision; '
             'a remote URL; or a local file.',
         default=None)
     def artifact_install(self, source=None, tree=None, job=None, verbose=False):
         self._set_log_level(verbose)
-        tree, job = self._compute_defaults(tree, job)
+        job = self._compute_platform(job)
         artifacts = self._make_artifacts(tree=tree, job=job)
 
         manifest_path = mozpath.join(self.topobjdir, '_build_manifests', 'install', 'dist_bin')
         manifest = InstallManifest(manifest_path)
 
         def install_callback(path, file_existed, file_updated):
             # Our paths are either under dist/bin or dist/plugins (for test
             # plugins). dist/plugins. does not have an install manifest.
@@ -1523,30 +1521,30 @@ class PackageFrontend(MachCommandBase):
             manifest.write(manifest_path)
 
         return retcode
 
     @ArtifactSubCommand('artifact', 'last',
         'Print the last pre-built artifact installed.')
     def artifact_print_last(self, tree=None, job=None, verbose=False):
         self._set_log_level(verbose)
-        tree, job = self._compute_defaults(tree, job)
+        job = self._compute_platform(job)
         artifacts = self._make_artifacts(tree=tree, job=job)
         artifacts.print_last()
         return 0
 
     @ArtifactSubCommand('artifact', 'print-cache',
         'Print local artifact cache for debugging.')
     def artifact_print_cache(self, tree=None, job=None, verbose=False):
         self._set_log_level(verbose)
-        tree, job = self._compute_defaults(tree, job)
+        job = self._compute_platform(job)
         artifacts = self._make_artifacts(tree=tree, job=job)
         artifacts.print_cache()
         return 0
 
     @ArtifactSubCommand('artifact', 'clear-cache',
         'Delete local artifacts and reset local artifact cache.')
     def artifact_clear_cache(self, tree=None, job=None, verbose=False):
         self._set_log_level(verbose)
-        tree, job = self._compute_defaults(tree, job)
+        job = self._compute_platform(job)
         artifacts = self._make_artifacts(tree=tree, job=job)
         artifacts.clear_cache()
         return 0