Bug 1421734 - Download and unpack toolchain artifacts in parallel; r=nalexander draft
authorChris AtLee <catlee@mozilla.com>
Mon, 12 Feb 2018 15:07:36 -0500
changeset 785382 7e3b9e91f7cf064942d12646119be0d9cb44b5d3
parent 785381 31b74b91294d65a4cdf0a2355d564540f67f644c
push id107209
push userbmo:gps@mozilla.com
push dateFri, 20 Apr 2018 00:09:32 +0000
reviewersnalexander
bugs1421734
milestone61.0a1
Bug 1421734 - Download and unpack toolchain artifacts in parallel; r=nalexander MozReview-Commit-ID: BMe6zqIqNHP
python/mozbuild/mozbuild/artifacts.py
python/mozbuild/mozbuild/mach_commands.py
--- a/python/mozbuild/mozbuild/artifacts.py
+++ b/python/mozbuild/mozbuild/artifacts.py
@@ -770,29 +770,29 @@ class ArtifactCache(object):
     def __init__(self, cache_dir, log=None, skip_cache=False):
         mkdir(cache_dir, not_indexed=True)
         self._cache_dir = cache_dir
         self._log = log
         self._skip_cache = skip_cache
         self._persist_limit = ArtifactPersistLimit(log)
         self._download_manager = DownloadManager(
             self._cache_dir, persist_limit=self._persist_limit)
-        self._last_dl_update = -1
 
     def log(self, *args, **kwargs):
         if self._log:
             self._log(*args, **kwargs)
 
     def fetch(self, url, force=False):
         fname = os.path.basename(url)
         try:
             # Use the file name from the url if it looks like a hash digest.
             if len(fname) not in (32, 40, 56, 64, 96, 128):
                 raise TypeError()
             binascii.unhexlify(fname)
+            basename = fname
         except TypeError:
             # We download to a temporary name like HASH[:16]-basename to
             # differentiate among URLs with the same basenames.  We used to then
             # extract the build ID from the downloaded artifact and use it to make a
             # human readable unique name, but extracting build IDs is time consuming
             # (especially on Mac OS X, where we must mount a large DMG file).
             hash = hashlib.sha256(url).hexdigest()[:16]
             # Strip query string and fragments.
@@ -804,46 +804,44 @@ class ArtifactCache(object):
             self.log(logging.DEBUG, 'artifact',
                 {'path': path},
                 'Skipping cache: removing cached downloaded artifact {path}')
             os.remove(path)
 
         self.log(logging.INFO, 'artifact',
             {'path': path},
             'Downloading to temporary location {path}')
-        try:
-            dl = self._download_manager.download(url, fname)
+        dl = self._download_manager.download(url, fname)
 
-            def download_progress(dl, bytes_so_far, total_size):
-                if not total_size:
-                    return
-                percent = (float(bytes_so_far) / total_size) * 100
-                now = int(percent / 5)
-                if now == self._last_dl_update:
-                    return
-                self._last_dl_update = now
-                self.log(logging.INFO, 'artifact',
-                         {'bytes_so_far': bytes_so_far, 'total_size': total_size, 'percent': percent},
-                         'Downloading... {percent:02.1f} %')
+        _last_dl_update = [-1]
 
-            if dl:
-                dl.set_progress(download_progress)
-                dl.wait()
-            else:
-                # Avoid the file being removed if it was in the cache already.
-                path = os.path.join(self._cache_dir, fname)
-                self._persist_limit.register_file(path)
+        def download_progress(dl, bytes_so_far, total_size):
+            if not total_size:
+                return
+            percent = (float(bytes_so_far) / total_size) * 100
+            now = int(percent / 10)
+            if now == _last_dl_update[0]:
+                return
+            _last_dl_update[0] = now
+            self.log(logging.INFO, 'artifact',
+                     {'bytes_so_far': bytes_so_far, 'total_size': total_size, 'percent': percent, 'basename': basename},
+                     'Downloading {basename}... {percent:02.1f} %')
 
-            self.log(logging.INFO, 'artifact',
-                {'path': os.path.abspath(mozpath.join(self._cache_dir, fname))},
-                'Downloaded artifact to {path}')
-            return os.path.abspath(mozpath.join(self._cache_dir, fname))
-        finally:
-            # Cancel any background downloads in progress.
-            self._download_manager.cancel()
+        if dl:
+            dl.set_progress(download_progress)
+            dl.wait()
+        else:
+            # Avoid the file being removed if it was in the cache already.
+            path = os.path.join(self._cache_dir, fname)
+            self._persist_limit.register_file(path)
+
+        self.log(logging.INFO, 'artifact',
+            {'path': os.path.abspath(mozpath.join(self._cache_dir, fname))},
+            'Downloaded artifact to {path}')
+        return os.path.abspath(mozpath.join(self._cache_dir, fname))
 
     def clear_cache(self):
         if self._skip_cache:
             self.log(logging.DEBUG, 'artifact',
                 {},
                 'Skipping cache: ignoring clear_cache!')
             return
 
--- a/python/mozbuild/mozbuild/mach_commands.py
+++ b/python/mozbuild/mozbuild/mach_commands.py
@@ -37,16 +37,18 @@ from mozbuild.base import (
     MozbuildObject,
 )
 from mozbuild.util import ensureParentDir
 
 from mozbuild.backend import (
     backends,
 )
 
+from concurrent.futures import ThreadPoolExecutor
+
 
 BUILD_WHAT_HELP = '''
 What to build. Can be a top-level make target or a relative directory. If
 multiple options are provided, they will be built serially. Takes dependency
 information from `topsrcdir/build/dumbmake-dependencies` to build additional
 targets as needed. BUILDING ONLY PARTS OF THE TREE CAN RESULT IN BAD TREE
 STATE. USE AT YOUR OWN RISK.
 '''.strip()
@@ -1228,24 +1230,26 @@ class PackageFrontend(MachCommandBase):
     @CommandArgument('--tooltool-url', metavar='URL',
         help='Use the given url as tooltool server')
     @CommandArgument('--no-unpack', action='store_true',
         help='Do not unpack any downloaded file')
     @CommandArgument('--retry', type=int, default=0,
         help='Number of times to retry failed downloads')
     @CommandArgument('--artifact-manifest', metavar='FILE',
         help='Store a manifest about the downloaded taskcluster artifacts')
+    @CommandArgument('--jobs', '-j', default='4', metavar='jobs', type=int,
+        help='Number of artifacts to fetch concurrently. Default is 4.')
     @CommandArgument('files', nargs='*',
         help='A list of files to download, in the form path@task-id, in '
              'addition to the files listed in the tooltool manifest.')
     def artifact_toolchain(self, verbose=False, cache_dir=None,
                           skip_cache=False, from_build=(),
                           tooltool_manifest=None, authentication_file=None,
                           tooltool_url=None, no_unpack=False, retry=None,
-                          artifact_manifest=None, files=()):
+                          artifact_manifest=None, jobs=4, files=()):
         '''Download, cache and install pre-built toolchains.
         '''
         from mozbuild.artifacts import ArtifactCache
         from mozbuild.action.tooltool import (
             FileRecord,
             open_manifest,
             unpack_file,
         )
@@ -1418,17 +1422,19 @@ class PackageFrontend(MachCommandBase):
             if '@' not in f:
                 self.log(logging.ERROR, 'artifact', {},
                          'Expected a list of files of the form path@task-id')
                 return 1
             name, task_id = f.rsplit('@', 1)
             record = ArtifactRecord(task_id, name)
             records[record.filename] = record
 
-        for record in records.itervalues():
+        artifacts = {} if artifact_manifest else None
+
+        def _fetch_and_unpack_record(record):
             self.log(logging.INFO, 'artifact', {'name': record.basename},
                      'Downloading {name}')
             valid = False
             # sleeptime is 60 per retry.py, used by tooltool_wrapper.sh
             for attempt, _ in enumerate(redo.retrier(attempts=retry+1,
                                                      sleeptime=60)):
                 try:
                     record.fetch_with(cache)
@@ -1463,27 +1469,23 @@ class PackageFrontend(MachCommandBase):
                     pass
                 if not valid:
                     os.unlink(record.filename)
                     if attempt < retry:
                         self.log(logging.INFO, 'artifact', {},
                                  'Will retry in a moment...')
                     continue
 
-                downloaded.append(record)
                 break
 
             if not valid:
                 self.log(logging.ERROR, 'artifact', {'name': record.basename},
                          'Failed to download {name}')
-                return 1
+                raise Exception('Failed to download {name}'.format(name=record.basename))
 
-        artifacts = {} if artifact_manifest else None
-
-        for record in downloaded:
             local = os.path.join(os.getcwd(), record.basename)
             if os.path.exists(local):
                 os.unlink(local)
             # unpack_file needs the file with its final name to work
             # (https://github.com/mozilla/build-tooltool/issues/38), so we
             # need to copy it, even though we remove it later. Use hard links
             # when possible.
             try:
@@ -1502,28 +1504,34 @@ class PackageFrontend(MachCommandBase):
                         h.update(data)
                 artifacts[record.url] = {
                     'sha256': h.hexdigest(),
                 }
             if record.unpack and not no_unpack:
                 unpack_file(local, record.setup)
                 os.unlink(local)
 
+            return record
+
+        with ThreadPoolExecutor(jobs) as pool:
+            downloaded = pool.map(_fetch_and_unpack_record, records.values())
+
         if not downloaded:
             self.log(logging.ERROR, 'artifact', {}, 'Nothing to download')
             if files:
                 return 1
 
         if artifacts:
             ensureParentDir(artifact_manifest)
             with open(artifact_manifest, 'w') as fh:
                 json.dump(artifacts, fh, indent=4, sort_keys=True)
 
         return 0
 
+
 class StaticAnalysisSubCommand(SubCommand):
     def __call__(self, func):
         after = SubCommand.__call__(self, func)
         args = [
             CommandArgument('--verbose', '-v', action='store_true',
                             help='Print verbose output.'),
         ]
         for arg in args: