Bug 1421734 - Download and unpack toolchain artifacts in parallel; r=nalexander
MozReview-Commit-ID: BMe6zqIqNHP
--- a/python/mozbuild/mozbuild/artifacts.py
+++ b/python/mozbuild/mozbuild/artifacts.py
@@ -770,29 +770,29 @@ class ArtifactCache(object):
def __init__(self, cache_dir, log=None, skip_cache=False):
mkdir(cache_dir, not_indexed=True)
self._cache_dir = cache_dir
self._log = log
self._skip_cache = skip_cache
self._persist_limit = ArtifactPersistLimit(log)
self._download_manager = DownloadManager(
self._cache_dir, persist_limit=self._persist_limit)
- self._last_dl_update = -1
def log(self, *args, **kwargs):
if self._log:
self._log(*args, **kwargs)
def fetch(self, url, force=False):
fname = os.path.basename(url)
try:
# Use the file name from the url if it looks like a hash digest.
if len(fname) not in (32, 40, 56, 64, 96, 128):
raise TypeError()
binascii.unhexlify(fname)
+ basename = fname
except TypeError:
# We download to a temporary name like HASH[:16]-basename to
# differentiate among URLs with the same basenames. We used to then
# extract the build ID from the downloaded artifact and use it to make a
# human readable unique name, but extracting build IDs is time consuming
# (especially on Mac OS X, where we must mount a large DMG file).
hash = hashlib.sha256(url).hexdigest()[:16]
# Strip query string and fragments.
@@ -804,46 +804,44 @@ class ArtifactCache(object):
self.log(logging.DEBUG, 'artifact',
{'path': path},
'Skipping cache: removing cached downloaded artifact {path}')
os.remove(path)
self.log(logging.INFO, 'artifact',
{'path': path},
'Downloading to temporary location {path}')
- try:
- dl = self._download_manager.download(url, fname)
+ dl = self._download_manager.download(url, fname)
- def download_progress(dl, bytes_so_far, total_size):
- if not total_size:
- return
- percent = (float(bytes_so_far) / total_size) * 100
- now = int(percent / 5)
- if now == self._last_dl_update:
- return
- self._last_dl_update = now
- self.log(logging.INFO, 'artifact',
- {'bytes_so_far': bytes_so_far, 'total_size': total_size, 'percent': percent},
- 'Downloading... {percent:02.1f} %')
+ _last_dl_update = [-1]
- if dl:
- dl.set_progress(download_progress)
- dl.wait()
- else:
- # Avoid the file being removed if it was in the cache already.
- path = os.path.join(self._cache_dir, fname)
- self._persist_limit.register_file(path)
+ def download_progress(dl, bytes_so_far, total_size):
+ if not total_size:
+ return
+ percent = (float(bytes_so_far) / total_size) * 100
+ now = int(percent / 10)
+ if now == _last_dl_update[0]:
+ return
+ _last_dl_update[0] = now
+ self.log(logging.INFO, 'artifact',
+ {'bytes_so_far': bytes_so_far, 'total_size': total_size, 'percent': percent, 'basename': basename},
+ 'Downloading {basename}... {percent:02.1f} %')
- self.log(logging.INFO, 'artifact',
- {'path': os.path.abspath(mozpath.join(self._cache_dir, fname))},
- 'Downloaded artifact to {path}')
- return os.path.abspath(mozpath.join(self._cache_dir, fname))
- finally:
- # Cancel any background downloads in progress.
- self._download_manager.cancel()
+ if dl:
+ dl.set_progress(download_progress)
+ dl.wait()
+ else:
+ # Avoid the file being removed if it was in the cache already.
+ path = os.path.join(self._cache_dir, fname)
+ self._persist_limit.register_file(path)
+
+ self.log(logging.INFO, 'artifact',
+ {'path': os.path.abspath(mozpath.join(self._cache_dir, fname))},
+ 'Downloaded artifact to {path}')
+ return os.path.abspath(mozpath.join(self._cache_dir, fname))
def clear_cache(self):
if self._skip_cache:
self.log(logging.DEBUG, 'artifact',
{},
'Skipping cache: ignoring clear_cache!')
return
--- a/python/mozbuild/mozbuild/mach_commands.py
+++ b/python/mozbuild/mozbuild/mach_commands.py
@@ -37,16 +37,18 @@ from mozbuild.base import (
MozbuildObject,
)
from mozbuild.util import ensureParentDir
from mozbuild.backend import (
backends,
)
+from concurrent.futures import ThreadPoolExecutor
+
BUILD_WHAT_HELP = '''
What to build. Can be a top-level make target or a relative directory. If
multiple options are provided, they will be built serially. Takes dependency
information from `topsrcdir/build/dumbmake-dependencies` to build additional
targets as needed. BUILDING ONLY PARTS OF THE TREE CAN RESULT IN BAD TREE
STATE. USE AT YOUR OWN RISK.
'''.strip()
@@ -1228,24 +1230,26 @@ class PackageFrontend(MachCommandBase):
@CommandArgument('--tooltool-url', metavar='URL',
help='Use the given url as tooltool server')
@CommandArgument('--no-unpack', action='store_true',
help='Do not unpack any downloaded file')
@CommandArgument('--retry', type=int, default=0,
help='Number of times to retry failed downloads')
@CommandArgument('--artifact-manifest', metavar='FILE',
help='Store a manifest about the downloaded taskcluster artifacts')
+ @CommandArgument('--jobs', '-j', default='4', metavar='jobs', type=int,
+ help='Number of artifacts to fetch concurrently. Default is 4.')
@CommandArgument('files', nargs='*',
help='A list of files to download, in the form path@task-id, in '
'addition to the files listed in the tooltool manifest.')
def artifact_toolchain(self, verbose=False, cache_dir=None,
skip_cache=False, from_build=(),
tooltool_manifest=None, authentication_file=None,
tooltool_url=None, no_unpack=False, retry=None,
- artifact_manifest=None, files=()):
+ artifact_manifest=None, jobs=4, files=()):
'''Download, cache and install pre-built toolchains.
'''
from mozbuild.artifacts import ArtifactCache
from mozbuild.action.tooltool import (
FileRecord,
open_manifest,
unpack_file,
)
@@ -1418,17 +1422,19 @@ class PackageFrontend(MachCommandBase):
if '@' not in f:
self.log(logging.ERROR, 'artifact', {},
'Expected a list of files of the form path@task-id')
return 1
name, task_id = f.rsplit('@', 1)
record = ArtifactRecord(task_id, name)
records[record.filename] = record
- for record in records.itervalues():
+ artifacts = {} if artifact_manifest else None
+
+ def _fetch_and_unpack_record(record):
self.log(logging.INFO, 'artifact', {'name': record.basename},
'Downloading {name}')
valid = False
# sleeptime is 60 per retry.py, used by tooltool_wrapper.sh
for attempt, _ in enumerate(redo.retrier(attempts=retry+1,
sleeptime=60)):
try:
record.fetch_with(cache)
@@ -1463,27 +1469,23 @@ class PackageFrontend(MachCommandBase):
pass
if not valid:
os.unlink(record.filename)
if attempt < retry:
self.log(logging.INFO, 'artifact', {},
'Will retry in a moment...')
continue
- downloaded.append(record)
break
if not valid:
self.log(logging.ERROR, 'artifact', {'name': record.basename},
'Failed to download {name}')
- return 1
+ raise Exception('Failed to download {name}'.format(name=record.basename))
- artifacts = {} if artifact_manifest else None
-
- for record in downloaded:
local = os.path.join(os.getcwd(), record.basename)
if os.path.exists(local):
os.unlink(local)
# unpack_file needs the file with its final name to work
# (https://github.com/mozilla/build-tooltool/issues/38), so we
# need to copy it, even though we remove it later. Use hard links
# when possible.
try:
@@ -1502,28 +1504,34 @@ class PackageFrontend(MachCommandBase):
h.update(data)
artifacts[record.url] = {
'sha256': h.hexdigest(),
}
if record.unpack and not no_unpack:
unpack_file(local, record.setup)
os.unlink(local)
+ return record
+
+ with ThreadPoolExecutor(jobs) as pool:
+ downloaded = pool.map(_fetch_and_unpack_record, records.values())
+
if not downloaded:
self.log(logging.ERROR, 'artifact', {}, 'Nothing to download')
if files:
return 1
if artifacts:
ensureParentDir(artifact_manifest)
with open(artifact_manifest, 'w') as fh:
json.dump(artifacts, fh, indent=4, sort_keys=True)
return 0
+
class StaticAnalysisSubCommand(SubCommand):
def __call__(self, func):
after = SubCommand.__call__(self, func)
args = [
CommandArgument('--verbose', '-v', action='store_true',
help='Print verbose output.'),
]
for arg in args: