Bug 1421734: Download and unpack toolchain artifacts in parallel
MozReview-Commit-ID: BMe6zqIqNHP
--- a/python/mozbuild/mozbuild/artifacts.py
+++ b/python/mozbuild/mozbuild/artifacts.py
@@ -765,37 +765,38 @@ class ArtifactPersistLimit(PersistLimit)
fs.remove(f.path)
self._files_size = 0
self.files = []
class ArtifactCache(object):
'''Fetch Task Cluster artifact URLs and purge least recently used artifacts from disk.'''
- def __init__(self, cache_dir, log=None, skip_cache=False):
+ def __init__(self, cache_dir, log=None, skip_cache=False, cancel_when_done=True):
mkdir(cache_dir, not_indexed=True)
self._cache_dir = cache_dir
self._log = log
self._skip_cache = skip_cache
self._persist_limit = ArtifactPersistLimit(log)
self._download_manager = DownloadManager(
self._cache_dir, persist_limit=self._persist_limit)
- self._last_dl_update = -1
+ self._cancel_when_done = cancel_when_done
def log(self, *args, **kwargs):
if self._log:
self._log(*args, **kwargs)
def fetch(self, url, force=False):
fname = os.path.basename(url)
try:
# Use the file name from the url if it looks like a hash digest.
if len(fname) not in (32, 40, 56, 64, 96, 128):
raise TypeError()
binascii.unhexlify(fname)
+ basename = fname
except TypeError:
# We download to a temporary name like HASH[:16]-basename to
# differentiate among URLs with the same basenames. We used to then
# extract the build ID from the downloaded artifact and use it to make a
# human readable unique name, but extracting build IDs is time consuming
# (especially on Mac OS X, where we must mount a large DMG file).
hash = hashlib.sha256(url).hexdigest()[:16]
# Strip query string and fragments.
@@ -810,54 +811,59 @@ class ArtifactCache(object):
os.remove(path)
self.log(logging.INFO, 'artifact',
{'path': path},
'Downloading to temporary location {path}')
try:
dl = self._download_manager.download(url, fname)
+ _last_dl_update = [-1]
+
def download_progress(dl, bytes_so_far, total_size):
if not total_size:
return
percent = (float(bytes_so_far) / total_size) * 100
- now = int(percent / 5)
- if now == self._last_dl_update:
+ now = int(percent / 10)
+ if now == _last_dl_update[0]:
return
- self._last_dl_update = now
+ _last_dl_update[0] = now
self.log(logging.INFO, 'artifact',
- {'bytes_so_far': bytes_so_far, 'total_size': total_size, 'percent': percent},
- 'Downloading... {percent:02.1f} %')
+ {'bytes_so_far': bytes_so_far, 'total_size': total_size, 'percent': percent, 'basename': basename},
+ 'Downloading {basename}... {percent:02.1f} %')
if dl:
dl.set_progress(download_progress)
dl.wait()
else:
# Avoid the file being removed if it was in the cache already.
path = os.path.join(self._cache_dir, fname)
self._persist_limit.register_file(path)
self.log(logging.INFO, 'artifact',
{'path': os.path.abspath(mozpath.join(self._cache_dir, fname))},
'Downloaded artifact to {path}')
return os.path.abspath(mozpath.join(self._cache_dir, fname))
finally:
# Cancel any background downloads in progress.
- self._download_manager.cancel()
+ if self._cancel_when_done:
+ self._download_manager.cancel()
def clear_cache(self):
if self._skip_cache:
self.log(logging.DEBUG, 'artifact',
{},
'Skipping cache: ignoring clear_cache!')
return
self._persist_limit.remove_all()
+
+
class Artifacts(object):
'''Maintain state to efficiently fetch build artifacts from a Firefox tree.'''
def __init__(self, tree, substs, defines, job=None, log=None,
cache_dir='.', hg=None, git=None, skip_cache=False,
topsrcdir=None):
if (hg and git) or (not hg and not git):
raise ValueError("Must provide path to exactly one of hg and git")
--- a/python/mozbuild/mozbuild/mach_commands.py
+++ b/python/mozbuild/mozbuild/mach_commands.py
@@ -37,16 +37,18 @@ from mozbuild.base import (
MozbuildObject,
)
from mozbuild.util import ensureParentDir
from mozbuild.backend import (
backends,
)
+from concurrent.futures import ThreadPoolExecutor
+
BUILD_WHAT_HELP = '''
What to build. Can be a top-level make target or a relative directory. If
multiple options are provided, they will be built serially. Takes dependency
information from `topsrcdir/build/dumbmake-dependencies` to build additional
targets as needed. BUILDING ONLY PARTS OF THE TREE CAN RESULT IN BAD TREE
STATE. USE AT YOUR OWN RISK.
'''.strip()
@@ -1254,17 +1256,17 @@ class PackageFrontend(MachCommandBase):
self.log_manager.structured_filter)
if not cache_dir:
cache_dir = os.path.join(self._mach_context.state_dir, 'toolchains')
tooltool_url = (tooltool_url or
'https://tooltool.mozilla-releng.net').rstrip('/')
cache = ArtifactCache(cache_dir=cache_dir, log=self.log,
- skip_cache=skip_cache)
+ skip_cache=skip_cache, cancel_when_done=False)
if authentication_file:
with open(authentication_file, 'rb') as f:
token = f.read().strip()
class TooltoolAuthenticator(HTTPAdapter):
def send(self, request, *args, **kwargs):
request.headers['Authorization'] = \
@@ -1396,17 +1398,19 @@ class PackageFrontend(MachCommandBase):
if '@' not in f:
self.log(logging.ERROR, 'artifact', {},
'Expected a list of files of the form path@task-id')
return 1
name, task_id = f.rsplit('@', 1)
record = ArtifactRecord(task_id, name)
records[record.filename] = record
- for record in records.itervalues():
+ artifacts = {} if artifact_manifest else None
+
+ def _fetch_and_unpack_record(record):
self.log(logging.INFO, 'artifact', {'name': record.basename},
'Downloading {name}')
valid = False
# sleeptime is 60 per retry.py, used by tooltool_wrapper.sh
for attempt, _ in enumerate(redo.retrier(attempts=retry+1,
sleeptime=60)):
try:
record.fetch_with(cache)
@@ -1441,38 +1445,35 @@ class PackageFrontend(MachCommandBase):
pass
if not valid:
os.unlink(record.filename)
if attempt < retry:
self.log(logging.INFO, 'artifact', {},
'Will retry in a moment...')
continue
- downloaded.append(record)
break
if not valid:
self.log(logging.ERROR, 'artifact', {'name': record.basename},
'Failed to download {name}')
- return 1
+ raise Exception('Failed to download {name}'.format(name=record.basename))
- artifacts = {} if artifact_manifest else None
-
- for record in downloaded:
local = os.path.join(os.getcwd(), record.basename)
if os.path.exists(local):
os.unlink(local)
# unpack_file needs the file with its final name to work
# (https://github.com/mozilla/build-tooltool/issues/38), so we
# need to copy it, even though we remove it later. Use hard links
# when possible.
try:
os.link(record.filename, local)
except Exception:
shutil.copy(record.filename, local)
+
# Keep a sha256 of each downloaded file, for the chain-of-trust
# validation.
if artifact_manifest is not None:
with open(local) as fh:
h = hashlib.sha256()
while True:
data = fh.read(1024 * 1024)
if not data:
@@ -1480,28 +1481,37 @@ class PackageFrontend(MachCommandBase):
h.update(data)
artifacts[record.url] = {
'sha256': h.hexdigest(),
}
if record.unpack and not no_unpack:
unpack_file(local, record.setup)
os.unlink(local)
+ return record
+
+ with ThreadPoolExecutor(8) as pool:
+ for record in pool.map(_fetch_and_unpack_record, records.values()):
+ downloaded.append(record)
+
+ cache._download_manager.cancel()
+
if not downloaded:
self.log(logging.ERROR, 'artifact', {}, 'Nothing to download')
if files:
return 1
if artifacts:
ensureParentDir(artifact_manifest)
with open(artifact_manifest, 'w') as fh:
json.dump(artifacts, fh, indent=4, sort_keys=True)
return 0
+
class StaticAnalysisSubCommand(SubCommand):
def __call__(self, func):
after = SubCommand.__call__(self, func)
args = [
CommandArgument('--verbose', '-v', action='store_true',
help='Print verbose output.'),
]
for arg in args: