Bug 1234913 - Pre: Allow to --skip-cache when running |mach artifact install|. r=chmanchester draft
authorNick Alexander <nalexander@mozilla.com>
Wed, 24 Feb 2016 23:25:25 -0800
changeset 335864 3c0b4c656ac51d038248a388e82a1ef7d3680ab1
parent 335863 3d5a435bcb168cad644c17c001990a31d1b818d9
child 335865 c5cef889adb213aa7dac192846ac476fdcce1a5d
push id11896
push usercmanchester@mozilla.com
push dateTue, 01 Mar 2016 19:15:58 +0000
reviewerschmanchester
bugs1234913
milestone47.0a1
Bug 1234913 - Pre: Allow to --skip-cache when running |mach artifact install|. r=chmanchester MozReview-Commit-ID: M2FYAPEqXy
python/mozbuild/mozbuild/artifacts.py
python/mozbuild/mozbuild/mach_commands.py
--- a/python/mozbuild/mozbuild/artifacts.py
+++ b/python/mozbuild/mozbuild/artifacts.py
@@ -426,44 +426,63 @@ def cachedmethod(cachefunc):
 class CacheManager(object):
     '''Maintain an LRU cache.  Provide simple persistence, including support for
     loading and saving the state using a "with" block.  Allow clearing the cache
     and printing the cache for debugging.
 
     Provide simple logging.
     '''
 
-    def __init__(self, cache_dir, cache_name, cache_size, cache_callback=None, log=None):
+    def __init__(self, cache_dir, cache_name, cache_size, cache_callback=None, log=None, skip_cache=False):
+        self._skip_cache = skip_cache
         self._cache = pylru.lrucache(cache_size, callback=cache_callback)
         self._cache_filename = mozpath.join(cache_dir, cache_name + '-cache.pickle')
         self._log = log
 
     def log(self, *args, **kwargs):
         if self._log:
             self._log(*args, **kwargs)
 
     def load_cache(self):
+        if self._skip_cache:
+            self.log(logging.DEBUG, 'artifact',
+                {},
+                'Skipping cache: ignoring load_cache!')
+            return
+
         try:
             items = pickle.load(open(self._cache_filename, 'rb'))
             for key, value in items:
                 self._cache[key] = value
         except Exception as e:
             # Corrupt cache, perhaps?  Sadly, pickle raises many different
             # exceptions, so it's not worth trying to be fine grained here.
             # We ignore any exception, so the cache is effectively dropped.
             self.log(logging.INFO, 'artifact',
                 {'filename': self._cache_filename, 'exception': repr(e)},
                 'Ignoring exception unpickling cache file {filename}: {exception}')
             pass
 
     def dump_cache(self):
+        if self._skip_cache:
+            self.log(logging.DEBUG, 'artifact',
+                {},
+                'Skipping cache: ignoring dump_cache!')
+            return
+
         ensureParentDir(self._cache_filename)
         pickle.dump(list(reversed(list(self._cache.items()))), open(self._cache_filename, 'wb'), -1)
 
     def clear_cache(self):
+        if self._skip_cache:
+            self.log(logging.DEBUG, 'artifact',
+                {},
+                'Skipping cache: ignoring clear_cache!')
+            return
+
         with self:
             self._cache.clear()
 
     def print_cache(self):
         with self:
             for item in self._cache.items():
                 self.log(logging.INFO, 'artifact',
                     {'item': item},
@@ -491,18 +510,18 @@ class CacheManager(object):
         return self
 
     def __exit__(self, type, value, traceback):
         self.dump_cache()
 
 class TreeCache(CacheManager):
     '''Map pushhead revisions to trees with tasks/artifacts known to taskcluster.'''
 
-    def __init__(self, cache_dir, log=None):
-        CacheManager.__init__(self, cache_dir, 'artifact_tree', MAX_CACHED_TASKS, log=log)
+    def __init__(self, cache_dir, log=None, skip_cache=False):
+        CacheManager.__init__(self, cache_dir, 'artifact_tree', MAX_CACHED_TASKS, log=log, skip_cache=skip_cache)
 
         self._index = taskcluster.Index()
 
     @cachedmethod(operator.attrgetter('_cache'))
     def artifact_trees(self, rev, trees):
         # The "trees" argument is intentionally ignored. If this value
         # changes over time it means a changeset we care about has become
         # a pushhead on another tree, and our cache may no longer be
@@ -518,18 +537,18 @@ class TreeCache(CacheManager):
         rev, trees = args
         self.log(logging.INFO, 'artifact',
             {'rev': rev},
             'Last fetched trees for pushhead revision {rev}')
 
 class TaskCache(CacheManager):
     '''Map candidate pushheads to Task Cluster task IDs and artifact URLs.'''
 
-    def __init__(self, cache_dir, log=None):
-        CacheManager.__init__(self, cache_dir, 'artifact_url', MAX_CACHED_TASKS, log=log)
+    def __init__(self, cache_dir, log=None, skip_cache=False):
+        CacheManager.__init__(self, cache_dir, 'artifact_url', MAX_CACHED_TASKS, log=log, skip_cache=skip_cache)
         self._index = taskcluster.Index()
         self._queue = taskcluster.Queue()
 
     @cachedmethod(operator.attrgetter('_cache'))
     def artifact_urls(self, tree, job, rev):
         try:
             artifact_job = get_job_details(job, log=self._log)
         except KeyError:
@@ -567,19 +586,19 @@ class TaskCache(CacheManager):
         self.log(logging.INFO, 'artifact',
             {'rev': rev},
             'Last installed binaries from hg parent revision {rev}')
 
 
 class ArtifactCache(CacheManager):
     '''Fetch Task Cluster artifact URLs and purge least recently used artifacts from disk.'''
 
-    def __init__(self, cache_dir, log=None):
+    def __init__(self, cache_dir, log=None, skip_cache=False):
         # TODO: instead of storing N artifact packages, store M megabytes.
-        CacheManager.__init__(self, cache_dir, 'fetch', MAX_CACHED_ARTIFACTS, cache_callback=self.delete_file, log=log)
+        CacheManager.__init__(self, cache_dir, 'fetch', MAX_CACHED_ARTIFACTS, cache_callback=self.delete_file, log=log, skip_cache=skip_cache)
         self._cache_dir = cache_dir
         size_limit = 1024 * 1024 * 1024 # 1Gb in bytes.
         file_limit = 4 # But always keep at least 4 old artifacts around.
         persist_limit = PersistLimit(size_limit, file_limit)
         self._download_manager = DownloadManager(self._cache_dir, persist_limit=persist_limit)
 
     def delete_file(self, key, value):
         try:
@@ -602,18 +621,26 @@ class ArtifactCache(CacheManager):
     def fetch(self, url, force=False):
         # We download to a temporary name like HASH[:16]-basename to
         # differentiate among URLs with the same basenames.  We used to then
         # extract the build ID from the downloaded artifact and use it to make a
         # human readable unique name, but extracting build IDs is time consuming
         # (especially on Mac OS X, where we must mount a large DMG file).
         hash = hashlib.sha256(url).hexdigest()[:16]
         fname = hash + '-' + os.path.basename(url)
+
+        path = os.path.abspath(mozpath.join(self._cache_dir, fname))
+        if self._skip_cache and os.path.exists(path):
+            self.log(logging.DEBUG, 'artifact',
+                {'path': path},
+                'Skipping cache: removing cached downloaded artifact {path}')
+            os.remove(path)
+
         self.log(logging.INFO, 'artifact',
-            {'path': os.path.abspath(mozpath.join(self._cache_dir, fname))},
+            {'path': path},
             'Downloading to temporary location {path}')
         try:
             dl = self._download_manager.download(url, fname)
             if dl:
                 dl.wait()
             self.log(logging.INFO, 'artifact',
                 {'path': os.path.abspath(mozpath.join(self._cache_dir, fname))},
                 'Downloaded artifact to {path}')
@@ -633,34 +660,35 @@ class ArtifactCache(CacheManager):
         self.log(logging.INFO, 'artifact',
             {'filename': result + PROCESSED_SUFFIX},
             'Last installed binaries from local processed file {filename}')
 
 
 class Artifacts(object):
     '''Maintain state to efficiently fetch build artifacts from a Firefox tree.'''
 
-    def __init__(self, tree, job=None, log=None, cache_dir='.', hg='hg'):
+    def __init__(self, tree, job=None, log=None, cache_dir='.', hg='hg', skip_cache=False):
         self._tree = tree
         self._job = job or self._guess_artifact_job()
         self._log = log
         self._hg = hg
         self._cache_dir = cache_dir
+        self._skip_cache = skip_cache
 
         try:
             self._artifact_job = get_job_details(self._job, log=self._log)
         except KeyError:
             self.log(logging.INFO, 'artifact',
                 {'job': self._job},
                 'Unknown job {job}')
             raise KeyError("Unknown job")
 
-        self._task_cache = TaskCache(self._cache_dir, log=self._log)
-        self._artifact_cache = ArtifactCache(self._cache_dir, log=self._log)
-        self._tree_cache = TreeCache(self._cache_dir, log=self._log)
+        self._task_cache = TaskCache(self._cache_dir, log=self._log, skip_cache=self._skip_cache)
+        self._artifact_cache = ArtifactCache(self._cache_dir, log=self._log, skip_cache=self._skip_cache)
+        self._tree_cache = TreeCache(self._cache_dir, log=self._log, skip_cache=self._skip_cache)
         # A "tree" according to mozext and an integration branch isn't always
         # an exact match. For example, pushhead("central") refers to pushheads
         # with artifacts under the taskcluster namespace "mozilla-central".
         self._tree_replacements = {
             'inbound': 'mozilla-inbound',
             'central': 'mozilla-central',
         }
 
@@ -758,16 +786,23 @@ class Artifacts(object):
 
     def install_from_file(self, filename, distdir):
         self.log(logging.INFO, 'artifact',
             {'filename': filename},
             'Installing from {filename}')
 
         # Do we need to post-process?
         processed_filename = filename + PROCESSED_SUFFIX
+
+        if self._skip_cache and os.path.exists(processed_filename):
+            self.log(logging.DEBUG, 'artifact',
+                {'path': processed_filename},
+                'Skipping cache: removing cached processed artifact {path}')
+            os.remove(processed_filename)
+
         if not os.path.exists(processed_filename):
             self.log(logging.INFO, 'artifact',
                 {'filename': filename},
                 'Processing contents of {filename}')
             self.log(logging.INFO, 'artifact',
                 {'processed_filename': processed_filename},
                 'Writing processed {processed_filename}')
             self._artifact_job.process_artifact(filename, processed_filename)
--- a/python/mozbuild/mozbuild/mach_commands.py
+++ b/python/mozbuild/mozbuild/mach_commands.py
@@ -1475,17 +1475,17 @@ class PackageFrontend(MachCommandBase):
         Never build libxul again!
 
         '''
         pass
 
     def _set_log_level(self, verbose):
         self.log_manager.terminal_handler.setLevel(logging.INFO if not verbose else logging.DEBUG)
 
-    def _make_artifacts(self, tree=None, job=None):
+    def _make_artifacts(self, tree=None, job=None, skip_cache=False):
         self._activate_virtualenv()
         self.virtualenv_manager.install_pip_package('pylru==1.0.9')
         self.virtualenv_manager.install_pip_package('taskcluster==0.0.32')
         self.virtualenv_manager.install_pip_package('mozregression==1.0.2')
 
         state_dir = self._mach_context.state_dir
         cache_dir = os.path.join(state_dir, 'package-frontend')
 
@@ -1498,29 +1498,32 @@ class PackageFrontend(MachCommandBase):
         import which
         if self._is_windows():
           hg = which.which('hg.exe')
         else:
           hg = which.which('hg')
 
         # Absolutely must come after the virtualenv is populated!
         from mozbuild.artifacts import Artifacts
-        artifacts = Artifacts(tree, job, log=self.log, cache_dir=cache_dir, hg=hg)
+        artifacts = Artifacts(tree, job, log=self.log, cache_dir=cache_dir, hg=hg, skip_cache=skip_cache)
         return artifacts
 
     @ArtifactSubCommand('artifact', 'install',
         'Install a good pre-built artifact.')
     @CommandArgument('source', metavar='SRC', nargs='?', type=str,
         help='Where to fetch and install artifacts from.  Can be omitted, in '
             'which case the current hg repository is inspected; an hg revision; '
             'a remote URL; or a local file.',
         default=None)
-    def artifact_install(self, source=None, tree=None, job=None, verbose=False):
+    @CommandArgument('--skip-cache', action='store_true',
+        help='Skip all local caches to force re-fetching remote artifacts.',
+        default=False)
+    def artifact_install(self, source=None, skip_cache=False, tree=None, job=None, verbose=False):
         self._set_log_level(verbose)
-        artifacts = self._make_artifacts(tree=tree, job=job)
+        artifacts = self._make_artifacts(tree=tree, job=job, skip_cache=skip_cache)
 
         return artifacts.install_from(source, self.distdir)
 
     @ArtifactSubCommand('artifact', 'last',
         'Print the last pre-built artifact installed.')
     def artifact_print_last(self, tree=None, job=None, verbose=False):
         self._set_log_level(verbose)
         artifacts = self._make_artifacts(tree=tree, job=job)