Bug 1388894 - Cache mach commands dispatch table; r?ahal, glandium draft
authorGregory Szorc <gps@mozilla.com>
Wed, 09 Aug 2017 15:16:42 -0700
changeset 643685 6c98f45f717e38ffb3f715d9a083f4377ac79ab1
parent 643666 ecafe806281ba5e24f0f9a8f920691cd7d8772b3
child 725374 3255cdb36cf064b05fae6d2f183d912ff5074769
push id73179
push userbmo:gps@mozilla.com
push dateThu, 10 Aug 2017 01:34:11 +0000
reviewersahal, glandium
bugs1388894
milestone57.0a1
Bug 1388894 - Cache mach commands dispatch table; r?ahal, glandium This is mostly in an RFC state right now. Please grant r-. This shaves ~45ms off `mach uuid` on my i7-6700K (~125ms -> ~80ms). About 2/3 of the remaining time is in import. Looking at what modules are imported, I suspect there is room to optimize the default imports list substantially. e.g. we're pulling in multiprocessing as part of mozbuild :/ TODO * settings providers need to always be loaded so all settings are registered when settings files loaded at dispatch time * need to cache categories? * need to test a lot of things * look at mtimes for any other files? MozReview-Commit-ID: 2jjydyq6PUX
build/mach_bootstrap.py
python/mach/mach/dispatcher.py
python/mach/mach/main.py
python/mach/mach/registrar.py
--- a/build/mach_bootstrap.py
+++ b/build/mach_bootstrap.py
@@ -245,16 +245,43 @@ def bootstrap(topsrcdir, mozilla_dir=Non
 
         with open(os.devnull, 'wb') as devnull:
             subprocess.Popen([sys.executable,
                               os.path.join(topsrcdir, 'build',
                                            'submit_telemetry_data.py'),
                               get_state_dir()[0]],
                               stdout=devnull, stderr=devnull)
 
+    def topsrcdir_state_dir():
+        """Resolve the directory for per-topsrcdir state.
+
+        This assigns a deterministic path inside the global state directory
+        for state belonging to this topsrcdir.
+        """
+        import hashlib
+
+        state_dir = get_state_dir()[0]
+        ident = hashlib.md5(topsrcdir).hexdigest()
+
+        p = os.path.join(state_dir, 'state.%s' % ident)
+
+        # Create directory automatically if the global state directory
+        # exists.
+        exists = False
+        if os.path.exists(state_dir):
+            try:
+                os.mkdir(p, 0o770)
+            except OSError as e:
+                if e.errno != errno.EEXIST:
+                    raise
+
+            exists = True
+
+        return p, exists
+
     def populate_context(context, key=None):
         if key is None:
             return
         if key == 'state_dir':
             state_dir, is_environ = get_state_dir()
             if is_environ:
                 if not os.path.exists(state_dir):
                     print('Creating global state directory from environment variable: %s'
@@ -272,16 +299,19 @@ def bootstrap(topsrcdir, mozilla_dir=Non
                     print('\nCreating default state directory: %s' % state_dir)
                     os.makedirs(state_dir, mode=0o770)
 
             return state_dir
 
         if key == 'topdir':
             return topsrcdir
 
+        if key == 'topsrcdir_state_dir':
+            return topsrcdir_state_dir()
+
         if key == 'telemetry_handler':
             return telemetry_handler
 
         if key == 'post_dispatch_handler':
             return post_dispatch_handler
 
         raise AttributeError(key)
 
@@ -293,18 +323,25 @@ def bootstrap(topsrcdir, mozilla_dir=Non
         mach.settings_paths.append(get_state_dir()[0])
     # always load local repository configuration
     mach.settings_paths.append(mozilla_dir)
 
     for category, meta in CATEGORIES.items():
         mach.define_category(category, meta['short'], meta['long'],
             meta['priority'])
 
-    for path in MACH_MODULES:
-        mach.load_commands_from_file(os.path.join(mozilla_dir, path))
+    state_dir, state_dir_exists = topsrcdir_state_dir()
+    if state_dir_exists:
+        dispatch_cache = os.path.join(state_dir, 'mach_dispatch.json')
+    else:
+        dispatch_cache = None
+
+    mach.load_commands(
+        cache_path=dispatch_cache,
+        command_files=[os.path.join(mozilla_dir, p) for p in MACH_MODULES])
 
     return mach
 
 
 # Hook import such that .pyc/.pyo files without a corresponding .py file in
 # the source directory are essentially ignored. See further below for details
 # and caveats.
 # Objdirs outside the source directory are ignored because in most cases, if
--- a/python/mach/mach/dispatcher.py
+++ b/python/mach/mach/dispatcher.py
@@ -231,16 +231,17 @@ class CommandAction(argparse.Action):
             raise UnrecognizedArgumentError(command, extra)
 
     def _handle_main_help(self, parser, verbose):
         # Since we don't need full sub-parser support for the main help output,
         # we create groups in the ArgumentParser and populate each group with
         # arguments corresponding to command names. This has the side-effect
         # that argparse renders it nicely.
         r = self._mach_registrar
+        r.force_load()
         disabled_commands = []
 
         cats = [(k, v[2]) for k, v in r.categories.items()]
         sorted_cats = sorted(cats, key=itemgetter(1), reverse=True)
         for category, priority in sorted_cats:
             group = None
 
             for command in sorted(r.commands_by_category[category]):
--- a/python/mach/mach/main.py
+++ b/python/mach/mach/main.py
@@ -276,16 +276,36 @@ To see more help for a specific command,
             for path in paths:
                 if os.path.isfile(path):
                     self.load_commands_from_file(path)
                 elif os.path.isdir(path):
                     self.load_commands_from_directory(path)
                 else:
                     print("command provider '%s' does not exist" % path)
 
+    def load_commands(self, cache_path=None, command_files=None):
+        """Loads commands, possibly by using a cache.
+
+        This is the preferred API for registering commands with the driver.
+
+        ``cache_path`` is the path to a file that will be used to cache loaded
+        command info. It is the caller's responsibility to ensure the file can
+        be read and created. i.e. the directory should exist. If not specified,
+        no cache will be used.
+
+        ``command_files`` is an iterable that will essentially result in the
+        same behavior as calling ``load_commands_from_file`` on each entry.
+
+        If a cache file is used, behavior is undefined if any ``load_commands*``
+        method is called after this one. That includes calling this method
+        multiple times.
+        """
+        command_files = command_files or []
+        Registrar.load_commands(command_files, cache_path=cache_path)
+
     def define_category(self, name, title, description, priority=50):
         """Provide a description for a named command category."""
 
         Registrar.register_category(name, title, description, priority)
 
     @property
     def require_conditions(self):
         return Registrar.require_conditions
--- a/python/mach/mach/registrar.py
+++ b/python/mach/mach/registrar.py
@@ -1,67 +1,223 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 from __future__ import absolute_import, unicode_literals
 
+import collections
+import errno
 import imp
+import os
 import sys
 import uuid
 
 from .base import MachError
 
 INVALID_COMMAND_CONTEXT = r'''
 It looks like you tried to run a mach command from an invalid context. The %s
 command failed to meet the following conditions: %s
 
 Run |mach help| to show a list of all commands available to the current context.
 '''.lstrip()
 
 
+def _load_cache(path):
+    import json
+
+    if not path:
+        return None
+
+    try:
+        with open(path, 'rb') as fh:
+            state = json.load(fh)
+            if not isinstance(state, dict):
+                raise ValueError('state is not a dict')
+    except IOError as e:
+        if e.errno == errno.ENOENT:
+            return None
+
+        raise
+    # JSON parse error.
+    except ValueError:
+        return None
+
+    if state.get('version') != 1:
+        return None
+
+    # Normalize values.
+    res = {}
+
+    for k, v in state.items():
+        if k in ('version', 'commands'):
+            res[k] = v
+        elif k == 'source_files':
+            res['source_files'] = {}
+            res['settings_files'] = set()
+            for path, (mtime, is_setting) in v.items():
+                res['source_files'][path] = mtime
+                if is_setting:
+                    res['settings_files'].add(path)
+        else:
+            # Unexpected key. Treat as invalid cache.
+            return None
+
+    return res
+
+def _save_cache(registrar, cache_path):
+    import inspect
+    import json
+
+    settings_files = set()
+    for settings in registrar.settings_providers:
+        try:
+            settings_files.add(inspect.getsourcefile(settings))
+        # Unable to resolve source file. Can't cache.
+        except TypeError:
+            return
+
+    source_files = {}
+    for path in registrar.source_files:
+        source_files[path] = [
+            os.path.getmtime(path),
+            path in settings_files,
+        ]
+
+    command_map = {}
+
+    for command in Registrar.command_handlers.values():
+        name = command.name
+        cls = command.cls
+
+        try:
+            source_file = inspect.getsourcefile(cls)
+        except TypeError:
+            # If we can't find the source file, a cache will do us no good.
+            # No-op.
+            return
+
+        # We resolved the command to a file that isn't loaded. This should
+        # never happen.
+        if source_file not in registrar.source_files:
+            return
+
+        command_map[name] = source_file
+
+    state = {
+        'version': 1,
+        'source_files': source_files,
+        'commands': command_map,
+    }
+
+    with open(cache_path, 'wb') as fh:
+        json.dump(state, fh, sort_keys=True, indent=4)
+
+
+class LazyCommandsDict(collections.Mapping):
+    """A command handler that loads modules lazily.
+
+    Behaves like a dict. Keys are command names. Values are
+    @Command handler types.
+    """
+    def __init__(self, registrar, commands):
+        self._registrar = registrar
+        self._commands = commands
+        self._resolved = {}
+
+    def __len__(self):
+        return len(self._commands)
+
+    def __iter__(self):
+        return iter(self._commands)
+
+    def __contains__(self, key):
+        return key in self._commands
+
+    def __getitem__(self, key):
+        # This is a no-op if the file has already been loaded.
+        # Since an instance of this class is used as
+        # ``MachRegistrar._command_handlers``, a side-effect of calling
+        # this function is that self._resolved should be populated.
+        path = self._commands[key]
+        self._registrar.register_commands_file(path)
+        return self._resolved[key]
+
+    def _populate(self):
+        for key in self:
+            self[key]
+
+    def keys(self):
+        self._populate()
+        return self._resolved.keys()
+
+    def items(self):
+        self._populate()
+        return self._resolved.items()
+
+    def values(self):
+        self._populate()
+        return self._resolved.values()
+
+
 class MachRegistrar(object):
     """Container for mach command and config providers."""
 
     def __init__(self):
         self.command_handlers = {}
         self.commands_by_category = {}
         self.settings_providers = set()
         self.categories = {}
         self.require_conditions = False
+        self.source_files = set()
+        self._loaded_files = set()
 
     def register_commands_file(self, path, module_name=None):
         """Registers a file containing mach commands.
 
         The file will eventually be imported. If not specified, the
         module name will be ``mach.commands.<random>``.
         """
+        # Only load each source file once. This assumes we never unload
+        # modules.
+        if path in self._loaded_files:
+            return
+
+        self._loaded_files.add(path)
+
         if module_name is None:
             # Ensure parent module is present otherwise we'll (likely) get
             # an error due to unknown parent.
             if b'mach.commands' not in sys.modules:
                 mod = imp.new_module(b'mach.commands')
                 sys.modules[b'mach.commands'] = mod
 
             module_name = 'mach.commands.%s' % uuid.uuid1().get_hex()
 
         imp.load_source(module_name, path)
+        self.source_files.add(path)
 
     def register_command_handler(self, handler):
         name = handler.name
 
         if not handler.category:
             raise MachError('Cannot register a mach command without a '
                 'category: %s' % name)
 
         if handler.category not in self.categories:
             raise MachError('Cannot register a command to an undefined '
                 'category: %s -> %s' % (name, handler.category))
 
-        self.command_handlers[name] = handler
+        # This is a layering violation. But it avoids issues with
+        # infinite recursion.
+        if isinstance(self.command_handlers, LazyCommandsDict):
+            self.command_handlers._resolved[name] = handler
+        else:
+            self.command_handlers[name] = handler
+
         self.commands_by_category[handler.category].add(name)
 
     def register_settings_provider(self, cls):
         self.settings_providers.add(cls)
 
     def register_category(self, name, title, description, priority=50):
         self.categories[name] = (title, description, priority)
         self.commands_by_category[name] = set()
@@ -115,16 +271,55 @@ class MachRegistrar(object):
 
         if context:
             postrun = getattr(context, 'post_dispatch_handler', None)
             if postrun:
                 postrun(context, handler, args=kwargs)
 
         return result
 
+    def load_commands(self, source_files, cache_path=None):
+        """Loads commands from commands files, possibly using a cache."""
+        if self.source_files:
+            raise MachError('commands already loaded; cannot call '
+                            'load_commands() multiple times')
+
+        self.source_files = set(source_files)
+
+        cache = _load_cache(cache_path)
+        cache_used = False
+
+        # For the cache to be used:
+        #
+        # 1. The set of commands files must align exactly.
+        # 2. All commands files must be unchanged from cached data.
+        if cache and set(cache['source_files']) == self.source_files:
+            mtimes = cache['source_files']
+            if all(os.path.getmtime(p) <= mtimes[p] for p in self.source_files):
+                self.command_handlers = LazyCommandsDict(self, cache['commands'])
+                cache_used = True
+
+        # Cache wasn't used. Load everything normally.
+        if not cache_used:
+            for path in source_files:
+                self.register_commands_file(path)
+
+        # Write out the cache if it is out of date.
+        if cache_path and not cache_used:
+            _save_cache(self, cache_path)
+
+    def force_load(self):
+        """Force loading of all modules.
+
+        Call this if an operation wants to be sure all registered modules are
+        actually loaded.
+        """
+        for k, v in self.command_handlers.items():
+            pass
+
     def dispatch(self, name, context=None, argv=None, subcommand=None, **kwargs):
         """Dispatch/run a command.
 
         Commands can use this to call other commands.
         """
         handler = self.command_handlers[name]
 
         if subcommand: