Bug 1388894 - Cache mach commands dispatch table; r?ahal, glandium
This is mostly in an RFC state right now. Please grant r-.
This shaves ~45ms off `mach uuid` on my i7-6700K (~125ms -> ~80ms).
About 2/3 of the remaining time is in import. Looking at what modules
are imported, I suspect there is room to optimize the default imports
list substantially. e.g. we're pulling in multiprocessing as part of
mozbuild :/
TODO
* settings providers need to always be loaded so all settings
are registered when settings files loaded at dispatch time
* need to cache categories?
* need to test a lot of things
* look at mtimes for any other files?
MozReview-Commit-ID: 2jjydyq6PUX
--- a/build/mach_bootstrap.py
+++ b/build/mach_bootstrap.py
@@ -245,16 +245,43 @@ def bootstrap(topsrcdir, mozilla_dir=Non
with open(os.devnull, 'wb') as devnull:
subprocess.Popen([sys.executable,
os.path.join(topsrcdir, 'build',
'submit_telemetry_data.py'),
get_state_dir()[0]],
stdout=devnull, stderr=devnull)
+ def topsrcdir_state_dir():
+ """Resolve the directory for per-topsrcdir state.
+
+ This assigns a deterministic path inside the global state directory
+ for state belonging to this topsrcdir.
+ """
+ import hashlib
+
+ state_dir = get_state_dir()[0]
+ ident = hashlib.md5(topsrcdir).hexdigest()
+
+ p = os.path.join(state_dir, 'state.%s' % ident)
+
+ # Create directory automatically if the global state directory
+ # exists.
+ exists = False
+ if os.path.exists(state_dir):
+ try:
+ os.mkdir(p, 0o770)
+ except OSError as e:
+ if e.errno != errno.EEXIST:
+ raise
+
+ exists = True
+
+ return p, exists
+
def populate_context(context, key=None):
if key is None:
return
if key == 'state_dir':
state_dir, is_environ = get_state_dir()
if is_environ:
if not os.path.exists(state_dir):
print('Creating global state directory from environment variable: %s'
@@ -272,16 +299,19 @@ def bootstrap(topsrcdir, mozilla_dir=Non
print('\nCreating default state directory: %s' % state_dir)
os.makedirs(state_dir, mode=0o770)
return state_dir
if key == 'topdir':
return topsrcdir
+ if key == 'topsrcdir_state_dir':
+ return topsrcdir_state_dir()
+
if key == 'telemetry_handler':
return telemetry_handler
if key == 'post_dispatch_handler':
return post_dispatch_handler
raise AttributeError(key)
@@ -293,18 +323,25 @@ def bootstrap(topsrcdir, mozilla_dir=Non
mach.settings_paths.append(get_state_dir()[0])
# always load local repository configuration
mach.settings_paths.append(mozilla_dir)
for category, meta in CATEGORIES.items():
mach.define_category(category, meta['short'], meta['long'],
meta['priority'])
- for path in MACH_MODULES:
- mach.load_commands_from_file(os.path.join(mozilla_dir, path))
+ state_dir, state_dir_exists = topsrcdir_state_dir()
+ if state_dir_exists:
+ dispatch_cache = os.path.join(state_dir, 'mach_dispatch.json')
+ else:
+ dispatch_cache = None
+
+ mach.load_commands(
+ cache_path=dispatch_cache,
+ command_files=[os.path.join(mozilla_dir, p) for p in MACH_MODULES])
return mach
# Hook import such that .pyc/.pyo files without a corresponding .py file in
# the source directory are essentially ignored. See further below for details
# and caveats.
# Objdirs outside the source directory are ignored because in most cases, if
--- a/python/mach/mach/dispatcher.py
+++ b/python/mach/mach/dispatcher.py
@@ -231,16 +231,17 @@ class CommandAction(argparse.Action):
raise UnrecognizedArgumentError(command, extra)
def _handle_main_help(self, parser, verbose):
# Since we don't need full sub-parser support for the main help output,
# we create groups in the ArgumentParser and populate each group with
# arguments corresponding to command names. This has the side-effect
# that argparse renders it nicely.
r = self._mach_registrar
+ r.force_load()
disabled_commands = []
cats = [(k, v[2]) for k, v in r.categories.items()]
sorted_cats = sorted(cats, key=itemgetter(1), reverse=True)
for category, priority in sorted_cats:
group = None
for command in sorted(r.commands_by_category[category]):
--- a/python/mach/mach/main.py
+++ b/python/mach/mach/main.py
@@ -276,16 +276,36 @@ To see more help for a specific command,
for path in paths:
if os.path.isfile(path):
self.load_commands_from_file(path)
elif os.path.isdir(path):
self.load_commands_from_directory(path)
else:
print("command provider '%s' does not exist" % path)
+ def load_commands(self, cache_path=None, command_files=None):
+ """Loads commands, possibly by using a cache.
+
+ This is the preferred API for registering commands with the driver.
+
+ ``cache_path`` is the path to a file that will be used to cache loaded
+ command info. It is the caller's responsibility to ensure the file can
+ be read and created. i.e. the directory should exist. If not specified,
+ no cache will be used.
+
+ ``command_files`` is an iterable that will essentially result in the
+ same behavior as calling ``load_commands_from_file`` on each entry.
+
+ If a cache file is used, behavior is undefined if any ``load_commands*``
+ method is called after this one. That includes calling this method
+ multiple times.
+ """
+ command_files = command_files or []
+ Registrar.load_commands(command_files, cache_path=cache_path)
+
def define_category(self, name, title, description, priority=50):
"""Provide a description for a named command category."""
Registrar.register_category(name, title, description, priority)
@property
def require_conditions(self):
return Registrar.require_conditions
--- a/python/mach/mach/registrar.py
+++ b/python/mach/mach/registrar.py
@@ -1,67 +1,223 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from __future__ import absolute_import, unicode_literals
+import collections
+import errno
import imp
+import os
import sys
import uuid
from .base import MachError
INVALID_COMMAND_CONTEXT = r'''
It looks like you tried to run a mach command from an invalid context. The %s
command failed to meet the following conditions: %s
Run |mach help| to show a list of all commands available to the current context.
'''.lstrip()
+def _load_cache(path):
+ import json
+
+ if not path:
+ return None
+
+ try:
+ with open(path, 'rb') as fh:
+ state = json.load(fh)
+ if not isinstance(state, dict):
+ raise ValueError('state is not a dict')
+ except IOError as e:
+ if e.errno == errno.ENOENT:
+ return None
+
+ raise
+ # JSON parse error.
+ except ValueError:
+ return None
+
+ if state.get('version') != 1:
+ return None
+
+ # Normalize values.
+ res = {}
+
+ for k, v in state.items():
+ if k in ('version', 'commands'):
+ res[k] = v
+ elif k == 'source_files':
+ res['source_files'] = {}
+ res['settings_files'] = set()
+ for path, (mtime, is_setting) in v.items():
+ res['source_files'][path] = mtime
+ if is_setting:
+ res['settings_files'].add(path)
+ else:
+ # Unexpected key. Treat as invalid cache.
+ return None
+
+ return res
+
+def _save_cache(registrar, cache_path):
+ import inspect
+ import json
+
+ settings_files = set()
+ for settings in registrar.settings_providers:
+ try:
+ settings_files.add(inspect.getsourcefile(settings))
+ # Unable to resolve source file. Can't cache.
+ except TypeError:
+ return
+
+ source_files = {}
+ for path in registrar.source_files:
+ source_files[path] = [
+ os.path.getmtime(path),
+ path in settings_files,
+ ]
+
+ command_map = {}
+
+ for command in Registrar.command_handlers.values():
+ name = command.name
+ cls = command.cls
+
+ try:
+ source_file = inspect.getsourcefile(cls)
+ except TypeError:
+ # If we can't find the source file, a cache will do us no good.
+ # No-op.
+ return
+
+ # We resolved the command to a file that isn't loaded. This should
+ # never happen.
+ if source_file not in registrar.source_files:
+ return
+
+ command_map[name] = source_file
+
+ state = {
+ 'version': 1,
+ 'source_files': source_files,
+ 'commands': command_map,
+ }
+
+ with open(cache_path, 'wb') as fh:
+ json.dump(state, fh, sort_keys=True, indent=4)
+
+
+class LazyCommandsDict(collections.Mapping):
+ """A command handler that loads modules lazily.
+
+ Behaves like a dict. Keys are command names. Values are
+ @Command handler types.
+ """
+ def __init__(self, registrar, commands):
+ self._registrar = registrar
+ self._commands = commands
+ self._resolved = {}
+
+ def __len__(self):
+ return len(self._commands)
+
+ def __iter__(self):
+ return iter(self._commands)
+
+ def __contains__(self, key):
+ return key in self._commands
+
+ def __getitem__(self, key):
+ # This is a no-op if the file has already been loaded.
+ # Since an instance of this class is used as
+ # ``MachRegistrar._command_handlers``, a side-effect of calling
+ # this function is that self._resolved should be populated.
+ path = self._commands[key]
+ self._registrar.register_commands_file(path)
+ return self._resolved[key]
+
+ def _populate(self):
+ for key in self:
+ self[key]
+
+ def keys(self):
+ self._populate()
+ return self._resolved.keys()
+
+ def items(self):
+ self._populate()
+ return self._resolved.items()
+
+ def values(self):
+ self._populate()
+ return self._resolved.values()
+
+
class MachRegistrar(object):
"""Container for mach command and config providers."""
def __init__(self):
self.command_handlers = {}
self.commands_by_category = {}
self.settings_providers = set()
self.categories = {}
self.require_conditions = False
+ self.source_files = set()
+ self._loaded_files = set()
def register_commands_file(self, path, module_name=None):
"""Registers a file containing mach commands.
The file will eventually be imported. If not specified, the
module name will be ``mach.commands.<random>``.
"""
+ # Only load each source file once. This assumes we never unload
+ # modules.
+ if path in self._loaded_files:
+ return
+
+ self._loaded_files.add(path)
+
if module_name is None:
# Ensure parent module is present otherwise we'll (likely) get
# an error due to unknown parent.
if b'mach.commands' not in sys.modules:
mod = imp.new_module(b'mach.commands')
sys.modules[b'mach.commands'] = mod
module_name = 'mach.commands.%s' % uuid.uuid1().get_hex()
imp.load_source(module_name, path)
+ self.source_files.add(path)
def register_command_handler(self, handler):
name = handler.name
if not handler.category:
raise MachError('Cannot register a mach command without a '
'category: %s' % name)
if handler.category not in self.categories:
raise MachError('Cannot register a command to an undefined '
'category: %s -> %s' % (name, handler.category))
- self.command_handlers[name] = handler
+ # This is a layering violation. But it avoids issues with
+ # infinite recursion.
+ if isinstance(self.command_handlers, LazyCommandsDict):
+ self.command_handlers._resolved[name] = handler
+ else:
+ self.command_handlers[name] = handler
+
self.commands_by_category[handler.category].add(name)
def register_settings_provider(self, cls):
self.settings_providers.add(cls)
def register_category(self, name, title, description, priority=50):
self.categories[name] = (title, description, priority)
self.commands_by_category[name] = set()
@@ -115,16 +271,55 @@ class MachRegistrar(object):
if context:
postrun = getattr(context, 'post_dispatch_handler', None)
if postrun:
postrun(context, handler, args=kwargs)
return result
+ def load_commands(self, source_files, cache_path=None):
+ """Loads commands from commands files, possibly using a cache."""
+ if self.source_files:
+ raise MachError('commands already loaded; cannot call '
+ 'load_commands() multiple times')
+
+ self.source_files = set(source_files)
+
+ cache = _load_cache(cache_path)
+ cache_used = False
+
+ # For the cache to be used:
+ #
+ # 1. The set of commands files must align exactly.
+ # 2. All commands files must be unchanged from cached data.
+ if cache and set(cache['source_files']) == self.source_files:
+ mtimes = cache['source_files']
+ if all(os.path.getmtime(p) <= mtimes[p] for p in self.source_files):
+ self.command_handlers = LazyCommandsDict(self, cache['commands'])
+ cache_used = True
+
+ # Cache wasn't used. Load everything normally.
+ if not cache_used:
+ for path in source_files:
+ self.register_commands_file(path)
+
+ # Write out the cache if it is out of date.
+ if cache_path and not cache_used:
+ _save_cache(self, cache_path)
+
+ def force_load(self):
+ """Force loading of all modules.
+
+ Call this if an operation wants to be sure all registered modules are
+ actually loaded.
+ """
+ for k, v in self.command_handlers.items():
+ pass
+
def dispatch(self, name, context=None, argv=None, subcommand=None, **kwargs):
"""Dispatch/run a command.
Commands can use this to call other commands.
"""
handler = self.command_handlers[name]
if subcommand: