Bug 1273673: use Python logging for taskgraph generation; r?gps draft
authorDustin J. Mitchell <dustin@mozilla.com>
Tue, 17 May 2016 23:23:03 +0000
changeset 368345 dd215d63ccec6f945638dcc661d0849660def3b8
parent 367988 8d4bac56c8edc3bc24ed28ccf36a7458abf0c161
child 368346 2662ced404b4e6826d3d63fb8a53ec1cb2addd62
push id18491
push userdmitchell@mozilla.com
push dateWed, 18 May 2016 15:54:48 +0000
reviewersgps
bugs1273673
milestone49.0a1
Bug 1273673: use Python logging for taskgraph generation; r?gps MozReview-Commit-ID: EYI6hGFUWP8
taskcluster/mach_commands.py
taskcluster/taskgraph/decision.py
taskcluster/taskgraph/generator.py
taskcluster/taskgraph/kind/base.py
taskcluster/taskgraph/kind/legacy.py
taskcluster/taskgraph/test/test_decision.py
taskcluster/taskgraph/test/test_generator.py
taskcluster/taskgraph/test/test_kind_legacy.py
testing/taskcluster/taskcluster_graph/mach_util.py
--- a/taskcluster/mach_commands.py
+++ b/taskcluster/mach_commands.py
@@ -1,50 +1,76 @@
 # -*- coding: utf-8 -*-
 
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 from __future__ import absolute_import, print_function, unicode_literals
 
+import logging
 import sys
+import time
 import traceback
 
 from mach.decorators import (
     CommandArgument,
     CommandProvider,
     Command,
     SubCommand,
 )
 
+from mach.logging import format_seconds
+
 from mozbuild.base import MachCommandBase
 
 
 class ShowTaskGraphSubCommand(SubCommand):
     """A SubCommand with TaskGraph-specific arguments"""
 
     def __call__(self, func):
         after = SubCommand.__call__(self, func)
         args = [
             CommandArgument('--root', '-r', default='taskcluster/ci',
                             help="root of the taskgraph definition relative to topsrcdir"),
+            CommandArgument('--quiet', '-q', action="store_true",
+                            help="suppress all logging output"),
+            CommandArgument('--verbose', '-v', action="store_true",
+                            help="include debug-level logging output"),
             CommandArgument('--parameters', '-p', required=True,
                             help="parameters file (.yml or .json; see "
                                  "`taskcluster/docs/parameters.rst`)`"),
             CommandArgument('--no-optimize', dest="optimize", action="store_false",
                             default="true",
                             help="do not remove tasks from the graph that are found in the "
                             "index (a.k.a. optimize the graph)"),
         ]
         for arg in args:
             after = arg(after)
         return after
 
 
+class ColorizedFormatter(logging.Formatter):
+    """
+    Format log messages similarly to mach logging, including times offset from
+    start and colorized output on TTYs.
+    """
+    def __init__(self, fmt, terminal):
+        super(ColorizedFormatter, self).__init__(fmt=fmt)
+        self.start_time = time.time()
+        self.terminal = terminal
+
+    def format(self, record):
+        f = super(ColorizedFormatter, self).format(record)
+        t = format_seconds(record.created - self.start_time)
+        if self.terminal:
+            t = self.terminal.blue(t)
+        return '%s %s' % (t, f)
+
+
 @CommandProvider
 class MachCommands(MachCommandBase):
 
     @Command('taskgraph', category="ci",
              description="Manipulate TaskCluster task graphs defined in-tree")
     def taskgraph(self):
         """The taskgraph subcommands all relate to the generation of task graphs
         for Gecko continuous integration.  A task graph is a set of tasks linked
@@ -132,34 +158,54 @@ class MachCommands(MachCommandBase):
         """Run the decision task: generate a task graph and submit to
         TaskCluster.  This is only meant to be called within decision tasks,
         and requires a great many arguments.  Commands like `mach taskgraph
         optimized` are better suited to use on the command line, and can take
         the parameters file generated by a decision task.  """
 
         import taskgraph.decision
         try:
-            return taskgraph.decision.taskgraph_decision(self.log, options)
+            self.setup_logging()
+            return taskgraph.decision.taskgraph_decision(options)
         except Exception as e:
             traceback.print_exc()
             sys.exit(1)
 
+
+    def setup_logging(self, quiet=False, verbose=True):
+        """
+        Set up Python logging for all loggers, sending results to stderr (so
+        that command output can be redirected easily) and adding the typical
+        mach timestamp.
+        """
+        # remove the old terminal handler
+        self.log_manager.replace_terminal_handler(None)
+
+        # re-add it, with level and fh set appropriately
+        if not quiet:
+            level = logging.DEBUG if verbose else logging.INFO
+            self.log_manager.add_terminal_logging(fh=sys.stderr, level=level)
+
+        # all of the taskgraph logging is unstructured logging
+        self.log_manager.enable_unstructured()
+        
+
     def show_taskgraph(self, graph_attr, options):
         import taskgraph.parameters
         import taskgraph.target_tasks
         import taskgraph.generator
 
         try:
+            self.setup_logging(quiet=options['quiet'], verbose=options['verbose'])
             parameters = taskgraph.parameters.load_parameters_file(options)
 
             target_tasks_method = parameters.get('target_tasks_method', 'all_tasks')
             target_tasks_method = taskgraph.target_tasks.get_method(target_tasks_method)
             tgg = taskgraph.generator.TaskGraphGenerator(
                 root_dir=options['root'],
-                log=self.log,
                 parameters=parameters,
                 target_tasks_method=target_tasks_method)
 
             tg = getattr(tgg, graph_attr)
 
             for label in tg.graph.visit_postorder():
                 print(tg.tasks[label])
         except Exception as e:
--- a/taskcluster/taskgraph/decision.py
+++ b/taskcluster/taskgraph/decision.py
@@ -11,20 +11,21 @@ import json
 import logging
 import yaml
 
 from .generator import TaskGraphGenerator
 from .create import create_tasks
 from .parameters import get_decision_parameters
 from .target_tasks import get_method
 
+logger = logging.getLogger(__name__)
 ARTIFACTS_DIR = 'artifacts'
 
 
-def taskgraph_decision(log, options):
+def taskgraph_decision(options):
     """
     Run the decision task.  This function implements `mach taskgraph decision`,
     and is responsible for
 
      * processing decision task command-line options into parameters
      * running task-graph generation exactly the same way the other `mach
        taskgraph` commands do
      * generating a set of artifacts to memorialize the graph
@@ -33,37 +34,33 @@ def taskgraph_decision(log, options):
 
     parameters = get_decision_parameters(options)
 
     # create a TaskGraphGenerator instance
     target_tasks_method = parameters.get('target_tasks_method', 'all_tasks')
     target_tasks_method = get_method(target_tasks_method)
     tgg = TaskGraphGenerator(
         root_dir=options['root'],
-        log=log,
         parameters=parameters,
         target_tasks_method=target_tasks_method)
 
     # write out the parameters used to generate this graph
-    write_artifact('parameters.yml', dict(**parameters), log)
+    write_artifact('parameters.yml', dict(**parameters))
 
     # write out the full graph for reference
     write_artifact('full-task-graph.json',
-                   taskgraph_to_json(tgg.full_task_graph),
-                   log)
+                   taskgraph_to_json(tgg.full_task_graph))
 
     # write out the target task set to allow reproducing this as input
     write_artifact('target_tasks.json',
-                   tgg.target_task_set.tasks.keys(),
-                   log)
+                   tgg.target_task_set.tasks.keys())
 
     # write out the optimized task graph to describe what will happen
     write_artifact('task-graph.json',
-                   taskgraph_to_json(tgg.optimized_task_graph),
-                   log)
+                   taskgraph_to_json(tgg.optimized_task_graph))
 
     # actually create the graph
     create_tasks(tgg.optimized_task_graph)
 
 
 def taskgraph_to_json(taskgraph):
     tasks = taskgraph.tasks
 
@@ -77,20 +74,18 @@ def taskgraph_to_json(taskgraph):
 
     # add dependencies with one trip through the graph edges
     for (left, right, name) in taskgraph.graph.edges:
         rv[left]['dependencies'].append((name, right))
 
     return rv
 
 
-def write_artifact(filename, data, log):
-    log(logging.INFO, 'writing-artifact', {
-        'filename': filename,
-    }, 'writing artifact file `{filename}`')
+def write_artifact(filename, data):
+    logger.info('writing artifact file `{}`'.format(filename))
     if not os.path.isdir(ARTIFACTS_DIR):
         os.mkdir(ARTIFACTS_DIR)
     path = os.path.join(ARTIFACTS_DIR, filename)
     if filename.endswith('.yml'):
         with open(path, 'w') as f:
             yaml.safe_dump(data, f, allow_unicode=True, default_flow_style=False)
     elif filename.endswith('.json'):
         with open(path, 'w') as f:
--- a/taskcluster/taskgraph/generator.py
+++ b/taskcluster/taskgraph/generator.py
@@ -5,45 +5,45 @@
 from __future__ import absolute_import, print_function, unicode_literals
 import logging
 import os
 import yaml
 
 from .graph import Graph
 from .types import TaskGraph
 
+logger = logging.getLogger(__name__)
+
 class TaskGraphGenerator(object):
     """
     The central controller for taskgraph.  This handles all phases of graph
     generation.  The task is generated from all of the kinds defined in
     subdirectories of the generator's root directory.
 
     Access to the results of this generation, as well as intermediate values at
     various phases of generation, is available via properties.  This encourages
     the provision of all generation inputs at instance construction time.
     """
 
     # Task-graph generation is implemented as a Python generator that yields
     # each "phase" of generation.  This allows some mach subcommands to short-
     # circuit generation of the entire graph by never completing the generator.
 
-    def __init__(self, root_dir, log, parameters,
+    def __init__(self, root_dir, parameters,
                  target_tasks_method):
         """
         @param root_dir: root directory, with subdirectories for each kind
-        @param log: Mach log function
         @param parameters: parameters for this task-graph generation
         @type parameters: dict
         @param target_tasks_method: function to determine the target_task_set;
                 see `./target_tasks.py`.
         @type target_tasks_method: function
         """
 
         self.root_dir = root_dir
-        self.log = log
         self.parameters = parameters
         self.target_tasks_method = target_tasks_method
 
         # this can be set up until the time the target task set is generated;
         # it defaults to parameters['target_tasks']
         self._target_tasks = parameters.get('target_tasks')
 
         # start the generator
@@ -100,20 +100,17 @@ class TaskGraphGenerator(object):
         return self._run_until('optimized_task_graph')
 
     def _load_kinds(self):
         for path in os.listdir(self.root_dir):
             path = os.path.join(self.root_dir, path)
             if not os.path.isdir(path):
                 continue
             name = os.path.basename(path)
-            self.log(logging.DEBUG, 'loading-kind', {
-                'name': name,
-                'path': path,
-            }, "loading kind `{name}` from {path}")
+            logger.debug("loading kind `{}` from `{}`".format(name, path))
 
             kind_yml = os.path.join(path, 'kind.yml')
             with open(kind_yml) as f:
                 config = yaml.load(f)
 
             # load the class defined by implementation
             try:
                 impl = config['implementation']
@@ -125,17 +122,17 @@ class TaskGraphGenerator(object):
 
             impl_module, impl_object = impl.split(':')
             impl_class = __import__(impl_module)
             for a in impl_module.split('.')[1:]:
                 impl_class = getattr(impl_class, a)
             for a in impl_object.split('.'):
                 impl_class = getattr(impl_class, a)
 
-            yield impl_class(path, config, self.log)
+            yield impl_class(path, config)
 
     def _run(self):
         all_tasks = {}
         for kind in self._load_kinds():
             for task in kind.load_tasks(self.parameters):
                 if task.label in all_tasks:
                     raise Exception("duplicate tasks with label " + task.label)
                 all_tasks[task.label] = task
--- a/taskcluster/taskgraph/kind/base.py
+++ b/taskcluster/taskgraph/kind/base.py
@@ -11,21 +11,20 @@ class Kind(object):
     """
     A kind represents a collection of tasks that share common characteristics.
     For example, all build jobs.  Each instance of a kind is intialized with a
     path from which it draws its task configuration.  The instance is free to
     store as much local state as it needs.
     """
     __metaclass__ = abc.ABCMeta
 
-    def __init__(self, path, config, log):
+    def __init__(self, path, config):
         self.name = os.path.basename(path)
         self.path = path
         self.config = config
-        self.log = log
 
     @abc.abstractmethod
     def load_tasks(self, parameters):
         """
         Get the set of tasks of this kind.
 
         The `parameters` give details on which to base the task generation.
         See `taskcluster/docs/parameters.rst` for details.
--- a/taskcluster/taskgraph/kind/legacy.py
+++ b/taskcluster/taskgraph/kind/legacy.py
@@ -49,16 +49,18 @@ DEFINE_TASK = 'queue:define-task:aws-pro
 DEFAULT_TRY = 'try: -b do -p all -u all -t all'
 DEFAULT_JOB_PATH = os.path.join(
     'tasks', 'branches', 'base_jobs.yml'
 )
 
 # time after which a try build's results will expire
 TRY_EXPIRATION = "14 days"
 
+logger = logging.getLogger(__name__)
+
 def mklabel():
     return TASKID_PLACEHOLDER.format(slugid())
 
 def set_expiration(task, timestamp):
     task_def = task['task']
     task_def['expires'] = timestamp
     if task_def.get('deadline', timestamp) > timestamp:
         task_def['deadline'] = timestamp
@@ -103,23 +105,21 @@ class LegacyKind(base.Kind):
 
         # Default to current time if querying the head rev fails
         pushdate = time.strftime('%Y%m%d%H%M%S', time.gmtime())
         vcs_info = query_vcs_info(params['head_repository'], params['head_rev'])
         changed_files = set()
         if vcs_info:
             pushdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(vcs_info.pushdate))
 
-            self.log(logging.DEBUG, 'vcs-info', {},
-                     '%d commits influencing task scheduling:\n' % len(vcs_info.changesets))
+            logger.debug('{} commits influencing task scheduling:'.format(len(vcs_info.changesets)))
             for c in vcs_info.changesets:
-                self.log(logging.DEBUG, 'vcs-relevant-commit', {
-                    'cset': c['node'][0:12],
-                    'desc': c['desc'].splitlines()[0].encode('ascii', 'ignore'),
-                }, "{cset} {desc}")
+                logger.debug("{cset} {desc}".format(
+                    cset=c['node'][0:12],
+                    desc=c['desc'].splitlines()[0].encode('ascii', 'ignore')))
                 changed_files |= set(c['files'])
 
         # Template parameters used when expanding the graph
         seen_images = {}
         parameters = dict(gaia_info().items() + {
             'index': 'index',
             'project': project,
             'pushlog_id': params.get('pushlog_id', 0),
@@ -185,42 +185,39 @@ class LegacyKind(base.Kind):
             # If the task defines file patterns and we have a set of changed
             # files to compare against, only run if a file pattern matches one
             # of the changed files.
             file_patterns = when.get('file_patterns', None)
             if file_patterns and changed_files:
                 for pattern in file_patterns:
                     for path in changed_files:
                         if mozpackmatch(path, pattern):
-                            self.log(logging.DEBUG, 'schedule-task', {
-                                'schedule': True,
+                            logger.debug('scheduling {task} because pattern {pattern} '
+                                'matches {path}', format({
                                 'task': task['task'],
                                 'pattern': pattern,
                                 'path': path,
-                            }, 'scheduling {task} because pattern {pattern} '
-                                'matches {path}')
+                            }))
                             return True
 
                 # No file patterns matched. Discard task.
-                self.log(logging.DEBUG, 'schedule-task', {
-                    'schedule': False,
-                    'task': task['task'],
-                }, 'discarding {task} because no relevant files changed')
+                logger.debug('discarding {task} because no relevant files changed'.format(
+                    task=task['task'],
+                    pattern=pattern,
+                    path=path))
                 return False
 
             return True
 
         job_graph = filter(should_run, job_graph)
 
         all_routes = {}
 
         for build in job_graph:
-            self.log(logging.DEBUG, 'load-task', {
-                'task': build['task'],
-            }, 'loading task {task}')
+            logging.debug("loading build task {}".format(build['task']))
             interactive = cmdline_interactive or build["interactive"]
             build_parameters = merge_dicts(parameters, build['additional-parameters'])
             build_parameters['build_slugid'] = mklabel()
             build_parameters['source'] = '{repo}file/{rev}/testing/taskcluster/{file}'.format(repo=params['head_repository'], rev=params['head_rev'], file=build['task'])
             build_task = templates.load(build['task'], build_parameters)
 
             # Copy build_* attributes to expose them to post-build tasks
             # as well as json routes and tests
--- a/taskcluster/taskgraph/test/test_decision.py
+++ b/taskcluster/taskgraph/test/test_decision.py
@@ -42,31 +42,31 @@ class TestDecision(unittest.TestCase):
         })
 
 
     def test_write_artifact_json(self):
         data = [{'some': 'data'}]
         tmpdir = tempfile.mkdtemp()
         try:
             decision.ARTIFACTS_DIR = os.path.join(tmpdir, "artifacts")
-            decision.write_artifact("artifact.json", data, lambda *args: None)
+            decision.write_artifact("artifact.json", data)
             with open(os.path.join(decision.ARTIFACTS_DIR, "artifact.json")) as f:
                 self.assertEqual(json.load(f), data)
         finally:
             if os.path.exists(tmpdir):
                 shutil.rmtree(tmpdir)
             decision.ARTIFACTS_DIR = 'artifacts'
 
 
     def test_write_artifact_yml(self):
         data = [{'some': 'data'}]
         tmpdir = tempfile.mkdtemp()
         try:
             decision.ARTIFACTS_DIR = os.path.join(tmpdir, "artifacts")
-            decision.write_artifact("artifact.yml", data, lambda *args: None)
+            decision.write_artifact("artifact.yml", data)
             with open(os.path.join(decision.ARTIFACTS_DIR, "artifact.yml")) as f:
                 self.assertEqual(yaml.safe_load(f), data)
         finally:
             if os.path.exists(tmpdir):
                 shutil.rmtree(tmpdir)
             decision.ARTIFACTS_DIR = 'artifacts'
 
 
--- a/taskcluster/taskgraph/test/test_generator.py
+++ b/taskcluster/taskgraph/test/test_generator.py
@@ -38,23 +38,21 @@ class WithFakeKind(TaskGraphGenerator):
 
     def _load_kinds(self):
         yield FakeKind()
 
 
 class TestGenerator(unittest.TestCase):
 
     def setUp(self):
-        def log(level, name, data, message):
-            pass
         self.target_tasks = []
 
         def target_tasks_method(full_task_graph, parameters):
             return self.target_tasks
-        self.tgg = WithFakeKind('/root', log, {}, target_tasks_method)
+        self.tgg = WithFakeKind('/root', {}, target_tasks_method)
 
     def test_full_task_set(self):
         "The full_task_set property has all tasks"
         self.assertEqual(self.tgg.full_task_set.graph,
                          graph.Graph({'t-0', 't-1', 't-2'}, set()))
         self.assertEqual(self.tgg.full_task_set.tasks.keys(),
                          ['t-0', 't-1', 't-2'])
 
--- a/taskcluster/taskgraph/test/test_kind_legacy.py
+++ b/taskcluster/taskgraph/test/test_kind_legacy.py
@@ -12,19 +12,17 @@ from mozunit import main
 
 
 class TestLegacyKind(unittest.TestCase):
     # NOTE: much of LegacyKind is copy-pasted from the old legacy code, which
     # is emphatically *not* designed for testing, so this test class does not
     # attempt to test the entire class.
 
     def setUp(self):
-        def log(level, name, data, message):
-            pass
-        self.kind = LegacyKind('/root', {}, log)
+        self.kind = LegacyKind('/root', {})
 
     def test_get_task_definition_artifact_sub(self):
         "get_task_definition correctly substiatutes artifact URLs"
         task_def = {
             'input_file': TASKID_PLACEHOLDER.format("G5BoWlCBTqOIhn3K3HyvWg"),
             'embedded': 'TASK={} FETCH=lazy'.format(
                 TASKID_PLACEHOLDER.format('G5BoWlCBTqOIhn3K3HyvWg')),
         }
--- a/testing/taskcluster/taskcluster_graph/mach_util.py
+++ b/testing/taskcluster/taskcluster_graph/mach_util.py
@@ -5,21 +5,24 @@ from __future__ import absolute_import
 
 from collections import defaultdict
 import os
 import json
 import copy
 import re
 import sys
 import time
+import logging
 from collections import namedtuple
 
 ROOT = os.path.dirname(os.path.realpath(__file__))
 GECKO = os.path.realpath(os.path.join(ROOT, '..', '..', '..'))
 
+logger = logging.getLogger(__name__)
+
 def merge_dicts(*dicts):
     merged_dict = {}
     for dictionary in dicts:
         merged_dict.update(dictionary)
     return merged_dict
 
 def gaia_info():
     '''
@@ -125,38 +128,38 @@ def remove_caches_from_task(task):
         pass
 
 def query_vcs_info(repository, revision):
     """Query the pushdate and pushid of a repository/revision.
     This is intended to be used on hg.mozilla.org/mozilla-central and
     similar. It may or may not work for other hg repositories.
     """
     if not repository or not revision:
-        sys.stderr.write('cannot query vcs info because vcs info not provided\n')
+        logger.warning('cannot query vcs info because vcs info not provided')
         return None
 
     VCSInfo = namedtuple('VCSInfo', ['pushid', 'pushdate', 'changesets'])
 
     try:
         import requests
         url = '%s/json-automationrelevance/%s' % (repository.rstrip('/'),
                                                   revision)
-        sys.stderr.write("Querying version control for metadata: %s\n" % url)
+        logger.debug("Querying version control for metadata: %s" % url)
         contents = requests.get(url).json()
 
         changesets = []
         for c in contents['changesets']:
             changesets.append({k: c[k] for k in ('desc', 'files', 'node')})
 
         pushid = contents['changesets'][-1]['pushid']
         pushdate = contents['changesets'][-1]['pushdate'][0]
 
         return VCSInfo(pushid, pushdate, changesets)
 
     except Exception:
-        sys.stderr.write(
-            "Error querying VCS info for '%s' revision '%s'\n" % (
+        logger.exception(
+            "Error querying VCS info for '%s' revision '%s'" % (
                 repository, revision,
             )
         )
         return None