author Dustin J. Mitchell <dustin@mozilla.com>

Tue, 27 Sep 2016 15:55:36 +0000

changeset 418054 c27a928d3e7f4712eca23feae05c1708cfec5b0c

parent 418052 f82827622145bcd160f63ec48e9606498beefc39

child 532239 fa8f6178dd3c9c0172f8d6327f016d43bda1f355

push id 30567

push user dmitchell@mozilla.com

push date Tue, 27 Sep 2016 16:00:57 +0000

reviewers gps

bugs 1305740

milestone 52.0a1

taskcluster/taskgraph/task/legacy.py file | annotate | diff | comparison | revisions
deleted file mode 100644
--- a/taskcluster/taskgraph/task/legacy.py
+++ /dev/null
@@ -1,632 +0,0 @@
-# This Source Code Form is subject to the terms of the Mozilla Public
-# License, v. 2.0. If a copy of the MPL was not distributed with this
-# file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-from __future__ import absolute_import, print_function, unicode_literals
-
-import json
-import logging
-import os
-import re
-import time
-from collections import namedtuple
-
-from . import base
-from mozpack.path import match as mozpackmatch
-from slugid import nice as slugid
-from taskgraph.util.legacy_commit_parser import parse_commit
-from taskgraph.util.time import (
-    json_time_from_now,
-    current_json_time,
-)
-from taskgraph.util.templates import Templates
-from taskgraph.util.docker import docker_image
-
-
-ROOT = os.path.dirname(os.path.realpath(__file__))
-GECKO = os.path.realpath(os.path.join(ROOT, '..', '..', '..'))
-# TASKID_PLACEHOLDER is the "internal" form of a taskid; it is substituted with
-# actual taskIds at the very last minute, in get_task_definition
-TASKID_PLACEHOLDER = 'TaskLabel=={}'
-
-DEFINE_TASK = 'queue:define-task:aws-provisioner-v1/{}'
-DEFAULT_TRY = 'try: -b do -p all -u all -t all'
-DEFAULT_JOB_PATH = os.path.join(
-    'tasks', 'branches', 'base_jobs.yml'
-)
-
-TREEHERDER_ROUTES = {
-    'staging': 'tc-treeherder-stage',
-    'production': 'tc-treeherder'
-}
-
-# time after which a try build's results will expire
-TRY_EXPIRATION = "14 days"
-
-logger = logging.getLogger(__name__)
-
-
-def mklabel():
-    return TASKID_PLACEHOLDER.format(slugid())
-
-
-def merge_dicts(*dicts):
-    merged_dict = {}
-    for dictionary in dicts:
-        merged_dict.update(dictionary)
-    return merged_dict
-
-
-def gaia_info():
-    '''Fetch details from in tree gaia.json (which links this version of
-    gecko->gaia) and construct the usual base/head/ref/rev pairing...'''
-    gaia = json.load(open(os.path.join(GECKO, 'b2g', 'config', 'gaia.json')))
-
-    if gaia['git'] is None or \
-       gaia['git']['remote'] == '' or \
-       gaia['git']['git_revision'] == '' or \
-       gaia['git']['branch'] == '':
-
-        # Just use the hg params...
-        return {
-            'gaia_base_repository': 'https://hg.mozilla.org/{}'.format(gaia['repo_path']),
-            'gaia_head_repository': 'https://hg.mozilla.org/{}'.format(gaia['repo_path']),
-            'gaia_ref': gaia['revision'],
-            'gaia_rev': gaia['revision']
-        }
-
-    else:
-        # Use git
-        return {
-            'gaia_base_repository': gaia['git']['remote'],
-            'gaia_head_repository': gaia['git']['remote'],
-            'gaia_rev': gaia['git']['git_revision'],
-            'gaia_ref': gaia['git']['branch'],
-        }
-
-
-def configure_dependent_task(task_path, parameters, taskid, templates, build_treeherder_config):
-    """Configure a build dependent task. This is shared between post-build and test tasks.
-
-    :param task_path: location to the task yaml
-    :param parameters: parameters to load the template
-    :param taskid: taskid of the dependent task
-    :param templates: reference to the template builder
-    :param build_treeherder_config: parent treeherder config
-    :return: the configured task
-    """
-    task = templates.load(task_path, parameters)
-    task['taskId'] = taskid
-
-    if 'requires' not in task:
-        task['requires'] = []
-
-    task['requires'].append(parameters['build_slugid'])
-
-    if 'extra' not in task['task']:
-        task['task']['extra'] = {}
-
-    # only set up treeherder information if the task contained any to begin with
-    if 'treeherder' in task['task']['extra']:
-        # Copy over any treeherder configuration from the build so
-        # tests show up under the same platform...
-        treeherder_config = task['task']['extra']['treeherder']
-
-        treeherder_config['collection'] = \
-            build_treeherder_config.get('collection', {})
-
-        treeherder_config['build'] = \
-            build_treeherder_config.get('build', {})
-
-        if 'machine' not in treeherder_config:
-            treeherder_config['machine'] = \
-                build_treeherder_config.get('machine', {})
-
-    if 'routes' not in task['task']:
-        task['task']['routes'] = []
-
-    if 'scopes' not in task['task']:
-        task['task']['scopes'] = []
-
-    return task
-
-
-def set_interactive_task(task, interactive):
-    r"""Make the task interactive.
-
-    :param task: task definition.
-    :param interactive: True if the task should be interactive.
-    """
-    if not interactive:
-        return
-
-    payload = task["task"]["payload"]
-    if "features" not in payload:
-        payload["features"] = {}
-    payload["features"]["interactive"] = True
-
-
-def remove_caches_from_task(task):
-    r"""Remove all caches but vcs from the task.
-
-    :param task: task definition.
-    """
-    whitelist = [
-        re.compile("^level-[123]-.*-tc-vcs(-public-sources)?$"),
-        re.compile("^level-[123]-hg-shared$"),
-        # The assumption here is that `hg robustcheckout --purge` is used and
-        # the checkout will start from a clean slate on job execution. This
-        # means there should be no contamination from previous tasks.
-        re.compile("^level-[123]-checkouts$"),
-        re.compile("^tooltool-cache$"),
-    ]
-    try:
-        caches = task["task"]["payload"]["cache"]
-        scopes = task["task"]["scopes"]
-        for cache in caches.keys():
-            if not any(pat.match(cache) for pat in whitelist):
-                caches.pop(cache)
-                scope = 'docker-worker:cache:' + cache
-                try:
-                    scopes.remove(scope)
-                except ValueError:
-                    raise ValueError("scope '{}' not in {}".format(scope, scopes))
-    except KeyError:
-        pass
-
-
-def remove_coalescing_from_task(task):
-    r"""Remove coalescing route and supersederUrl from job task
-
-    :param task: task definition.
-    """
-
-    try:
-        payload = task["task"]["payload"]
-        routes = task["task"]["routes"]
-        removable_routes = [route for route in list(routes)
-                            if route.startswith('coalesce.')]
-        if removable_routes:
-            # we remove supersederUrl only when we have also routes to remove
-            payload.pop("supersederUrl")
-
-        for route in removable_routes:
-            routes.remove(route)
-    except KeyError:
-        pass
-
-
-def query_vcs_info(repository, revision):
-    """Query the pushdate and pushid of a repository/revision.
-
-    This is intended to be used on hg.mozilla.org/mozilla-central and
-    similar. It may or may not work for other hg repositories.
-    """
-    if not repository or not revision:
-        logger.warning('cannot query vcs info because vcs info not provided')
-        return None
-
-    VCSInfo = namedtuple('VCSInfo', ['pushid', 'pushdate', 'changesets'])
-
-    try:
-        import requests
-        url = '%s/json-automationrelevance/%s' % (repository.rstrip('/'),
-                                                  revision)
-        logger.debug("Querying version control for metadata: %s", url)
-        contents = requests.get(url).json()
-
-        changesets = []
-        for c in contents['changesets']:
-            changesets.append({k: c[k] for k in ('desc', 'files', 'node')})
-
-        pushid = contents['changesets'][-1]['pushid']
-        pushdate = contents['changesets'][-1]['pushdate'][0]
-
-        return VCSInfo(pushid, pushdate, changesets)
-
-    except Exception:
-        logger.exception("Error querying VCS info for '%s' revision '%s'",
-                         repository, revision)
-        return None
-
-
-def set_expiration(task, relative_datestamp):
-    task_def = task['task']
-    task_def['expires'] = {'relative-datestamp': relative_datestamp}
-    if 'deadline' in task_def:
-        now = current_json_time(datetime_format=True)
-        timestamp = json_time_from_now(input_str=TRY_EXPIRATION,
-                                       now=now,
-                                       datetime_format=True)
-        deadline = json_time_from_now(input_str=task_def['deadline']['relative-datestamp'],
-                                      now=now,
-                                      datetime_format=True)
-        if deadline > timestamp:
-            task_def['deadline']['relative-datestamp'] = relative_datestamp
-
-    try:
-        artifacts = task_def['payload']['artifacts']
-    except KeyError:
-        return
-
-    # for docker-worker, artifacts is a dictionary
-    # for generic-worker, artifacts is a list
-    # for taskcluster-worker, it will depend on what we do in artifacts plugin
-    for artifact in artifacts.values() if hasattr(artifacts, "values") else artifacts:
-        artifact['expires']['relative-datestamp'] = relative_datestamp
-
-
-def format_treeherder_route(destination, project, revision, pushlog_id):
-    return "{}.v2.{}.{}.{}".format(destination,
-                                   project,
-                                   revision,
-                                   pushlog_id)
-
-
-def decorate_task_treeherder_routes(task, project, revision, pushlog_id):
-    """Decorate the given task with treeherder routes.
-
-    Uses task.extra.treeherderEnv if available otherwise defaults to only
-    staging.
-
-    :param dict task: task definition.
-    :param str project: The project the tasks are running for.
-    :param str revision: The revision for the push
-    :param str pushlog_id: The ID of the push
-    """
-
-    if 'extra' not in task:
-        return
-
-    if 'routes' not in task:
-        task['routes'] = []
-
-    treeheder_env = task['extra'].get('treeherderEnv', ['staging'])
-
-    for env in treeheder_env:
-        route = format_treeherder_route(TREEHERDER_ROUTES[env],
-                                        project,
-                                        revision,
-                                        pushlog_id)
-        task['routes'].append(route)
-
-
-def decorate_task_json_routes(task, json_routes, parameters):
-    """Decorate the given task with routes.json routes.
-
-    :param dict task: task definition.
-    :param json_routes: the list of routes to use from routes.json
-    :param parameters: dictionary of parameters to use in route templates
-    """
-    routes = task.get('routes', [])
-    for route in json_routes:
-        routes.append(route.format(**parameters))
-
-    task['routes'] = routes
-
-
-class BuildTaskValidationException(Exception):
-    pass
-
-
-def validate_build_task(task):
-    '''The build tasks have some required fields in extra this function ensures
-    they are there. '''
-    if 'task' not in task:
-        raise BuildTaskValidationException('must have task field')
-
-    task_def = task['task']
-
-    if 'extra' not in task_def:
-        raise BuildTaskValidationException('build task must have task.extra props')
-
-    if 'locations' in task_def['extra']:
-
-        locations = task_def['extra']['locations']
-
-        if 'build' not in locations:
-            raise BuildTaskValidationException('task.extra.locations.build missing')
-
-        if 'tests' not in locations and 'test_packages' not in locations:
-            raise BuildTaskValidationException('task.extra.locations.tests or '
-                                               'task.extra.locations.tests_packages missing')
-
-
-class LegacyTask(base.Task):
-    """
-    This kind generates a full task graph from the old YAML files in
-    `testing/taskcluster/tasks`.  The tasks already have dependency links.
-
-    The existing task-graph generation generates slugids for tasks during task
-    generation, so this kind labels tasks using those slugids, with a prefix of
-    "TaskLabel==".  These labels are unfortunately not stable from run to run.
-    """
-
-    def __init__(self, *args, **kwargs):
-        self.task_dict = kwargs.pop('task_dict')
-        super(LegacyTask, self).__init__(*args, **kwargs)
-
-    def __eq__(self, other):
-        return super(LegacyTask, self).__eq__(other) and \
-               self.task_dict == other.task_dict
-
-    @classmethod
-    def load_tasks(cls, kind, path, config, params, loaded_tasks):
-        root = os.path.abspath(os.path.join(path, config['legacy_path']))
-
-        project = params['project']
-        # NOTE: message is ignored here; we always use DEFAULT_TRY, then filter the
-        # resulting task graph later
-        message = DEFAULT_TRY
-
-        templates = Templates(root)
-
-        job_path = os.path.join(root, 'tasks', 'branches', project, 'job_flags.yml')
-        job_path = job_path if os.path.exists(job_path) else \
-            os.path.join(root, DEFAULT_JOB_PATH)
-
-        jobs = templates.load(job_path, {})
-
-        job_graph, trigger_tests = parse_commit(message, jobs)
-
-        cmdline_interactive = params.get('interactive', False)
-
-        # Default to current time if querying the head rev fails
-        vcs_info = query_vcs_info(params['head_repository'], params['head_rev'])
-        changed_files = set()
-        if vcs_info:
-
-            logger.debug(
-                '{} commits influencing task scheduling:'.format(len(vcs_info.changesets)))
-            for c in vcs_info.changesets:
-                logger.debug("{cset} {desc}".format(
-                    cset=c['node'][0:12],
-                    desc=c['desc'].splitlines()[0].encode('ascii', 'ignore')))
-                changed_files |= set(c['files'])
-
-        pushdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(params['pushdate']))
-
-        # Template parameters used when expanding the graph
-        parameters = dict(gaia_info().items() + {
-            'index': 'index',
-            'project': project,
-            'pushlog_id': params.get('pushlog_id', 0),
-            'docker_image': docker_image,
-            'base_repository': params['base_repository'] or
-            params['head_repository'],
-            'head_repository': params['head_repository'],
-            'head_ref': params['head_ref'] or params['head_rev'],
-            'head_rev': params['head_rev'],
-            'pushdate': pushdate,
-            'pushtime': pushdate[8:],
-            'year': pushdate[0:4],
-            'month': pushdate[4:6],
-            'day': pushdate[6:8],
-            'rank': params['pushdate'],
-            'owner': params['owner'],
-            'level': params['level'],
-        }.items())
-
-        routes_file = os.path.join(root, 'routes.json')
-        with open(routes_file) as f:
-            contents = json.load(f)
-            json_routes = contents['routes']
-            # TODO: Nightly and/or l10n routes
-
-        # Task graph we are generating for taskcluster...
-        graph = {
-            'tasks': [],
-            'scopes': set(),
-        }
-
-        for env in TREEHERDER_ROUTES:
-            route = format_treeherder_route(TREEHERDER_ROUTES[env],
-                                            parameters['project'],
-                                            parameters['head_rev'],
-                                            parameters['pushlog_id'])
-            graph['scopes'].add("queue:route:{}".format(route))
-
-        graph['metadata'] = {
-            'source': '{repo}file/{rev}/testing/taskcluster/mach_commands.py'.format(
-                repo=params['head_repository'], rev=params['head_rev']),
-            'owner': params['owner'],
-            # TODO: Add full mach commands to this example?
-            'description': 'Task graph generated via ./mach taskcluster-graph',
-            'name': 'task graph local'
-        }
-
-        # Filter the job graph according to conditions met by this invocation run.
-        def should_run(task):
-            # Old style build or test task that doesn't define conditions. Always runs.
-            if 'when' not in task:
-                return True
-
-            when = task['when']
-
-            # If the task defines file patterns and we have a set of changed
-            # files to compare against, only run if a file pattern matches one
-            # of the changed files.
-            file_patterns = when.get('file_patterns', None)
-            if file_patterns and changed_files:
-                # Always consider changes to the task definition itself
-                file_patterns.append('testing/taskcluster/{task}'.format(task=task['task']))
-                for pattern in file_patterns:
-                    for path in changed_files:
-                        if mozpackmatch(path, pattern):
-                            logger.debug('scheduling {task} because pattern {pattern} '
-                                         'matches {path}'.format(
-                                             task=task['task'],
-                                             pattern=pattern,
-                                             path=path,
-                                         ))
-                            return True
-
-                # No file patterns matched. Discard task.
-                logger.debug('discarding {task} because no relevant files changed'.format(
-                    task=task['task'],
-                    pattern=pattern,
-                    path=path))
-                return False
-
-            return True
-
-        job_graph = filter(should_run, job_graph)
-
-        all_routes = {}
-
-        for build in job_graph:
-            logging.debug("loading build task {}".format(build['task']))
-            interactive = cmdline_interactive or build["interactive"]
-            build_parameters = merge_dicts(parameters, build['additional-parameters'])
-            build_parameters['build_slugid'] = mklabel()
-            build_parameters['source'] = '{repo}file/{rev}/testing/taskcluster/{file}'.format(
-                repo=params['head_repository'], rev=params['head_rev'], file=build['task'])
-            build_task = templates.load(build['task'], build_parameters)
-
-            # Copy build_* attributes to expose them to post-build tasks
-            # as well as json routes and tests
-            task_extra = build_task['task']['extra']
-            build_parameters['build_name'] = task_extra['build_name']
-            build_parameters['build_type'] = task_extra['build_type']
-            build_parameters['build_product'] = task_extra['build_product']
-
-            if 'treeherder' in task_extra:
-                tier = task_extra['treeherder'].get('tier', 1)
-                if tier != 1:
-                    # Only tier 1 jobs use the build time as rank. Everything
-                    # else gets rank 0 until it is promoted to tier 1.
-                    task_extra['index']['rank'] = 0
-
-            set_interactive_task(build_task, interactive)
-
-            # try builds don't use cache nor coalescing
-            if project == "try":
-                remove_caches_from_task(build_task)
-                remove_coalescing_from_task(build_task)
-                set_expiration(build_task, TRY_EXPIRATION)
-
-            decorate_task_treeherder_routes(build_task['task'],
-                                            build_parameters['project'],
-                                            build_parameters['head_rev'],
-                                            build_parameters['pushlog_id'])
-            decorate_task_json_routes(build_task['task'],
-                                      json_routes,
-                                      build_parameters)
-
-            # Ensure each build graph is valid after construction.
-            validate_build_task(build_task)
-            attributes = build_task['attributes'] = {
-                'kind': 'legacy',
-                'legacy_kind': 'build',
-                'run_on_projects': ['all'],
-            }
-            if 'build_name' in build:
-                attributes['build_platform'] = build['build_name']
-            if 'build_type' in task_extra:
-                attributes['build_type'] = {'dbg': 'debug'}.get(task_extra['build_type'],
-                                                                task_extra['build_type'])
-            if build.get('is_job'):
-                attributes['job'] = build['build_name']
-                attributes['legacy_kind'] = 'job'
-            graph['tasks'].append(build_task)
-
-            for location in build_task['task']['extra'].get('locations', {}):
-                build_parameters['{}_location'.format(location)] = \
-                    build_task['task']['extra']['locations'][location]
-
-            for url in build_task['task']['extra'].get('url', {}):
-                build_parameters['{}_url'.format(url)] = \
-                    build_task['task']['extra']['url'][url]
-
-            define_task = DEFINE_TASK.format(build_task['task']['workerType'])
-
-            for route in build_task['task'].get('routes', []):
-                if route.startswith('index.gecko.v2') and route in all_routes:
-                    raise Exception(
-                        "Error: route '%s' is in use by multiple tasks: '%s' and '%s'" % (
-                            route,
-                            build_task['task']['metadata']['name'],
-                            all_routes[route],
-                        ))
-                all_routes[route] = build_task['task']['metadata']['name']
-
-            graph['scopes'].add(define_task)
-            graph['scopes'] |= set(build_task['task'].get('scopes', []))
-            route_scopes = map(
-                lambda route: 'queue:route:' + route, build_task['task'].get('routes', [])
-            )
-            graph['scopes'] |= set(route_scopes)
-
-            # Treeherder symbol configuration for the graph required for each
-            # build so tests know which platform they belong to.
-            build_treeherder_config = build_task['task']['extra']['treeherder']
-
-            if 'machine' not in build_treeherder_config:
-                message = '({}), extra.treeherder.machine required for all builds'
-                raise ValueError(message.format(build['task']))
-
-            if 'build' not in build_treeherder_config:
-                build_treeherder_config['build'] = \
-                    build_treeherder_config['machine']
-
-            if 'collection' not in build_treeherder_config:
-                build_treeherder_config['collection'] = {'opt': True}
-
-            if len(build_treeherder_config['collection'].keys()) != 1:
-                message = '({}), extra.treeherder.collection must contain one type'
-                raise ValueError(message.fomrat(build['task']))
-
-            for post_build in build['post-build']:
-                # copy over the old parameters to update the template
-                # TODO additional-parameters is currently not an option, only
-                # enabled for build tasks
-                post_parameters = merge_dicts(build_parameters,
-                                              post_build.get('additional-parameters', {}))
-                post_task = configure_dependent_task(post_build['task'],
-                                                     post_parameters,
-                                                     mklabel(),
-                                                     templates,
-                                                     build_treeherder_config)
-                set_interactive_task(post_task, interactive)
-
-                if project == "try":
-                    set_expiration(post_task, TRY_EXPIRATION)
-
-                post_task['attributes'] = attributes.copy()
-                post_task['attributes']['legacy_kind'] = 'post_build'
-                post_task['attributes']['post_build'] = post_build['job_flag']
-                graph['tasks'].append(post_task)
-
-        graph['scopes'] = sorted(graph['scopes'])
-
-        # Convert to a dictionary of tasks.  The process above has invented a
-        # taskId for each task, and we use those as the *labels* for the tasks;
-        # taskgraph will later assign them new taskIds.
-        return [
-            cls(kind, t['taskId'], task=t['task'], attributes=t['attributes'], task_dict=t)
-            for t in graph['tasks']
-        ]
-
-    def get_dependencies(self, taskgraph):
-        # fetch dependency information from the cached graph
-        deps = [(label, label) for label in self.task_dict.get('requires', [])]
-
-        # add a dependency on an image task, if needed
-        if 'docker-image' in self.task_dict:
-            deps.append(('build-docker-image-{docker-image}'.format(**self.task_dict),
-                         'docker-image'))
-
-        return deps
-
-    def optimize(self, params):
-        # no optimization for the moment
-        return False, None
-
-    @classmethod
-    def from_json(cls, task_dict):
-        legacy_task = cls(kind='legacy',
-                          label=task_dict['label'],
-                          attributes=task_dict['attributes'],
-                          task=task_dict['task'],
-                          task_dict=task_dict)
-        return legacy_task
author	Dustin J. Mitchell <dustin@mozilla.com>
	Tue, 27 Sep 2016 15:55:36 +0000
changeset 418054	c27a928d3e7f4712eca23feae05c1708cfec5b0c
parent 418052	f82827622145bcd160f63ec48e9606498beefc39
child 532239	fa8f6178dd3c9c0172f8d6327f016d43bda1f355
push id	30567
push user	dmitchell@mozilla.com
push date	Tue, 27 Sep 2016 16:00:57 +0000
reviewers	gps
bugs	1305740
milestone	52.0a1