WIP add core colanding functionality for PR generation (bug 1288282) draft
authorbyron jones <glob@mozilla.com>
Wed, 27 Sep 2017 15:09:24 +0800
changeset 11721 11714c4a857758dc5afb0cb59b11fba0e594f3b7
parent 11720 e9a10bb0d99a9606670cc935a3cbd9df095d946d
push id1805
push userbjones@mozilla.com
push dateWed, 27 Sep 2017 07:33:57 +0000
bugs1288282
WIP add core colanding functionality for PR generation (bug 1288282) When send a 'servo-coland' message over SQS, extract revisions from the referenced mercurial bundle and create github pull requests from commits that touch servo/. Branches and PRs use the bug ID as the identifier, however each colanding request will result in a new PR. If a PR for the same bug was replaced, it will be referenced in the new PR. The PR's url will be left in a comment on the bug. MozReview-Commit-ID: 3SYi6BytYsg
ansible/roles/vcs-sync/templates/servo-sync.ini.j2
vcssync/mozvcssync/coland.py
vcssync/mozvcssync/servo.py
vcssync/mozvcssync/util.py
vcssync/prod-requirements.txt
--- a/ansible/roles/vcs-sync/templates/servo-sync.ini.j2
+++ b/ansible/roles/vcs-sync/templates/servo-sync.ini.j2
@@ -19,8 +19,17 @@ hg_converted = {{ servo_linear_hg_url }}
 
 backout_integration_url = {{ backout_integration_url | mandatory }}
 backout_github_name = {{ backout_github_name | mandatory }}
 backout_author = {{ backout_author | mandatory }}
 
 sqs_coland_region = {{ sqs_coland_region | mandatory }}
 sqs_coland_queue = {{ sqs_coland_queue | mandatory }}
 sqs_coland_error_queue = {{ sqs_coland_error_queue | mandatory }}
+
+coland_bucket = coland-bundle
+coland_bucket_aws_key = {{ coland_bucket_aws_key | mandatory }}
+coland_bucket_aws_secret = {{ coland_bucket_aws_secret | mandatory }}
+
+coland_github_name = servo/servo
+coland_github_token = {{ coland_github_token | mandatory }}
+
+coland_bugzilla_url = https://bugzilla.mozilla.org/rest
new file mode 100644
--- /dev/null
+++ b/vcssync/mozvcssync/coland.py
@@ -0,0 +1,427 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+import logging
+import os
+import tempfile
+import urlparse
+from subprocess import CalledProcessError
+from email.utils import (formataddr, parseaddr)
+
+import boto3
+import bugsy
+import hglib
+from botocore.exceptions import ClientError
+
+from .github_pr import GitHubPR
+from .sqs import SqsFatalException
+from .util import run_hg, clean_hg_repo, maybe_revision
+
+"""
+Processes a request to Co-Land a Servo+Gecko Change.
+
+Autoland-transplant uses a FIFO SQS queue to deliver the request to the
+servo-sqs-listen daemon, which passes the message to this package for
+processing.
+
+Requests are JSON with the following fields:
+
+"action"
+    Must be "servo-coland"
+
+"autoland-id"
+    Autoland's internal request ID.
+    Allows Autoland to identify responses.
+
+"author"
+    Patch author, used for auditing.
+
+"bug-id"
+    BMO bug ID.
+    Coland will comment on the bug with the Pull Request URL.
+
+"bundle-url"
+    s3:// URL of Mercurial bundle which contains both Servo and Gecko changes.
+
+"destination"
+    ssh:// URL of destination Mercurial repo.
+
+"""
+
+
+logger = logging.getLogger('coland')
+
+
+def validate_config(config):
+    # Ensure co-land configuration is valid.
+
+    required_fields = {'sqs_coland_queue', 'sqs_coland_error_queue',
+                       'sqs_coland_error_queue', 'coland_bucket',
+                       'coland_bucket_aws_key', 'coland_bucket_aws_secret',
+                       'coland_github_name', 'coland_github_token',
+                       'coland_bugzilla_url', 'coland_bugzilla_apikey'}
+    request_fields = set(config.keys())
+    missing_fields = required_fields - request_fields
+    if missing_fields:
+        raise Exception(
+            'invalid config: missing required field%s: %s'
+            % ('' if len(missing_fields) == 1
+               else 's', ', '.join(sorted(missing_fields))))
+
+    # Sanity check queue names.
+    if not config['sqs_coland_queue'].endswith('.fifo'):
+        raise Exception("'%s' is not a FIFO queue" % config['sqs_coland_queue'])
+    if not config['sqs_coland_error_queue'].endswith('.fifo'):
+        raise Exception("'%s' is not a FIFO queue"
+                        % config['sqs_coland_error_queue'])
+
+    # Ensure repo paths exist and are sane; we won't create them.
+    if not os.path.exists(config['integration_path']):
+        raise Exception('failed to find integration_path "%s"'
+                        % config['integration_path'])
+    if not os.path.exists('%s/.hg' % config['integration_path']):
+        raise Exception('invalid integration_path "%s"'
+                        % config['integration_path'])
+    if not os.path.exists(config['github_path']):
+        raise Exception('failed to find github_path "%s"'
+                        % config['github_path'])
+    if not os.path.exists('%s/.git' % config['github_path']):
+        raise Exception('invalid github_path "%s"'
+                        % config['github_path'])
+
+    if len(config['coland_github_name'].split('/')) != 2:
+        raise Exception('invalid coland_github_name: "%s" is not in the form '
+                        '"user/repo"' % config['coland_github_name'])
+
+    # Fix bugzilla url if required.
+    config['coland_bugzilla_url'] = config['coland_bugzilla_url'].rstrip('/')
+
+
+def validate_request(config, request):
+    # Ensure mandatory fields are provided.
+    required_fields = {'action', 'author', 'autoland-id', 'bug-id',
+                       'bundle-url', 'destination', }
+
+    request_fields = set(request.keys())
+    missing_fields = required_fields - request_fields
+    if missing_fields:
+        raise SqsFatalException(
+            'missing required field%s: %s'
+            % ('' if len(missing_fields) == 1
+               else 's', ', '.join(sorted(missing_fields))))
+
+    # Validate author, bug-id.
+    if '@' not in parseaddr(request['author'])[1]:
+        raise SqsFatalException('invalid author "%s"' % request['author'])
+    try:
+        request['bug-id'] = int(request['bug-id'])
+    except ValueError:
+        raise SqsFatalException('invalid bug-id "%s"' % request['bug-id'])
+
+    # Validate the S3 bucket.
+    try:
+        url = urlparse.urlparse(request['bundle-url'])
+    except ValueError:
+        raise SqsFatalException('invalid bundle-url "%s": malformed url'
+                                % request['bundle-url'])
+
+    if url.scheme != 's3':
+        raise SqsFatalException('invalid bundle-url "%s": not a s3:// url'
+                                % request['bundle-url'])
+
+    if url.hostname != config['coland_bucket']:
+        raise SqsFatalException('invalid bundle-url "%s": illegal bucket'
+                                % request['bundle-url'])
+
+    # And that the destination is a URL, not just a repo name.
+    try:
+        url = urlparse.urlparse(request['destination'])
+    except ValueError:
+        raise SqsFatalException('invalid destination "%s": malformed url'
+                                % request['destination'])
+
+    if url.scheme != 'ssh':
+        raise SqsFatalException('invalid destination "%s": not a ssh:// url'
+                                % request['destination'])
+
+    if url.hostname != 'hg.mozilla.org':
+        raise SqsFatalException('invalid destination "%s": illegal hostname'
+                                % request['destination'])
+
+
+def prepare_hg_repo(hg_repo, remote_tip):
+    # Checkout latest.
+    run_hg(logger, hg_repo, ['pull', '-r', remote_tip])
+    run_hg(logger, hg_repo, ['update', remote_tip])
+
+    # Remove old rebased commits.
+    rev_selector = 'first(outgoing() and descendants(%s::))' % remote_tip
+    rev = run_hg(logger, hg_repo,
+                 ['log', '-T', '{node}', '-r', rev_selector])
+    if rev:
+        run_hg(logger, hg_repo, ['strip', '-r', rev])
+
+
+def get_bug(bugzilla_url, bug_id):
+    try:
+        logger.info('checking visibility of bug %s' % bug_id)
+        return bugsy.Bugsy(bugzilla_url=bugzilla_url).get(bug_id)
+    except bugsy.BugsyException as e:
+        if e.code == 101:  # bug_id_does_not_exist
+            raise SqsFatalException('Invalid bug-id %s' % bug_id)
+        if e.code == 102:  # bug_access_denied
+            return None
+        raise
+
+
+def download_from_s3(s3_url, aws_key, aws_secret, output_file):
+    # Download from s3 url specified in self.patch_url to a temp file.
+    # Returns the temp filename which must be deleted by the caller.
+
+    url = urlparse.urlparse(s3_url)
+    bucket = url.hostname
+    key = url.path[1:]
+
+    try:
+        s3 = boto3.client('s3',
+                          aws_access_key_id=aws_key,
+                          aws_secret_access_key=aws_secret)
+        logger.info('downloading %s to %s' % (s3_url, output_file))
+        s3.download_file(bucket, key, output_file)
+    except ClientError as e:
+        error_code = int(e.response['Error']['Code'])
+        if error_code == 404:
+            raise Exception('unable to download %s: file not found' % s3_url)
+        if error_code == 403:
+            raise Exception('unable to download %s: permission denied' % s3_url)
+        raise
+
+
+def extract_bundle_revs(hg_repo, filename):
+    """Return a list of the full SHAs from a hg bundle file"""
+
+    # We cannot use `hg incoming` as a previous failure may have resulted
+    # in the revisions already be in-tree.
+    return [r.strip() for r in
+            run_hg(logger, hg_repo, ['debugbundle', filename]).splitlines()
+            if maybe_revision(r.strip())]
+
+
+def current_heads_set(hg_repo):
+    return set(run_hg(logger, hg_repo,
+                      ['heads', '-T', '{node}\n']).splitlines())
+
+
+def get_touched_servo_files(hg_repo, revset):
+    return [f for f in
+            run_hg(logger, hg_repo, ['log', '-T', '{join(files,"\n")}\n',
+                                     '-r', revset]).splitlines()
+            if f.startswith('servo/')
+            ]
+
+
+def expanded_author(author):
+    # git requires author to be of the form 'Name <email>'.  Populate
+    # the name from the email if it's missing instead of failing.
+    author_name, email = parseaddr(author)
+    if not author_name:
+        author = formataddr((email[:email.index('@')], email))
+        logger.warning('"%s" is missing author name, setting to "%s"' %
+                       (email, author))
+    return author
+
+
+def create_pr_from_rev(hg_repo, github_pr, rev, pr_args):
+    run_hg(logger, hg_repo, ['update', rev])
+
+    # TODO mess around with encoding
+    repo_path = run_hg(logger, hg_repo, ['root']).rstrip()
+    patch = run_hg(logger, hg_repo, ['diff', '--git', '--cwd', repo_path,
+                                     '--change', '.', '--root', 'servo/'],
+                   log_output=False)
+    with tempfile.NamedTemporaryFile() as temp_file:
+        temp_file.write(patch)
+        temp_file.flush()
+
+        pr_args['reset_branch'] = False
+        pr_args['patch_file'] = temp_file.name
+        try:
+            return github_pr.create_pr_from_patch(**pr_args)
+        except CalledProcessError as e:
+            if 'patch does not apply' in e.output:
+                raise SqsFatalException('Merge conflict while applying '
+                                        'co-landing changes to github/servo; '
+                                        'please rebase and try again.')
+            raise
+
+
+def abort_rebase(hg_repo):
+    try:
+        run_hg(logger, hg_repo, ['rebase', '--abort'])
+    except hglib.error.CommandError as e:
+        if 'abort: no rebase in progress' not in e.out:
+            raise
+
+
+def rebase_bundle(hg_repo, bundle_revs, remote_tip):
+    # If the bundle added a new head, we need to rebase it.
+    heads = current_heads_set(hg_repo)
+
+    if bundle_revs[-1] in heads and remote_tip in heads:
+        try:
+            run_hg(logger, hg_repo,
+                   ['rebase', '--tool', 'internal:merge',
+                    '-s', bundle_revs[0], '-d', remote_tip])
+        except hglib.error.CommandError as e:
+            abort_rebase(hg_repo)
+            if 'unresolved conflicts (see hg resolve' in e.out:
+                raise SqsFatalException('Merge conflict while applying '
+                                        'co-landing changes to hg/autoland; '
+                                        'please rebase and try again.')
+            raise
+
+        bundle_revs = run_hg(logger, hg_repo,
+                             ['log', '-T', '{node}\n',
+                              '-r', '%s::' % remote_tip]).splitlines()[1:]
+
+    return bundle_revs
+
+
+def delete_github_branch(git, branch_name):
+    # Delete the branch to remove any partial commits/krud.
+    if git.get('branch', '--list', branch_name):
+        logger.info('deleting git branch %s' % branch_name)
+        git.cmd('checkout', 'master')
+        git.cmd('branch', '--delete', '--force', branch_name)
+        # noinspection PyBroadException
+        try:
+            git.cmd('push', 'origin', '--delete', branch_name)
+        except Exception:
+            pass
+
+
+def clean_master(git):
+    git.cmd('checkout', 'master', '--force')
+    git.cmd('reset', '--hard')
+
+
+def create_pr_comment(pr, comment):
+    # Unfortunately github3.py v0.9.6 doesn't implement create_comment.
+    # It's implemented in github3.py v1 however that isn't stable yet.
+    pr._post(pr.comments_url, {'body': comment})
+
+
+def create_bug_comment(bugzilla_url, api_key, bug_id, comment):
+    try:
+        bugzilla = bugsy.Bugsy(bugzilla_url=bugzilla_url, api_key=api_key)
+        bug = bugzilla.get(bug_id)
+        bug.add_comment(comment)
+    except bugsy.BugsyException as e:
+        if e.code == 101:  # bug_id_does_not_exist
+            raise SqsFatalException('Invalid bug-id %s' % bug_id)
+        logger.error('Failed to comment on bug %s: %s' % (bug_id, str(e)))
+
+
+def create_from_queue_message(config, message):
+    validate_request(config, message)
+
+    bmo_rest_url = '%s/rest' % config['coland_bugzilla_url'].rstrip('/')
+    bug = get_bug(bmo_rest_url, message['bug-id'])
+    if not bug:
+        raise SqsFatalException('Bug %s is not public' % message['bug-id'])
+
+    repo_cfg = [b'extensions.strip=', b'ui.interactive=False']
+    with hglib.open(config['integration_path'], 'utf-8', repo_cfg) as hg_repo:
+        abort_rebase(hg_repo)
+        clean_hg_repo(logger, config['integration_path'])
+
+        remote_url = run_hg(logger, hg_repo, ['paths', 'default']).rstrip()
+        remote_tip = run_hg(logger, hg_repo,
+                            ['identify', remote_url, '-r', 'tip']).rstrip()
+
+        prepare_hg_repo(hg_repo, remote_tip)
+
+        # Expand remote_tip to full hash.
+        remote_tip = run_hg(logger, hg_repo,
+                            ['log', '-T', '{node}', '-r', remote_tip])
+
+        bundle_revs = []
+        bundle_file = tempfile.NamedTemporaryFile(delete=False, suffix='.hg')
+        try:
+            bundle_file.close()
+
+            # Download and unbundle.
+            download_from_s3(message['bundle-url'],
+                             config['coland_bucket_aws_key'],
+                             config['coland_bucket_aws_secret'],
+                             bundle_file.name)
+
+            bundle_revs = extract_bundle_revs(hg_repo, bundle_file.name)
+            run_hg(logger, hg_repo, ['unbundle', bundle_file.name])
+            bundle_revs = rebase_bundle(hg_repo, bundle_revs, remote_tip)
+
+            # Ensure servo files have been touched by this commit.
+            if not get_touched_servo_files(hg_repo, '%s::' % bundle_revs[0]):
+                raise SqsFatalException('servo/ not modified')
+
+            # Initialise github.
+            github_pr = GitHubPR(config['coland_github_token'],
+                                 config['coland_github_name'],
+                                 config['github_path'])
+
+            # Name each PR branch after the bug ID.
+            branch_name = 'bug-%s' % message['bug-id']
+            existing_pr = github_pr.pr_from_branch(branch_name, state='open')
+
+            # Always work with a clean state.
+            delete_github_branch(github_pr.git, branch_name)
+            clean_master(github_pr.git)
+
+            pr_args = {
+                'branch_name': branch_name,
+                'author': expanded_author(message['author']),
+                'pr_title': 'Gecko Bug %s' % message['bug-id'],
+                'pr_body': '%s/show_bug.cgi?id=%s\n%s\n\n#gecko-coland %s' %
+                           (config['coland_bugzilla_url'], message['bug-id'],
+                            bug.summary, message['bundle-url'])
+            }
+            pr_url = None
+
+            for rev in bundle_revs:
+                if not get_touched_servo_files(hg_repo, rev):
+                    continue
+
+                logger.info('%s pull-request from %s' %
+                            ('updating' if existing_pr else 'creating', rev))
+
+                pr_args['description'] = (
+                    run_hg(logger, hg_repo,
+                           ['log', '-T', '{desc}', '-r', rev]).rstrip())
+
+                pr = create_pr_from_rev(hg_repo, github_pr, rev, pr_args)
+                pr_url = pr.html_url
+
+                # Bump the priority on fix-up commits.
+                if existing_pr:
+                    create_pr_comment(pr, 'replaces %s\n@bors-servo p=1\n' %
+                                      existing_pr.html_url)
+
+            # Update bugzilla with the PR url.
+            create_bug_comment(bmo_rest_url, config['coland_bugzilla_apikey'],
+                               message['bug-id'],
+                               'Servo changes submitted as %s' % pr_url)
+
+        finally:
+            try:
+                os.unlink(bundle_file.name)
+            except OSError:
+                pass
+            try:
+                if bundle_revs:
+                    run_hg(logger, hg_repo,
+                           ['strip', '--no-backup'] + bundle_revs)
+            except hglib.error.CommandError:
+                logger.warning('ignoring strip failure')
--- a/vcssync/mozvcssync/servo.py
+++ b/vcssync/mozvcssync/servo.py
@@ -167,20 +167,22 @@ def pulse_daemon():
     args = parser.parse_args()
 
     config = load_config(args.config)
     run_pulse_listener(config)
 
 
 def sqs_daemon():
     import argparse
+    from . import coland
     from . import sqs
 
     configure_stdout()
     logging.getLogger('sqs').setLevel(logging.INFO)
+    logging.getLogger('coland').setLevel(logging.INFO)
     logging.getLogger('git').setLevel(logging.INFO)
 
     parser = argparse.ArgumentParser()
 
     parser.add_argument('config', help='Path to config file to load')
     parser.add_argument('integration_path',
                         help='Path to integration repo clone')
     parser.add_argument('github_path',
@@ -188,19 +190,26 @@ def sqs_daemon():
     parser.add_argument('--interval', default=30, type=int,
                         help='Interval to poll for messages (seconds)')
     args = parser.parse_args()
 
     config = load_config(args.config)
     config['integration_path'] = args.integration_path
     config['github_path'] = args.github_path
 
+    # Validate the config at startup time, rather than when the first message
+    # is received.
+    coland.validate_config(config)
+
     class SqsHandler(sqs.SqsListener):
         def handle_message(self, message):
-            raise sqs.SqsFatalException('not implemented')
+            if message.get('action', '') == 'servo-coland':
+                coland.create_from_queue_message(config, message)
+            else:
+                raise sqs.SqsFatalException('invalid or missing "action"')
 
     listener = SqsHandler(region=config['sqs_coland_region'],
                           queue=config['sqs_coland_queue'],
                           error_queue=config['sqs_coland_error_queue'],
                           interval=args.interval)
     listener.listen()
 
 
--- a/vcssync/mozvcssync/util.py
+++ b/vcssync/mozvcssync/util.py
@@ -40,16 +40,26 @@ def run_hg(logger, client, args, log_out
 def clean_hg_repo(logger, path):
     """Clean a Mercurial working directory."""
     logger.warn('reverting all local changes and purging %s' % path)
     with hglib.open(path, 'utf-8', [b'extensions.purge=']) as repo:
         run_hg(logger, repo, [b'--quiet', b'revert', b'--no-backup', b'--all'])
         run_hg(logger, repo, [b'purge', b'--all'])
 
 
+def maybe_revision(hex_str):
+    """Returns bool indicating if the provided string appears to be a revision
+       sha."""
+    try:
+        int(hex_str, 16)
+        return len(hex_str) == 12 or len(hex_str) == 40
+    except ValueError:
+        return False
+
+
 def get_github_client(token):
     """Obtain a github3 client using an API token for authentication.
 
     If the ``BETAMAX_LIBRARY_DIR`` and ``BETAMAX_CASSETTE`` environment
     variables are defined, the ``requests.Session`` used by the client
     will be hooked up to betamax and pre-recorded HTTP requests will be used
     instead of incurring actual requests. When betamax is active, the auth
     token is not relevant.
--- a/vcssync/prod-requirements.txt
+++ b/vcssync/prod-requirements.txt
@@ -1,16 +1,19 @@
 --find-links https://s3-us-west-2.amazonaws.com/moz-packages/pypi/index.html
 
 amqp==1.4.9 \
     --hash=sha256:e0ed0ce6b8ffe5690a2e856c7908dc557e0e605283d6885dd1361d79f2928908
 
 anyjson==0.3.3 \
     --hash=sha256:37812d863c9ad3e35c0734c42e0bf0320ce8c3bed82cd20ad54cb34d158157ba
 
+bugsy==0.10.1 \
+    --hash=sha256:e42ef4a1f6f7e633bdb0f699b5f41ae94754101ca91682b376bd058478ef9456
+
 dulwich==0.16.1 \
     --hash=sha256:470d0feec9d4e7aba091c02f62db7f9cc6549ffe3f623a8039f96f584159da05
 
 github3.py==0.9.6 \
     --hash=sha256:650d31dbc3f3290ea56b18cfd0e72e00bbbd6777436578865a7e45b496f09e4c
 
 kombu==3.0.37 \
     --hash=sha256:7ceab743e3e974f3e5736082e8cc514c009e254e646d6167342e0e192aee81a6