github-webhooks: AWS Lambda support for consuming GitHub web hooks (bug 1170600); r?dividehex draft
authorGregory Szorc <gps@mozilla.com>
Tue, 18 Oct 2016 17:26:12 -0700
changeset 9829 91ce002642918cc17fd105c80fceab9e3d1f85be
parent 9828 3413208e76ea58a88c43f7fb4d74e550356de9b7
push id1339
push userbmo:gps@mozilla.com
push dateTue, 01 Nov 2016 21:46:34 +0000
reviewersdividehex
bugs1170600
github-webhooks: AWS Lambda support for consuming GitHub web hooks (bug 1170600); r?dividehex This commit contains AWS Lambda functions for ingesting GitHub Web Hooks into Amazon Firehose+SNS, and to republish events from SNS to Pulse. It also contains Python code for deploying the code to Lambda. The AWS infrastructure configuration lives in Terraform in a separate repository. We do use some AWS APIs as part of the deploy that could be implemented in Terraform. However, their use is minimal. The code in this repository is narrlowly tailored to the application, run-time level, not the infrastructure level. So I think it is acceptable. MozReview-Commit-ID: GiI4Tj3tcQL
github-webhooks/lambda-requirements.txt
github-webhooks/lambda_pulse.py
github-webhooks/lambda_receive.py
testing/vcttesting/deploy.py
testing/vcttesting/deploy_mach_commands.py
new file mode 100644
--- /dev/null
+++ b/github-webhooks/lambda-requirements.txt
@@ -0,0 +1,32 @@
+anyjson==0.3.3 \
+    --hash=sha256:37812d863c9ad3e35c0734c42e0bf0320ce8c3bed82cd20ad54cb34d158157ba
+
+amqp==1.4.8 \
+    --hash=sha256:94d2a68227e7984bd40f5f71c6839730622aff8a4633f29ede1ffdd0d240795e
+
+botocore==1.4.63 \
+    --hash=sha256:34c3d5ae027fa5b4f8f732d1ef9a63b1e03d02b87ae2e77d9efcc725dfc53ef6
+
+boto3==1.4.1 \
+    --hash=sha256:0d5e1d546e3974437f3468ea96abac95ba99753830138c3fb14718c7fdf6121e
+
+docutils==0.12 \
+    --hash=sha256:c7db717810ab6965f66c8cf0398a98c9d8df982da39b4cd7f162911eb89596fa
+
+futures==3.0.5 \
+    --hash=sha256:f7f16b6bf9653a918a03f1f2c2d62aac0cd64b1bc088e93ea279517f6b61120b
+
+jmespath==0.9.0 \
+    --hash=sha256:ade5261b0d7d34b6f53accc91e6881b579b40161ed575e6ac465de5edad32815
+
+kombu==3.0.30 \
+    --hash=sha256:fecb34edf4852064f385dbf53969047b22353d3608a576497dd05f71b943fae6
+
+python-dateutil==2.5.3 \
+    --hash=sha256:598499a75be2e5e18a66f12c00dd47a069de24794effeda4228bfc760f44f527
+
+s3transfer==0.1.8 \
+    --hash=sha256:2607489b0babc041e26a62b431b90fda7eb818af8aa99dbbbbba9cf087f9572b
+
+six==1.10.0 \
+    --hash=sha256:0ff78c403d9bccf5a425a6d31a12aa6b47f1c21ca4dc2573a7e2f32a97335eb1
new file mode 100644
--- /dev/null
+++ b/github-webhooks/lambda_pulse.py
@@ -0,0 +1,71 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# This Lambda function re-publishes GitHub web hooks on Pulse, an
+# AMQP server. The function is meant to be triggered by a new message
+# on an SNS topic. (See ``lambda_receive.py``.)
+
+import datetime
+import json
+import kombu
+
+import mozilla_credentials
+
+
+def get_connection():
+    return kombu.Connection(
+        hostname='pulse.mozilla.org',
+        port=5671,
+        userid='github-webhooks',
+        password=mozilla_credentials.pulse_password,
+        virtual_host='/',
+        ssl=True,
+        connect_timeout=5,
+    )
+
+
+def handler(event, context):
+    if 'Records' not in event:
+        raise Exception('event payload does not match expected; are you using SNS?')
+
+    for record in event['Records']:
+        m = json.loads(record['Sns']['Message'])
+        p = m['body']
+
+        # Don't republish events for private repositories because that
+        # data is supposed to be private!
+        if p['repository']['private']:
+            print('repository is private; ignoring')
+            return
+
+        # The routing key (used for filtering/subscriptions) is composed
+        # of the full repo name plus the event name.
+        routing_key = '%s/%s' % (p['repository']['full_name'], m['event'])
+
+        # TODO use /v1 once someone unhacks the exchange on the server.
+        exchange = 'exchange/github-webhooks/v2'
+
+        print('connecting to pulse...')
+        c = get_connection()
+        c.connect()
+        with c:
+            ex = kombu.Exchange(exchange, type='topic')
+            producer = c.Producer(exchange=ex,
+                                  routing_key=routing_key,
+                                  serializer='json')
+
+            data = {
+                'event': m['event'],
+                'request_id': m['request_id'],
+                'payload': p,
+                '_meta': {
+                    'exchange': exchange,
+                    'routing_key': routing_key,
+                    'serializer': 'json',
+                    'sent': datetime.datetime.utcnow().isoformat(),
+                }
+            }
+
+            producer.publish(data)
+            print('published!')
new file mode 100644
--- /dev/null
+++ b/github-webhooks/lambda_receive.py
@@ -0,0 +1,94 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# This Lambda function takes GitHub web hook events and sends them to
+# Firehose and SNS. Firehose takes care of long-term retention in S3.
+# SNS is used to trigger other Lambda functions.
+#
+# Ideally, these would be separate Lambda functions. However, since
+# GitHub web hooks are ingested by an API Gateway and since you can
+# only have 1 Lambda per HTTP endpoint+method, our hands are tied.
+
+import boto3
+import json
+
+
+# Set of events that we consider public.
+#
+# We don't publish all events because some events contain information that
+# is private/confidential and/or is related to the operational configuration
+# of repositories or organizations.
+#
+# We explicitly list events that will be published to prevent unwanted
+# disclosure of data from new event types.
+PUBLIC_EVENTS = {
+    'commit_comment',
+    'create', # branch or tag created
+    'delete', # branch or tag deleted
+    'deployment',
+    'deployment_status',
+    'fork',
+    'gollum', # wiki page update
+    'issue_comment',
+    'issues',
+    # We don't publish membership changes because those are semi-private.
+    # 'member', # user added as collaborator
+    # 'membership', # team membership changed
+    'page_build',
+    # We don't republish this to lessen the chances that accidental repo
+    # publication will result in consumers grabbing its content.
+    # 'public', # repo changes from private to public
+    'pull_request_review_comment',
+    'pull_request_review',
+    'pull_request',
+    'push',
+    # There are privacy and security implications with publishing repo changes.
+    # 'repository',
+    'release',
+    'status',
+    # Again, membership isn't relevant to the public.
+    # 'team_add',
+    'watch',
+}
+
+
+
+firehose = boto3.client('firehose')
+sns = boto3.client('sns')
+
+
+def handler(event, context):
+    if 'params' not in event:
+        raise Exception('event payload does not match expected; are you using the API gateway?')
+
+    event_name = event['params']['header']['X-GitHub-Event']
+
+    data = json.dumps({
+        'event': event_name,
+        'request_id': event['params']['header']['X-GitHub-Delivery'],
+        'body': event['body-json'],
+    }, sort_keys=True)
+
+    print('sending to firehose')
+    firehose.put_record(
+        DeliveryStreamName='github-webhooks',
+        Record={'Data': data},
+    )
+
+    print('sending to github-webhooks-all SNS')
+    sns.publish(
+        TopicArn='arn:aws:sns:us-west-2:699292812394:github-webhooks-all',
+        Message=data,
+    )
+
+    if event_name not in PUBLIC_EVENTS:
+        print('not publishing to public channel because event %s is not allowed' % event_name)
+    elif event['body-json']['repository']['private']:
+        print('not publishing to public channel because repo is private')
+    else:
+        print('sending to github-webhooks-public SNS')
+        sns.publish(
+            TopicArn='arn:aws:sns:us-west-2:699292812394:github-webhooks-public',
+            Message=data,
+        )
--- a/testing/vcttesting/deploy.py
+++ b/testing/vcttesting/deploy.py
@@ -1,19 +1,25 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 from __future__ import absolute_import, unicode_literals
 
+import io
 import json
 import logging
 import os
 from pipes import quote
+import shutil
 import subprocess
+import tempfile
+import zipfile
+
+import boto3
 
 from .vctutil import (
     get_and_write_vct_node,
 )
 
 HERE = os.path.abspath(os.path.dirname(__file__))
 ROOT = os.path.normpath(os.path.join(HERE, '..', '..'))
 ANSIBLE = os.path.join(ROOT, 'ansible')
@@ -80,8 +86,86 @@ def hgmo_strip(repo, rev, verbosity=0):
                         verbosity=verbosity)
 
 
 def hgmo_reclone_repos(repos, verbosity=0):
     extra = {'repos': repos}
 
     return run_playbook('hgmo-reclone-repos', extra_vars=extra,
                         verbosity=verbosity)
+
+def github_lambda_deploy_package(pulse_password):
+    """Obtain a .zip file for a deployment package for GitHub Lambda foo."""
+    d = tempfile.mkdtemp()
+
+    PIP = os.path.join(ROOT, 'venv', 'bin', 'pip')
+
+    try:
+        # Install Python packages.
+        subprocess.check_call([
+            PIP, 'install',
+            '-t', d,
+            '-r', os.path.join(ROOT, 'github-webhooks', 'lambda-requirements.txt'),
+            '--require-hashes',
+        ])
+
+        # Copy relevant files from the source directory.
+        for p in os.listdir(os.path.join(ROOT, 'github-webhooks')):
+            if not p.endswith('.py'):
+                continue
+
+            shutil.copyfile(os.path.join(ROOT, 'github-webhooks', p),
+                            os.path.join(d, p))
+
+        # Make a module containing credentials.
+        with open(os.path.join(d, 'mozilla_credentials.py'), 'wb') as fh:
+            fh.write('pulse_password = "%s"\n' % pulse_password)
+
+        # Now make a zip file.
+        zf = io.BytesIO()
+        with zipfile.ZipFile(zf, 'w') as z:
+            for root, dirs, files in os.walk(d):
+                for f in sorted(files):
+                    full = os.path.join(root, f)
+                    rel = os.path.relpath(full, d)
+
+                    z.write(full, rel)
+
+        return zf.getvalue()
+    finally:
+        shutil.rmtree(d)
+
+
+def github_webhook_lambda(pulse_password):
+    """Deploys code for GitHub WebHook processing in AWS Lambda."""
+    zip_content = github_lambda_deploy_package(pulse_password)
+
+    S3_BUCKET = 'moz-github-webhooks'
+    S3_KEY = 'github_lambda.zip'
+
+    # The code package is shared. So upload to S3 and reference it there.
+    s3 = boto3.client('s3')
+    s3.put_object(
+        Bucket=S3_BUCKET,
+        Key=S3_KEY,
+        Body=zip_content,
+        ContentType='application/zip',
+    )
+
+    client = boto3.client('lambda', region_name='us-west-2')
+
+    for fn in ('github-webhooks-receive', 'github-webhooks-pulse'):
+        res = client.update_function_code(
+            FunctionName=fn,
+            S3Bucket=S3_BUCKET,
+            S3Key=S3_KEY,
+            Publish=True,
+        )
+
+        # Lambda versions code/functions by default. So delete old versions
+        # as part of upload so old versions don't pile up.
+        for v in client.list_versions_by_function(FunctionName=fn)['Versions']:
+            if v['Version'] in (res['Version'], '$LATEST'):
+                continue
+
+            client.delete_function(
+                FunctionName=v['FunctionArn'],
+                Qualifier=v['Version'])
--- a/testing/vcttesting/deploy_mach_commands.py
+++ b/testing/vcttesting/deploy_mach_commands.py
@@ -38,16 +38,25 @@ class DeployCommands(object):
     @Command('mozreview-create-repo', category='deploy',
              description='Create a new review repository')
     @CommandArgument('--verbosity', type=int,
                      help='How verbose to be with output')
     def mozreview_create_repo(self, verbosity=None):
         from vcttesting.deploy import mozreview_create_repo as deploy
         return deploy(verbosity=verbosity)
 
+    @Command('github-webhooks', category='deploy',
+             description='GitHub Web Hooks Lambda functions')
+    def github_webhooks(self):
+        from vcttesting.deploy import github_webhook_lambda
+
+        password = raw_input('Please enter password for Pulse user: ')
+
+        github_webhook_lambda(password)
+
     @Command('hgmo', category='deploy',
              description='Deploy hg.mozilla.org')
     @CommandArgument('--verbosity', type=int,
                      help='How verbose to be with output')
     def hgmo(self, verbosity=None):
         from vcttesting.deploy import deploy_hgmo as deploy
         return deploy(verbosity=verbosity)