vcssync: synchronize upstream changes to stylo repository (bug 1317525) draft
authorGregory Szorc <gps@mozilla.com>
Tue, 15 Nov 2016 12:49:58 -0800
changeset 9913 9605f84052f16b5515efc02f13593b86754d18ad
parent 9907 3dc46e8380a5c14114d74ae8f47dbc526c058c89
push id1375
push userbmo:gps@mozilla.com
push dateThu, 17 Nov 2016 00:05:37 +0000
bugs1317525
vcssync: synchronize upstream changes to stylo repository (bug 1317525) THIS COMMIT IS INCOMPLETE. Missing pieces: * MOAR tests * Ansible/Terraform provisioning * Docs ------ Over the past few years, several groups have also wanted some form or another of version control "syncing." Example use cases include: * Mirroring a Mercurial repository to Git[Hub] * Mirroring a Git[Hub] repository to Mercurial * Exporting a subdirectory of mozilla-central to Git[Hub] to facilitate development on Git[Hub] * Importing content from a Git repository into mozilla-central * Converting GitHub Pull Requests into MozReview review requests VCS Sync facilitates #1. And we have haphazard implementations of some of the other bullet points. Other items are pipe dreams. As part of vendoring Servo and more tightly integrating the Servo and Firefox CI systems, we'll be rolling out a somewhat complicated change to how Servo and Firefox commits are synchronized and landed. This requires writing some VCS syncing code. This is a known consumer of a somewhat more complicated VCS syncing solution that is a few weeks to months off. In the short term, the Stylo team wants a stop-gap mechanism to "synchronize" changes from Servo and Firefox upstreams to their "stylo" incubator repository. This commit implements said mechanism. This commit provides a daemon specialized for the Stylo team's immediate needs. The daemon connects to Pulse and listens for messages saying there has been a push to the GitHub Servo upstream or the hgmo Firefox upstream. When a push occurs, the changes are imported/merged/integrated into the "stylo" repository. Because we have future needs for more advanced VCS syncing functionality and because testing and code reuse is important, I've factored the new code as a standalone "vcs sync" tool. There is some generic code for importing a Git commit into a subdirectory of a Mercurial repository. The bulk of the remaining code is specific to the immediate needs of the Stylo team. No sense being an architect astronaut until future needs dictate the abstractions. MozReview-Commit-ID: 8LJmAIzDExh
create-test-environment
testing/vcttesting/testing.py
vcssync/mozvcssync/__init__.py
vcssync/mozvcssync/stylo.py
vcssync/mozvcssync/sync.py
vcssync/setup.py
vcssync/tests/helpers.sh
vcssync/tests/hghave
vcssync/tests/test-stylo-git-import.t
--- a/create-test-environment
+++ b/create-test-environment
@@ -152,16 +152,20 @@ cd ../..
 cd pylib/mozreviewbots
 python setup.py develop
 cd ../..
 
 cd pylib/vcsreplicator
 python setup.py develop
 cd ../..
 
+cd vcssync
+python setup.py develop
+cd ../..
+
 cd hghooks
 python setup.py develop
 cd ..
 
 cd testing
 python setup.py develop
 cd ..
 
--- a/testing/vcttesting/testing.py
+++ b/testing/vcttesting/testing.py
@@ -17,24 +17,26 @@ from coverage import coverage
 HERE = os.path.dirname(os.path.abspath(__file__))
 ROOT = os.path.normpath(os.path.join(HERE, '..', '..'))
 
 
 PYTHON_COVERAGE_DIRS = (
     'hgext',
     'pylib',
     'hghooks',
+    'vcssync',
 )
 
 # Directories containing Python unit tests.
 UNIT_TEST_DIRS = [
     'autoland/tests',
     'git/tests',
     'hgserver/tests',
     'pylib',
+    'vcssync/tests',
 ]
 
 # Directories whose Python unit tests we should ignore.
 UNIT_TEST_IGNORES = (
     'pylib/Bugsy',
     'pylib/flake8',
     'pylib/mccabe',
     'pylib/pep8',
new file mode 100644
new file mode 100644
--- /dev/null
+++ b/vcssync/mozvcssync/stylo.py
@@ -0,0 +1,240 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+import logging
+import os
+import socket
+import sys
+import time
+
+from ConfigParser import (
+    RawConfigParser,
+)
+
+import hglib
+from hglib.util import (
+    cmdbuilder,
+)
+import kombu
+
+from . import (
+    sync,
+)
+
+
+COMMIT_AUTHOR = b'VCS Sync <vcs-sync@mozilla.com>'
+
+logger = logging.getLogger('mozvcssync.stylo')
+
+
+def import_git_commit(c, commit):
+    """Import a servo/servo Git commit into a Firefox repo."""
+    sync.import_git_to_mercurial_subtree(
+        hg_repo=c['hg_path'],
+        git_repo=c['servo_git_path'],
+        git_commit=commit,
+        hg_base_revision=b'integration',
+        hg_subpath=b'servo',
+        message=b'vendor servo commit %s' % commit,
+        user=COMMIT_AUTHOR,
+        ignores=(
+            b'tests/wpt/css-tests/*',
+            b'tests/wpt/web-platform-tests/*',
+        ),
+    )
+
+
+def fetch_and_vendor(c, ref, commit):
+    sync.fetch_git_ref(c['git'], c['servo_git_path'],
+                       b'github', c['servo_git_fetch_url'],
+                       ref)
+    import_git_commit(c, commit)
+
+
+def merge_hg(c, url, revision):
+    """Merge a Mercurial revision into our target revision."""
+    logger.warn('merging %s from %s into %s' % (revision, url, c['hg_path']))
+
+    hg_config = [
+        b'extensions.purge=',
+    ]
+
+    with hglib.open(c['hg_path'], encoding='utf-8', configs=hg_config) as repo:
+        # Ensure the requested revision is present in the store.
+        if revision not in repo:
+            logger.warn('pulling %s from %s' % (revision, url))
+            repo.pull(url, revision)
+
+        # Now update to our target revision so we can do a merge.
+        logger.warn('updating and purging hg checkout')
+        repo.update(rev=b'integration', clean=True)
+        repo.rawcommand(cmdbuilder(b'purge', all=True))
+
+        # The destination head should be vanilla and a union of the
+        # canonical upstream + servo, which is in a directory tree that
+        # doesn't exist in the upstream. Disabling file merges via the
+        # "fail" merge tool asserts this precondition.
+        logger.warn('merging...')
+        repo.merge(rev=revision, tool=b':fail')
+        logger.warn('committing...')
+        rev, node = repo.commit(b'merge %s' % revision, user=COMMIT_AUTHOR)
+        logger.warn('committed %s' % node)
+
+
+def push(c):
+    push_url = c['hg_push_url']
+    logger.warn('pushing to %s' % push_url)
+
+    with hglib.open(c['hg_path'], encoding='utf-8') as repo:
+        repo.push(push_url, bookmark=[b'integration'])
+
+
+def run_pulse_listener(c):
+    conn = kombu.Connection(hostname=c['pulse_host'],
+                            port=c['pulse_port'],
+                            userid=c['pulse_userid'],
+                            password=c['pulse_password'],
+                            ssl=c['pulse_ssl'])
+    conn.connect()
+
+    gh_exchange = kombu.Exchange(c['pulse_github_exchange'], type='topic', channel=conn)
+    gh_exchange.declare(passive=True)
+    hg_exchange = kombu.Exchange(c['pulse_hgmo_exchange'], type='topic', channel=conn)
+    hg_exchange.declare(passive=True)
+
+    gh_queue = kombu.Queue(name=c['pulse_github_queue'], exchange=gh_exchange,
+                           durable=True, routing_key=c['pulse_github_routing_key'],
+                           exclusive=False, auto_delete=False, channel=conn)
+    hg_queue = kombu.Queue(name=c['pulse_hgmo_queue'], exchange=hg_exchange,
+                           routing_key=c['pulse_hgmo_routing_key'], durable=True,
+                           exclusive=False, auto_delete=False, channel=conn)
+
+    def on_message(body, message):
+        exchange = message.delivery_info['exchange']
+        if exchange == c['pulse_github_exchange']:
+            # We only care about push events.
+            if body['event'] != 'push':
+                message.ack()
+                return
+
+            # We only care about activity to the configured repository.
+            repo_name = body['payload']['repository']['full_name']
+            if repo_name != c['servo_github_name']:
+                message.ack()
+                return
+
+            ref = body['payload']['ref']
+            logger.warn('observed push to %s of %s' % (ref, repo_name))
+
+            if ref != c['servo_fetch_ref']:
+                message.ack()
+                return
+
+            head_commit = body['payload']['after']
+            fetch_and_vendor(c, ref, head_commit)
+            push(c)
+            message.ack()
+
+        elif exchange == c['pulse_hgmo_exchange']:
+            if body['payload']['type'] != 'changegroup.1':
+                message.ack()
+                return
+
+            repo_url = body['payload']['data']['repo_url']
+            if repo_url != c['hg_upstream']:
+                message.ack()
+                return
+
+            heads = body['payload']['data']['heads']
+            if len(heads) != 1:
+                raise Exception('unexpected heads count in upstream')
+
+            revision = heads[0].encode('ascii')
+            merge_hg(c, repo_url, revision)
+            push(c)
+            message.ack()
+
+
+    consumer = conn.Consumer([gh_queue, hg_queue], callbacks=[on_message],
+                             auto_declare=False)
+    # queue.declare() declares the exchange, which isn't allowed by the server.
+    # So call the low-level APIs to only declare the queue itself.
+    for queue in consumer.queues:
+        queue.queue_declare()
+        queue.queue_bind()
+
+    try:
+        while True:
+            with consumer:
+                try:
+                    conn.drain_events(timeout=1.0)
+                except socket.timeout:
+                    pass
+    except KeyboardInterrupt:
+        pass
+
+
+def load_config(path):
+    c = RawConfigParser()
+    c.read(path)
+
+    d = dict(c.items('stylo'))
+    d['git'] = b'git'
+
+    d['pulse_port'] = c.getint('stylo', 'pulse_port')
+    d['pulse_ssl'] = c.getboolean('stylo', 'pulse_ssl')
+
+    return d
+
+
+def cli():
+    import argparse
+
+    parser = argparse.ArgumentParser()
+
+    subparsers = parser.add_subparsers(title='subcommands', dest='command')
+
+    subparsers.add_parser('pulse-listen')
+
+    sp = subparsers.add_parser('fetch-git-ref',
+                               help='Fetch a ref from a Git repo')
+    sp.add_argument('ref', help='ref to fetch')
+
+    sp = subparsers.add_parser('import-git-commit',
+                               help='Import a git commit into the hg repo')
+    sp.add_argument('commit', help='commit to import')
+
+    args = parser.parse_args()
+
+    if b'STYLO_SYNC_CONFIG' not in os.environ:
+        print('STYLO_SYNC_CONFIG must be defined')
+        sys.exit(1)
+
+    config = load_config(os.environ['STYLO_SYNC_CONFIG'])
+
+    command = args.command
+
+    root = logging.getLogger()
+    handler = logging.StreamHandler(sys.stdout)
+    formatter = logging.Formatter('%(name)s %(message)s')
+    formatter.converter = time.gmtime
+    handler.setFormatter(formatter)
+    root.addHandler(handler)
+
+    if command == 'pulse-listen':
+        run_pulse_listener(config)
+        sys.exit(0)
+    elif command == 'fetch-git-ref':
+        sync.fetch_git_ref(config['git'],
+                           config['servo_git_path'],
+                           config['servo_git_fetch_url'],
+                           args.ref)
+        return 0
+    elif command == 'import-git-commit':
+        import_git_commit(config, args.commit)
+    else:
+        print('error: unknown command: %s' % command)
+        sys.exit(1)
new file mode 100644
--- /dev/null
+++ b/vcssync/mozvcssync/sync.py
@@ -0,0 +1,103 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, unicode_literals
+
+import errno
+import fnmatch
+import logging
+import os
+import shutil
+import subprocess
+
+import dulwich.index
+import dulwich.repo
+import hglib
+from hglib.util import (
+    cmdbuilder,
+)
+
+logger = logging.getLogger('mozvcssync.sync')
+
+
+def fetch_git_ref(git, path, remote, url, ref):
+    """Fetch a remote Git repo+ref into a local Git repo."""
+    logger.warn('fetching %s from %s into %s' % (ref, url, path))
+    subprocess.check_call([git, b'remote', b'set-url', remote, url], cwd=path)
+    subprocess.check_call([git, b'fetch', remote, ref], cwd=path)
+
+
+def import_git_to_mercurial_subtree(hg_repo, git_repo, git_commit,
+                                    hg_base_revision, hg_subpath,
+                                    message, user, ignores=None):
+    """Import a Git commit to a sub-directory of a Mercurial repo.
+
+    A snapshot of the Git repository at revision ``git_commit`` is
+    imported into the specified Mercurial repository at ``hg_base_revision``
+    under sub-directory ``hg_subpath``.
+
+    Paths matching the ``fnmatch`` patterns defined in the ``ignores``
+    iterable are not imported.
+
+    History of the Git repository is not preserved.
+
+    This is a "dumb" import in that no real attempt is made to preserve
+    history, SHA-1 mappings, etc. The destination directly is completely
+    overwritten with the contents from the source. Only use this when the
+    source repository is canonical and the destination directory doesn't
+    receive any changes that aren't from the source.
+    """
+    ignores = ignores or []
+
+    logger.warn('importing %s from %s into %s on top of %s' % (
+        git_commit, git_repo, hg_repo, hg_base_revision))
+
+    # TODO obtain lock on hg repo
+
+    hg_config = [
+        b'extensions.automv=',
+        b'extensions.purge=',
+    ]
+
+    grepo = dulwich.repo.Repo(git_repo)
+    gcommit = grepo.get_object(git_commit)
+
+    with hglib.open(hg_repo, encoding='utf-8', configs=hg_config) as hrepo:
+        # First thing we do is update the hg working directory to requested
+        # destination revision and ensure it is clean.
+        hrepo.update(rev=hg_base_revision, clean=True)
+        hrepo.rawcommand(cmdbuilder(b'purge', all=True))
+
+        # Then clear out the subdirectory we're importing into.
+        dest_dir = os.path.join(hg_repo, hg_subpath)
+        if os.path.exists(dest_dir):
+            shutil.rmtree(dest_dir)
+        os.mkdir(dest_dir)
+
+        # Now copy files from the git commit into the hg repo. Rather than
+        # worry about maintaining a Git working copy, we use dulwich to
+        # access the Git repo data directly.
+        seen_dirs = set()
+        for entry in grepo.object_store.iter_tree_contents(gcommit.tree):
+            path = entry.path
+
+            if any(fnmatch.fnmatch(path, ignore) for ignore in ignores):
+                continue
+
+            dir_path = os.path.dirname(path)
+            if dir_path and dir_path not in seen_dirs:
+                try:
+                    os.makedirs(os.path.join(dest_dir, dir_path))
+                    seen_dirs.add(dir_path)
+                except OSError as e:
+                    if e.errno != errno.EEXIST:
+                        raise
+
+            dest_path = os.path.join(dest_dir, path)
+            blob = grepo.get_object(entry.sha)
+            dulwich.index.build_file_from_blob(blob, entry.mode,
+                                               dest_path)
+
+        node = hrepo.commit(message=message, addremove=True, user=user)[1]
+        logger.warn('commit created revision %s' % node)
new file mode 100644
--- /dev/null
+++ b/vcssync/setup.py
@@ -0,0 +1,23 @@
+from setuptools import setup, find_packages
+
+setup(
+    name='mozvcssync',
+    version='0.1',
+    description='Synchronize changes across VCS repositories',
+    url='https://mozilla-version-control-tools.readthedocs.io/',
+    author='Mozilla',
+    author_email='dev-version-control@lists.mozilla.org',
+    license='MPL 2.0',
+    classifiers=[
+        'Development Status :: 4 - Beta',
+        'Intended Audience :: Developers',
+        'Programming Language :: Python :: 2.7',
+    ],
+    packages=find_packages(),
+    entry_points={
+        'console_scripts': [
+            'stylo-sync=mozvcssync.stylo:cli',
+        ],
+    },
+    install_requires=['Mercurial'],
+)
new file mode 100644
--- /dev/null
+++ b/vcssync/tests/helpers.sh
@@ -0,0 +1,13 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+. $TESTDIR/hgserver/tests/helpers.sh
+
+# make git commits deterministic and environment agnostic
+export GIT_AUTHOR_NAME=test
+export GIT_AUTHOR_EMAIL=test@example.com
+export GIT_AUTHOR_DATE='Thu Jan 1 00:00:00 1970 +0000'
+export GIT_COMMITTER_NAME=test
+export GIT_COMMITTER_EMAIL=test@example.com
+export GIT_COMMITTER_DATE='Thu Jan 1 00:00:00 1970 +0000'
new file mode 100755
--- /dev/null
+++ b/vcssync/tests/hghave
@@ -0,0 +1,8 @@
+#!/usr/bin/env python
+
+import os
+
+HERE = os.path.abspath(os.path.dirname(__file__))
+REPO_ROOT = os.path.join(HERE, '..', '..')
+execfile(os.path.join(REPO_ROOT, 'testing', 'hghave.py'))
+
new file mode 100644
--- /dev/null
+++ b/vcssync/tests/test-stylo-git-import.t
@@ -0,0 +1,40 @@
+#require hgmodocker
+
+  $ . $TESTDIR/vcssync/tests/helpers.sh
+
+$ hgmoenv
+
+  $ cat > stylo.ini << EOF
+  > [stylo]
+  > servo_git_fetch_url = file://$TESTTMP/servo_upstream
+  > servo_git_path = $TESTTMP/git_mirror
+  > EOF
+
+  $ export STYLO_SYNC_CONFIG=$TESTTMP/stylo.ini
+
+  $ git init servo_upstream
+  Initialized empty Git repository in $TESTTMP/servo_upstream/.git/
+  $ cd servo_upstream
+
+  $ echo initial > foo
+  $ git add foo
+  $ git commit -m initial
+  [master (root-commit) 012a4dc] initial
+   1 file changed, 1 insertion(+)
+   create mode 100644 foo
+  $ echo commit-1 > foo
+  $ git commit -a -m 'commit 1'
+  [master 2492779] commit 1
+   1 file changed, 1 insertion(+), 1 deletion(-)
+  $ git checkout -b branch-1
+  $ echo branch-1 > foo
+  $ git commit -a -m 'branch 1 commit 1'
+  $ cd ..
+
+  $ git init --bare git_mirror
+
+  $ stylo-sync fetch-git-ref master
+
+Cleanup
+
+$ hgmo clean