INCOMPLETE vcssync: support integrating Git refs into a Mercurial repo draft
authorGregory Szorc <gps@mozilla.com>
Fri, 21 Apr 2017 14:29:19 -0700
changeset 10859 9aeb58f3dae697dd58b3ea9a5efbc693751b8eba
parent 10858 6fa6b7f01a10d492927edbb65e041ea368fae4b0
push id1638
push userbmo:gps@mozilla.com
push dateSat, 22 Apr 2017 00:35:48 +0000
INCOMPLETE vcssync: support integrating Git refs into a Mercurial repo MozReview-Commit-ID: 9XJzKzY7phV
vcssync/mozvcssync/git2hg.py
--- a/vcssync/mozvcssync/git2hg.py
+++ b/vcssync/mozvcssync/git2hg.py
@@ -10,16 +10,21 @@ import os
 import subprocess
 import tempfile
 
 import dulwich.repo
 import hglib
 
 from .gitrewrite import (
     commit_metadata_rewriter,
+    find_source_commit,
+    RewriteError,
+)
+from .gitrewrite.integrate import (
+    prepare_ref_for_integration,
 )
 from .gitrewrite.linearize import (
     linearize_git_repo,
 )
 from .util import (
     monitor_hg_repo,
 )
 
@@ -284,8 +289,106 @@ def linearize_git_repo_to_hg(git_source_
 
     # TODO so hacky. Relies on credentials in the environment.
     if shamap_s3_upload_url and shamap_changed:
         subprocess.check_call([
             b'aws', b's3', b'cp', rev_map, shamap_s3_upload_url
         ])
 
     return result
+
+
+def integrate_ephemeral_git_ref(
+        git_base_url, git_base_ref,
+        git_head_url, git_head_ref,
+        git_repo_path,
+        hg_repo_path,
+        git_base_revision=None,
+        git_head_revision=None,
+        exclude_dirs=None,
+        rebase_to_ref=None,
+        map_via_source_revision_key=None,
+        move_to_subdir=None,
+        find_copies_harder=False,
+        skip_submodules=False,
+        similarity=50):
+    """Import a Git ref into a Mercurial repo in an ephemeral manner.
+
+    This function implements the high-level logic of importing a Git ref into
+    a Mercurial repository. It is commonly used to operate on "pull requests."
+    The import is done "ephemerally," which means the conversion isn't expected
+    to be permanent and certain state (like the Git to Mercurial hash map) isn't
+    persisted to disk.
+    """
+    if rebase_to_ref and not map_via_source_revision_key:
+        raise ValueError('map_via_source_revision_key must be set when using '
+                         'rebase_to_ref')
+
+    if map_via_source_revision_key and not rebase_to_ref:
+        raise ValueError('rebase_to_ref must be set when using '
+                         'map_via_source_revision_key')
+
+    git_repo_path = os.path.abspath(git_repo_path)
+
+    git_repo = dulwich.repo.Repo(git_repo_path)
+
+    local_base_ref = b'integrate/base/%s' % git_base_ref
+    local_head_ref = b'integrate/head/%s' % git_head_ref
+
+    # TODO will want to make this controllable someday.
+    def shape_strategy(merge_info):
+        return 'squash'
+
+    def rewriter(merge_info, strategy, commit_map, source_commit, dest_commit):
+        # TODO this reparenting logic could be extracted to a standalone
+        # function.
+        have_external_parents = any(p in merge_info['incoming_parent']
+                                    for p in dest_commit.parents)
+
+        # If parents of the original commit aren't in the incoming set and we
+        # rewrote the source commits through another mechanism, map to the
+        # rewritten commits.
+        if map_via_source_revision_key and have_external_parents:
+            new_parents = []
+            for p in dest_commit.parents:
+                if p not in merge_info['incoming_parents']:
+                    new_parents.append(p)
+
+                rewritten = find_source_commit(git_repo, p, rebase_to_ref,
+                                               map_via_source_revision_key)
+
+                if not rewritten:
+                    raise RewriteError('unable to find rewritten version of '
+                                       'commit %s' % source_commit.id)
+
+                new_parents.append(rewritten.id)
+
+            dest_commit.parents = new_parents
+
+    # Normalize the incoming commits and possibly reparent.
+
+    integrate_result = prepare_ref_for_integration(
+        git_base_url, git_base_ref, local_base_ref,
+        git_head_url, git_head_ref, local_head_ref,
+        base_revision=git_base_revision,
+        head_revision=git_head_revision,
+        exclude_dirs=exclude_dirs,
+        shape_strategy_fn=shape_strategy,
+        commit_rewriter=rewriter,
+    )
+
+    # Now convert to Mercurial.
+
+    # We make a copy of the revision map because these commits are ephemeral.
+    original_revmap = os.path.join(hg_repo_path, b'.hg', b'shamap')
+    with tempfile.NamedTemporaryFile() as tf:
+        with open(original_revmap, 'rb') as fh:
+            tf.write(fh.read())
+
+        tf.flush()
+
+        with monitor_hg_repo(hg_repo_path, [b'shamap']) as changes:
+            run_hg_convert(git_repo_path, hg_repo_path, tf.name,
+                           rev=integrate_result['integrate_commit'],
+                           similarity=similarity,
+                           find_copies_harder=find_copies_harder,
+                           skip_submodules=skip_submodules,
+                           move_to_subdir=move_to_subdir)