vcssync: extract code for running `hg convert` to reusable function (bug 1357597); r?glob draft
authorGregory Szorc <gps@mozilla.com>
Tue, 18 Apr 2017 16:59:56 -0700
changeset 10851 2b7cff910a8220c78a3a7e174470828809315d18
parent 10850 1821bb08e6e2bc0e6dd7e62732b4fa4f623dfabd
child 10852 a40ca8daccdb7b2433148fc587f31e1d0bdf899c
push id1638
push userbmo:gps@mozilla.com
push dateSat, 22 Apr 2017 00:35:48 +0000
reviewersglob
bugs1357597
vcssync: extract code for running `hg convert` to reusable function (bug 1357597); r?glob A future commit will introduce a new consumer that wants to call `hg convert`. Since the code is a bit involved, let's factor the functionality to a standalone function. This also has the benefit of making linearize_git_repo_to_hg() significantly more readable. MozReview-Commit-ID: FP9Rl3uTbHM
vcssync/mozvcssync/git2hg.py
--- a/vcssync/mozvcssync/git2hg.py
+++ b/vcssync/mozvcssync/git2hg.py
@@ -52,16 +52,72 @@ def source_commits_in_map_file(path, com
                         break
     except IOError as e:
         if e.errno != errno.ENOENT:
             raise
 
     return len(remaining) == 0, commit_map
 
 
+def run_hg_convert(git_repo_path,
+                   hg_repo_path,
+                   rev_map,
+                   rev=None,
+                   similarity=50,
+                   find_copies_harder=False,
+                   skip_submodules=False,
+                   move_to_subdir=None):
+    """Run ``hg convert`` to convert Git commits to Mercurial."""
+    hg_config = [
+        b'extensions.convert=',
+        # Make the rename detection limit essentially infinite.
+        b'convert.git.renamelimit=1000000000',
+        # The ``convert_revision`` that would be stored reflects the rewritten
+        # Git commit. This is valuable as a persistent SHA map, but that's it.
+        # We (hopefully) insert the original Git commit via
+        # ``source_revision_key``, so this is of marginal value.
+        b'convert.git.saverev=false',
+        b'convert.git.similarity=%d' % similarity,
+    ]
+
+    if find_copies_harder:
+        hg_config.append(b'convert.git.findcopiesharder=true')
+    if skip_submodules:
+        hg_config.append(b'convert.git.skipsubmodules=true')
+
+    args = [hglib.HGPATH]
+    for c in hg_config:
+        args.extend([b'--config', c])
+
+    args.extend([b'convert'])
+
+    if rev:
+        args.extend([b'--rev', rev])
+
+    # `hg convert` needs a filemap to prune empty changesets. So use an
+    # empty file even if we don't have any filemap rules.
+    with tempfile.NamedTemporaryFile('wb') as tf:
+        if move_to_subdir:
+            tf.write(b'rename . %s\n' % move_to_subdir)
+
+        tf.flush()
+
+        args.extend([b'--filemap', tf.name])
+
+        args.extend([git_repo_path, hg_repo_path, rev_map])
+
+        # hglib doesn't appear to stream output very well. So just invoke
+        # `hg` directly.
+        env = dict(os.environ)
+        env[b'HGPLAIN'] = b'1'
+        env[b'HGENCODING'] = b'utf-8'
+
+        subprocess.check_call(args, cwd='/', env=env)
+
+
 def linearize_git_repo_to_hg(git_source_url, ref, git_repo_path, hg_repo_path,
                              git_push_url=None,
                              hg_push_url=None,
                              move_to_subdir=None,
                              find_copies_harder=False,
                              skip_submodules=False,
                              similarity=50,
                              shamap_s3_upload_url=None,
@@ -184,33 +240,16 @@ def linearize_git_repo_to_hg(git_source_
         if found:
             logger.warn('all Git commits have already been '
                         'converted; not doing anything')
             maybe_push_hg()
             return result
 
     logger.warn('converting %d Git commits' % len(git_state['commit_map']))
 
-    hg_config = [
-        b'extensions.convert=',
-        # Make the rename detection limit essentially infinite.
-        b'convert.git.renamelimit=1000000000',
-        # The ``convert_revision`` that would be stored reflects the rewritten
-        # Git commit. This is valuable as a persistent SHA map, but that's it.
-        # We (hopefully) insert the original Git commit via
-        # ``source_revision_key``, so this is of marginal value.
-        b'convert.git.saverev=false',
-        b'convert.git.similarity=%d' % similarity,
-    ]
-
-    if find_copies_harder:
-        hg_config.append(b'convert.git.findcopiesharder=true')
-    if skip_submodules:
-        hg_config.append(b'convert.git.skipsubmodules=true')
-
     if not os.path.exists(hg_repo_path):
         hglib.init(hg_repo_path)
 
     with hglib.open(hg_repo_path) as hrepo:
         tip = hrepo[b'tip']
         before_hg_tip_rev = tip.rev()
         before_hg_tip_node = tip.node()
 
@@ -219,42 +258,22 @@ def linearize_git_repo_to_hg(git_source_
         if not os.path.exists(shamap_path):
             return None
 
         with open(shamap_path, 'rb') as fh:
             return hashlib.sha256(fh.read()).digest()
 
     old_shamap_hash = get_shamap_hash()
 
-    args = [hglib.HGPATH]
-    for c in hg_config:
-        args.extend([b'--config', c])
-
-    args.extend([b'convert'])
-    args.extend([b'--rev', b'refs/convert/dest/heads/%s' % ref])
-
-    # `hg convert` needs a filemap to prune empty changesets. So use an
-    # empty file even if we don't have any filemap rules.
-    with tempfile.NamedTemporaryFile('wb') as tf:
-        if move_to_subdir:
-            tf.write(b'rename . %s\n' % move_to_subdir)
-
-        tf.flush()
-
-        args.extend([b'--filemap', tf.name])
-
-        args.extend([git_repo_path, hg_repo_path, rev_map])
-
-        # hglib doesn't appear to stream output very well. So just invoke
-        # `hg` directly.
-        env = dict(os.environ)
-        env[b'HGPLAIN'] = b'1'
-        env[b'HGENCODING'] = b'utf-8'
-
-        subprocess.check_call(args, cwd='/', env=env)
+    run_hg_convert(git_repo_path, hg_repo_path, rev_map,
+                   rev=b'refs/convert/dest/heads/%s' % ref,
+                   similarity=similarity,
+                   find_copies_harder=find_copies_harder,
+                   skip_submodules=skip_submodules,
+                   move_to_subdir=move_to_subdir)
 
     with hglib.open(hg_repo_path) as hrepo:
         tip = hrepo[b'tip']
         after_hg_tip_rev = tip.rev()
         after_hg_tip_node = tip.node()
 
     if before_hg_tip_rev == -1:
         convert_count = after_hg_tip_rev + 1