vcssync: extract code for testing for conversion presence (
bug 1357597); r?glob
The near future will introduce another consumer that will want to test
whether the `hg convert` revision map has knowledge of specific source
commits. So we extract this functionality to a standalone function.
As part of the extraction, behavior changed slightly. We now accept
an iterable of commits whose presence to test for, not just a single
one. We also maintain a mapping of source to destination commit.
File reading is still lazy and stops as soon as all matches are
found. Both these features are currently unused but will be used in
an upcoming commit.
MozReview-Commit-ID: LbHJu8k3Jya
--- a/vcssync/mozvcssync/git2hg.py
+++ b/vcssync/mozvcssync/git2hg.py
@@ -1,14 +1,15 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from __future__ import absolute_import, unicode_literals
+import errno
import hashlib
import logging
import os
import subprocess
import tempfile
import dulwich.repo
import hglib
@@ -19,16 +20,48 @@ from .gitrewrite import (
from .gitrewrite.linearize import (
linearize_git_repo,
)
logger = logging.getLogger(__name__)
+def source_commits_in_map_file(path, commits):
+ """Determine whether all source commits are present in a map file.
+
+ Accepts the ``path`` to an ``hg convert`` revision mapping file and an
+ iterable of source revisions to test for presence.
+
+ Returns a 2-tuple of (bool, dict) indicating whether all commits are
+ present in the map file and a mapping of the original commit to the
+ converted commit for all found commits.
+ """
+ commit_map = {}
+ remaining = set(commits)
+
+ try:
+ with open(path, 'rb') as fh:
+ for line in fh:
+ line = line.strip()
+ if not line:
+ continue
+ shas = line.split()
+ if shas[0] in remaining:
+ remaining.remove(shas[0])
+ commit_map[shas[0]] = shas[1]
+ if not remaining:
+ break
+ except IOError as e:
+ if e.errno != errno.ENOENT:
+ raise
+
+ return len(remaining) == 0, commit_map
+
+
def linearize_git_repo_to_hg(git_source_url, ref, git_repo_path, hg_repo_path,
git_push_url=None,
hg_push_url=None,
move_to_subdir=None,
find_copies_harder=False,
skip_submodules=False,
similarity=50,
shamap_s3_upload_url=None,
@@ -141,32 +174,23 @@ def linearize_git_repo_to_hg(git_source_
result = {
'git_result': git_state,
'rev_map_path': rev_map,
}
# If nothing was converted, no-op if the head is already converted
# according to the `hg convert` revision map.
if not git_state['commit_map']:
- try:
- with open(rev_map, 'rb') as fh:
- for line in fh:
- line = line.strip()
- if not line:
- continue
- shas = line.split()
- if shas[0] == git_state['dest_commit']:
- logger.warn('all Git commits have already been '
- 'converted; not doing anything')
- maybe_push_hg()
- return result
- except IOError:
- # Fall through to doing the conversion. If it's a file permissions
- # error, `hg convert` will abort.
- pass
+ found = source_commits_in_map_file(rev_map,
+ [git_state['dest_commit']])[0]
+ if found:
+ logger.warn('all Git commits have already been '
+ 'converted; not doing anything')
+ maybe_push_hg()
+ return result
logger.warn('converting %d Git commits' % len(git_state['commit_map']))
hg_config = [
b'extensions.convert=',
# Make the rename detection limit essentially infinite.
b'convert.git.renamelimit=1000000000',
# The ``convert_revision`` that would be stored reflects the rewritten