vcssync: implement a context manager for monitoring an hg repo (
bug 1357597); r?glob
Previously, linearize_git_repo_to_hg() had a bunch of low-level code
to grab state from a Mercurial repo before and after an operation.
This type of probing is generic and can be extracted to its own
function. Furthermore, this pattern of performing actions before
and after an event is something that context managers can do for
us.
So this commit extracts the code to a utility function that is
used as a context manager.
At this point, linearize_git_repo_to_hg() is much easier to read
and most of its generic logic is in standalone functions,
facilitating reuse.
MozReview-Commit-ID: qTjRXeJlIW
--- a/vcssync/mozvcssync/git2hg.py
+++ b/vcssync/mozvcssync/git2hg.py
@@ -1,30 +1,32 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from __future__ import absolute_import, unicode_literals
import errno
-import hashlib
import logging
import os
import subprocess
import tempfile
import dulwich.repo
import hglib
from .gitrewrite import (
commit_metadata_rewriter,
)
from .gitrewrite.linearize import (
linearize_git_repo,
)
+from .util import (
+ monitor_hg_repo,
+)
logger = logging.getLogger(__name__)
def source_commits_in_map_file(path, commits):
"""Determine whether all source commits are present in a map file.
@@ -243,59 +245,47 @@ def linearize_git_repo_to_hg(git_source_
maybe_push_hg()
return result
logger.warn('converting %d Git commits' % len(git_state['commit_map']))
if not os.path.exists(hg_repo_path):
hglib.init(hg_repo_path)
- with hglib.open(hg_repo_path) as hrepo:
- tip = hrepo[b'tip']
- before_hg_tip_rev = tip.rev()
- before_hg_tip_node = tip.node()
-
- def get_shamap_hash():
- if not os.path.exists(rev_map):
- return None
-
- with open(rev_map, 'rb') as fh:
- return hashlib.sha256(fh.read()).digest()
-
- old_shamap_hash = get_shamap_hash()
+ with monitor_hg_repo(hg_repo_path, [b'shamap']) as changes:
+ run_hg_convert(git_repo_path, hg_repo_path, rev_map,
+ rev=b'refs/convert/dest/heads/%s' % ref,
+ similarity=similarity,
+ find_copies_harder=find_copies_harder,
+ skip_submodules=skip_submodules,
+ move_to_subdir=move_to_subdir)
- run_hg_convert(git_repo_path, hg_repo_path, rev_map,
- rev=b'refs/convert/dest/heads/%s' % ref,
- similarity=similarity,
- find_copies_harder=find_copies_harder,
- skip_submodules=skip_submodules,
- move_to_subdir=move_to_subdir)
+ # Aliasing makes this slightly easier to read.
+ before = changes['before']
+ after = changes['after']
- with hglib.open(hg_repo_path) as hrepo:
- tip = hrepo[b'tip']
- after_hg_tip_rev = tip.rev()
- after_hg_tip_node = tip.node()
+ if before['tip_rev'] == -1:
+ convert_count = after['tip_rev'] + 1
+ else:
+ convert_count = after['tip_rev'] - before['tip_rev']
- if before_hg_tip_rev == -1:
- convert_count = after_hg_tip_rev + 1
- else:
- convert_count = after_hg_tip_rev - before_hg_tip_rev
-
- result['hg_before_tip_rev'] = before_hg_tip_rev
- result['hg_after_tip_rev'] = after_hg_tip_rev
- result['hg_before_tip_node'] = before_hg_tip_node
- result['hg_after_tip_node'] = after_hg_tip_node
+ result['hg_before_tip_rev'] = before['tip_rev']
+ result['hg_after_tip_rev'] = after['tip_rev']
+ result['hg_before_tip_node'] = before['tip_node']
+ result['hg_after_tip_node'] = after['tip_node']
result['hg_convert_count'] = convert_count
logger.warn('%d Git commits converted to Mercurial; '
'previous tip: %d:%s; current tip: %d:%s' % (
- convert_count, before_hg_tip_rev, before_hg_tip_node,
- after_hg_tip_rev, after_hg_tip_node))
+ convert_count, before['tip_rev'], before['tip_node'],
+ after['tip_rev'], after['tip_node']))
maybe_push_hg()
+ shamap_changed = before['hashes']['shamap'] != after['hashes']['shamap']
+
# TODO so hacky. Relies on credentials in the environment.
- if shamap_s3_upload_url and old_shamap_hash != get_shamap_hash():
+ if shamap_s3_upload_url and shamap_changed:
subprocess.check_call([
b'aws', b's3', b'cp', rev_map, shamap_s3_upload_url
])
return result
--- a/vcssync/mozvcssync/util.py
+++ b/vcssync/mozvcssync/util.py
@@ -1,14 +1,17 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from __future__ import absolute_import, unicode_literals
+import contextlib
+import errno
+import hashlib
import os
import pipes
import github3
import hglib
def run_hg(logger, client, args):
@@ -56,8 +59,62 @@ def get_github_client(token):
recorder = betamax.Betamax(gh._session)
recorder.use_cassette(betamax_cassette)
recorder.start()
gh.login(token=token)
return gh
+
+
+def hash_path(path):
+ try:
+ with open(path, 'rb') as fh:
+ return hashlib.sha256(fh.read()).digest()
+ except IOError as e:
+ if e.errno != errno.ENOENT:
+ raise
+
+ return None
+
+
+@contextlib.contextmanager
+def monitor_hg_repo(repo_path, hg_paths=None):
+ """Context manager to monitor a Mercurial repo for changes.
+
+ Before the context manager is active, the Mercurial repo at
+ ``repo_path`` is opened and state is collected.
+
+ When the context manager closes, a similar sampling is performed.
+
+ The context manager returns a dict describing the state of the repo. It
+ has keys ``before`` and ``after`` which hold state from before and
+ after the body of the context manager executes. It is currently up to the
+ caller to perform diffing.
+
+ Note: currently only the tip rev and node are recorded to compute for
+ differences. This is insufficient to detect changes for all use cases. For
+ example, it may not accurately detect certain strip operations.
+ """
+ hg_paths = hg_paths or []
+
+ def get_state():
+ with hglib.open(repo_path) as repo:
+ tip = repo[b'tip']
+ tip_rev = tip.rev()
+ tip_node = tip.node()
+
+ hashes = {path: hash_path(os.path.join(b'.hg', path))
+ for path in hg_paths}
+
+ return {
+ 'tip_rev': tip_rev,
+ 'tip_node': tip_node,
+ 'hashes': hashes,
+ }
+
+ state = {'before': get_state()}
+
+ try:
+ yield state
+ finally:
+ state['after'] = get_state()