vcssync: implement a context manager for monitoring an hg repo (bug 1357597); r?glob draft
authorGregory Szorc <gps@mozilla.com>
Tue, 18 Apr 2017 17:25:51 -0700
changeset 10853 40aaed94c3c84b701fdc8c9a16540e5163324f2b
parent 10852 a40ca8daccdb7b2433148fc587f31e1d0bdf899c
child 10854 11f3682b65fa2ee7aceb39cc4bf9dd736a8b95db
push id1638
push userbmo:gps@mozilla.com
push dateSat, 22 Apr 2017 00:35:48 +0000
reviewersglob
bugs1357597
vcssync: implement a context manager for monitoring an hg repo (bug 1357597); r?glob Previously, linearize_git_repo_to_hg() had a bunch of low-level code to grab state from a Mercurial repo before and after an operation. This type of probing is generic and can be extracted to its own function. Furthermore, this pattern of performing actions before and after an event is something that context managers can do for us. So this commit extracts the code to a utility function that is used as a context manager. At this point, linearize_git_repo_to_hg() is much easier to read and most of its generic logic is in standalone functions, facilitating reuse. MozReview-Commit-ID: qTjRXeJlIW
vcssync/mozvcssync/git2hg.py
vcssync/mozvcssync/util.py
--- a/vcssync/mozvcssync/git2hg.py
+++ b/vcssync/mozvcssync/git2hg.py
@@ -1,30 +1,32 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 from __future__ import absolute_import, unicode_literals
 
 import errno
-import hashlib
 import logging
 import os
 import subprocess
 import tempfile
 
 import dulwich.repo
 import hglib
 
 from .gitrewrite import (
     commit_metadata_rewriter,
 )
 from .gitrewrite.linearize import (
     linearize_git_repo,
 )
+from .util import (
+    monitor_hg_repo,
+)
 
 
 logger = logging.getLogger(__name__)
 
 
 def source_commits_in_map_file(path, commits):
     """Determine whether all source commits are present in a map file.
 
@@ -243,59 +245,47 @@ def linearize_git_repo_to_hg(git_source_
             maybe_push_hg()
             return result
 
     logger.warn('converting %d Git commits' % len(git_state['commit_map']))
 
     if not os.path.exists(hg_repo_path):
         hglib.init(hg_repo_path)
 
-    with hglib.open(hg_repo_path) as hrepo:
-        tip = hrepo[b'tip']
-        before_hg_tip_rev = tip.rev()
-        before_hg_tip_node = tip.node()
-
-    def get_shamap_hash():
-        if not os.path.exists(rev_map):
-            return None
-
-        with open(rev_map, 'rb') as fh:
-            return hashlib.sha256(fh.read()).digest()
-
-    old_shamap_hash = get_shamap_hash()
+    with monitor_hg_repo(hg_repo_path, [b'shamap']) as changes:
+        run_hg_convert(git_repo_path, hg_repo_path, rev_map,
+                       rev=b'refs/convert/dest/heads/%s' % ref,
+                       similarity=similarity,
+                       find_copies_harder=find_copies_harder,
+                       skip_submodules=skip_submodules,
+                       move_to_subdir=move_to_subdir)
 
-    run_hg_convert(git_repo_path, hg_repo_path, rev_map,
-                   rev=b'refs/convert/dest/heads/%s' % ref,
-                   similarity=similarity,
-                   find_copies_harder=find_copies_harder,
-                   skip_submodules=skip_submodules,
-                   move_to_subdir=move_to_subdir)
+    # Aliasing makes this slightly easier to read.
+    before = changes['before']
+    after = changes['after']
 
-    with hglib.open(hg_repo_path) as hrepo:
-        tip = hrepo[b'tip']
-        after_hg_tip_rev = tip.rev()
-        after_hg_tip_node = tip.node()
+    if before['tip_rev'] == -1:
+        convert_count = after['tip_rev'] + 1
+    else:
+        convert_count = after['tip_rev'] - before['tip_rev']
 
-    if before_hg_tip_rev == -1:
-        convert_count = after_hg_tip_rev + 1
-    else:
-        convert_count = after_hg_tip_rev - before_hg_tip_rev
-
-    result['hg_before_tip_rev'] = before_hg_tip_rev
-    result['hg_after_tip_rev'] = after_hg_tip_rev
-    result['hg_before_tip_node'] = before_hg_tip_node
-    result['hg_after_tip_node'] = after_hg_tip_node
+    result['hg_before_tip_rev'] = before['tip_rev']
+    result['hg_after_tip_rev'] = after['tip_rev']
+    result['hg_before_tip_node'] = before['tip_node']
+    result['hg_after_tip_node'] = after['tip_node']
     result['hg_convert_count'] = convert_count
 
     logger.warn('%d Git commits converted to Mercurial; '
                 'previous tip: %d:%s; current tip: %d:%s' % (
-        convert_count, before_hg_tip_rev, before_hg_tip_node,
-        after_hg_tip_rev, after_hg_tip_node))
+        convert_count, before['tip_rev'], before['tip_node'],
+        after['tip_rev'], after['tip_node']))
 
     maybe_push_hg()
 
+    shamap_changed = before['hashes']['shamap'] != after['hashes']['shamap']
+
     # TODO so hacky. Relies on credentials in the environment.
-    if shamap_s3_upload_url and old_shamap_hash != get_shamap_hash():
+    if shamap_s3_upload_url and shamap_changed:
         subprocess.check_call([
             b'aws', b's3', b'cp', rev_map, shamap_s3_upload_url
         ])
 
     return result
--- a/vcssync/mozvcssync/util.py
+++ b/vcssync/mozvcssync/util.py
@@ -1,14 +1,17 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 from __future__ import absolute_import, unicode_literals
 
+import contextlib
+import errno
+import hashlib
 import os
 import pipes
 
 import github3
 import hglib
 
 
 def run_hg(logger, client, args):
@@ -56,8 +59,62 @@ def get_github_client(token):
 
         recorder = betamax.Betamax(gh._session)
         recorder.use_cassette(betamax_cassette)
         recorder.start()
 
     gh.login(token=token)
 
     return gh
+
+
+def hash_path(path):
+    try:
+        with open(path, 'rb') as fh:
+            return hashlib.sha256(fh.read()).digest()
+    except IOError as e:
+        if e.errno != errno.ENOENT:
+            raise
+
+        return None
+
+
+@contextlib.contextmanager
+def monitor_hg_repo(repo_path, hg_paths=None):
+    """Context manager to monitor a Mercurial repo for changes.
+
+    Before the context manager is active, the Mercurial repo at
+    ``repo_path`` is opened and state is collected.
+
+    When the context manager closes, a similar sampling is performed.
+
+    The context manager returns a dict describing the state of the repo. It
+    has keys ``before`` and ``after`` which hold state from before and
+    after the body of the context manager executes. It is currently up to the
+    caller to perform diffing.
+
+    Note: currently only the tip rev and node are recorded to compute for
+    differences. This is insufficient to detect changes for all use cases. For
+    example, it may not accurately detect certain strip operations.
+    """
+    hg_paths = hg_paths or []
+
+    def get_state():
+        with hglib.open(repo_path) as repo:
+            tip = repo[b'tip']
+            tip_rev = tip.rev()
+            tip_node = tip.node()
+
+        hashes = {path: hash_path(os.path.join(b'.hg', path))
+                  for path in hg_paths}
+
+        return {
+            'tip_rev': tip_rev,
+            'tip_node': tip_node,
+            'hashes': hashes,
+        }
+
+    state = {'before': get_state()}
+
+    try:
+        yield state
+    finally:
+        state['after'] = get_state()