vcssync: add function for searching for original commit (bug 1357597); r?glob draft
authorGregory Szorc <gps@mozilla.com>
Fri, 21 Apr 2017 15:18:01 -0700
changeset 10858 6fa6b7f01a10d492927edbb65e041ea368fae4b0
parent 10857 f867f6d5354ce934fa0bba4fe6853e9fa0ccc96f
child 10859 9aeb58f3dae697dd58b3ea9a5efbc693751b8eba
push id1638
push userbmo:gps@mozilla.com
push dateSat, 22 Apr 2017 00:35:48 +0000
reviewersglob
bugs1357597
vcssync: add function for searching for original commit (bug 1357597); r?glob When rewriting Git commits, we sometimes add an annotation recording the original commit ID. This commit introduces functionality for parsing that annotation to find the rewritten version of a commit. MozReview-Commit-ID: 6S6MuMGyVt0
vcssync/mozvcssync/gitrewrite/__init__.py
vcssync/tests/test_find_source_commit.py
--- a/vcssync/mozvcssync/gitrewrite/__init__.py
+++ b/vcssync/mozvcssync/gitrewrite/__init__.py
@@ -6,16 +6,17 @@ from __future__ import absolute_import, 
 
 import collections
 import json
 import os
 import re
 import stat
 import uuid
 
+import dulwich.walk
 import github3.pulls
 
 from ..util import (
     get_github_client,
 )
 
 
 class RewriteError(Exception):
@@ -426,8 +427,49 @@ def commit_metadata_rewriter(
                 lines.append(b'%s: %s' % (source_repo_key, source_repo))
             if source_revision_key:
                 lines.append(b'%s: %s' % (source_revision_key,
                                           source_commit.id))
 
             dest_commit.message = b'%s\n' % b'\n'.join(lines)
 
     return rewrite_commit
+
+
+def find_source_commit(repo, commit_id, search_revision, source_revision_key):
+    """Find the original commit from a rewritten commit.
+
+    Some commit rewriting operations do not store an explicit map of revisions.
+    Instead, the original commit is annotated in the commit message. e.g. using
+    ``Source-Revision: %s`` syntax. This function is used to find the original
+    Git commit post rewrite.
+
+    It works by walking ancestors of ``search_revision`` and looking for
+    matches. If the exact commit is found, it is returned. Otherwise, it
+    parses for the ``source_revision_key`` specified. If no match is found,
+    returns None.
+
+    Note: this function assumes we can trust the annotation in the read
+    commits. Any user with write privileges to the repo could produce these
+    annotations and mislead this function. So consider the implications before
+    using this function.
+    """
+    search = re.compile('^%s: %s$' % (re.escape(source_revision_key),
+                                      re.escape(commit_id)),
+                        re.MULTILINE)
+
+    walker = dulwich.walk.Walker(repo.object_store,
+                                 include=[repo[search_revision].id])
+
+    for entry in walker:
+        c = entry.commit
+
+        # Exact match.
+        if c.id == commit_id:
+            return c
+
+        # Parse metadata.
+        match = search.search(c.message)
+
+        if match:
+            return c
+
+    return None
new file mode 100644
--- /dev/null
+++ b/vcssync/tests/test_find_source_commit.py
@@ -0,0 +1,95 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, unicode_literals
+
+import unittest
+
+import dulwich.objects
+import dulwich.repo
+
+from mozvcssync.gitrewrite import (
+    find_source_commit,
+)
+
+
+class TestFindSourceCommit(unittest.TestCase):
+    def setUp(self):
+        self.repo = dulwich.repo.MemoryRepo()
+
+        os = self.repo.object_store
+
+        tree = dulwich.objects.Tree()
+        tree[b'dummy'] = (0, b'0' * 40)
+        os.add_object(tree)
+
+        c = dulwich.objects.Commit()
+        c.tree = tree
+        c.author = b'dummy author'
+        c.author_time = 0
+        c.author_timezone = 0
+        c.committer = b'dummy committer'
+        c.commit_time = 0
+        c.commit_timezone = 0
+        c.message = b'initial'
+        os.add_object(c)
+
+        c2 = c.copy()
+        c2.parents = [c.id]
+        c2.message = b'commit 2'
+        os.add_object(c2)
+
+        c3 = c.copy()
+        c3.parents = [c2.id]
+        c3.message = b'commit 3'
+        os.add_object(c3)
+
+        self.repo[b'refs/heads/master'] = c3.id
+
+        # Now create a simulated rewrite of master.
+        c2a = c2.copy()
+        c2a.message = b'commit 2 rewritten\n\nSource-Revision: %s' % c2.id
+        os.add_object(c2a)
+
+        c3a = c3.copy()
+        c3a.parents = [c2a.id]
+        c3a.message = b'commit 3 rewritten\n\nSource-Revision: %s' % c3.id
+        os.add_object(c3a)
+
+        self.repo[b'refs/heads/rewritten'] = c3a.id
+
+    def test_no_match(self):
+        self.assertIsNone(find_source_commit(self.repo,
+                                             b'0' * 40,
+                                             b'refs/heads/master',
+                                             b'Source-Revision'))
+
+    def test_exact_match(self):
+        c3 = self.repo[b'refs/heads/master']
+
+        self.assertEqual(find_source_commit(self.repo,
+                                            c3.id,
+                                            b'refs/heads/master',
+                                            b'not relevant'),
+                         c3)
+
+    def test_match_base(self):
+        c2 = self.repo[self.repo[b'refs/heads/master'].parents[0]]
+        c2a = self.repo[self.repo[b'refs/heads/rewritten'].parents[0]]
+
+        self.assertEqual(find_source_commit(self.repo,
+                                            c2.id,
+                                            b'refs/heads/rewritten',
+                                            b'Source-Revision'),
+                         c2a)
+
+    def test_match_head(self):
+        c3 = self.repo[b'refs/heads/master']
+        c3a = self.repo[b'refs/heads/rewritten']
+
+        self.assertEqual(find_source_commit(self.repo,
+                                            c3.id,
+                                            b'refs/heads/rewritten',
+                                            b'Source-Revision'),
+                         c3a)