vcssync: add function for searching for original commit (
bug 1357597); r?glob
When rewriting Git commits, we sometimes add an annotation recording
the original commit ID. This commit introduces functionality for
parsing that annotation to find the rewritten version of a commit.
MozReview-Commit-ID: 6S6MuMGyVt0
--- a/vcssync/mozvcssync/gitrewrite/__init__.py
+++ b/vcssync/mozvcssync/gitrewrite/__init__.py
@@ -6,16 +6,17 @@ from __future__ import absolute_import,
import collections
import json
import os
import re
import stat
import uuid
+import dulwich.walk
import github3.pulls
from ..util import (
get_github_client,
)
class RewriteError(Exception):
@@ -426,8 +427,49 @@ def commit_metadata_rewriter(
lines.append(b'%s: %s' % (source_repo_key, source_repo))
if source_revision_key:
lines.append(b'%s: %s' % (source_revision_key,
source_commit.id))
dest_commit.message = b'%s\n' % b'\n'.join(lines)
return rewrite_commit
+
+
+def find_source_commit(repo, commit_id, search_revision, source_revision_key):
+ """Find the original commit from a rewritten commit.
+
+ Some commit rewriting operations do not store an explicit map of revisions.
+ Instead, the original commit is annotated in the commit message. e.g. using
+ ``Source-Revision: %s`` syntax. This function is used to find the original
+ Git commit post rewrite.
+
+ It works by walking ancestors of ``search_revision`` and looking for
+ matches. If the exact commit is found, it is returned. Otherwise, it
+ parses for the ``source_revision_key`` specified. If no match is found,
+ returns None.
+
+ Note: this function assumes we can trust the annotation in the read
+ commits. Any user with write privileges to the repo could produce these
+ annotations and mislead this function. So consider the implications before
+ using this function.
+ """
+ search = re.compile('^%s: %s$' % (re.escape(source_revision_key),
+ re.escape(commit_id)),
+ re.MULTILINE)
+
+ walker = dulwich.walk.Walker(repo.object_store,
+ include=[repo[search_revision].id])
+
+ for entry in walker:
+ c = entry.commit
+
+ # Exact match.
+ if c.id == commit_id:
+ return c
+
+ # Parse metadata.
+ match = search.search(c.message)
+
+ if match:
+ return c
+
+ return None
new file mode 100644
--- /dev/null
+++ b/vcssync/tests/test_find_source_commit.py
@@ -0,0 +1,95 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, unicode_literals
+
+import unittest
+
+import dulwich.objects
+import dulwich.repo
+
+from mozvcssync.gitrewrite import (
+ find_source_commit,
+)
+
+
+class TestFindSourceCommit(unittest.TestCase):
+ def setUp(self):
+ self.repo = dulwich.repo.MemoryRepo()
+
+ os = self.repo.object_store
+
+ tree = dulwich.objects.Tree()
+ tree[b'dummy'] = (0, b'0' * 40)
+ os.add_object(tree)
+
+ c = dulwich.objects.Commit()
+ c.tree = tree
+ c.author = b'dummy author'
+ c.author_time = 0
+ c.author_timezone = 0
+ c.committer = b'dummy committer'
+ c.commit_time = 0
+ c.commit_timezone = 0
+ c.message = b'initial'
+ os.add_object(c)
+
+ c2 = c.copy()
+ c2.parents = [c.id]
+ c2.message = b'commit 2'
+ os.add_object(c2)
+
+ c3 = c.copy()
+ c3.parents = [c2.id]
+ c3.message = b'commit 3'
+ os.add_object(c3)
+
+ self.repo[b'refs/heads/master'] = c3.id
+
+ # Now create a simulated rewrite of master.
+ c2a = c2.copy()
+ c2a.message = b'commit 2 rewritten\n\nSource-Revision: %s' % c2.id
+ os.add_object(c2a)
+
+ c3a = c3.copy()
+ c3a.parents = [c2a.id]
+ c3a.message = b'commit 3 rewritten\n\nSource-Revision: %s' % c3.id
+ os.add_object(c3a)
+
+ self.repo[b'refs/heads/rewritten'] = c3a.id
+
+ def test_no_match(self):
+ self.assertIsNone(find_source_commit(self.repo,
+ b'0' * 40,
+ b'refs/heads/master',
+ b'Source-Revision'))
+
+ def test_exact_match(self):
+ c3 = self.repo[b'refs/heads/master']
+
+ self.assertEqual(find_source_commit(self.repo,
+ c3.id,
+ b'refs/heads/master',
+ b'not relevant'),
+ c3)
+
+ def test_match_base(self):
+ c2 = self.repo[self.repo[b'refs/heads/master'].parents[0]]
+ c2a = self.repo[self.repo[b'refs/heads/rewritten'].parents[0]]
+
+ self.assertEqual(find_source_commit(self.repo,
+ c2.id,
+ b'refs/heads/rewritten',
+ b'Source-Revision'),
+ c2a)
+
+ def test_match_head(self):
+ c3 = self.repo[b'refs/heads/master']
+ c3a = self.repo[b'refs/heads/rewritten']
+
+ self.assertEqual(find_source_commit(self.repo,
+ c3.id,
+ b'refs/heads/rewritten',
+ b'Source-Revision'),
+ c3a)