vcssync: drop commits without tree changes when linearizing (
bug 1322769); r?glob
Currently, the linearizing of a Git repo could result in "empty"
commits. These are commits where there are no tree changes from the
previous commit. This can happen when a rewritten commit only changes
files in directories that are filtered out as part of rewriting.
When these empty commits are fed into `hg convert`, they are silently
ignored. This makes the log output from conversion a bit confusing,
as you'll e.g. see 8 Git commits being rewritten but only 5 new
Mercurial changesets.
In this commit, we drop empty commits when linearizing Git repos.
Dropped commits are logged as such to make it easier to understand
what's going on.
MozReview-Commit-ID: auwRtt8KON
--- a/vcssync/mozvcssync/gitrewrite/linearize.py
+++ b/vcssync/mozvcssync/gitrewrite/linearize.py
@@ -187,31 +187,42 @@ def linearize_git_repo(git_repo, ref, ex
if source_repo and source_repo.startswith(b'https://github.com/'):
orgrepo = source_repo[len(b'https://github.com/'):]
github_org, github_repo = orgrepo.split(b'/')
if github_client and github_repo and not os.path.exists(github_cache_dir):
os.mkdir(github_cache_dir)
+ last_tree = repo[dest_commit_id].tree if dest_commit_id else None
rewrite_count = 0
for i, source_commit in enumerate(source_commits):
logger.warn('%d/%d %s %s' % (
i + 1, len(source_commits), source_commit.id,
source_commit.message.splitlines()[0].decode('utf-8', 'replace')))
dest_commit = source_commit.copy()
# If we're pruning directories, we need to rewrite tree objects.
if exclude_dirs:
dest_commit.tree = prune_directories(repo.object_store,
dest_commit.tree,
exclude_dirs).id
+ # If the tree is identical to the last commit, the commit is empty.
+ # There is no value in keeping it. So we drop it.
+ #
+ # In some cases, retaining empty commits may be desirable. So this
+ # behavior could be controlled by a function argument if wanted.
+ if dest_commit.tree == last_tree:
+ logger.warn('dropping %s because no tree changes' %
+ source_commit.id)
+ continue
+
if use_p2_author and len(source_commit.parents) == 2:
c = repo[source_commit.parents[1]]
author = c.author
committer = c.committer
else:
author = source_commit.author
committer = source_commit.committer
@@ -266,16 +277,17 @@ def linearize_git_repo(git_repo, ref, ex
dest_commit.message = b'%s\n' % b'\n'.join(lines)
# Our commit object is fully transformed. Write it.
repo.object_store.add_object(dest_commit)
rewrite_count += 1
dest_commit_id = dest_commit.id
+ last_tree = dest_commit.tree
result['commit_map'][source_commit.id] = dest_commit_id
result['dest_commit'] = dest_commit_id
# Store refs to the converted source and dest commits. We use
# ``git update-ref`` so reflogs are written (Dulwich doesn't appear
# to write reflogs).
reflog_commands = []
new file mode 100644
--- /dev/null
+++ b/vcssync/tests/test-linearize-git-drop-empty.t
@@ -0,0 +1,96 @@
+ $ . $TESTDIR/vcssync/tests/helpers.sh
+
+ $ git init repo0
+ Initialized empty Git repository in $TESTTMP/repo0/.git/
+
+ $ cd repo0
+ $ touch file0
+ $ git add file0
+ $ git commit -m initial
+ [master (root-commit) 9a1c63e] initial
+ 1 file changed, 0 insertions(+), 0 deletions(-)
+ create mode 100644 file0
+
+ $ mkdir dir0 dir1
+ $ touch dir0/file0 dir1/file0
+ $ git add dir0 dir1
+ $ git commit -m 'add dir0/file0 and dir1/file0'
+ [master 91896bd] add dir0/file0 and dir1/file0
+ 2 files changed, 0 insertions(+), 0 deletions(-)
+ create mode 100644 dir0/file0
+ create mode 100644 dir1/file0
+ $ git branch before-dir1
+ $ touch dir1/file1
+ $ git add dir1/file1
+ $ git commit -m 'add dir1/file1'
+ [master 3219fc6] add dir1/file1
+ 1 file changed, 0 insertions(+), 0 deletions(-)
+ create mode 100644 dir1/file1
+ $ touch dir1/file2
+ $ git add dir1/file2
+ $ git commit -m 'add dir1/file2'
+ [master a870890] add dir1/file2
+ 1 file changed, 0 insertions(+), 0 deletions(-)
+ create mode 100644 dir1/file2
+ $ touch dir0/file1
+ $ git add dir0/file1
+ $ git commit -m 'add dir0/file1'
+ [master 9826ff3] add dir0/file1
+ 1 file changed, 0 insertions(+), 0 deletions(-)
+ create mode 100644 dir0/file1
+
+Linearizing master should drop commits only touching dir1
+
+ $ linearize-git --exclude-dir dir1 . heads/master
+ linearizing 5 commits from heads/master (9a1c63edf1b4ddedd8b4c6ead8e7d9d613a40c4b to 9826ff342e616ccbb358c7d6ea25c6d695a74a23)
+ 1/5 9a1c63edf1b4ddedd8b4c6ead8e7d9d613a40c4b initial
+ 2/5 91896bd2582da05399b9f2203676701b5ca7c86f add dir0/file0 and dir1/file0
+ 3/5 3219fc60822be2815a418dac5a355551aa79b60b add dir1/file1
+ dropping 3219fc60822be2815a418dac5a355551aa79b60b because no tree changes
+ 4/5 a87089085cdd007a4b176ae84dad07af750e0615 add dir1/file2
+ dropping a87089085cdd007a4b176ae84dad07af750e0615 because no tree changes
+ 5/5 9826ff342e616ccbb358c7d6ea25c6d695a74a23 add dir0/file1
+ 3 commits from heads/master converted; original: 9826ff342e616ccbb358c7d6ea25c6d695a74a23; rewritten: aba6d76d367154ca87f9b5b177852f0a85f50b65
+
+Test incremental conversion where empty commits are on edges
+
+ $ linearize-git --exclude-dir dir1 . heads/before-dir1
+ linearizing 2 commits from heads/before-dir1 (9a1c63edf1b4ddedd8b4c6ead8e7d9d613a40c4b to 91896bd2582da05399b9f2203676701b5ca7c86f)
+ 1/2 9a1c63edf1b4ddedd8b4c6ead8e7d9d613a40c4b initial
+ 2/2 91896bd2582da05399b9f2203676701b5ca7c86f add dir0/file0 and dir1/file0
+ 2 commits from heads/before-dir1 converted; original: 91896bd2582da05399b9f2203676701b5ca7c86f; rewritten: 66547f310f848f1024fbda495dafebd3c6c347f9
+
+ $ git checkout before-dir1
+ Switched to branch 'before-dir1'
+ $ git reset --hard 3219fc60822be2815a418dac5a355551aa79b60b
+ HEAD is now at 3219fc6 add dir1/file1
+ $ linearize-git --exclude-dir dir1 . heads/before-dir1
+ linearizing 1 commits from heads/before-dir1 (3219fc60822be2815a418dac5a355551aa79b60b to 3219fc60822be2815a418dac5a355551aa79b60b)
+ 1/1 3219fc60822be2815a418dac5a355551aa79b60b add dir1/file1
+ dropping 3219fc60822be2815a418dac5a355551aa79b60b because no tree changes
+ 0 commits from heads/before-dir1 converted; original: 3219fc60822be2815a418dac5a355551aa79b60b; rewritten: 66547f310f848f1024fbda495dafebd3c6c347f9
+
+ $ git reset --hard a87089085cdd007a4b176ae84dad07af750e0615
+ HEAD is now at a870890 add dir1/file2
+ $ linearize-git --exclude-dir dir1 . heads/before-dir1
+ linearizing 1 commits from heads/before-dir1 (a87089085cdd007a4b176ae84dad07af750e0615 to a87089085cdd007a4b176ae84dad07af750e0615)
+ 1/1 a87089085cdd007a4b176ae84dad07af750e0615 add dir1/file2
+ dropping a87089085cdd007a4b176ae84dad07af750e0615 because no tree changes
+ 0 commits from heads/before-dir1 converted; original: a87089085cdd007a4b176ae84dad07af750e0615; rewritten: 66547f310f848f1024fbda495dafebd3c6c347f9
+
+ $ git reset --hard 9826ff342e616ccbb358c7d6ea25c6d695a74a23
+ HEAD is now at 9826ff3 add dir0/file1
+ $ linearize-git --exclude-dir dir1 . heads/before-dir1
+ linearizing 1 commits from heads/before-dir1 (9826ff342e616ccbb358c7d6ea25c6d695a74a23 to 9826ff342e616ccbb358c7d6ea25c6d695a74a23)
+ 1/1 9826ff342e616ccbb358c7d6ea25c6d695a74a23 add dir0/file1
+ 1 commits from heads/before-dir1 converted; original: 9826ff342e616ccbb358c7d6ea25c6d695a74a23; rewritten: aba6d76d367154ca87f9b5b177852f0a85f50b65
+
+Converted commit SHA-1 for master should align with incremental result
+
+ $ git for-each-ref
+ aba6d76d367154ca87f9b5b177852f0a85f50b65 commit refs/convert/dest/heads/before-dir1
+ aba6d76d367154ca87f9b5b177852f0a85f50b65 commit refs/convert/dest/heads/master
+ 9826ff342e616ccbb358c7d6ea25c6d695a74a23 commit refs/convert/source/heads/before-dir1
+ 9826ff342e616ccbb358c7d6ea25c6d695a74a23 commit refs/convert/source/heads/master
+ 9826ff342e616ccbb358c7d6ea25c6d695a74a23 commit refs/heads/before-dir1
+ 9826ff342e616ccbb358c7d6ea25c6d695a74a23 commit refs/heads/master
--- a/vcssync/tests/test-linearize-git-exclude-dirs.t
+++ b/vcssync/tests/test-linearize-git-exclude-dirs.t
@@ -92,25 +92,25 @@ Directories can be excluded when lineari
$ git update-ref -d refs/convert/source/heads/master
$ git update-ref -d refs/convert/dest/heads/master
$ linearize-git --exclude-dir dir0 --exclude-dir dir1 . heads/master
linearizing 4 commits from heads/master (a547cc07d30f025e022b27310c713705158c21b4 to e9fb4537517445c07d491482211919591e4dae45)
1/4 a547cc07d30f025e022b27310c713705158c21b4 initial
2/4 0ac77c9293242a70f71defcee37a74659207b19e add file1s
3/4 b7b3abcd50597761f65c0a11846de6ebc98cc5b7 add dir0/file2
+ dropping b7b3abcd50597761f65c0a11846de6ebc98cc5b7 because no tree changes
4/4 e9fb4537517445c07d491482211919591e4dae45 Merge branch 'head1'
- 4 commits from heads/master converted; original: e9fb4537517445c07d491482211919591e4dae45; rewritten: d8230193bc11a2745bec8258c94b95324f3c4955
+ 3 commits from heads/master converted; original: e9fb4537517445c07d491482211919591e4dae45; rewritten: adc3f0cd6e97a4aaded01d4c68119b7566807b07
$ git log --graph --format=oneline refs/convert/dest/heads/master
- * d8230193bc11a2745bec8258c94b95324f3c4955 Merge branch 'head1'
- * 8a2c50c762f3483c5b3d26947d81a0cbe2ba8e69 add dir0/file2
+ * adc3f0cd6e97a4aaded01d4c68119b7566807b07 Merge branch 'head1'
* 925f1eab825ed50a1f80058c6a1f220c009a8bfd add file1s
* a547cc07d30f025e022b27310c713705158c21b4 initial
$ git show -m refs/convert/dest/heads/master
- commit d8230193bc11a2745bec8258c94b95324f3c4955
+ commit adc3f0cd6e97a4aaded01d4c68119b7566807b07
Author: test <test@example.com>
Date: Thu Jan 1 00:00:00 1970 +0000
Merge branch 'head1'
diff --git a/dir2/file0 b/dir2/file0
new file mode 100644
index 0000000..e69de29