vcssync: drop commits without tree changes when linearizing (bug 1322769); r?glob draft
authorGregory Szorc <gps@mozilla.com>
Fri, 27 Jan 2017 12:13:39 -0800
changeset 10265 97335960d69a78b334ff53792f9dc8b505ecdf05
parent 10264 41592751ae32a7b35b73f6511fadfd5695ba01be
push id1494
push userbmo:gps@mozilla.com
push dateFri, 27 Jan 2017 22:32:04 +0000
reviewersglob
bugs1322769
vcssync: drop commits without tree changes when linearizing (bug 1322769); r?glob Currently, the linearizing of a Git repo could result in "empty" commits. These are commits where there are no tree changes from the previous commit. This can happen when a rewritten commit only changes files in directories that are filtered out as part of rewriting. When these empty commits are fed into `hg convert`, they are silently ignored. This makes the log output from conversion a bit confusing, as you'll e.g. see 8 Git commits being rewritten but only 5 new Mercurial changesets. In this commit, we drop empty commits when linearizing Git repos. Dropped commits are logged as such to make it easier to understand what's going on. MozReview-Commit-ID: auwRtt8KON
vcssync/mozvcssync/gitrewrite/linearize.py
vcssync/tests/test-linearize-git-drop-empty.t
vcssync/tests/test-linearize-git-exclude-dirs.t
--- a/vcssync/mozvcssync/gitrewrite/linearize.py
+++ b/vcssync/mozvcssync/gitrewrite/linearize.py
@@ -187,31 +187,42 @@ def linearize_git_repo(git_repo, ref, ex
 
     if source_repo and source_repo.startswith(b'https://github.com/'):
         orgrepo = source_repo[len(b'https://github.com/'):]
         github_org, github_repo = orgrepo.split(b'/')
 
     if github_client and github_repo and not os.path.exists(github_cache_dir):
         os.mkdir(github_cache_dir)
 
+    last_tree = repo[dest_commit_id].tree if dest_commit_id else None
     rewrite_count = 0
 
     for i, source_commit in enumerate(source_commits):
         logger.warn('%d/%d %s %s' % (
             i + 1, len(source_commits), source_commit.id,
             source_commit.message.splitlines()[0].decode('utf-8', 'replace')))
 
         dest_commit = source_commit.copy()
 
         # If we're pruning directories, we need to rewrite tree objects.
         if exclude_dirs:
             dest_commit.tree = prune_directories(repo.object_store,
                                                  dest_commit.tree,
                                                  exclude_dirs).id
 
+        # If the tree is identical to the last commit, the commit is empty.
+        # There is no value in keeping it. So we drop it.
+        #
+        # In some cases, retaining empty commits may be desirable. So this
+        # behavior could be controlled by a function argument if wanted.
+        if dest_commit.tree == last_tree:
+            logger.warn('dropping %s because no tree changes' %
+                        source_commit.id)
+            continue
+
         if use_p2_author and len(source_commit.parents) == 2:
             c = repo[source_commit.parents[1]]
             author = c.author
             committer = c.committer
         else:
             author = source_commit.author
             committer = source_commit.committer
 
@@ -266,16 +277,17 @@ def linearize_git_repo(git_repo, ref, ex
 
             dest_commit.message = b'%s\n' % b'\n'.join(lines)
 
         # Our commit object is fully transformed. Write it.
         repo.object_store.add_object(dest_commit)
 
         rewrite_count += 1
         dest_commit_id = dest_commit.id
+        last_tree = dest_commit.tree
         result['commit_map'][source_commit.id] = dest_commit_id
 
     result['dest_commit'] = dest_commit_id
 
     # Store refs to the converted source and dest commits. We use
     # ``git update-ref`` so reflogs are written (Dulwich doesn't appear
     # to write reflogs).
     reflog_commands = []
new file mode 100644
--- /dev/null
+++ b/vcssync/tests/test-linearize-git-drop-empty.t
@@ -0,0 +1,96 @@
+  $ . $TESTDIR/vcssync/tests/helpers.sh
+
+  $ git init repo0
+  Initialized empty Git repository in $TESTTMP/repo0/.git/
+
+  $ cd repo0
+  $ touch file0
+  $ git add file0
+  $ git commit -m initial
+  [master (root-commit) 9a1c63e] initial
+   1 file changed, 0 insertions(+), 0 deletions(-)
+   create mode 100644 file0
+
+  $ mkdir dir0 dir1
+  $ touch dir0/file0 dir1/file0
+  $ git add dir0 dir1
+  $ git commit -m 'add dir0/file0 and dir1/file0'
+  [master 91896bd] add dir0/file0 and dir1/file0
+   2 files changed, 0 insertions(+), 0 deletions(-)
+   create mode 100644 dir0/file0
+   create mode 100644 dir1/file0
+  $ git branch before-dir1
+  $ touch dir1/file1
+  $ git add dir1/file1
+  $ git commit -m 'add dir1/file1'
+  [master 3219fc6] add dir1/file1
+   1 file changed, 0 insertions(+), 0 deletions(-)
+   create mode 100644 dir1/file1
+  $ touch dir1/file2
+  $ git add dir1/file2
+  $ git commit -m 'add dir1/file2'
+  [master a870890] add dir1/file2
+   1 file changed, 0 insertions(+), 0 deletions(-)
+   create mode 100644 dir1/file2
+  $ touch dir0/file1
+  $ git add dir0/file1
+  $ git commit -m 'add dir0/file1'
+  [master 9826ff3] add dir0/file1
+   1 file changed, 0 insertions(+), 0 deletions(-)
+   create mode 100644 dir0/file1
+
+Linearizing master should drop commits only touching dir1
+
+  $ linearize-git --exclude-dir dir1 . heads/master
+  linearizing 5 commits from heads/master (9a1c63edf1b4ddedd8b4c6ead8e7d9d613a40c4b to 9826ff342e616ccbb358c7d6ea25c6d695a74a23)
+  1/5 9a1c63edf1b4ddedd8b4c6ead8e7d9d613a40c4b initial
+  2/5 91896bd2582da05399b9f2203676701b5ca7c86f add dir0/file0 and dir1/file0
+  3/5 3219fc60822be2815a418dac5a355551aa79b60b add dir1/file1
+  dropping 3219fc60822be2815a418dac5a355551aa79b60b because no tree changes
+  4/5 a87089085cdd007a4b176ae84dad07af750e0615 add dir1/file2
+  dropping a87089085cdd007a4b176ae84dad07af750e0615 because no tree changes
+  5/5 9826ff342e616ccbb358c7d6ea25c6d695a74a23 add dir0/file1
+  3 commits from heads/master converted; original: 9826ff342e616ccbb358c7d6ea25c6d695a74a23; rewritten: aba6d76d367154ca87f9b5b177852f0a85f50b65
+
+Test incremental conversion where empty commits are on edges
+
+  $ linearize-git --exclude-dir dir1 . heads/before-dir1
+  linearizing 2 commits from heads/before-dir1 (9a1c63edf1b4ddedd8b4c6ead8e7d9d613a40c4b to 91896bd2582da05399b9f2203676701b5ca7c86f)
+  1/2 9a1c63edf1b4ddedd8b4c6ead8e7d9d613a40c4b initial
+  2/2 91896bd2582da05399b9f2203676701b5ca7c86f add dir0/file0 and dir1/file0
+  2 commits from heads/before-dir1 converted; original: 91896bd2582da05399b9f2203676701b5ca7c86f; rewritten: 66547f310f848f1024fbda495dafebd3c6c347f9
+
+  $ git checkout before-dir1
+  Switched to branch 'before-dir1'
+  $ git reset --hard 3219fc60822be2815a418dac5a355551aa79b60b
+  HEAD is now at 3219fc6 add dir1/file1
+  $ linearize-git --exclude-dir dir1 . heads/before-dir1
+  linearizing 1 commits from heads/before-dir1 (3219fc60822be2815a418dac5a355551aa79b60b to 3219fc60822be2815a418dac5a355551aa79b60b)
+  1/1 3219fc60822be2815a418dac5a355551aa79b60b add dir1/file1
+  dropping 3219fc60822be2815a418dac5a355551aa79b60b because no tree changes
+  0 commits from heads/before-dir1 converted; original: 3219fc60822be2815a418dac5a355551aa79b60b; rewritten: 66547f310f848f1024fbda495dafebd3c6c347f9
+
+  $ git reset --hard a87089085cdd007a4b176ae84dad07af750e0615
+  HEAD is now at a870890 add dir1/file2
+  $ linearize-git --exclude-dir dir1 . heads/before-dir1
+  linearizing 1 commits from heads/before-dir1 (a87089085cdd007a4b176ae84dad07af750e0615 to a87089085cdd007a4b176ae84dad07af750e0615)
+  1/1 a87089085cdd007a4b176ae84dad07af750e0615 add dir1/file2
+  dropping a87089085cdd007a4b176ae84dad07af750e0615 because no tree changes
+  0 commits from heads/before-dir1 converted; original: a87089085cdd007a4b176ae84dad07af750e0615; rewritten: 66547f310f848f1024fbda495dafebd3c6c347f9
+
+  $ git reset --hard 9826ff342e616ccbb358c7d6ea25c6d695a74a23
+  HEAD is now at 9826ff3 add dir0/file1
+  $ linearize-git --exclude-dir dir1 . heads/before-dir1
+  linearizing 1 commits from heads/before-dir1 (9826ff342e616ccbb358c7d6ea25c6d695a74a23 to 9826ff342e616ccbb358c7d6ea25c6d695a74a23)
+  1/1 9826ff342e616ccbb358c7d6ea25c6d695a74a23 add dir0/file1
+  1 commits from heads/before-dir1 converted; original: 9826ff342e616ccbb358c7d6ea25c6d695a74a23; rewritten: aba6d76d367154ca87f9b5b177852f0a85f50b65
+
+Converted commit SHA-1 for master should align with incremental result
+
+  $ git for-each-ref
+  aba6d76d367154ca87f9b5b177852f0a85f50b65 commit	refs/convert/dest/heads/before-dir1
+  aba6d76d367154ca87f9b5b177852f0a85f50b65 commit	refs/convert/dest/heads/master
+  9826ff342e616ccbb358c7d6ea25c6d695a74a23 commit	refs/convert/source/heads/before-dir1
+  9826ff342e616ccbb358c7d6ea25c6d695a74a23 commit	refs/convert/source/heads/master
+  9826ff342e616ccbb358c7d6ea25c6d695a74a23 commit	refs/heads/before-dir1
+  9826ff342e616ccbb358c7d6ea25c6d695a74a23 commit	refs/heads/master
--- a/vcssync/tests/test-linearize-git-exclude-dirs.t
+++ b/vcssync/tests/test-linearize-git-exclude-dirs.t
@@ -92,25 +92,25 @@ Directories can be excluded when lineari
 
   $ git update-ref -d refs/convert/source/heads/master
   $ git update-ref -d refs/convert/dest/heads/master
   $ linearize-git --exclude-dir dir0 --exclude-dir dir1 . heads/master
   linearizing 4 commits from heads/master (a547cc07d30f025e022b27310c713705158c21b4 to e9fb4537517445c07d491482211919591e4dae45)
   1/4 a547cc07d30f025e022b27310c713705158c21b4 initial
   2/4 0ac77c9293242a70f71defcee37a74659207b19e add file1s
   3/4 b7b3abcd50597761f65c0a11846de6ebc98cc5b7 add dir0/file2
+  dropping b7b3abcd50597761f65c0a11846de6ebc98cc5b7 because no tree changes
   4/4 e9fb4537517445c07d491482211919591e4dae45 Merge branch 'head1'
-  4 commits from heads/master converted; original: e9fb4537517445c07d491482211919591e4dae45; rewritten: d8230193bc11a2745bec8258c94b95324f3c4955
+  3 commits from heads/master converted; original: e9fb4537517445c07d491482211919591e4dae45; rewritten: adc3f0cd6e97a4aaded01d4c68119b7566807b07
   $ git log --graph --format=oneline refs/convert/dest/heads/master
-  * d8230193bc11a2745bec8258c94b95324f3c4955 Merge branch 'head1'
-  * 8a2c50c762f3483c5b3d26947d81a0cbe2ba8e69 add dir0/file2
+  * adc3f0cd6e97a4aaded01d4c68119b7566807b07 Merge branch 'head1'
   * 925f1eab825ed50a1f80058c6a1f220c009a8bfd add file1s
   * a547cc07d30f025e022b27310c713705158c21b4 initial
   $ git show -m refs/convert/dest/heads/master
-  commit d8230193bc11a2745bec8258c94b95324f3c4955
+  commit adc3f0cd6e97a4aaded01d4c68119b7566807b07
   Author: test <test@example.com>
   Date:   Thu Jan 1 00:00:00 1970 +0000
   
       Merge branch 'head1'
   
   diff --git a/dir2/file0 b/dir2/file0
   new file mode 100644
   index 0000000..e69de29