pushlog: optimize revision resolution (bug 1337946); r?glob draft
authorGregory Szorc <gps@mozilla.com>
Mon, 19 Jun 2017 11:41:56 -0700
changeset 11246 9e9c834f20d75c458f735f2592c9b872d3edb8f4
parent 11245 96cf69fe524aecfc95a093fabcd1c784d4f686d5
push id1709
push usergszorc@mozilla.com
push dateMon, 19 Jun 2017 18:45:20 +0000
reviewersglob
bugs1337946, 108759, 110000, 100000, 10000, 752788, 750000, 740000, 26203, 20000, 0, 722270, 720000, 710000, 788897, 790000, 716771, 700000
pushlog: optimize revision resolution (bug 1337946); r?glob Some revsets iterate over the pushlog and emit revisions. Since the pushlog API returns nodes, they need to convert nodes to integer revs. Previously, they were using repo[x].rev() to resolve a node to a rev. This constructs a full changectx type, which can be expensive. We rewrite the code to perform node -> rev translations in the most efficient manner possible. The impact on revset performance for mozilla-central is clearly visible: $ hg perfrevset 'pushhead()' ! wall 1.108759 comb 1.110000 user 1.100000 sys 0.010000 (best of 9) ! wall 0.752788 comb 0.750000 user 0.740000 sys 0.010000 (best of 14 $ hg perfrevset 'pushdate(">2017")' ! wall 1.026203 comb 1.020000 user 1.020000 sys 0.000000 (best of 10) ! wall 0.722270 comb 0.720000 user 0.710000 sys 0.010000 (best of 14) $ hg perfrevset 'pushuser(gszorc)' ! wall 0.788897 comb 0.790000 user 0.790000 sys 0.000000 (best of 13) ! wall 0.716771 comb 0.720000 user 0.700000 sys 0.020000 (best of 14) MozReview-Commit-ID: 6Od0xOVBcav
hgext/pushlog/__init__.py
--- a/hgext/pushlog/__init__.py
+++ b/hgext/pushlog/__init__.py
@@ -633,35 +633,37 @@ def revset_pushhead(repo, subset, x):
     revset.getargs(x, 0, 0, 'pushhead takes no arguments')
 
     # Iterating over all pushlog data is unfortunate, as there is overhead
     # involved. However, this is less overhead than issuing a SQL query for
     # every changeset, especially on large repositories. There is room to make
     # this optimal by batching SQL, but that adds complexity. For now,
     # simplicity wins.
     def getrevs():
+        to_rev = repo.changelog.rev
         for push in repo.pushlog.pushes():
-            yield repo[push.nodes[-1]].rev()
+            yield to_rev(bin(push.nodes[-1]))
 
     return subset & revset.generatorset(getrevs())
 
 
 @revsetpredicate('pushdate(interval)', safe=True)
 def revset_pushdate(repo, subset, x):
     """Changesets that were pushed within the interval, see :hg:`help dates`."""
     l = revset.getargs(x, 1, 1, 'pushdate requires one argument')
 
     ds = revset.getstring(l[0], 'pushdate requires a string argument')
     dm = util.matchdate(ds)
 
     def getrevs():
+        to_rev = repo.changelog.rev
         for push in repo.pushlog.pushes():
             if dm(push.when):
                 for node in push.nodes:
-                    yield repo[node].rev()
+                    yield to_rev(bin(node))
 
     return subset & revset.generatorset(getrevs())
 
 
 @revsetpredicate('pushuser(string)', safe=True)
 def revset_pushuser(repo, subset, x):
     """User name that pushed the changeset contains string.
 
@@ -671,20 +673,21 @@ def revset_pushuser(repo, subset, x):
     a regular expression. To match a user that actually contains `re:`, use
     the prefix `literal:`.
     """
     l = revset.getargs(x, 1, 1, 'pushuser requires one argument')
     n = encoding.lower(revset.getstring(l[0], 'pushuser requires a string'))
     kind, pattern, matcher = revset._substringmatcher(n)
 
     def getrevs():
+        to_rev = repo.changelog.rev
         for push in repo.pushlog.pushes():
             if matcher(encoding.lower(push.user)):
                 for node in push.nodes:
-                    yield repo[node].rev()
+                    yield to_rev(bin(node))
 
     return subset & revset.generatorset(getrevs())
 
 
 @revsetpredicate('pushid(int)', safe=True)
 def revset_pushid(repo, subset, x):
     """Changesets that were part of the specified numeric push id."""
     l = revset.getargs(x, 1, 1, 'pushid requires one argument')
@@ -694,39 +697,41 @@ def revset_pushid(repo, subset, x):
         raise error.ParseError('pushid expects a number')
 
     with repo.pushlog.conn(readonly=True) as conn:
         push = repo.pushlog.pushfromid(conn, pushid) if conn else None
 
     if not push:
         return revset.baseset()
 
+    to_rev = repo.changelog.rev
     pushrevs = set()
     for node in push.nodes:
         try:
-            pushrevs.add(repo[node].rev())
+            pushrevs.add(to_rev(bin(node)))
         except RepoLookupError:
             pass
 
     return subset & pushrevs
 
 
 @revsetpredicate('pushrev(set)', safe=True)
 def revset_pushrev(repo, subset, x):
     """Changesets that were part of the same push as the specified changeset(s)."""
     l = revset.getset(repo, subset, x)
 
     # This isn't the most optimal implementation, especially if the input
     # set is large. But it gets the job done.
+    to_rev = repo.changelog.rev
     revs = set()
     for rev in l:
         push = repo.pushlog.pushfromchangeset(repo[rev])
         if push:
             for node in push.nodes:
-                revs.add(repo[node].rev())
+                revs.add(to_rev(bin(node)))
 
     return subset.filter(revs.__contains__)
 
 # Again, for performance reasons we read the entire pushlog database and cache
 # the results. Again, this is unfortunate. But, the alternative is a potential
 # very expensive series of database lookups.
 #
 # The justification for doing this for templates is even less than doing it for