author Gregory Szorc <gps@mozilla.com>

Thu, 08 Jun 2017 13:32:37 -0700

changeset 11237 bb140762d07e7164199f302892a6ca058f4da014

parent 11236 6504363eb1aaf7d66a163e6df855fbd7735bcdc2

child 11238 10b4978a9f5f3e58beafc66b159620962abf89cc

push id 1708

push user gszorc@mozilla.com

push date Mon, 19 Jun 2017 18:36:36 +0000

reviewers glob

bugs 1303904

hgext/hgmo/__init__.py file | annotate | diff | comparison | revisions

hgext/pushlog/__init__.py file | annotate | diff | comparison | revisions
--- a/hgext/hgmo/__init__.py
+++ b/hgext/hgmo/__init__.py
@@ -439,38 +439,46 @@ def automationrelevancewebcommand(web, r
 
     csets = []
     # Query an unfiltered repo because sometimes automation wants to run against
     # changesets that have since become hidden. The response exposes whether the
     # requested node is visible, so consumers can make intelligent decisions
     # about what to do if the changeset isn't visible.
     urepo = repo.unfiltered()
     revs = list(urepo.revs('automationrelevant(%r)', req.form['node'][0]))
-    for rev in revs:
-        ctx = urepo[rev]
-        entry = webutil.changelistentry(web, ctx, tmpl)
-        # Some items in changelistentry are generators, which json.dumps()
-        # can't handle. So we expand them.
-        for k, v in entry.items():
-            # "files" is a generator that attempts to call a template.
-            # Don't even bother and just repopulate it.
-            if k == 'files':
-                entry['files'] = sorted(ctx.files())
-            elif k == 'allparents':
-                entry['parents'] = [p['node'] for p in v()]
-                del entry['allparents']
-            # These aren't interesting to us, so prune them. The
-            # original impetus for this was because "changelogtag"
-            # isn't part of the json template and adding it is non-trivial.
-            elif k in deletefields:
-                del entry[k]
-            elif isinstance(v, types.GeneratorType):
-                entry[k] = list(v)
+
+    # The pushlog extensions wraps webutil.commonentry and the way it is called
+    # means pushlog opens a SQLite connection on every call. This is inefficient.
+    # So we pre load and cache data for pushlog entries we care about.
+    cl = urepo.changelog
+    nodes = [cl.node(rev) for rev in revs]
 
-        csets.append(entry)
+    with repo.unfiltered().pushlog.cache_data_for_nodes(nodes):
+        for rev in revs:
+            ctx = urepo[rev]
+            entry = webutil.changelistentry(web, ctx, tmpl)
+            # Some items in changelistentry are generators, which json.dumps()
+            # can't handle. So we expand them.
+            for k, v in entry.items():
+                # "files" is a generator that attempts to call a template.
+                # Don't even bother and just repopulate it.
+                if k == 'files':
+                    entry['files'] = sorted(ctx.files())
+                elif k == 'allparents':
+                    entry['parents'] = [p['node'] for p in v()]
+                    del entry['allparents']
+                # These aren't interesting to us, so prune them. The
+                # original impetus for this was because "changelogtag"
+                # isn't part of the json template and adding it is non-trivial.
+                elif k in deletefields:
+                    del entry[k]
+                elif isinstance(v, types.GeneratorType):
+                    entry[k] = list(v)
+
+            csets.append(entry)
 
     # Advertise whether the requested revision is visible (non-obsolete).
     if csets:
         visible = csets[-1]['node'] in repo
     else:
         visible = None
 
     data = {
--- a/hgext/pushlog/__init__.py
+++ b/hgext/pushlog/__init__.py
@@ -185,16 +185,22 @@ class pushlog(object):
     '''An interface to pushlog data.'''
 
     def __init__(self, repo):
         '''Create an instance bound to a sqlite database in a path.'''
         # Use a weak ref to avoid cycle. This object's lifetime should be no
         # greater than the repo's.
         self.repo = repo
 
+        # Caches of pushlog data to avoid database I/O.
+        # int -> Push
+        self._push_by_id = {}
+        # bin node -> int
+        self._push_id_by_node = {}
+
     def _getconn(self, readonly=False, tr=None):
         """Get a SQLite connection to the pushlog.
 
         In normal operation, this will return a ``sqlite3.Connection``.
         If the database does not exist, it will be created. If the database
         schema is not present or up to date, it will be updated. An error will
         be raised if any of this could not be performed.
 
@@ -383,16 +389,20 @@ class pushlog(object):
     def pushfromnode(self, node):
         """Obtain info about a push that added the specified changeset.
 
         Returns a Push namedtuple of (pushid, who, when, [nodes]) or None if
         there is no pushlog info for this node.
 
         Argument is specified as binary node.
         """
+        if node in self._push_id_by_node:
+            pushid = self._push_id_by_node[node]
+            return self._push_by_id[pushid] if pushid is not None else None
+
         with self.conn(readonly=True) as c:
             if not c:
                 return None
 
             return self._push_from_node(c, node)
 
     def _push_from_node(self, conn, node):
         res = conn.execute('SELECT pushid from changesets WHERE node=?',
@@ -406,29 +416,62 @@ class pushlog(object):
         return self.pushfromnode(ctx.node())
 
     def pushfromid(self, conn, pushid):
         """Obtain a push from its numeric push id.
 
         Returns a Push namedtuple or None if there is no push with this push
         id.
         """
+        push = self._push_by_id.get(pushid)
+        if push:
+            return push
+
         res = conn.execute('SELECT id, user, date, node from pushlog '
                            'LEFT JOIN changesets on id=pushid '
                            'WHERE id=? ORDER BY rev ASC', (pushid,))
         nodes = []
         for pushid, who, when, node in res:
             who = who.encode('utf-8')
             nodes.append(node.encode('ascii'))
 
         if not nodes:
             return None
 
         return Push(pushid, who, when, nodes)
 
+    @contextlib.contextmanager
+    def cache_data_for_nodes(self, nodes):
+        """Given an iterable of nodes, cache pushlog data for them.
+
+        Due to current API design, many pushlog methods open a SQLite
+        database, perform a query, then close the database. Calling these
+        within tight loops can be slow.
+
+        This context manager can be used to pre-load pushlog data to
+        avoid inefficient SQLite access patterns.
+        """
+        with self.conn(readonly=True) as c:
+            if not c:
+                return
+
+            try:
+                for node in nodes:
+                    push = self._push_from_node(c, node)
+                    if push:
+                        self._push_id_by_node[node] = push.pushid
+                        self._push_by_id[push.pushid] = push
+                    else:
+                        self._push_id_by_node[node] = None
+
+                yield
+            finally:
+                self._push_id_by_node = {}
+                self._push_by_id = {}
+
     def verify(self):
         # Attempt to create database (since .pushes below won't).
         with self.conn():
             pass
 
         repo = self.repo
         ui = self.repo.ui
author	Gregory Szorc <gps@mozilla.com>
	Thu, 08 Jun 2017 13:32:37 -0700
changeset 11237	bb140762d07e7164199f302892a6ca058f4da014
parent 11236	6504363eb1aaf7d66a163e6df855fbd7735bcdc2
child 11238	10b4978a9f5f3e58beafc66b159620962abf89cc
push id	1708
push user	gszorc@mozilla.com
push date	Mon, 19 Jun 2017 18:36:36 +0000
reviewers	glob
bugs	1303904
hgext/hgmo/__init__.py		file \| annotate \| diff \| comparison \| revisions
hgext/pushlog/__init__.py		file \| annotate \| diff \| comparison \| revisions