Bug 1289514 - Poll pushlog from last consumed push ID; r=catlee draft
authorGregory Szorc <gps@mozilla.com>
Tue, 26 Jul 2016 13:04:41 -0700
changeset 4881 1d93046a312c727ca90c0218b1022d596ffe02e0
parent 4880 2e73a62bc15a9761cc0b79f2417e4e682fa4863e
child 4882 ef442b9cae77c76f273516b402eae49b31b54430
push id3643
push userbmo:gps@mozilla.com
push dateTue, 26 Jul 2016 22:50:34 +0000
reviewerscatlee
bugs1289514
Bug 1289514 - Poll pushlog from last consumed push ID; r=catlee Previously, we polled pushlog from the last consumed changeset. According to https://mozilla-version-control-tools.readthedocs.io/en/latest/hgmo/pushlog.html the preferred mechanism for consuming pushlog is to use startID. So we do that. MozReview-Commit-ID: 8DradcwNlsu
changes/hgpoller.py
test/test_hgpoller.py
--- a/changes/hgpoller.py
+++ b/changes/hgpoller.py
@@ -227,17 +227,16 @@ class BaseHgPoller(BasePoller):
         if hgURL.endswith("/"):
             hgURL = hgURL[:-1]
         fragments = [hgURL, branch]
         if tree is not None:
             fragments.append(tree)
         self.baseURL = "/".join(fragments)
         self.pushlogUrlOverride = pushlogUrlOverride
         self.tipsOnly = tipsOnly
-        self.lastChangeset = None
         self.lastPushID = None
         self.startLoad = 0
         self.loadTime = None
         self.repo_branch = repo_branch
         self.maxChanges = maxChanges
         # With mergePushChanges=True we get one buildbot change per push to hg.
         # The files from all changes in the push will be accumulated in the buildbot change
         # and the comments of tipmost change of the push will be used
@@ -254,18 +253,18 @@ class BaseHgPoller(BasePoller):
     def _make_url(self):
         url = None
         if self.pushlogUrlOverride:
             url = self.pushlogUrlOverride
         else:
             url = "/".join((self.baseURL, 'json-pushes?version=2&full=1'))
 
         args = []
-        if self.lastChangeset is not None:
-            args.append('fromchange=' + self.lastChangeset)
+        if self.lastPushID is not None:
+            args.append('startID=%d' % self.lastPushID)
         if self.tipsOnly:
             args.append('tipsonly=1')
         if args:
             if '?' not in url:
                 url += '?'
             else:
                 url += '&'
             url += '&'.join(args)
@@ -300,21 +299,24 @@ class BaseHgPoller(BasePoller):
         # again.
         #
         # It's worth noting that a reset repo's pushlog could have *more*
         # entries than the former repo. In this case, this code will fail
         # to detect a reset repo from the pushlog alone.
         if self.lastPushID and push_data['lastpushid'] < self.lastPushID:
             self.emptyRepo = False
             self.lastPushID = None
-            self.lastChangeset = None
             log.msg('%s appears to have been reset; clearing state' %
                     self.baseURL)
             return
 
+        # No pushes to process. Exit early.
+        if not push_data['pushes']:
+            return
+
         # We want to add at most self.maxChanges changes per push. If
         # mergePushChanges is True, then we'll get up to maxChanges pushes,
         # each with up to maxChanges changes.
         # Go through the list of pushes backwards, since we want to keep the
         # latest ones and possibly discard earlier ones.
         change_list = []
         too_many = False
         for push in reversed(push_data['pushes']):
@@ -399,26 +401,26 @@ class BaseHgPoller(BasePoller):
                 desc='more than maxChanges(%i) received; ignoring the rest' % self.maxChanges,
                 date=time.time(),
             )
             change_list.append(c)
 
         # Un-reverse the list of changes so they get added in the right order
         change_list.reverse()
 
-        # If we have a lastChangeset we're comparing against, we've been
-        # running for a while and so any changes returned here are new.
+        # If we have a lastPushID, we've consumed data already so any changes
+        # returned here are new.
 
         # If the repository was previously empty (indicated by emptyRepo=True),
         # we also want to pay attention to all these pushes.
 
-        # If we don't have a lastChangeset and the repository isn't empty, then
-        # don't trigger any new builds, and start monitoring for changes since
-        # the latest changeset in the repository
-        if self.lastChangeset is not None or self.emptyRepo:
+        # If we don't have a lastPushID and the repository isn't empty, then
+        # don't trigger any new builds, and start monitoring for changes
+        # from the last push ID.
+        if self.lastPushID is not None or self.emptyRepo:
             for change in change_list:
                 link = "%s/rev/%s" % (self.baseURL, change["node"])
                 c = changes.Change(who=change["user"],
                                    files=change["files"],
                                    revision=change["node"],
                                    comments=change["desc"],
                                    revlink=link,
                                    when=change["date"],
@@ -428,23 +430,21 @@ class BaseHgPoller(BasePoller):
                                              change['commit_titles'],
                                              'BaseHgPoller')
                 self.changeHook(c)
                 self.parent.addChange(c)
 
         # The repository isn't empty any more!
         self.emptyRepo = False
         # Use the last change found by the poller, regardless of if it's on our
-        # branch or not. This is so we don't have to constantly ignore it in
-        # future polls.
-        self.lastChangeset = push_data['pushes'][-1]['changesets'][-1]['node']
+        # branch or not.
         self.lastPushID = push_data['pushes'][-1]['pushid']
         if self.verbose:
-            log.msg("last changeset %s on %s" %
-                    (self.lastChangeset, self.baseURL))
+            log.msg('last processed push id on %s is %d' %
+                    (self.baseURL, self.lastPushID))
 
     def changeHook(self, change):
         pass
 
 
 class HgPoller(base.ChangeSource, BaseHgPoller):
     """This source will poll a Mercurial server over HTTP using
     the built-in RSS feed for changes and submit them to the
--- a/test/test_hgpoller.py
+++ b/test/test_hgpoller.py
@@ -60,62 +60,50 @@ class TestHTTPServer(object):
 class UrlCreation(unittest.TestCase):
     def testSimpleUrl(self):
         correctUrl = 'https://hg.mozilla.org/mozilla-central/json-pushes?version=2&full=1'
         poller = hgpoller.BaseHgPoller(
             hgURL='https://hg.mozilla.org', branch='mozilla-central')
         url = poller._make_url()
         self.failUnlessEqual(url, correctUrl)
 
-    def testUrlWithLastChangeset(self):
-        correctUrl = 'https://hg.mozilla.org/mozilla-central/json-pushes?version=2&full=1&fromchange=123456'
+    def testUrlWithLastPushID(self):
+        correctUrl = 'https://hg.mozilla.org/mozilla-central/json-pushes?version=2&full=1&startID=42'
         poller = hgpoller.BaseHgPoller(
             hgURL='https://hg.mozilla.org', branch='mozilla-central')
-        poller.lastChangeset = '123456'
+        poller.lastPushID = 42
         url = poller._make_url()
         self.failUnlessEqual(url, correctUrl)
 
     def testTipsOnlyUrl(self):
         correctUrl = 'https://hg.mozilla.org/mozilla-central/json-pushes?version=2&full=1&tipsonly=1'
         poller = hgpoller.BaseHgPoller(
             hgURL='https://hg.mozilla.org', branch='mozilla-central',
             tipsOnly=True)
         url = poller._make_url()
         self.failUnlessEqual(url, correctUrl)
 
-    def testTipsOnlyWithLastChangeset(self):
+    def testTipsOnlyWithLastPushID(self):
         # there's two possible correct URLs in this case
-        correctUrls = [
-            'https://hg.mozilla.org/releases/mozilla-1.9.1/json-pushes?version=2&full=1&fromchange=123456&tipsonly=1',
-            'https://hg.mozilla.org/releases/mozilla-1.9.1/json-pushes?version=2&full=1&tipsonly=1&fromchange=123456'
-        ]
+        correctUrl = 'https://hg.mozilla.org/releases/mozilla-1.9.1/json-pushes?version=2&full=1&startID=42&tipsonly=1'
         poller = hgpoller.BaseHgPoller(hgURL='https://hg.mozilla.org',
                               branch='releases/mozilla-1.9.1', tipsOnly=True)
-        poller.lastChangeset = '123456'
+        poller.lastPushID = 42
         url = poller._make_url()
-        self.failUnlessIn(url, correctUrls)
+        self.failUnlessEqual(url, correctUrl)
 
     def testOverrideUrl(self):
-        correctUrl = 'https://hg.mozilla.org/other_repo/json-pushes?version=2&full=1&fromchange=123456'
+        correctUrl = 'https://hg.mozilla.org/other_repo/json-pushes?version=2&full=1&startID=42'
         poller = hgpoller.BaseHgPoller(
             hgURL='https://hg.mozilla.org', branch='mozilla-central',
             pushlogUrlOverride='https://hg.mozilla.org/other_repo/json-pushes?version=2&full=1')
-        poller.lastChangeset = '123456'
+        poller.lastPushID = 42
         url = poller._make_url()
         self.failUnlessEqual(url, correctUrl)
 
-    def testUrlWithUnicodeLastChangeset(self):
-        correctUrl = 'https://hg.mozilla.org/mozilla-central/json-pushes?version=2&full=1&fromchange=123456'
-        poller = hgpoller.BaseHgPoller(
-            hgURL='https://hg.mozilla.org', branch='mozilla-central')
-        poller.lastChangeset = u'123456'
-        url = poller._make_url()
-        self.failUnlessEqual(url, correctUrl)
-        self.failUnless(isinstance(url, str))
-
 
 fakeLocalesFile = """/l10n-central/af/
 /l10n-central/be/
 /l10n-central/de/
 /l10n-central/hi/
 /l10n-central/kk/
 /l10n-central/zh-TW/"""
 
@@ -372,22 +360,36 @@ class PushlogReset(PollingTest):
                     ],
                     "date": 1282358416,
                     "user": "dougt@mozilla.com"
                 }
             }
         }
         """)
         self.assertIsNone(poller.lastPushID, 'last push ID should be None')
-        self.assertIsNone(poller.lastChangeset, 'last changeset should be None')
         self.assertEqual(poller._make_url(),
                          'http://localhost/whatever/json-pushes?version=2&full=1',
                          'pushlog URL should start from the end')
 
 
+class EmptyPushes(PollingTest):
+    def testEmptyPushes(self):
+        poller = self.doTest(data=validPushlog)
+        self.assertEqual(poller._make_url(), 'http://localhost/whatever/json-pushes?version=2&full=1&startID=15227')
+
+        # This simulates what happens on subsequent polls when no new
+        # data is available.
+        poller.processData("""
+        {
+            "lastpushid": 15227,
+            "pushes": {}
+        }
+        """)
+
+
 class RepoBranchHandling(PollingTest):
     def testNoRepoBranch(self):
         self.doTest(repo_branch=None)
 
         self.assertEquals(len(self.changes), 2)
 
     def testDefaultRepoBranch(self):
         self.doTest(repo_branch='default')