Bug 1289514 - React intelligently to reset pushlogs; r?catlee draft
authorGregory Szorc <gps@mozilla.com>
Tue, 26 Jul 2016 12:32:58 -0700
changeset 4879 ff1a7a80c2f14aee675a16f73374015446e4b6ee
parent 4878 d76e0a6a3900f728240c404c2d1dc3beb2359241
child 4880 2e73a62bc15a9761cc0b79f2417e4e682fa4863e
push id3643
push userbmo:gps@mozilla.com
push dateTue, 26 Jul 2016 22:50:34 +0000
reviewerscatlee
bugs1289514
Bug 1289514 - React intelligently to reset pushlogs; r?catlee Now that we're querying pushlog version 2 and the last push ID is exposed to us, we can detect when a repo/pushlog is reset by recording the last processed push id and detect when the "lastpushid" value exposed by the server is less than that. With this change, twig resets should no longer require the hgpoller to be restarted to pick up that reset! MozReview-Commit-ID: CKhNCNEyGP2
changes/hgpoller.py
test/test_hgpoller.py
--- a/changes/hgpoller.py
+++ b/changes/hgpoller.py
@@ -228,16 +228,17 @@ class BaseHgPoller(BasePoller):
             hgURL = hgURL[:-1]
         fragments = [hgURL, branch]
         if tree is not None:
             fragments.append(tree)
         self.baseURL = "/".join(fragments)
         self.pushlogUrlOverride = pushlogUrlOverride
         self.tipsOnly = tipsOnly
         self.lastChangeset = None
+        self.lastPushID = None
         self.startLoad = 0
         self.loadTime = None
         self.repo_branch = repo_branch
         self.maxChanges = maxChanges
         # With mergePushChanges=True we get one buildbot change per push to hg.
         # The files from all changes in the push will be accumulated in the buildbot change
         # and the comments of tipmost change of the push will be used
         self.mergePushChanges = mergePushChanges
@@ -286,20 +287,47 @@ class BaseHgPoller(BasePoller):
 
     def processData(self, query):
         push_data = parse_pushlog_json(query)
 
         # The payload tells us the most recent push ID. If it is the empty
         # string, the pushlog is empty and there is no data to consume.
         if not push_data['lastpushid']:
             self.emptyRepo = True
+            self.lastPushID = None
             if self.verbose:
                 log.msg('%s is empty' % self.baseURL)
             return
 
+        # If nothing has changed and we're fully caught up, the remote
+        # lastpushid will be the same as self.lastPushID.
+        #
+        # If the remote lastpushid is less than a previously observed value,
+        # this could mean one of the following:
+        #
+        #    a) Data from the pushlog was removed (perhaps the repo was
+        #       stripped)
+        #    b) The repo/pushlog was reset.
+        #
+        # These scenarios should be rare. In both of them, our assumption
+        # about the behavior of the pushlog always being monotonically
+        # increasing have been invalidated. So we reset state and start
+        # again.
+        #
+        # It's worth noting that a reset repo's pushlog could have *more*
+        # entries than the former repo. In this case, this code will fail
+        # to detect a reset repo from the pushlog alone.
+        if self.lastPushID and push_data['lastpushid'] < self.lastPushID:
+            self.emptyRepo = False
+            self.lastPushID = None
+            self.lastChangeset = None
+            log.msg('%s appears to have been reset; clearing state' %
+                    self.baseURL)
+            return
+
         # We want to add at most self.maxChanges changes per push. If
         # mergePushChanges is True, then we'll get up to maxChanges pushes,
         # each with up to maxChanges changes.
         # Go through the list of pushes backwards, since we want to keep the
         # latest ones and possibly discard earlier ones.
         change_list = []
         too_many = False
         for push in reversed(push_data['pushes']):
@@ -416,16 +444,17 @@ class BaseHgPoller(BasePoller):
                 self.parent.addChange(c)
 
         # The repository isn't empty any more!
         self.emptyRepo = False
         # Use the last change found by the poller, regardless of if it's on our
         # branch or not. This is so we don't have to constantly ignore it in
         # future polls.
         self.lastChangeset = push_data['pushes'][-1]['changesets'][-1]['node']
+        self.lastPushID = push_data['pushes'][-1]['pushid']
         if self.verbose:
             log.msg("last changeset %s on %s" %
                     (self.lastChangeset, self.baseURL))
 
     def changeHook(self, change):
         pass
 
 
--- a/test/test_hgpoller.py
+++ b/test/test_hgpoller.py
@@ -337,16 +337,57 @@ class EmptyLastPushID(PollingTest):
         {
             "lastpushid": "",
             "pushes": {}
         }
         """)
         self.assertTrue(poller.emptyRepo, 'repo marked as empty')
 
 
+class PushlogReset(PollingTest):
+    def testDecreasingLastPushID(self):
+        poller = self.doTest(data=validPushlog)
+        self.assertEqual(poller.lastPushID, 15227)
+
+        poller.processData("""
+        {
+            "lastpushid": 15225,
+            "pushes": {
+                "15200": {
+                    "changesets": [
+                        {
+                            "author": "Jim Chen <jchen@mozilla.com>",
+                            "branch": "GECKO20b5pre_20100820_RELBRANCH",
+                            "desc": "Bug 588456 - Properly commit Android IME composition on blur; r=mwu a=blocking-fennec",
+                            "files": [
+                                "embedding/android/GeckoInputConnection.java",
+                                "embedding/android/GeckoSurfaceView.java",
+                                "widget/src/android/nsWindow.cpp",
+                                "widget/src/android/nsWindow.h"
+                            ],
+                            "node": "4c23e51a484f077ea27af3ea4a4ee13da5aeb5e6",
+                            "parents": [
+                                "935c15d506516a2269cee35a1a80748aaec1ae08"
+                            ],
+                            "tags": []
+                        }
+                    ],
+                    "date": 1282358416,
+                    "user": "dougt@mozilla.com"
+                }
+            }
+        }
+        """)
+        self.assertIsNone(poller.lastPushID, 'last push ID should be None')
+        self.assertIsNone(poller.lastChangeset, 'last changeset should be None')
+        self.assertEqual(poller._make_url(),
+                         'http://localhost/whatever/json-pushes?version=2&full=1',
+                         'pushlog URL should start from the end')
+
+
 class RepoBranchHandling(PollingTest):
     def testNoRepoBranch(self):
         self.doTest(repo_branch=None)
 
         self.assertEquals(len(self.changes), 2)
 
     def testDefaultRepoBranch(self):
         self.doTest(repo_branch='default')