Bug 1289514 - Change API and semantics of pushlog parsing; r=catlee
Instead of returning the list of pushes, we now return a slightly
normalized version of the raw JSON. This allows us to expose things
like "lastpushid" to consumers.
We also sort the pushes by push id because that is the most
appropriate value to sort on since it is monotonically increasing
(unlike date, which can have clock skew and some pushlog may contain
out-of-order insertions with decreasing data values). We want
automation to process pushes in the order they were pushed, so
sorting by push ID makes sense.
MozReview-Commit-ID: 6cFynKvSpe4
--- a/changes/hgpoller.py
+++ b/changes/hgpoller.py
@@ -103,21 +103,39 @@ from twisted.python import log
from twisted.internet import defer, reactor
from twisted.internet.task import LoopingCall
from twisted.web.client import getPage
from buildbot.changes import base, changes
from buildbot.util import json
-def _parse_changes(data):
- pushes = json.loads(data)['pushes'].values()
- # Sort by push date
- pushes.sort(key=lambda p: p['date'])
- return pushes
+def parse_pushlog_json(data):
+ """Parse pushlog data version 2.
+
+ Returns a normalized version of the data. The "pushes" key
+ is a list of pushlog entries in order instead of a dict.
+ """
+ data = json.loads(data)
+
+ pushes = []
+ for pushid, push in data['pushes'].items():
+ # Keys are strings in the JSON. We convert to int so the sort
+ # below works appropriately. (A string sort may not always
+ # have appropriate ordering.)
+ push['pushid'] = int(pushid)
+ pushes.append(push)
+
+ # Sort by the push id because that is the monotonically incrementing
+ # key.
+ pushes.sort(key=lambda p: p['pushid'])
+
+ data['pushes'] = pushes
+
+ return data
class Pluggable(object):
'''The Pluggable class implements a forward for Deferred's that
can be thrown away.
This is in particular useful when a network request doesn't really
error in a reasonable time, and you want to make sure that if it
@@ -262,35 +280,35 @@ class BaseHgPoller(BasePoller):
## emptyRepo to True so we can trigger builds for new changes there
# if self.verbose:
# log.msg("%s has been reset" % self.baseURL)
# self.lastChangeset = None
# self.emptyRepo = True
return self.super_class.dataFailed(self, res)
def processData(self, query):
- pushes = _parse_changes(query)
- if len(pushes) == 0:
+ push_data = parse_pushlog_json(query)
+ if not push_data['pushes']:
if self.lastChangeset is None:
# We don't have a lastChangeset, and there are no changes. Assume
# the repository is empty.
self.emptyRepo = True
if self.verbose:
log.msg("%s is empty" % self.baseURL)
# Nothing else to do
return
# We want to add at most self.maxChanges changes per push. If
# mergePushChanges is True, then we'll get up to maxChanges pushes,
# each with up to maxChanges changes.
# Go through the list of pushes backwards, since we want to keep the
# latest ones and possibly discard earlier ones.
change_list = []
too_many = False
- for push in reversed(pushes):
+ for push in reversed(push_data['pushes']):
# Used for merging push changes
c = dict(
user=push['user'],
date=push['date'],
files=[],
desc="",
node=None,
commit_titles=[],
@@ -398,17 +416,17 @@ class BaseHgPoller(BasePoller):
self.changeHook(c)
self.parent.addChange(c)
# The repository isn't empty any more!
self.emptyRepo = False
# Use the last change found by the poller, regardless of if it's on our
# branch or not. This is so we don't have to constantly ignore it in
# future polls.
- self.lastChangeset = pushes[-1]["changesets"][-1]["node"]
+ self.lastChangeset = push_data['pushes'][-1]['changesets'][-1]['node']
if self.verbose:
log.msg("last changeset %s on %s" %
(self.lastChangeset, self.baseURL))
def changeHook(self, change):
pass
--- a/test/test_hgpoller.py
+++ b/test/test_hgpoller.py
@@ -266,17 +266,18 @@ validPushlog = """
malformedPushlog = """
{ "invalid json" }
"""
class PushlogParsing(unittest.TestCase):
def testValidPushlog(self):
- pushes = hgpoller._parse_changes(validPushlog)
+ push_data = hgpoller.parse_pushlog_json(validPushlog)
+ pushes = push_data['pushes']
self.failUnlessEqual(len(pushes), 2)
self.failUnlessEqual(pushes[0]['changesets'][0]['node'],
'4c23e51a484f077ea27af3ea4a4ee13da5aeb5e6')
self.failUnlessEqual(pushes[0]['date'], 1282358416)
self.failUnlessEqual(len(pushes[0]['changesets'][0]['files']), 4)
self.failUnlessEqual(pushes[0]['changesets'][0]['branch'],
'GECKO20b5pre_20100820_RELBRANCH')
@@ -297,20 +298,20 @@ class PushlogParsing(unittest.TestCase):
'33be08836cb164f9e546231fc59e9e4cf98ed991')
self.failUnlessEqual(len(pushes[1]['changesets'][1]['files']), 1)
self.failUnlessEqual(pushes[1]['changesets'][1]['branch'], 'default')
self.failUnlessEqual(pushes[1]['changesets'][1]['author'],
'Bobby Holley <bobbyholley@gmail.com>')
def testMalformedPushlog(self):
self.failUnlessRaises(
- JSONDecodeError, hgpoller._parse_changes, malformedPushlog)
+ JSONDecodeError, hgpoller.parse_pushlog_json, malformedPushlog)
def testEmptyPushlog(self):
- self.failUnlessRaises(JSONDecodeError, hgpoller._parse_changes, "")
+ self.failUnlessRaises(JSONDecodeError, hgpoller.parse_pushlog_json, "")
class RepoBranchHandling(unittest.TestCase):
def setUp(self):
self.changes = []
def doTest(self, repo_branch):
changes = self.changes