Bug 1281004: Temporary scripts to analyze and compare task graphs
MozReview-Commit-ID: 4MCHUmAduVo
new file mode 100644
--- /dev/null
+++ b/analyze.py
@@ -0,0 +1,82 @@
+import os
+import json
+
+ROWS, COLUMNS = map(int, os.popen('stty size', 'r').read().split())
+
+def rowdata(t):
+ def attr(n):
+ return t['attributes'].get(n)
+
+ exp_scopes = set()
+ if t['task']['payload'].get('features', {}).get('allowPtrace'):
+ exp_scopes.add("docker-worker:feature:allowPtrace")
+ for cache in t['task']['payload']['cache']:
+ exp_scopes.add('docker-worker:cache:' + cache)
+ for dev in t['task']['payload'].get('capabilities', {}).get('devices', {}):
+ exp_scopes.add('docker-worker:capability:device:' + dev)
+ unexp_scopes = sorted(set(t['task']['scopes']) - exp_scopes)
+
+ payload = t['task']['payload']
+
+ command = [(c if isinstance(c, unicode) else c['task-reference']) for c in payload['command']]
+ command = command[2:]
+ command = [(c if not c.startswith('--installer-url=') else '--installer-url=..') for c in command]
+ command = [(c if not c.startswith('--test-packages-url=') else '--test-packages-url=..') for c in command]
+ command = [c for c in command
+ if c not in set([
+ '--installer-url=..',
+ '--test-packages-url=..',
+ #'--no-read-buildbot-config',
+ '--e10s',
+ ])
+ and not c.startswith('--download-symbols')
+ and not c.startswith('--this-chunk')
+ and not c.startswith('--total-chunk')
+ ]
+ command= ' '.join(command)
+
+ extra = t['task']['extra']
+
+ if not attr('unittest_suite').startswith('firefox'):
+ return
+
+ return (
+ #attr('unittest_suite'),
+ attr('unittest_flavor'),
+ attr('build_platform'),
+ attr('build_type'),
+ #t['task']['extra']['treeherder']['groupSymbol'],
+ #t['task']['extra']['treeherder']['symbol'],
+ #attr('unittest_try_name'),
+ t['task']['extra']['treeherder']['tier'],
+ #' '.join(c for c in t['task']['payload']['command'] if isinstance(c, basestring)),
+ )
+
+def main():
+ ftg = json.load(open("tasks-old.json"))
+ test_tasks = [t for t in ftg.itervalues() if t['attributes'].get('legacy_kind', '').endswith('test')]
+
+ rows = filter(None, (rowdata(t) for t in test_tasks))
+ rows = [map(str, r) for r in rows]
+ widths = [0] * len(rows[0])
+ for row in rows:
+ for i, (w, r) in enumerate(zip(widths, row)):
+ if len(r) > w:
+ widths[i] = len(r)
+ formatstr = '\033[95m|\033[0m'.join('{:%d}' % w for w in widths)
+
+ formatted = [formatstr.format(*row) for row in rows]
+ rowlen = sum(widths) + len(widths) * 2
+
+ rows_per_line = COLUMNS // (rowlen + 1)
+ stride = (len(formatted) // (rows_per_line - 1)) if rows_per_line > 1 else len(rows)
+
+ lines = [[] for _ in range(stride)]
+
+ for i, row in enumerate(sorted(formatted)):
+ lines[i % stride].append(row.ljust(rowlen))
+
+ for line in lines:
+ print '\033[91m|\033[0m '.join(line)
+
+main()
new file mode 100644
--- /dev/null
+++ b/graphdiff.py
@@ -0,0 +1,122 @@
+import json
+import difflib
+import re
+
+def tests_only(graph):
+ return {l: t for l, t in graph.iteritems()
+ if t['attributes'].get('legacy_kind', '').endswith('test') or t['attributes']['kind'] in ('desktop-test', 'android-test')}
+
+def replace_labels(str):
+ return re.sub("<[^>]*>", "<...>", str)
+
+def drop_stuff(graph):
+ for t in graph.itervalues():
+ del t['task']['created']
+ del t['task']['metadata']
+ del t['label']
+ if 'schedulerId' in t['task']:
+ del t['task']['schedulerId']
+ del t['dependencies']
+ del t['attributes']['kind']
+ del t['attributes']['test_platform'] # changed
+ if 'legacy_kind' in t['attributes']:
+ del t['attributes']['legacy_kind']
+ del t['task']['deadline']
+
+ env = t['task']['payload']['env']
+ env['MOZHARNESS_CONFIG'] = env['MOZHARNESS_CONFIG'].rstrip()
+
+ # groupName was inconsistent for the same group; fixed now
+ del t['task']['extra']['treeherder']['groupName'] # changed
+
+ # this is ignored per
+ # https://schemas.taskcluster.net/taskcluster-treeherder/v1/task-treeherder-config.json#
+ try:
+ del t['task']['extra']['treeherder']['build']
+ except KeyError:
+ pass
+
+ # old tasks don't have an explicit expires unless they are try
+ try:
+ del t['task']['expires']
+ except KeyError:
+ pass
+
+ # symbols should be strings
+ t['task']['extra']['treeherder']['symbol'] = str(t['task']['extra']['treeherder']['symbol'])
+
+ # sort command options since option order is not consistent
+ command = t['task']['payload']['command']
+ t['task']['payload']['command'] = command[:2] + sorted(command[2:])
+
+ # treat 'true' and True as the same for env values
+ env = t['task']['payload']['env']
+ t['task']['payload']['env'] = {v: 'true' if e is True else e for v, e in env.iteritems()}
+
+ t['task']['scopes'].sort()
+ for a in t['task']['payload'].get('artifacts', {}).itervalues():
+ del a['expires']
+ return graph
+
+def relabel(graph):
+ rv = {}
+ for t in graph.itervalues():
+ l = []
+ for path in [
+ 'task.extra.treeherder.groupSymbol',
+ 'task.extra.treeherder.symbol',
+ 'attributes.build_platform',
+ 'attributes.build_type',
+ ]:
+ try:
+ x = t
+ for elt in path.split('.'):
+ x = x[elt]
+ l.append(str(x))
+ except KeyError:
+ pass
+ l = '/'.join(l)
+ if l in rv:
+ raise Exception("duplicate label %s for %s / %s" % (l, t['label'], rv[l]['label']))
+ rv[l] = t
+ return rv
+
+def main():
+ graph1 = json.load(open("tasks-old.json"))
+ graph2 = json.load(open("tasks-new.json"))
+
+ graph1 = drop_stuff(relabel(tests_only(graph1)))
+ graph2 = drop_stuff(relabel(tests_only(graph2)))
+
+ skipped = set(graph1) - set(graph2)
+ print("skipping {} tasks defined only in tasks-old.json".format(len(skipped)))
+
+ # limit to just a few tasks
+ #graph2 = {k: graph2[k] for k in graph2.keys()[:100]}
+ #import sys
+ #print >>sys.stderr, graph2.items()[0]
+
+ # temporarily omit tasks from the old graph that are not present in the new
+ graph1 = {k: v for k, v in graph1.iteritems() if k in graph2}
+
+ print("comparing {} tasks".format(len(graph1)))
+
+ # just print labels for added tasks, as that's usually what's useful
+ diff = False
+ for l in sorted(set(graph2) - set(graph1)):
+ print("+ added task: {}".format(l))
+ del graph2[l]
+ diff = True
+
+ graph1 = replace_labels(json.dumps(graph1, indent=4, sort_keys=True)).split('\n')
+ graph2 = replace_labels(json.dumps(graph2, indent=4, sort_keys=True)).split('\n')
+
+ for line in difflib.unified_diff(graph1, graph2, fromfile="tasks-old.json", tofile="tasks-new.json", lineterm='', n=4):
+ diff = True
+ print line
+
+ if not diff:
+ print("no diff")
+ print("skipped tasks:\n" + "\n".join(sorted(skipped)))
+
+main()