Bug 1281004: Temporary scripts to analyze and compare task graphs draft
authorDustin J. Mitchell <dustin@mozilla.com>
Wed, 06 Jul 2016 02:02:31 +0000
changeset 384313 047543f20150a226ca32a79834fa4d7208059b7b
parent 384312 099606c610592223ca90dff57c0b9d87afd8ff88
child 384314 00a147bfc031f24d626ba8ffef0724a01dc1f4b1
push id22238
push userdmitchell@mozilla.com
push dateWed, 06 Jul 2016 02:09:29 +0000
bugs1281004
milestone50.0a1
Bug 1281004: Temporary scripts to analyze and compare task graphs MozReview-Commit-ID: 4MCHUmAduVo
analyze.py
graphdiff.py
new file mode 100644
--- /dev/null
+++ b/analyze.py
@@ -0,0 +1,82 @@
+import os
+import json
+
+ROWS, COLUMNS = map(int, os.popen('stty size', 'r').read().split())
+
+def rowdata(t):
+    def attr(n):
+        return t['attributes'].get(n)
+
+    exp_scopes = set()
+    if t['task']['payload'].get('features', {}).get('allowPtrace'):
+        exp_scopes.add("docker-worker:feature:allowPtrace")
+    for cache in t['task']['payload']['cache']:
+        exp_scopes.add('docker-worker:cache:' + cache)
+    for dev in t['task']['payload'].get('capabilities', {}).get('devices', {}):
+        exp_scopes.add('docker-worker:capability:device:' + dev)
+    unexp_scopes = sorted(set(t['task']['scopes']) - exp_scopes)
+
+    payload = t['task']['payload']
+
+    command = [(c if isinstance(c, unicode) else c['task-reference']) for c in payload['command']]
+    command = command[2:]
+    command = [(c if not c.startswith('--installer-url=') else '--installer-url=..') for c in command]
+    command = [(c if not c.startswith('--test-packages-url=') else '--test-packages-url=..') for c in command]
+    command = [c for c in command
+            if c not in set([
+                    '--installer-url=..',
+                    '--test-packages-url=..',
+                    #'--no-read-buildbot-config',
+                    '--e10s',
+                ])
+                and not c.startswith('--download-symbols')
+                and not c.startswith('--this-chunk')
+                and not c.startswith('--total-chunk')
+                ]
+    command= ' '.join(command)
+
+    extra = t['task']['extra']
+
+    if not attr('unittest_suite').startswith('firefox'):
+        return
+
+    return (
+        #attr('unittest_suite'),
+        attr('unittest_flavor'),
+        attr('build_platform'),
+        attr('build_type'),
+        #t['task']['extra']['treeherder']['groupSymbol'],
+        #t['task']['extra']['treeherder']['symbol'],
+        #attr('unittest_try_name'),
+        t['task']['extra']['treeherder']['tier'],
+        #' '.join(c for c in t['task']['payload']['command'] if isinstance(c, basestring)),
+    )
+
+def main():
+    ftg = json.load(open("tasks-old.json"))
+    test_tasks = [t for t in ftg.itervalues() if t['attributes'].get('legacy_kind', '').endswith('test')]
+
+    rows = filter(None, (rowdata(t) for t in test_tasks))
+    rows = [map(str, r) for r in rows]
+    widths = [0] * len(rows[0])
+    for row in rows:
+        for i, (w, r) in enumerate(zip(widths, row)):
+            if len(r) > w:
+                widths[i] = len(r)
+    formatstr = '\033[95m|\033[0m'.join('{:%d}' % w for w in widths)
+
+    formatted = [formatstr.format(*row) for row in rows]
+    rowlen = sum(widths) + len(widths) * 2
+
+    rows_per_line = COLUMNS // (rowlen + 1)
+    stride = (len(formatted) // (rows_per_line - 1)) if rows_per_line > 1 else len(rows)
+
+    lines = [[] for _ in range(stride)]
+
+    for i, row in enumerate(sorted(formatted)):
+        lines[i % stride].append(row.ljust(rowlen))
+
+    for line in lines:
+        print '\033[91m|\033[0m '.join(line)
+
+main()
new file mode 100644
--- /dev/null
+++ b/graphdiff.py
@@ -0,0 +1,122 @@
+import json
+import difflib
+import re
+
+def tests_only(graph):
+    return {l: t for l, t in graph.iteritems()
+            if t['attributes'].get('legacy_kind', '').endswith('test') or t['attributes']['kind'] in ('desktop-test', 'android-test')}
+
+def replace_labels(str):
+    return re.sub("<[^>]*>", "<...>", str)
+
+def drop_stuff(graph):
+    for t in graph.itervalues():
+        del t['task']['created']
+        del t['task']['metadata']
+        del t['label']
+        if 'schedulerId' in t['task']:
+            del t['task']['schedulerId']
+        del t['dependencies']
+        del t['attributes']['kind']
+        del t['attributes']['test_platform']  # changed
+        if 'legacy_kind' in t['attributes']:
+            del t['attributes']['legacy_kind']
+        del t['task']['deadline']
+
+        env = t['task']['payload']['env']
+        env['MOZHARNESS_CONFIG'] = env['MOZHARNESS_CONFIG'].rstrip()
+
+        # groupName was inconsistent for the same group; fixed now
+        del t['task']['extra']['treeherder']['groupName']  # changed
+
+        # this is ignored per
+        # https://schemas.taskcluster.net/taskcluster-treeherder/v1/task-treeherder-config.json#
+        try:
+            del t['task']['extra']['treeherder']['build']
+        except KeyError:
+            pass
+
+        # old tasks don't have an explicit expires unless they are try
+        try:
+            del t['task']['expires']
+        except KeyError:
+            pass
+
+        # symbols should be strings
+        t['task']['extra']['treeherder']['symbol'] = str(t['task']['extra']['treeherder']['symbol'])
+
+        # sort command options since option order is not consistent
+        command = t['task']['payload']['command']
+        t['task']['payload']['command'] = command[:2] + sorted(command[2:])
+
+        # treat 'true' and True as the same for env values
+        env = t['task']['payload']['env']
+        t['task']['payload']['env'] = {v: 'true' if e is True else e for v, e in env.iteritems()}
+
+        t['task']['scopes'].sort()
+        for a in t['task']['payload'].get('artifacts', {}).itervalues():
+            del a['expires']
+    return graph
+
+def relabel(graph):
+    rv = {}
+    for t in graph.itervalues():
+        l = []
+        for path in [
+            'task.extra.treeherder.groupSymbol',
+            'task.extra.treeherder.symbol',
+            'attributes.build_platform',
+            'attributes.build_type',
+        ]:
+            try:
+                x = t
+                for elt in path.split('.'):
+                    x = x[elt]
+                l.append(str(x))
+            except KeyError:
+                pass
+        l = '/'.join(l)
+        if l in rv:
+            raise Exception("duplicate label %s for %s / %s" % (l, t['label'], rv[l]['label']))
+        rv[l] = t
+    return rv
+
+def main():
+    graph1 = json.load(open("tasks-old.json"))
+    graph2 = json.load(open("tasks-new.json"))
+
+    graph1 = drop_stuff(relabel(tests_only(graph1)))
+    graph2 = drop_stuff(relabel(tests_only(graph2)))
+
+    skipped = set(graph1) - set(graph2)
+    print("skipping {} tasks defined only in tasks-old.json".format(len(skipped)))
+
+    # limit to just a few tasks
+    #graph2 = {k: graph2[k] for k in graph2.keys()[:100]}
+    #import sys
+    #print >>sys.stderr, graph2.items()[0]
+
+    # temporarily omit tasks from the old graph that are not present in the new
+    graph1 = {k: v for k, v in graph1.iteritems() if k in graph2}
+
+    print("comparing {} tasks".format(len(graph1)))
+
+    # just print labels for added tasks, as that's usually what's useful
+    diff = False
+    for l in sorted(set(graph2) - set(graph1)):
+        print("+ added task: {}".format(l))
+        del graph2[l]
+        diff = True
+
+    graph1 = replace_labels(json.dumps(graph1, indent=4, sort_keys=True)).split('\n')
+    graph2 = replace_labels(json.dumps(graph2, indent=4, sort_keys=True)).split('\n')
+
+    for line in difflib.unified_diff(graph1, graph2, fromfile="tasks-old.json", tofile="tasks-new.json", lineterm='', n=4):
+        diff = True
+        print line
+
+    if not diff:
+        print("no diff")
+        print("skipped tasks:\n" + "\n".join(sorted(skipped)))
+
+main()