Bug 1449195 - Process raptor test results and format for output; r?jmaher draft
authorRob Wood <rwood@mozilla.com>
Thu, 03 May 2018 12:00:57 -0400
changeset 795044 796e6e83826efa1e91a56bcb3d13c1243b2cd47a
parent 794798 77845b8d2930ab73838be0329cb3f17a5f23828d
push id109849
push userrwood@mozilla.com
push dateMon, 14 May 2018 22:59:17 +0000
reviewersjmaher
bugs1449195
milestone62.0a1
Bug 1449195 - Process raptor test results and format for output; r?jmaher MozReview-Commit-ID: JJ6shYJ2cG5
testing/mozharness/mozharness/mozilla/testing/raptor.py
testing/raptor/raptor/control_server.py
testing/raptor/raptor/filter.py
testing/raptor/raptor/gen_test_config.py
testing/raptor/raptor/manifest.py
testing/raptor/raptor/output.py
testing/raptor/raptor/playback/mitmproxy.py
testing/raptor/raptor/raptor.py
testing/raptor/raptor/results.py
testing/raptor/raptor/tests/raptor-firefox-tp6.ini
testing/raptor/test/test_control_server.py
testing/raptor/test/test_raptor.py
testing/raptor/webext/raptor/runner.js
--- a/testing/mozharness/mozharness/mozilla/testing/raptor.py
+++ b/testing/mozharness/mozharness/mozilla/testing/raptor.py
@@ -1,15 +1,16 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 from __future__ import absolute_import, print_function, unicode_literals
 
 import copy
+import json
 import os
 import re
 import sys
 
 import mozharness
 
 from mozharness.base.config import parse_config_file
 from mozharness.base.errors import PythonErrorList
--- a/testing/raptor/raptor/control_server.py
+++ b/testing/raptor/raptor/control_server.py
@@ -14,82 +14,93 @@ import threading
 
 from mozlog import get_proxy_logger
 
 LOG = get_proxy_logger(component='control_server')
 
 here = os.path.abspath(os.path.dirname(__file__))
 
 
-class MyHandler(BaseHTTPServer.BaseHTTPRequestHandler):
+def MakeCustomHandlerClass(results_handler):
+
+    class MyHandler(BaseHTTPServer.BaseHTTPRequestHandler, object):
+
+        def __init__(self, *args, **kwargs):
+            self.results_handler = results_handler
+            super(MyHandler, self).__init__(*args, **kwargs)
 
-    def do_GET(self):
-        # get handler, received request for test settings from web ext runner
-        self.send_response(200)
-        validFiles = ['raptor-firefox-tp6.json']
-        head, tail = os.path.split(self.path)
-        if tail in validFiles:
-            LOG.info('reading test settings from ' + tail)
-            try:
-                with open(tail) as json_settings:
-                    self.send_header('Access-Control-Allow-Origin', '*')
-                    self.send_header('Content-type', 'application/json')
-                    self.end_headers()
-                    self.wfile.write(json.dumps(json.load(json_settings)))
-                    self.wfile.close()
-                    LOG.info('sent test settings to web ext runner')
-            except Exception as ex:
-                LOG.info('control server exception')
-                LOG.info(ex)
-        else:
-            LOG.info('received request for unknown file: ' + self.path)
+        def do_GET(self):
+            # get handler, received request for test settings from web ext runner
+            self.send_response(200)
+            validFiles = ['raptor-firefox-tp6.json']
+            head, tail = os.path.split(self.path)
+            if tail in validFiles:
+                LOG.info('reading test settings from ' + tail)
+                try:
+                    with open(tail) as json_settings:
+                        self.send_header('Access-Control-Allow-Origin', '*')
+                        self.send_header('Content-type', 'application/json')
+                        self.end_headers()
+                        self.wfile.write(json.dumps(json.load(json_settings)))
+                        self.wfile.close()
+                        LOG.info('sent test settings to web ext runner')
+                except Exception as ex:
+                    LOG.info('control server exception')
+                    LOG.info(ex)
+            else:
+                LOG.info('received request for unknown file: ' + self.path)
 
-    def do_POST(self):
-        # post handler, received something from webext
-        self.send_response(200)
-        self.send_header('Access-Control-Allow-Origin', '*')
-        self.send_header('Content-type', 'text/html')
-        self.end_headers()
-        content_len = int(self.headers.getheader('content-length'))
-        post_body = self.rfile.read(content_len)
-        # could have received a status update or test results
-        data = json.loads(post_body)
-        LOG.info("received " + data['type'] + ": " + str(data['data']))
+        def do_POST(self):
+            # post handler, received something from webext
+            self.send_response(200)
+            self.send_header('Access-Control-Allow-Origin', '*')
+            self.send_header('Content-type', 'text/html')
+            self.end_headers()
+            content_len = int(self.headers.getheader('content-length'))
+            post_body = self.rfile.read(content_len)
+            # could have received a status update or test results
+            data = json.loads(post_body)
+            LOG.info("received " + data['type'] + ": " + str(data['data']))
+            if data['type'] == 'webext_results':
+                self.results_handler.add(data['data'])
 
-    def do_OPTIONS(self):
-        self.send_response(200, "ok")
-        self.send_header('Access-Control-Allow-Origin', '*')
-        self.send_header('Access-Control-Allow-Methods', 'GET, POST, OPTIONS')
-        self.send_header("Access-Control-Allow-Headers", "X-Requested-With")
-        self.send_header("Access-Control-Allow-Headers", "Content-Type")
-        self.end_headers()
+        def do_OPTIONS(self):
+            self.send_response(200, "ok")
+            self.send_header('Access-Control-Allow-Origin', '*')
+            self.send_header('Access-Control-Allow-Methods', 'GET, POST, OPTIONS')
+            self.send_header("Access-Control-Allow-Headers", "X-Requested-With")
+            self.send_header("Access-Control-Allow-Headers", "Content-Type")
+            self.end_headers()
+
+    return MyHandler
 
 
 class RaptorControlServer():
     """Container class for Raptor Control Server"""
 
-    def __init__(self):
+    def __init__(self, results_handler):
         self.raptor_venv = os.path.join(os.getcwd(), 'raptor-venv')
         self.server = None
         self._server_thread = None
         self.port = None
+        self.results_handler = results_handler
 
     def start(self):
         config_dir = os.path.join(here, 'tests')
         os.chdir(config_dir)
 
         # pick a free port
         sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
         sock.bind(('', 0))
         self.port = sock.getsockname()[1]
         sock.close()
         server_address = ('', self.port)
 
         server_class = BaseHTTPServer.HTTPServer
-        handler_class = MyHandler
+        handler_class = MakeCustomHandlerClass(self.results_handler)
 
         httpd = server_class(server_address, handler_class)
 
         self._server_thread = threading.Thread(target=httpd.serve_forever)
         self._server_thread.setDaemon(True)  # don't hang on exit
         self._server_thread.start()
         LOG.info("raptor control server running on port %d..." % self.port)
         self.server = httpd
new file mode 100644
--- /dev/null
+++ b/testing/raptor/raptor/filter.py
@@ -0,0 +1,260 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# originally taken from /testing/talos/talos/filter.py
+
+from __future__ import absolute_import
+
+import math
+
+"""
+data filters:
+takes a series of run data and applies statistical transforms to it
+
+Each filter is a simple function, but it also have attached a special
+`prepare` method that create a tuple with one instance of a
+:class:`Filter`; this allow to write stuff like::
+
+  from raptor import filter
+  filters = filter.ignore_first.prepare(1) + filter.median.prepare()
+
+  for filter in filters:
+      data = filter(data)
+  # data is filtered
+"""
+
+_FILTERS = {}
+
+
+class Filter(object):
+    def __init__(self, func, *args, **kwargs):
+        """
+        Takes a filter function, and save args and kwargs that
+        should be used when the filter is used.
+        """
+        self.func = func
+        self.args = args
+        self.kwargs = kwargs
+
+    def apply(self, data):
+        """
+        Apply the filter on the data, and return the new data
+        """
+        return self.func(data, *self.args, **self.kwargs)
+
+
+def define_filter(func):
+    """
+    decorator to attach the prepare method.
+    """
+    def prepare(*args, **kwargs):
+        return (Filter(func, *args, **kwargs),)
+    func.prepare = prepare
+    return func
+
+
+def register_filter(func):
+    """
+    all filters defined in this module
+    should be registered
+    """
+    global _FILTERS
+
+    _FILTERS[func.__name__] = func
+    return func
+
+
+def filters(*args):
+    global _FILTERS
+
+    filters_ = [_FILTERS[filter] for filter in args]
+    return filters_
+
+
+def apply(data, filters):
+    for filter in filters:
+        data = filter(data)
+
+    return data
+
+
+def parse(string_):
+
+    def to_number(string_number):
+        try:
+            return int(string_number)
+        except ValueError:
+            return float(string_number)
+
+    tokens = string_.split(":")
+
+    func = tokens[0]
+    digits = []
+    if len(tokens) > 1:
+        digits.extend(tokens[1].split(","))
+        digits = [to_number(digit) for digit in digits]
+
+    return [func, digits]
+
+
+# filters that return a scalar
+
+@register_filter
+@define_filter
+def mean(series):
+    """
+    mean of data; needs at least one data point
+    """
+    return sum(series)/float(len(series))
+
+
+@register_filter
+@define_filter
+def median(series):
+    """
+    median of data; needs at least one data point
+    """
+    series = sorted(series)
+    if len(series) % 2:
+        # odd
+        return series[len(series)/2]
+    else:
+        # even
+        middle = len(series)/2  # the higher of the middle 2, actually
+        return 0.5*(series[middle-1] + series[middle])
+
+
+@register_filter
+@define_filter
+def variance(series):
+    """
+    variance: http://en.wikipedia.org/wiki/Variance
+    """
+
+    _mean = mean(series)
+    variance = sum([(i-_mean)**2 for i in series])/float(len(series))
+    return variance
+
+
+@register_filter
+@define_filter
+def stddev(series):
+    """
+    standard deviation: http://en.wikipedia.org/wiki/Standard_deviation
+    """
+    return variance(series)**0.5
+
+
+@register_filter
+@define_filter
+def dromaeo(series):
+    """
+    dromaeo: https://wiki.mozilla.org/Dromaeo, pull the internal calculation
+    out
+      * This is for 'runs/s' based tests, not 'ms' tests.
+      * chunksize: defined in dromaeo: tests/dromaeo/webrunner.js#l8
+    """
+    means = []
+    chunksize = 5
+    series = list(dromaeo_chunks(series, chunksize))
+    for i in series:
+        means.append(mean(i))
+    return geometric_mean(means)
+
+
+@register_filter
+@define_filter
+def dromaeo_chunks(series, size):
+    for i in range(0, len(series), size):
+        yield series[i:i+size]
+
+
+@register_filter
+@define_filter
+def geometric_mean(series):
+    """
+    geometric_mean: http://en.wikipedia.org/wiki/Geometric_mean
+    """
+    total = 0
+    for i in series:
+        total += math.log(i+1)
+    return math.exp(total / len(series)) - 1
+
+# filters that return a list
+
+
+@register_filter
+@define_filter
+def ignore_first(series, number=1):
+    """
+    ignore first datapoint
+    """
+    if len(series) <= number:
+        # don't modify short series
+        return series
+    return series[number:]
+
+
+@register_filter
+@define_filter
+def ignore(series, function):
+    """
+    ignore the first value of a list given by function
+    """
+    if len(series) <= 1:
+        # don't modify short series
+        return series
+    series = series[:]  # do not mutate the original series
+    value = function(series)
+    series.remove(value)
+    return series
+
+
+@register_filter
+@define_filter
+def ignore_max(series):
+    """
+    ignore maximum data point
+    """
+    return ignore(series, max)
+
+
+@register_filter
+@define_filter
+def ignore_min(series):
+    """
+    ignore minimum data point
+    """
+    return ignore(series, min)
+
+
+@register_filter
+@define_filter
+def v8_subtest(series, name):
+    """
+       v8 benchmark score - modified for no sub benchmarks.
+       * removed Crypto and kept Encrypt/Decrypt standalone
+       * removed EarlyBoyer and kept Earley/Boyer standalone
+
+       this is not 100% in parity but within .3%
+    """
+    reference = {'Encrypt': 266181.,
+                 'Decrypt': 266181.,
+                 'DeltaBlue': 66118.,
+                 'Earley': 666463.,
+                 'Boyer': 666463.,
+                 'NavierStokes': 1484000.,
+                 'RayTrace': 739989.,
+                 'RegExp': 910985.,
+                 'Richards': 35302.,
+                 'Splay': 81491.
+                 }
+
+    return reference[name] / geometric_mean(series)
+
+
+@register_filter
+@define_filter
+def responsiveness_Metric(val_list):
+    return sum([float(x)*float(x) / 1000000.0 for x in val_list])
--- a/testing/raptor/raptor/gen_test_config.py
+++ b/testing/raptor/raptor/gen_test_config.py
@@ -15,20 +15,21 @@ LOG = get_proxy_logger(component="gen_te
 
 def gen_test_config(browser, test, cs_port):
     LOG.info("writing test settings url background js, so webext can get it")
 
     data = """// this file is auto-generated by raptor, do not edit directly
 function getTestConfig() {
     return {"browser": "%s",
             "cs_port": "%d",
+            "test_name": "%s",
             "test_settings_url": "http://localhost:%d/%s.json"};
 }
 
-""" % (browser, cs_port, cs_port, test)
+""" % (browser, cs_port, test, cs_port, test)
 
     webext_background_script = (os.path.join(webext_dir, "auto_gen_test_config.js"))
 
     file = open(webext_background_script, "w")
     file.write(data)
     file.close()
 
     LOG.info("finished writing test config into webext")
--- a/testing/raptor/raptor/manifest.py
+++ b/testing/raptor/raptor/manifest.py
@@ -9,16 +9,23 @@ import os
 from manifestparser import TestManifest
 from mozlog import get_proxy_logger
 
 here = os.path.abspath(os.path.dirname(__file__))
 raptor_ini = os.path.join(here, 'raptor.ini')
 tests_dir = os.path.join(here, 'tests')
 LOG = get_proxy_logger(component="manifest")
 
+required_settings = ['apps', 'type', 'page_cycles', 'test_url', 'measure',
+                     'unit', 'lower_is_better', 'alert_threshold']
+
+playback_settings = ['playback_binary_manifest', 'playback_binary_zip_mac',
+                     'playback_pageset_manifest', 'playback_pageset_zip_mac',
+                     'playback_recordings']
+
 
 def filter_app(tests, values):
     for test in tests:
         if values["app"] in test['apps']:
             yield test
 
 
 def get_browser_test_list(browser_app):
@@ -26,16 +33,37 @@ def get_browser_test_list(browser_app):
     test_manifest = TestManifest([raptor_ini], strict=False)
     info = {"app": browser_app}
     return test_manifest.active_tests(exists=False,
                                       disabled=False,
                                       filters=[filter_app],
                                       **info)
 
 
+def validate_test_ini(test_details):
+    # validate all required test details were found in the test INI
+    valid_settings = True
+
+    for setting in required_settings:
+        if setting not in test_details:
+            valid_settings = False
+            LOG.info("setting '%s' is required but not found in %s"
+                     % (setting, test_details['manifest']))
+
+    # if playback is specified, we need more playback settings
+    if 'playback' in test_details:
+        for setting in playback_settings:
+            if setting not in test_details:
+                valid_settings = False
+                LOG.info("setting '%s' is required but not found in %s"
+                         % (setting, test_details['manifest']))
+
+    return valid_settings
+
+
 def write_test_settings_json(test_details):
     # write test settings json file with test details that the control
     # server will provide for the web ext
     test_settings = {
         "raptor-options": {
             "type": test_details['type'],
             "test_url": test_details['test_url'],
             "page_cycles": int(test_details['page_cycles'])
@@ -47,16 +75,21 @@ def write_test_settings_json(test_detail
         if "fnbpaint" in test_details['measure']:
             test_settings['raptor-options']['measure']['fnbpaint'] = True
         if "fcp" in test_details['measure']:
             test_settings['raptor-options']['measure']['fcp'] = True
         if "hero" in test_details['measure']:
             test_settings['raptor-options']['measure']['hero'] = test_details['hero'].split()
     if test_details.get("page_timeout", None) is not None:
         test_settings['raptor-options']['page_timeout'] = int(test_details['page_timeout'])
+    test_settings['raptor-options']['unit'] = test_details.get("unit", "ms")
+    test_settings['raptor-options']['lower_is_better'] = \
+        bool(test_details.get("lower_is_better", True))
+    if test_details.get("alert_threshold", None) is not None:
+        test_settings['raptor-options']['alert_threshold'] = float(test_details['alert_threshold'])
 
     settings_file = os.path.join(tests_dir, test_details['name'] + '.json')
     try:
         with open(settings_file, 'w') as out_file:
             json.dump(test_settings, out_file, indent=4, ensure_ascii=False)
             out_file.close()
     except IOError:
         LOG.info("abort: exception writing test settings json!")
@@ -81,11 +114,16 @@ def get_raptor_test_list(args):
         if len(tests_to_run) == 0:
             LOG.critical("abort: specified test doesn't exist!")
     else:
         tests_to_run = available_tests
 
     # write out .json test setting files for the control server to read and send to web ext
     if len(tests_to_run) != 0:
         for test in tests_to_run:
-            write_test_settings_json(test)
+            if validate_test_ini(test):
+                write_test_settings_json(test)
+            else:
+                # test doesn't have valid settings, remove it from available list
+                LOG.info("test %s is not valid due to missing settings" % test['name'])
+                tests_to_run.remove(test)
 
     return tests_to_run
new file mode 100644
--- /dev/null
+++ b/testing/raptor/raptor/output.py
@@ -0,0 +1,171 @@
+
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# some parts of this originally taken from /testing/talos/talos/output.py
+
+"""output raptor test results"""
+from __future__ import absolute_import
+
+import filter
+
+import json
+from mozlog import get_proxy_logger
+
+LOG = get_proxy_logger(component="output")
+
+
+class Output(object):
+    """class for raptor output"""
+
+    def __init__(self, results):
+        """
+        - results : list of RaptorTestResult instances
+        """
+        self.results = results
+        self.summarized_results = {}
+
+    def summarize(self):
+        suites = []
+        vals = []
+        test_results = {
+            'framework': {
+                'name': 'raptor',
+            },
+            'suites': suites,
+        }
+
+        # check if we actually have any results
+        if len(self.results) == 0:
+            LOG.error("error: no raptor test results found!")
+            return
+
+        for test in self.results:
+            subtests = []
+            suite = {
+                'name': test.name,
+                'extraOptions': test.extra_options,
+                'subtests': subtests
+            }
+
+            suites.append(suite)
+
+            # each test can report multiple measurements per pageload
+            # each measurement becomes a subtest inside the 'suite'
+            for key, values in test.measurements.iteritems():
+                new_subtest = {}
+                new_subtest['name'] = key
+                new_subtest['replicates'] = values
+                new_subtest['lower_is_better'] = test.lower_is_better
+                new_subtest['alert_threshold'] = float(test.alert_threshold)
+                new_subtest['value'] = 0
+                new_subtest['unit'] = test.unit
+
+                filtered_values = filter.ignore_first(new_subtest['replicates'], 1)
+                new_subtest['value'] = filter.median(filtered_values)
+                vals.append(new_subtest['value'])
+
+                subtests.append(new_subtest)
+
+        # if there is more than one subtest, calculate a summary result
+        if len(subtests) > 1:
+            suite['value'] = self.construct_results(vals, testname=test.name)
+
+        LOG.info("returning summarized test results:")
+        LOG.info(test_results)
+
+        self.summarized_results = test_results
+
+    def output(self):
+        """output to file and perfherder data json """
+        if self.summarized_results == {}:
+            LOG.error("error: no summarized raptor results found!")
+            return False
+
+        results_path = "raptor.json"
+
+        with open(results_path, 'w') as f:
+            for result in self.summarized_results:
+                f.write("%s\n" % result)
+
+        # the output that treeherder expects to find
+        extra_opts = self.summarized_results['suites'][0].get('extraOptions', [])
+        if 'geckoProfile' not in extra_opts:
+            LOG.info("PERFHERDER_DATA: %s" % json.dumps(self.summarized_results))
+
+        json.dump(self.summarized_results, open(results_path, 'w'), indent=2,
+                  sort_keys=True)
+        return True
+
+    @classmethod
+    def v8_Metric(cls, val_list):
+        results = [i for i, j in val_list]
+        score = 100 * filter.geometric_mean(results)
+        return score
+
+    @classmethod
+    def JS_Metric(cls, val_list):
+        """v8 benchmark score"""
+        results = [i for i, j in val_list]
+        return sum(results)
+
+    @classmethod
+    def speedometer_score(cls, val_list):
+        """
+        speedometer_score: https://bug-172968-attachments.webkit.org/attachment.cgi?id=319888
+        """
+        correctionFactor = 3
+        results = [i for i, j in val_list]
+        # speedometer has 16 tests, each of these are made of up 9 subtests
+        # and a sum of the 9 values.  We receive 160 values, and want to use
+        # the 16 test values, not the sub test values.
+        if len(results) != 160:
+            raise Exception("Speedometer has 160 subtests, found: %s instead" % len(results))
+
+        results = results[9::10]
+        score = 60 * 1000 / filter.geometric_mean(results) / correctionFactor
+        return score
+
+    @classmethod
+    def benchmark_score(cls, val_list):
+        """
+        benchmark_score: ares6/jetstream self reported as 'geomean'
+        """
+        results = [i for i, j in val_list if j == 'geomean']
+        return filter.mean(results)
+
+    @classmethod
+    def stylebench_score(cls, val_list):
+        """
+        stylebench_score: https://bug-172968-attachments.webkit.org/attachment.cgi?id=319888
+        """
+        correctionFactor = 3
+        results = [i for i, j in val_list]
+        # stylebench has 4 tests, each of these are made of up 12 subtests
+        # and a sum of the 12 values.  We receive 52 values, and want to use
+        # the 4 test values, not the sub test values.
+        if len(results) != 52:
+            raise Exception("StyleBench has 52 subtests, found: %s instead" % len(results))
+
+        results = results[12::13]
+        score = 60 * 1000 / filter.geometric_mean(results) / correctionFactor
+        return score
+
+    def construct_results(self, vals, testname):
+        if testname.startswith('v8_7'):
+            return self.v8_Metric(vals)
+        elif testname.startswith('kraken'):
+            return self.JS_Metric(vals)
+        elif testname.startswith('ares6'):
+            return self.benchmark_score(vals)
+        elif testname.startswith('jetstream'):
+            return self.benchmark_score(vals)
+        elif testname.startswith('speedometer'):
+            return self.speedometer_score(vals)
+        elif testname.startswith('stylebench'):
+            return self.stylebench_score(vals)
+        elif len(vals) > 1:
+            return filter.geometric_mean([i for i, j in vals])
+        else:
+            return filter.mean([i for i, j in vals])
--- a/testing/raptor/raptor/playback/mitmproxy.py
+++ b/testing/raptor/raptor/playback/mitmproxy.py
@@ -238,17 +238,17 @@ class Mitmproxy(Playback):
             sys.path.insert(1, mitmdump_path)
         else:
             # mac and linux
             param2 = param + ' ' + ' '.join(mitmproxy_recordings)
 
         # mitmproxy needs some DLL's that are a part of Firefox itself, so add to path
         env["PATH"] = os.path.dirname(browser_path) + ";" + env["PATH"]
 
-        command = [mitmdump_path, '-k', '-s', param2]
+        command = [mitmdump_path, '-k', '-q', '-s', param2]
 
         LOG.info("Starting mitmproxy playback using env path: %s" % env["PATH"])
         LOG.info("Starting mitmproxy playback using command: %s" % ' '.join(command))
         # to turn off mitmproxy log output, use these params for Popen:
         # Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env)
         mitmproxy_proc = subprocess.Popen(command, env=env)
         time.sleep(10)
         data = mitmproxy_proc.poll()
--- a/testing/raptor/raptor/raptor.py
+++ b/testing/raptor/raptor/raptor.py
@@ -26,18 +26,19 @@ try:
     build = MozbuildObject.from_environment(cwd=here)
 except ImportError:
     build = None
 
 from cmdline import parse_args
 from control_server import RaptorControlServer
 from gen_test_config import gen_test_config
 from outputhandler import OutputHandler
+from manifest import get_raptor_test_list
 from playback import get_playback
-from manifest import get_raptor_test_list
+from results import RaptorResultsHandler
 
 
 class Raptor(object):
     """Container class for Raptor"""
 
     def __init__(self, app, binary):
         self.config = {}
         self.config['app'] = app
@@ -56,16 +57,19 @@ class Raptor(object):
         with open(os.path.join(self.profile_data_dir, 'profiles.json'), 'r') as fh:
             base_profiles = json.load(fh)['raptor']
 
         for name in base_profiles:
             path = os.path.join(self.profile_data_dir, name)
             self.log.info("Merging profile: {}".format(path))
             self.profile.merge(path)
 
+        # create results holder
+        self.results_handler = RaptorResultsHandler()
+
         # Create the runner
         self.output_handler = OutputHandler()
         process_args = {
             'processOutputLine': [self.output_handler],
         }
         runner_cls = runners[app]
         self.runner = runner_cls(
             binary, profile=self.profile, process_args=process_args)
@@ -74,33 +78,35 @@ class Raptor(object):
     def profile_data_dir(self):
         if 'MOZ_DEVELOPER_REPO_DIR' in os.environ:
             return os.path.join(os.environ['MOZ_DEVELOPER_REPO_DIR'], 'testing', 'profiles')
         if build:
             return os.path.join(build.topsrcdir, 'testing', 'profiles')
         return os.path.join(here, 'profile_data')
 
     def start_control_server(self):
-        self.control_server = RaptorControlServer()
+        self.control_server = RaptorControlServer(self.results_handler)
         self.control_server.start()
 
     def get_playback_config(self, test):
         self.config['playback_tool'] = test.get('playback')
         self.log.info("test uses playback tool: %s " % self.config['playback_tool'])
         self.config['playback_binary_manifest'] = test.get('playback_binary_manifest', None)
         _key = 'playback_binary_zip_%s' % self.config['platform']
         self.config['playback_binary_zip'] = test.get(_key, None)
         self.config['playback_pageset_manifest'] = test.get('playback_pageset_manifest', None)
         _key = 'playback_pageset_zip_%s' % self.config['platform']
         self.config['playback_pageset_zip'] = test.get(_key, None)
         self.config['playback_recordings'] = test.get('playback_recordings', None)
 
     def run_test(self, test, timeout=None):
         self.log.info("starting raptor test: %s" % test['name'])
-        gen_test_config(self.config['app'], test['name'], self.control_server.port)
+        gen_test_config(self.config['app'],
+                        test['name'],
+                        self.control_server.port)
 
         self.profile.addons.install(os.path.join(webext_dir, 'raptor'))
 
         # some tests require tools to playback the test pages
         if test.get('playback', None) is not None:
             self.get_playback_config(test)
             # startup the playback tool
             self.playback = get_playback(self.config)
@@ -129,23 +135,22 @@ class Raptor(object):
         proc.output.append(
             "__startBeforeLaunchTimestamp%d__endBeforeLaunchTimestamp"
             % first_time)
         proc.output.append(
             "__startAfterTerminationTimestamp%d__endAfterTerminationTimestamp"
             % (int(time.time()) * 1000))
 
     def process_results(self):
-        self.log.info('todo: process results and dump in PERFHERDER_JSON blob')
-        self.log.info('- or - do we want the control server to do that?')
+        return self.results_handler.summarize_and_output(self.config)
 
     def clean_up(self):
         self.control_server.stop()
         self.runner.stop()
-        self.log.info("raptor finished")
+        self.log.info("finished")
 
 
 def main(args=sys.argv[1:]):
     args = parse_args()
     commandline.setup_logging('raptor', args, {'tbpl': sys.stdout})
     LOG = get_default_logger(component='raptor-main')
 
     # if a test name specified on command line, and it exists, just run that one
@@ -163,14 +168,19 @@ def main(args=sys.argv[1:]):
 
     raptor = Raptor(args.app, args.binary)
 
     raptor.start_control_server()
 
     for next_test in raptor_test_list:
         raptor.run_test(next_test)
 
-    raptor.process_results()
+    success = raptor.process_results()
     raptor.clean_up()
 
+    if not success:
+        # didn't get test results; test timed out or crashed, etc. we want job to fail
+        LOG.critical("error: no raptor test results were found")
+        os.sys.exit(1)
+
 
 if __name__ == "__main__":
     main()
new file mode 100644
--- /dev/null
+++ b/testing/raptor/raptor/results.py
@@ -0,0 +1,44 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# class to process, format, and report raptor test results
+# received from the raptor control server
+from __future__ import absolute_import
+
+from output import Output
+
+from mozlog import get_proxy_logger
+
+LOG = get_proxy_logger(component='results-handler')
+
+
+class RaptorResultsHandler():
+    """Handle Raptor test results"""
+
+    def __init__(self):
+        self.results = []
+
+    def add(self, new_result_json):
+        # add to results
+        LOG.info("received results in RaptorResultsHandler.add")
+        LOG.info(new_result_json)
+        new_result = RaptorTestResult(new_result_json)
+        self.results.append(new_result)
+
+    def summarize_and_output(self, test_config):
+        # summarize the result data, write to file and output PERFHERDER_DATA
+        LOG.info("summarizing raptor test results")
+        output = Output(self.results)
+        output.summarize()
+        return output.output()
+
+
+class RaptorTestResult():
+    """Single Raptor test result class"""
+
+    def __init__(self, test_result_json):
+        self.extra_options = []
+        # convert test result json/dict (from control server) to test result object instance
+        for key, value in test_result_json.iteritems():
+            setattr(self, key, value)
--- a/testing/raptor/raptor/tests/raptor-firefox-tp6.ini
+++ b/testing/raptor/raptor/tests/raptor-firefox-tp6.ini
@@ -8,13 +8,16 @@
 apps = firefox
 type =  pageload
 playback = mitmproxy
 playback_binary_manifest = mitmproxy-rel-bin-osx.manifest
 playback_binary_zip_mac = mitmproxy-2.0.2-osx.tar.gz
 playback_pageset_manifest = mitmproxy-playback-set.manifest
 playback_pageset_zip_mac = mitmproxy-recording-set-win10.zip
 page_cycles = 25
+unit = ms
+lower_is_better = true
+alert_threshold = 2.0
 
 [raptor-firefox-tp6]
 test_url = https://www.amazon.com/s/url=search-alias%3Daps&field-keywords=laptop
 playback_recordings = mitmproxy-recording-amazon.mp
 measure = fnbpaint
--- a/testing/raptor/test/test_control_server.py
+++ b/testing/raptor/test/test_control_server.py
@@ -1,21 +1,33 @@
 from __future__ import absolute_import, unicode_literals
 
 import mozunit
+import os
+import sys
 
 from BaseHTTPServer import HTTPServer
 from mozlog.structuredlog import set_default_logger, StructuredLogger
 from raptor.control_server import RaptorControlServer
 
+# need this so the raptor unit tests can find output & filter classes
+here = os.path.abspath(os.path.dirname(__file__))
+raptor_dir = os.path.join(os.path.dirname(here), 'raptor')
+sys.path.insert(0, raptor_dir)
+
+from raptor.results import RaptorResultsHandler
+
+
 set_default_logger(StructuredLogger('test_control_server'))
 
 
 def test_start_and_stop():
-    control = RaptorControlServer()
+
+    results_handler = RaptorResultsHandler()
+    control = RaptorControlServer(results_handler)
 
     assert control.server is None
     control.start()
     assert isinstance(control.server, HTTPServer)
     assert control.server.fileno()
     assert control._server_thread.is_alive()
 
     control.stop()
--- a/testing/raptor/test/test_raptor.py
+++ b/testing/raptor/test/test_raptor.py
@@ -30,18 +30,16 @@ def test_create_profile(options, app, ge
     prefs_file = os.path.join(raptor.profile.profile, 'user.js')
     with open(prefs_file, 'r') as fh:
         prefs = fh.read()
         assert firefox_pref in prefs
         assert raptor_pref in prefs
 
 
 def test_start_and_stop_server(raptor):
-    print("*RW* control server is now:")
-    print(str(raptor.control_server))
     assert raptor.control_server is None
 
     raptor.start_control_server()
 
     assert raptor.control_server._server_thread.is_alive()
     assert raptor.control_server.port is not None
     assert raptor.control_server.server is not None
 
--- a/testing/raptor/webext/raptor/runner.js
+++ b/testing/raptor/webext/raptor/runner.js
@@ -11,51 +11,62 @@
 // inside the 'talos-pagesets' dir or 'heroes' dir (tarek's github
 // repo) or 'webkit/PerformanceTests' dir (for benchmarks) first run:
 // 'python -m SimpleHTTPServer 8081'
 // to serve out the pages that we want to prototype with. Also
 // update the manifest content 'matches' accordingly
 
 var browserName;
 var ext;
+var testName = null;
 var settingsURL = null;
-var cs_port = null;
+var csPort = null;
 var testType;
 var pageCycles = 0;
 var pageCycle = 0;
 var pageCycleDelay = 1000;
 var testURL;
 var testTabID = 0;
-var results = {"page": "", "measurements": {}};
 var getHero = false;
 var getFNBPaint = false;
 var getFCP = false;
 var isHeroPending = false;
 var pendingHeroes = [];
 var settings = {};
 var isFNBPaintPending = false;
 var isFCPPending = false;
 var isBenchmarkPending = false;
-var pageTimeout = 5000; // default pageload timeout
+var pageTimeout = 10000; // default pageload timeout
+
+var results = {"name": "",
+               "page": "",
+               "type": "",
+               "lower_is_better": true,
+               "alert_threshold": 2.0,
+               "measurements": {}};
 
 function getTestSettings() {
   console.log("getting test settings from control server");
   return new Promise(resolve => {
 
     fetch(settingsURL).then(function(response) {
       response.text().then(function(text) {
         console.log(text);
         settings = JSON.parse(text)["raptor-options"];
 
         // parse the test settings
         testType = settings.type;
         pageCycles = settings.page_cycles;
         testURL = settings.test_url;
         results.page = testURL;
         results.type = testType;
+        results.name = testName;
+        results.unit = settings.unit;
+        results.lower_is_better = settings.lower_is_better;
+        results.alert_threshold = settings.alert_threshold;
 
         if (settings.page_timeout !== undefined) {
           pageTimeout = settings.page_timeout;
         }
         console.log("using page timeout (ms): " + pageTimeout);
 
         if (testType == "pageload") {
           if (settings.measure !== undefined) {
@@ -199,19 +210,22 @@ function timeoutAlarmListener(alarm) {
   var text = alarm.name;
   console.error(text);
   postToControlServer("status", text);
   // call clean-up to shutdown gracefully
   cleanUp();
 }
 
 function setTimeoutAlarm(timeoutName, timeoutMS) {
-  var timeout_when = window.performance.now() + timeoutMS;
+  // webext alarms require date.now NOT performance.now
+  var now = Date.now(); // eslint-disable-line mozilla/avoid-Date-timing
+  var timeout_when = now + timeoutMS;
   ext.alarms.create(timeoutName, { when: timeout_when });
-  console.log("set " + timeoutName);
+  console.log("now is " + now + ", set raptor alarm " +
+              timeoutName + " to expire at " + timeout_when);
 }
 
 function cancelTimeoutAlarm(timeoutName) {
   if (browserName === "firefox") {
     var clearAlarm = ext.alarms.clear(timeoutName);
     clearAlarm.then(function(onCleared) {
       if (onCleared) {
         console.log("cancelled " + timeoutName);
@@ -282,17 +296,17 @@ function verifyResults() {
                   + x + " but only have " + count);
     }
   }
   postToControlServer("results", results);
 }
 
 function postToControlServer(msgType, msgData) {
   // requires 'control server' running at port 8000 to receive results
-  var url = "http://127.0.0.1:" + cs_port + "/";
+  var url = "http://127.0.0.1:" + csPort + "/";
   var client = new XMLHttpRequest();
   client.onreadystatechange = function() {
     if (client.readyState == XMLHttpRequest.DONE && client.status == 200) {
       console.log("post success");
     }
   };
 
   client.open("POST", url, true);
@@ -327,19 +341,21 @@ function cleanUp() {
   // this only works with Firefox as google chrome doesn't support dump()
   if (browserName === "firefox")
     window.dump("\n__raptor_shutdownBrowser\n");
 
 }
 
 function runner() {
   let config = getTestConfig();
+  testName = config.test_name;
   settingsURL = config.test_settings_url;
-  cs_port = config.cs_port;
+  csPort = config.cs_port;
   browserName = config.browser;
+
   getBrowserInfo().then(function() {
     getTestSettings().then(function() {
       if (testType == "benchmark") {
         // webkit benchmark type of test
         console.log("benchmark test start");
       } else if (testType == "pageload") {
         // standard pageload test
         console.log("pageloader test start");