Bug 1460741 - Add 'speedometer' benchmark to raptor for firefox; r?jmaher draft
authorRob Wood <rwood@mozilla.com>
Tue, 15 May 2018 14:50:48 -0400
changeset 803636 b9ec43afd95e067a8c7da0fdba877113ea780ff7
parent 803635 9e133ed7e9b5c356236ce6edd61477fbc6e4abbb
push id112163
push userrwood@mozilla.com
push dateMon, 04 Jun 2018 18:04:02 +0000
reviewersjmaher
bugs1460741
milestone62.0a1
Bug 1460741 - Add 'speedometer' benchmark to raptor for firefox; r?jmaher MozReview-Commit-ID: 6eTJhUJv3y9
testing/mozharness/mozharness/mozilla/testing/raptor.py
testing/raptor/raptor/benchmark.py
testing/raptor/raptor/cmdline.py
testing/raptor/raptor/control_server.py
testing/raptor/raptor/gen_test_config.py
testing/raptor/raptor/manifest.py
testing/raptor/raptor/output.py
testing/raptor/raptor/outputhandler.py
testing/raptor/raptor/playback/mitmproxy.py
testing/raptor/raptor/raptor.ini
testing/raptor/raptor/raptor.py
testing/raptor/raptor/results.py
testing/raptor/raptor/tests/raptor-speedometer.ini
testing/raptor/requirements.txt
testing/raptor/webext/raptor/benchmark-relay.js
testing/raptor/webext/raptor/manifest.json
testing/raptor/webext/raptor/measure.js
testing/raptor/webext/raptor/runner.js
third_party/webkit/PerformanceTests/Speedometer/resources/benchmark-report.js
--- a/testing/mozharness/mozharness/mozilla/testing/raptor.py
+++ b/testing/mozharness/mozharness/mozilla/testing/raptor.py
@@ -136,22 +136,26 @@ class Raptor(TestingMixin, MercurialScri
         kw_options = {'binary': binary_path}
         # options overwritten from **kw
         if 'test' in self.config:
             kw_options['test'] = self.config['test']
         if self.config.get('branch'):
             kw_options['branchName'] = self.config['branch']
         if self.symbols_path:
             kw_options['symbolsPath'] = self.symbols_path
+        if self.config.get('obj_path', None) is not None:
+            kw_options['obj-path'] = self.config['obj_path']
         kw_options.update(kw)
         # configure profiling options
         options.extend(self.query_gecko_profile_options())
         # extra arguments
         if args is not None:
             options += args
+        if self.config.get('run_local', False):
+            options.extend(['--run-local'])
         if 'raptor_extra_options' in self.config:
             options += self.config['raptor_extra_options']
         if self.config.get('code_coverage', False):
             options.extend(['--code-coverage'])
         for key, value in kw_options.items():
             options.extend(['--%s' % key, value])
         return options
 
new file mode 100644
--- /dev/null
+++ b/testing/raptor/raptor/benchmark.py
@@ -0,0 +1,114 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import
+
+import os
+import shutil
+import socket
+
+from mozlog import get_proxy_logger
+
+from wptserve import server, handlers
+
+LOG = get_proxy_logger(component="raptor-benchmark")
+here = os.path.abspath(os.path.dirname(__file__))
+
+
+class Benchmark(object):
+    """utility class for running benchmarks in raptor"""
+
+    def __init__(self, config, test):
+        self.config = config
+        self.test = test
+
+        # bench_dir is where we will download all mitmproxy required files
+        # when running locally it comes from obj_path via mozharness/mach
+        if self.config.get("obj_path", None) is not None:
+            self.bench_dir = self.config.get("obj_path")
+        else:
+            # in production it is ../tasks/task_N/build/tests/raptor/raptor/...
+            # 'here' is that path, we can start with that
+            self.bench_dir = here
+
+        # now add path for benchmark source; locally we put it in a raptor benchmarks
+        # folder; in production the files are automatically copied to a different dir
+        if self.config.get('run_local', False):
+            self.bench_dir = os.path.join(self.bench_dir, 'testing', 'raptor', 'benchmarks')
+        else:
+            self.bench_dir = os.path.join(self.bench_dir, 'tests', 'webkit', 'PerformanceTests')
+
+        LOG.info("bench_dir to be used for benchmark source: %s" % self.bench_dir)
+        if not os.path.exists(self.bench_dir):
+            os.makedirs(self.bench_dir)
+
+        # when running locally we need to get the benchmark source
+        if self.config.get('run_local', False):
+            self.get_webkit_source()
+
+        LOG.info("bench_dir contains:")
+        LOG.info(os.listdir(self.bench_dir))
+
+        # now have the benchmark source ready, go ahead and serve it up!
+        self.start_http_server()
+
+    def get_webkit_source(self):
+        # in production the build system auto copies webkit source into place;
+        # but when run locally we need to do this manually, so that raptor can find it
+        if 'speedometer' in self.test['name']:
+            # we only want to copy over the source for the benchmark that is about to run
+            dest = os.path.join(self.bench_dir, 'Speedometer')
+            src = os.path.join(os.environ['MOZ_DEVELOPER_REPO_DIR'], 'third_party',
+                               'webkit', 'PerformanceTests', 'Speedometer')
+        else:
+            # otherwise copy all, but be sure to add each benchmark above instead
+            dest = self.bench_dir
+            # source for all benchmarks is repo/third_party...
+            src = os.path.join(os.environ['MOZ_DEVELOPER_REPO_DIR'], 'third_party',
+                               'webkit', 'PerformanceTests')
+
+        if os.path.exists(dest):
+            LOG.info("benchmark source already exists at: %s" % dest)
+            return
+
+        LOG.info("copying webkit benchmarks from %s to %s" % (src, dest))
+        try:
+            shutil.copytree(src, dest)
+        except Exception:
+            LOG.critical("error copying webkit benchmarks from %s to %s" % (src, dest))
+
+    def start_http_server(self):
+        self.write_server_headers()
+
+        # pick a free port
+        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+        sock.bind(('', 0))
+        self.port = sock.getsockname()[1]
+        sock.close()
+        _webserver = '127.0.0.1:%d' % self.port
+
+        self.httpd = self.setup_webserver(_webserver)
+        self.httpd.start()
+
+    def write_server_headers(self):
+        # to add specific headers for serving files via wptserve, write out a headers dir file
+        # see http://wptserve.readthedocs.io/en/latest/handlers.html#file-handlers
+        LOG.info("writing wptserve headers file")
+        headers_file = os.path.join(self.bench_dir, '__dir__.headers')
+        file = open(headers_file, 'w')
+        file.write("Access-Control-Allow-Origin: *")
+        file.close()
+        LOG.info("wrote wpt headers file: %s" % headers_file)
+
+    def setup_webserver(self, webserver):
+        LOG.info("starting webserver on %r" % webserver)
+        LOG.info("serving benchmarks from here: %s" % self.bench_dir)
+        self.host, self.port = webserver.split(':')
+
+        return server.WebTestHttpd(port=int(self.port), doc_root=self.bench_dir,
+                                   routes=[("GET", "*", handlers.file_handler)])
+
+    def stop_serve(self):
+        LOG.info("TODO: stop serving benchmark source")
+        pass
--- a/testing/raptor/raptor/cmdline.py
+++ b/testing/raptor/raptor/cmdline.py
@@ -16,20 +16,24 @@ def create_parser(mach_interface=False):
     add_arg('-t', '--test', required=True, dest='test',
             help="name of raptor test to run")
     if not mach_interface:
         add_arg('--app', default='firefox', dest='app',
                 help="name of the application we are testing (default: firefox)",
                 choices=['firefox', 'chrome'])
         add_arg('-b', '--binary', required=True, dest='binary',
                 help="path to the browser executable that we are testing")
-        add_arg('--branchName', dest="branch_name", default='',
+        add_arg('--branchName', dest="branch_name", default=None,
                 help="Name of the branch we are testing on")
         add_arg('--symbolsPath', dest='symbols_path',
                 help="Path to the symbols for the build we are testing")
+        add_arg('--run-local', dest="run_local", default=False, action="store_true",
+                help="Flag that indicates if raptor is running locally or in production")
+        add_arg('--obj-path', dest="obj_path", default=None,
+                help="Browser build obj_path (received when running in production)")
 
     add_logging_group(parser)
     return parser
 
 
 def verify_options(parser, args):
     ctx = vars(args)
 
--- a/testing/raptor/raptor/control_server.py
+++ b/testing/raptor/raptor/control_server.py
@@ -9,17 +9,17 @@ from __future__ import absolute_import
 import BaseHTTPServer
 import json
 import os
 import socket
 import threading
 
 from mozlog import get_proxy_logger
 
-LOG = get_proxy_logger(component='control_server')
+LOG = get_proxy_logger(component='raptor-control-server')
 
 here = os.path.abspath(os.path.dirname(__file__))
 
 
 def MakeCustomHandlerClass(results_handler, shutdown_browser):
 
     class MyHandler(BaseHTTPServer.BaseHTTPRequestHandler, object):
 
--- a/testing/raptor/raptor/gen_test_config.py
+++ b/testing/raptor/raptor/gen_test_config.py
@@ -5,31 +5,32 @@ from __future__ import absolute_import
 
 import os
 
 from mozlog import get_proxy_logger
 
 
 here = os.path.abspath(os.path.dirname(__file__))
 webext_dir = os.path.join(os.path.dirname(here), 'webext', 'raptor')
-LOG = get_proxy_logger(component="gen_test_url")
+LOG = get_proxy_logger(component="raptor-gen-test-config")
 
 
-def gen_test_config(browser, test, cs_port):
-    LOG.info("writing test settings url background js, so webext can get it")
+def gen_test_config(browser, test, cs_port, b_port=0):
+    LOG.info("writing test settings into background js, so webext can get it")
 
     data = """// this file is auto-generated by raptor, do not edit directly
 function getTestConfig() {
     return {"browser": "%s",
             "cs_port": "%d",
             "test_name": "%s",
-            "test_settings_url": "http://localhost:%d/%s.json"};
+            "test_settings_url": "http://localhost:%d/%s.json",
+            "benchmark_port": "%d"};
 }
 
-""" % (browser, cs_port, test, cs_port, test)
+""" % (browser, cs_port, test, cs_port, test, b_port)
 
     webext_background_script = (os.path.join(webext_dir, "auto_gen_test_config.js"))
 
     file = open(webext_background_script, "w")
     file.write(data)
     file.close()
 
-    LOG.info("finished writing test config into webext")
+    LOG.info("finished writing test config to %s" % webext_background_script)
--- a/testing/raptor/raptor/manifest.py
+++ b/testing/raptor/raptor/manifest.py
@@ -7,17 +7,17 @@ import json
 import os
 
 from manifestparser import TestManifest
 from mozlog import get_proxy_logger
 
 here = os.path.abspath(os.path.dirname(__file__))
 raptor_ini = os.path.join(here, 'raptor.ini')
 tests_dir = os.path.join(here, 'tests')
-LOG = get_proxy_logger(component="manifest")
+LOG = get_proxy_logger(component="raptor-manifest")
 
 required_settings = ['apps', 'type', 'page_cycles', 'test_url', 'measure',
                      'unit', 'lower_is_better', 'alert_threshold']
 
 playback_settings = ['playback_binary_manifest', 'playback_binary_zip_mac',
                      'playback_pageset_manifest', 'playback_pageset_zip_mac',
                      'playback_recordings']
 
@@ -38,16 +38,19 @@ def get_browser_test_list(browser_app):
                                       **info)
 
 
 def validate_test_ini(test_details):
     # validate all required test details were found in the test INI
     valid_settings = True
 
     for setting in required_settings:
+        # measure setting not required for benchmark type tests
+        if setting == 'measure' and test_details['type'] == 'benchmark':
+            continue
         if setting not in test_details:
             valid_settings = False
             LOG.info("setting '%s' is required but not found in %s"
                      % (setting, test_details['manifest']))
 
     # if playback is specified, we need more playback settings
     if 'playback' in test_details:
         for setting in playback_settings:
@@ -76,18 +79,20 @@ def write_test_settings_json(test_detail
             test_settings['raptor-options']['measure']['fnbpaint'] = True
         if "fcp" in test_details['measure']:
             test_settings['raptor-options']['measure']['fcp'] = True
         if "hero" in test_details['measure']:
             test_settings['raptor-options']['measure']['hero'] = test_details['hero'].split()
     if test_details.get("page_timeout", None) is not None:
         test_settings['raptor-options']['page_timeout'] = int(test_details['page_timeout'])
     test_settings['raptor-options']['unit'] = test_details.get("unit", "ms")
-    test_settings['raptor-options']['lower_is_better'] = \
-        bool(test_details.get("lower_is_better", True))
+    if test_details.get("lower_is_better", "true") == "false":
+        test_settings['raptor-options']['lower_is_better'] = False
+    else:
+        test_settings['raptor-options']['lower_is_better'] = True
     if test_details.get("alert_threshold", None) is not None:
         test_settings['raptor-options']['alert_threshold'] = float(test_details['alert_threshold'])
 
     settings_file = os.path.join(tests_dir, test_details['name'] + '.json')
     try:
         with open(settings_file, 'w') as out_file:
             json.dump(test_settings, out_file, indent=4, ensure_ascii=False)
             out_file.close()
--- a/testing/raptor/raptor/output.py
+++ b/testing/raptor/raptor/output.py
@@ -42,46 +42,113 @@ class Output(object):
         if len(self.results) == 0:
             LOG.error("error: no raptor test results found!")
             return
 
         for test in self.results:
             subtests = []
             suite = {
                 'name': test.name,
+                'type': test.type,
                 'extraOptions': test.extra_options,
-                'subtests': subtests
+                'subtests': subtests,
+                'lowerIsBetter': test.lower_is_better,
+                'alertThreshold': float(test.alert_threshold)
             }
 
             suites.append(suite)
 
-            # each test can report multiple measurements per pageload
-            # each measurement becomes a subtest inside the 'suite'
-            for key, values in test.measurements.iteritems():
-                new_subtest = {}
-                new_subtest['name'] = test.name + "-" + key
-                new_subtest['replicates'] = values
-                new_subtest['lower_is_better'] = test.lower_is_better
-                new_subtest['alert_threshold'] = float(test.alert_threshold)
-                new_subtest['value'] = 0
-                new_subtest['unit'] = test.unit
+            # process results for pageloader type of tests
+            if test.type == "pageload":
+                # each test can report multiple measurements per pageload
+                # each measurement becomes a subtest inside the 'suite'
+
+                # this is the format we receive the results in from the pageload test
+                # i.e. one test (subtest) in raptor-firefox-tp6:
+
+                # {u'name': u'raptor-firefox-tp6-amazon', u'type': u'pageload', u'measurements':
+                # {u'fnbpaint': [788, 315, 334, 286, 318, 276, 296, 296, 292, 285, 268, 277, 274,
+                # 328, 295, 290, 286, 270, 279, 280, 346, 303, 308, 398, 281]}, u'browser':
+                # u'Firefox 62.0a1 20180528123052', u'lower_is_better': True, u'page':
+                # u'https://www.amazon.com/s/url=search-alias%3Daps&field-keywords=laptop',
+                # u'unit': u'ms', u'alert_threshold': 2}
+
+                for key, values in test.measurements.iteritems():
+                    new_subtest = {}
+                    new_subtest['name'] = test.name + "-" + key
+                    new_subtest['replicates'] = values
+                    new_subtest['lowerIsBetter'] = test.lower_is_better
+                    new_subtest['alertThreshold'] = float(test.alert_threshold)
+                    new_subtest['value'] = 0
+                    new_subtest['unit'] = test.unit
+
+                    filtered_values = filter.ignore_first(new_subtest['replicates'], 1)
+                    new_subtest['value'] = filter.median(filtered_values)
+                    vals.append(new_subtest['value'])
+
+                    subtests.append(new_subtest)
+
+            elif test.type == "benchmark":
+                # each benchmark 'index' becomes a subtest; each pagecycle / iteration
+                # of the test has multiple values per index/subtest
+
+                # this is the format we receive the results in from the benchmark
+                # i.e. this is ONE pagecycle of speedometer:
 
-                filtered_values = filter.ignore_first(new_subtest['replicates'], 1)
-                new_subtest['value'] = filter.median(filtered_values)
-                vals.append(new_subtest['value'])
+                # {u'name': u'raptor-speedometer', u'type': u'benchmark', u'measurements':
+                # {u'speedometer': [[{u'AngularJS-TodoMVC/DeletingAllItems': [147.3000000000011,
+                # 149.95999999999913, 143.29999999999927, 150.34000000000378, 257.6999999999971],
+                # u'Inferno-TodoMVC/CompletingAllItems/Sync': [88.03999999999996,#
+                # 85.60000000000036, 94.18000000000029, 95.19999999999709, 86.47999999999593],
+                # u'AngularJS-TodoMVC': [518.2400000000016, 525.8199999999997, 610.5199999999968,
+                # 532.8200000000215, 640.1800000000003], ...(repeated for each index/subtest)}]]},
+                # u'browser': u'Firefox 62.0a1 20180528123052', u'lower_is_better': False, u'page':
+                # u'http://localhost:55019/Speedometer/index.html?raptor', u'unit': u'score',
+                # u'alert_threshold': 2}
+
+                for page_cycle in test.measurements['speedometer']:
+                    page_cycle_results = page_cycle[0]
 
-                subtests.append(new_subtest)
+                    for sub, replicates in page_cycle_results.iteritems():
+                        # for each pagecycle, replicates are appended to each subtest
+                        # so if it doesn't exist the first time create the subtest entry
+                        existing = False
+                        for existing_sub in subtests:
+                            if existing_sub['name'] == sub:
+                                # pagecycle, subtest already there, so append the replicates
+                                existing_sub['replicates'].extend(replicates)
+                                # update the value now that we have more replicates
+                                existing_sub['value'] = filter.median(existing_sub['replicates'])
+                                # now need to update our vals list too since have new subtest value
+                                for existing_val in vals:
+                                    if existing_val[1] == sub:
+                                        existing_val[0] = existing_sub['value']
+                                        break
+                                existing = True
+                                break
+
+                        if not existing:
+                            # subtest not added yet, first pagecycle, so add new one
+                            new_subtest = {}
+                            new_subtest['name'] = sub
+                            new_subtest['replicates'] = replicates
+                            new_subtest['lowerIsBetter'] = test.lower_is_better
+                            new_subtest['alertThreshold'] = float(test.alert_threshold)
+                            new_subtest['value'] = filter.median(replicates)
+                            new_subtest['unit'] = test.unit
+                            subtests.append(new_subtest)
+                            vals.append([new_subtest['value'], sub])
+            else:
+                LOG.error("output.summarize received unsupported test results type")
+                return
 
         # if there is more than one subtest, calculate a summary result
         if len(subtests) > 1:
             suite['value'] = self.construct_results(vals, testname=test.name)
 
-        LOG.info("returning summarized test results:")
-        LOG.info(test_results)
-
         self.summarized_results = test_results
 
     def output(self):
         """output to file and perfherder data json """
         if self.summarized_results == {}:
             LOG.error("error: no summarized raptor results found!")
             return False
 
@@ -159,24 +226,22 @@ class Output(object):
         if len(results) != 52:
             raise Exception("StyleBench has 52 subtests, found: %s instead" % len(results))
 
         results = results[12::13]
         score = 60 * 1000 / filter.geometric_mean(results) / correctionFactor
         return score
 
     def construct_results(self, vals, testname):
-        if testname.startswith('v8_7'):
+        if testname.startswith('raptor-v8_7'):
             return self.v8_Metric(vals)
-        elif testname.startswith('kraken'):
+        elif testname.startswith('raptor-kraken'):
             return self.JS_Metric(vals)
-        elif testname.startswith('ares6'):
+        elif testname.startswith('raptor-jetstream'):
             return self.benchmark_score(vals)
-        elif testname.startswith('jetstream'):
-            return self.benchmark_score(vals)
-        elif testname.startswith('speedometer'):
+        elif testname.startswith('raptor-speedometer'):
             return self.speedometer_score(vals)
-        elif testname.startswith('stylebench'):
+        elif testname.startswith('raptor-stylebench'):
             return self.stylebench_score(vals)
         elif len(vals) > 1:
             return filter.geometric_mean([i for i, j in vals])
         else:
             return filter.mean([i for i, j in vals])
--- a/testing/raptor/raptor/outputhandler.py
+++ b/testing/raptor/raptor/outputhandler.py
@@ -5,17 +5,17 @@
 # originally from talos_process.py
 from __future__ import absolute_import
 
 import json
 
 from mozlog import get_proxy_logger
 
 
-LOG = get_proxy_logger(component='raptor_process')
+LOG = get_proxy_logger(component='raptor-output-handler')
 
 
 class OutputHandler(object):
     def __init__(self):
         self.proc = None
 
     def __call__(self, line):
         if not line.strip():
--- a/testing/raptor/raptor/playback/mitmproxy.py
+++ b/testing/raptor/raptor/playback/mitmproxy.py
@@ -14,17 +14,17 @@ import time
 import mozinfo
 
 from mozlog import get_proxy_logger
 from mozprocess import ProcessHandler
 
 from .base import Playback
 
 here = os.path.dirname(os.path.realpath(__file__))
-LOG = get_proxy_logger(component='mitmproxy')
+LOG = get_proxy_logger(component='raptor-mitmproxy')
 
 mozharness_dir = os.path.join(here, '../../../mozharness')
 sys.path.insert(0, mozharness_dir)
 
 external_tools_path = os.environ.get('EXTERNALTOOLSPATH', None)
 
 if external_tools_path is not None:
     # running in production via mozharness
@@ -67,63 +67,59 @@ pref("network.proxy.ssl_port", 8080);
 class Mitmproxy(Playback):
 
     def __init__(self, config):
         self.config = config
         self.mitmproxy_proc = None
         self.recordings = config.get('playback_recordings', None)
         self.browser_path = config.get('binary', None)
 
-        # bindir is where we will download all mitmproxy required files
-        # if invoved via mach we will have received this in config; otherwise
-        # not running via mach (invoved direcdtly in testing/raptor) so figure it out
+        # raptor_dir is where we will download all mitmproxy required files
+        # when running locally it comes from obj_path via mozharness/mach
         if self.config.get("obj_path", None) is not None:
-            self.bindir = self.config.get("obj_path")
+            self.raptor_dir = self.config.get("obj_path")
         else:
-            # bit of a pain to get object dir when not running via mach - need to go from
-            # the binary folder i.e.
-            # /mozilla-unified/obj-x86_64-apple-darwin17.4.0/dist/Nightly.app/Contents/MacOS/
-            # back to:
-            # mozilla-unified/obj-x86_64-apple-darwin17.4.0/
-            # note, this may need to be updated per platform
-            self.bindir = os.path.normpath(os.path.join(self.config['binary'],
-                                                        '..', '..', '..', '..',
-                                                        '..', 'testing', 'raptor'))
+            # in production it is ../tasks/task_N/build/, in production that dir
+            # is not available as an envvar, however MOZ_UPLOAD_DIR is set as
+            # ../tasks/task_N/build/blobber_upload_dir so take that and go up 1 level
+            self.raptor_dir = os.path.dirname(os.path.dirname(os.environ['MOZ_UPLOAD_DIR']))
 
-        self.recordings_path = self.bindir
-        LOG.info("bindir to be used for mitmproxy downloads and exe files: %s" % self.bindir)
+        # add raptor to raptor_dir
+        self.raptor_dir = os.path.join(self.raptor_dir, "testing", "raptor")
+        self.recordings_path = self.raptor_dir
+        LOG.info("raptor_dir used for mitmproxy downloads and exe files: %s" % self.raptor_dir)
 
         # go ahead and download and setup mitmproxy
         self.download()
         # mitmproxy must be started before setup, so that the CA cert is available
         self.start()
         self.setup()
 
     def _tooltool_fetch(self, manifest):
         def outputHandler(line):
             LOG.info(line)
         command = [sys.executable, TOOLTOOL_PATH, 'fetch', '-o', '-m', manifest]
 
         proc = ProcessHandler(
             command, processOutputLine=outputHandler, storeOutput=False,
-            cwd=self.bindir)
+            cwd=self.raptor_dir)
 
         proc.run()
 
         try:
             proc.wait()
         except Exception:
             if proc.poll() is None:
                 proc.kill(signal.SIGTERM)
 
     def download(self):
         # download mitmproxy binary and pageset using tooltool
         # note: tooltool automatically unpacks the files as well
-        if not os.path.exists(self.bindir):
-            os.makedirs(self.bindir)
+        if not os.path.exists(self.raptor_dir):
+            os.makedirs(self.raptor_dir)
         LOG.info("downloading mitmproxy binary")
         _manifest = os.path.join(here, self.config['playback_binary_manifest'])
         self._tooltool_fetch(_manifest)
         LOG.info("downloading mitmproxy pageset")
         _manifest = os.path.join(here, self.config['playback_pageset_manifest'])
         self._tooltool_fetch(_manifest)
         return
 
@@ -134,17 +130,17 @@ class Mitmproxy(Playback):
         scripts_path = os.environ.get('SCRIPTSPATH')
         LOG.info('scripts_path: %s' % str(scripts_path))
         self.install_mitmproxy_cert(self.mitmproxy_proc,
                                     self.browser_path,
                                     str(scripts_path))
         return
 
     def start(self):
-        mitmdump_path = os.path.join(self.bindir, 'mitmdump')
+        mitmdump_path = os.path.join(self.raptor_dir, 'mitmdump')
         recordings_list = self.recordings.split()
         self.mitmproxy_proc = self.start_mitmproxy_playback(mitmdump_path,
                                                             self.recordings_path,
                                                             recordings_list,
                                                             self.browser_path)
         return
 
     def stop(self):
--- a/testing/raptor/raptor/raptor.ini
+++ b/testing/raptor/raptor/raptor.ini
@@ -1,2 +1,3 @@
 # raptor tests
 [include:tests/raptor-firefox-tp6.ini]
+[include:tests/raptor-speedometer.ini]
--- a/testing/raptor/raptor/raptor.py
+++ b/testing/raptor/raptor/raptor.py
@@ -21,39 +21,41 @@ webext_dir = os.path.join(os.path.dirnam
 sys.path.insert(0, here)
 
 try:
     from mozbuild.base import MozbuildObject
     build = MozbuildObject.from_environment(cwd=here)
 except ImportError:
     build = None
 
+from benchmark import Benchmark
 from cmdline import parse_args
 from control_server import RaptorControlServer
 from gen_test_config import gen_test_config
 from outputhandler import OutputHandler
 from manifest import get_raptor_test_list
 from playback import get_playback
 from results import RaptorResultsHandler
 
 
 class Raptor(object):
     """Container class for Raptor"""
 
-    def __init__(self, app, binary):
+    def __init__(self, app, binary, run_local=False, obj_path=None):
         self.config = {}
         self.config['app'] = app
         self.config['binary'] = binary
         self.config['platform'] = mozinfo.os
-
+        self.config['run_local'] = run_local
+        self.config['obj_path'] = obj_path
         self.raptor_venv = os.path.join(os.getcwd(), 'raptor-venv')
-        self.log = get_default_logger(component='raptor')
-        self.addons_installed = False
+        self.log = get_default_logger(component='raptor-main')
         self.control_server = None
         self.playback = None
+        self.benchmark = None
 
         # Create the profile
         self.profile = create_profile(self.config['app'])
 
         # Merge in base profiles
         with open(os.path.join(self.profile_data_dir, 'profiles.json'), 'r') as fh:
             base_profiles = json.load(fh)['raptor']
 
@@ -94,19 +96,30 @@ class Raptor(object):
         self.config['playback_binary_zip'] = test.get(_key, None)
         self.config['playback_pageset_manifest'] = test.get('playback_pageset_manifest', None)
         _key = 'playback_pageset_zip_%s' % self.config['platform']
         self.config['playback_pageset_zip'] = test.get(_key, None)
         self.config['playback_recordings'] = test.get('playback_recordings', None)
 
     def run_test(self, test, timeout=None):
         self.log.info("starting raptor test: %s" % test['name'])
+        self.log.info("test settings: %s" % str(test))
+        self.log.info("raptor config: %s" % str(self.config))
+
+        # benchmark-type tests require the benchmark test to be served out
+        if test.get('type') == "benchmark":
+            self.benchmark = Benchmark(self.config, test)
+            benchmark_port = int(self.benchmark.port)
+        else:
+            benchmark_port = 0
+
         gen_test_config(self.config['app'],
                         test['name'],
-                        self.control_server.port)
+                        self.control_server.port,
+                        benchmark_port)
 
         # must intall raptor addon each time because we dynamically update some content
         raptor_webext = os.path.join(webext_dir, 'raptor')
         self.log.info("installing webext %s" % raptor_webext)
         self.profile.addons.install(raptor_webext)
         webext_id = self.profile.addons.addon_details(raptor_webext)['id']
 
         # some tests require tools to playback the test pages
@@ -136,16 +149,28 @@ class Raptor(object):
         self.log.info("removing webext %s" % raptor_webext)
         self.profile.addons.remove_addon(webext_id)
 
         if self.runner.is_running():
             self.log("Application timed out after {} seconds".format(timeout))
             self.runner.stop()
 
     def process_results(self):
+        # when running locally output results in build/raptor.json; when running
+        # in production output to a local.json to be turned into tc job artifact
+        if self.config.get('run_local', False):
+            if 'MOZ_DEVELOPER_REPO_DIR' in os.environ:
+                raptor_json_path = os.path.join(os.environ['MOZ_DEVELOPER_REPO_DIR'],
+                                                'testing', 'mozharness', 'build', 'raptor.json')
+            else:
+                raptor_json_path = os.path.join(here, 'raptor.json')
+        else:
+            raptor_json_path = os.path.join(os.getcwd(), 'local.json')
+
+        self.config['raptor_json_path'] = raptor_json_path
         return self.results_handler.summarize_and_output(self.config)
 
     def clean_up(self):
         self.control_server.stop()
         self.runner.stop()
         self.log.info("finished")
 
 
@@ -162,17 +187,17 @@ def main(args=sys.argv[1:]):
     if len(raptor_test_list) == 0:
         LOG.critical("abort: no tests found")
         sys.exit(1)
 
     LOG.info("raptor tests scheduled to run:")
     for next_test in raptor_test_list:
         LOG.info(next_test['name'])
 
-    raptor = Raptor(args.app, args.binary)
+    raptor = Raptor(args.app, args.binary, args.run_local, args.obj_path)
 
     raptor.start_control_server()
 
     for next_test in raptor_test_list:
         raptor.run_test(next_test)
 
     success = raptor.process_results()
     raptor.clean_up()
--- a/testing/raptor/raptor/results.py
+++ b/testing/raptor/raptor/results.py
@@ -17,17 +17,16 @@ class RaptorResultsHandler():
     """Handle Raptor test results"""
 
     def __init__(self):
         self.results = []
 
     def add(self, new_result_json):
         # add to results
         LOG.info("received results in RaptorResultsHandler.add")
-        LOG.info(new_result_json)
         new_result = RaptorTestResult(new_result_json)
         self.results.append(new_result)
 
     def summarize_and_output(self, test_config):
         # summarize the result data, write to file and output PERFHERDER_DATA
         LOG.info("summarizing raptor test results")
         output = Output(self.results)
         output.summarize()
new file mode 100644
--- /dev/null
+++ b/testing/raptor/raptor/tests/raptor-speedometer.ini
@@ -0,0 +1,15 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# speedometer benchmark for firefox and chrome
+
+[raptor-speedometer]
+apps = firefox
+type =  benchmark
+test_url = http://localhost:<port>/Speedometer/index.html?raptor
+page_cycles = 5
+page_timeout = 120000
+unit = score
+lower_is_better = false
+alert_threshold = 2.0
--- a/testing/raptor/requirements.txt
+++ b/testing/raptor/requirements.txt
@@ -1,3 +1,4 @@
 mozrunner ~= 7.0
 mozprofile ~= 1.1
 manifestparser >= 1.1
+wptserve ~= 1.4.0
--- a/testing/raptor/webext/raptor/benchmark-relay.js
+++ b/testing/raptor/webext/raptor/benchmark-relay.js
@@ -1,19 +1,21 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 // receives result from benchmark and relays onto our background runner
 
 function receiveMessage(event) {
-  console.log("received message!");
-  console.log(event.origin);
-  if (event.origin == "http://localhost:8081") {
-    sendResult("speedometer", event.data);
+  console.log("raptor benchmark-relay received message");
+  console.log(event.data);
+  // raptor benchmark message data [0] is raptor tag, [1] is benchmark
+  // name, and the rest is actual benchmark results that we want to fw
+  if (event.data[0] == "raptor-benchmark") {
+    sendResult(event.data[1], event.data.slice(2));
   }
 }
 
 function sendResult(_type, _value) {
   // send result back to background runner script
   console.log("sending result back to runner: " + _type + " " + _value);
   chrome.runtime.sendMessage({"type": _type, "value": _value}, function(response) {
     console.log(response.text);
--- a/testing/raptor/webext/raptor/manifest.json
+++ b/testing/raptor/webext/raptor/manifest.json
@@ -8,23 +8,26 @@
   "name": "Raptor",
   "version": "0.1",
   "description": "Performance measurement framework prototype",
   "background": {
     "scripts": ["auto_gen_test_config.js", "runner.js"]
   },
   "content_scripts": [
     {
-      "matches": ["<all_urls>"],
+      "matches": ["*://*.amazon.com/*",
+                  "*://*.facebook.com/*",
+                  "*://*.google.com/*",
+                  "*://*.youtube.com/*"],
       "js": ["measure.js"]
     },
     {
-      "matches": ["http://*/Speedometer/index.html*"],
+      "matches": ["*://*/Speedometer/index.html*"],
       "js": ["benchmark-relay.js"]
     }
   ],
   "permissions": [
-    "http://127.0.0.1:8000/",
+    "<all_urls>",
     "tabs",
     "storage",
     "alarms"
   ]
 }
--- a/testing/raptor/webext/raptor/measure.js
+++ b/testing/raptor/webext/raptor/measure.js
@@ -35,41 +35,48 @@ function contentHandler() {
     // chrome, no promise so use callback
     chrome.storage.local.get("settings", function(item) {
       setup(item.settings);
     });
   }
 }
 
 function setup(settings) {
-  if (settings.measure !== undefined) {
-    if (settings.measure.fnbpaint !== undefined) {
-      getFNBPaint = settings.measure.fnbpaint;
-      if (getFNBPaint) {
-        console.log("will be measuring fnbpaint");
-        measureFNBPaint();
-      }
+  if (settings.type != "pageload") {
+    return;
+  }
+
+  if (settings.measure == undefined) {
+    console.log("abort: 'measure' key not found in test settings");
+    return;
+  }
+
+  if (settings.measure.fnbpaint !== undefined) {
+    getFNBPaint = settings.measure.fnbpaint;
+    if (getFNBPaint) {
+      console.log("will be measuring fnbpaint");
+      measureFNBPaint();
     }
-    if (settings.measure.fcp !== undefined) {
-      getFCP = settings.measure.fcp;
-      if (getFCP) {
-        console.log("will be measuring first-contentful-paint");
-        measureFirstContentfulPaint();
-      }
+  }
+
+  if (settings.measure.fcp !== undefined) {
+    getFCP = settings.measure.fcp;
+    if (getFCP) {
+      console.log("will be measuring first-contentful-paint");
+      measureFirstContentfulPaint();
     }
-    if (settings.measure.hero !== undefined) {
-      if (settings.measure.hero.length !== 0) {
-        getHero = true;
-        heroesToCapture = settings.measure.hero;
-        console.log("hero elements to measure: " + heroesToCapture);
-        measureHero();
-      }
+  }
+
+  if (settings.measure.hero !== undefined) {
+    if (settings.measure.hero.length !== 0) {
+      getHero = true;
+      heroesToCapture = settings.measure.hero;
+      console.log("hero elements to measure: " + heroesToCapture);
+      measureHero();
     }
-  } else {
-    console.log("abort: 'measure' key not found in test settings");
   }
 }
 
 function measureHero() {
   var obs = null;
 
   var heroElementsFound = window.document.querySelectorAll("[elementtiming]");
   console.log("found " + heroElementsFound.length + " hero elements in the page");
--- a/testing/raptor/webext/raptor/runner.js
+++ b/testing/raptor/webext/raptor/runner.js
@@ -13,28 +13,28 @@
 // 'python -m SimpleHTTPServer 8081'
 // to serve out the pages that we want to prototype with. Also
 // update the manifest content 'matches' accordingly
 
 // when the browser starts this webext runner will start automatically; we
 // want to give the browser some time (ms) to settle before starting tests
 var postStartupDelay = 30000;
 
-// have an optional delay (ms) between pageload cycles
-var pageloadDelay = 1000;
+// delay (ms) between pageload cycles
+var pageCycleDelay = 1000;
 
 var browserName;
 var ext;
 var testName = null;
 var settingsURL = null;
 var csPort = null;
+var benchmarkPort = null;
 var testType;
 var pageCycles = 0;
 var pageCycle = 0;
-var pageCycleDelay = 1000;
 var testURL;
 var testTabID = 0;
 var getHero = false;
 var getFNBPaint = false;
 var getFCP = false;
 var isHeroPending = false;
 var pendingHeroes = [];
 var settings = {};
@@ -58,16 +58,26 @@ function getTestSettings() {
       response.text().then(function(text) {
         console.log(text);
         settings = JSON.parse(text)["raptor-options"];
 
         // parse the test settings
         testType = settings.type;
         pageCycles = settings.page_cycles;
         testURL = settings.test_url;
+
+        // for pageload type tests, the testURL is fine as is - we don't have
+        // to add a port as it's accessed via proxy and the playback tool
+        // however for benchmark tests, their source is served out on a local
+        // webserver, so we need to swap in the webserver port into the testURL
+        if (testType == "benchmark") {
+          // just replace the '<port>' keyword in the URL with actual benchmarkPort
+          testURL = testURL.replace("<port>", benchmarkPort);
+        }
+
         results.page = testURL;
         results.type = testType;
         results.name = testName;
         results.unit = settings.unit;
         results.lower_is_better = settings.lower_is_better;
         results.alert_threshold = settings.alert_threshold;
 
         if (settings.page_timeout !== undefined) {
@@ -138,25 +148,25 @@ function getBrowserInfo() {
       resolve();
     }
   });
 }
 
 function testTabCreated(tab) {
   testTabID = tab.id;
   console.log("opened new empty tab " + testTabID);
-  setTimeout(nextCycle, pageloadDelay);
+  nextCycle();
 }
 
 async function testTabUpdated(tab) {
   console.log("tab " + tab.id + " reloaded");
   // wait for pageload test result from content
   await waitForResult();
   // move on to next cycle (or test complete)
-  setTimeout(nextCycle, pageloadDelay);
+  nextCycle();
 }
 
 function waitForResult() {
   console.log("awaiting results...");
   return new Promise(resolve => {
     function checkForResult() {
       if (testType == "pageload") {
         if (!isHeroPending && !isFNBPaintPending && !isFCPPending) {
@@ -200,17 +210,17 @@ function nextCycle() {
         }
         if (getFNBPaint)
           isFNBPaintPending = true;
         if (getFCP)
           isFCPPending = true;
       } else if (testType == "benchmark") {
         isBenchmarkPending = true;
       }
-      // reload the test page
+      // (re)load the test page
       ext.tabs.update(testTabID, {url: testURL}, testTabUpdated);
     }, pageCycleDelay);
   } else {
     verifyResults();
   }
 }
 
 function timeoutAlarmListener(alarm) {
@@ -351,16 +361,17 @@ function cleanUp() {
 function runner() {
   let config = getTestConfig();
   console.log("test name is: " + config.test_name);
   console.log("test settings url is: " + config.test_settings_url);
   testName = config.test_name;
   settingsURL = config.test_settings_url;
   csPort = config.cs_port;
   browserName = config.browser;
+  benchmarkPort = config.benchmark_port;
 
   getBrowserInfo().then(function() {
     getTestSettings().then(function() {
       if (testType == "benchmark") {
         // webkit benchmark type of test
         console.log("benchmark test start");
       } else if (testType == "pageload") {
         // standard pageload test
--- a/third_party/webkit/PerformanceTests/Speedometer/resources/benchmark-report.js
+++ b/third_party/webkit/PerformanceTests/Speedometer/resources/benchmark-report.js
@@ -1,12 +1,13 @@
 // This file can be customized to report results as needed.
 
 (function () {
-    if ((!window.testRunner && location.search != '?webkit' && location.hash != '#webkit') && location.search != '?gecko')
+    if ((!window.testRunner && location.search != '?webkit' && location.hash != '#webkit')
+         && location.search != '?gecko' && location.search != '?raptor')
         return;
 
     if (window.testRunner)
         testRunner.waitUntilDone();
 
     var scriptElement = document.createElement('script');
     scriptElement.src = '../resources/runner.js';
     document.head.appendChild(scriptElement);
@@ -68,28 +69,33 @@
                     addToMeasuredValue(suite.total, suiteName, 'Total');
                 }
             });
 
             var fullNames = new Array;
             for (var fullName in measuredValuesByFullName)
                 fullNames.push(fullName);
 
-            if (typeof tpRecordTime !== "undefined") {
+            if (typeof tpRecordTime !== "undefined" || location.search == '?raptor') {
                 var values = new Array;
                 for (var i = 0; i < fullNames.length; i++) {
                     values.push(measuredValuesByFullName[fullNames[i]]);
                 }
                 fullNames = new Array;
                 for (var fullName in measuredValuesByFullName) {
                     for (var count=0; count < this.iterationCount; count++) {
                         fullNames.push(fullName);
                     }
                 }
-                tpRecordTime(values.join(','), 0, fullNames.join(','));
+                if (location.search == '?raptor') {
+                    _data = ['raptor-benchmark', 'speedometer', measuredValuesByFullName];
+                    window.postMessage(_data, '*');
+                } else {
+                    tpRecordTime(values.join(','), 0, fullNames.join(','));
+                }
             } else {
                 for (var i = 0; i < fullNames.length; i++) {
                     var values = measuredValuesByFullName[fullNames[i]];
                     PerfTestRunner.reportValues(createTest(fullNames[i], values.aggregator, i + 1 == fullNames.length), values);
                 }
             }
         }
     };