Bug 1353461 - [reftest] remove the start-after-crash feature, r?jmaher draft
authorAndrew Halberstadt <ahalberstadt@mozilla.com>
Mon, 05 Feb 2018 14:24:49 -0500
changeset 759225 160501b1348a8a404072135fc88cf9f6617842d0
parent 759224 bd8a7c41f61e27e311a35fd0f1535853742e9fae
child 759226 e7b720539186896156d5da19b95080fb1860007a
push id100313
push userahalberstadt@mozilla.com
push dateFri, 23 Feb 2018 22:02:04 +0000
reviewersjmaher
bugs1353461
milestone60.0a1
Bug 1353461 - [reftest] remove the start-after-crash feature, r?jmaher The point of start-after-crash was to resume running tests after a crash so we don't miss out on test coverage when a crash happens. Now that we're close to landing run-by-manifest, this feature is as necessary since only tests that appear later in the same manifest as the crashing test will be skipped. Another reason to remove it, is that it's current implementation is buggy. It assumes tests have a unique ID (they don't), which means we could accidentally get into an infinite loop if the wrong test happens to crash at the wrong time. A third reason to remove it is that it adds a lot of complexity to the harness. Especially now with run-by-manifest, supporting both would require handling a lot of different edge cases and make things much more complicated than the already are. All that being said, it would still provide value. Not only would it allow us to resume tests in the same manifest, but run-by-manifest isn't yet enabled on Android, jsreftest or crashtest (though we hope to get it enabled in those places in the future). So it may be worth re-implementing start-after-crash again based on the new manifest parsing mechanism that run-by-manifest uses. It should be possible to implement it a lot more cleanly now. This work will have to be left to a follow-up. MozReview-Commit-ID: P2hh5VecKW
layout/tools/reftest/reftest.jsm
layout/tools/reftest/remotereftest.py
layout/tools/reftest/runreftest.py
--- a/layout/tools/reftest/reftest.jsm
+++ b/layout/tools/reftest/reftest.jsm
@@ -263,22 +263,16 @@ function InitAndStartRefTests()
         g.thisChunk = 0;
     }
 
     try {
         g.focusFilterMode = prefs.getCharPref("reftest.focusFilterMode");
     } catch(e) {}
 
     try {
-        g.startAfter = prefs.getCharPref("reftest.startAfter");
-    } catch(e) {
-        g.startAfter = undefined;
-    }
-
-    try {
         g.compareRetainedDisplayLists = prefs.getBoolPref("reftest.compareRetainedDisplayLists");
     } catch (e) {}
 #ifdef MOZ_STYLO
     try {
         g.compareStyloToGecko = prefs.getBoolPref("reftest.compareStyloToGecko");
     } catch(e) {}
 #endif
 
@@ -504,48 +498,30 @@ function StartTests()
             end = g.thisChunk == g.totalChunks ? g.urls.length : g.urls.indexOf(tURLs[end + 1]) - 1;
 
             logger.info("Running chunk " + g.thisChunk + " out of " + g.totalChunks + " chunks.  " +
                 "tests " + (start+1) + "-" + end + "/" + g.urls.length);
 
             g.urls = g.urls.slice(start, end);
         }
 
-        if (g.manageSuite && g.startAfter === undefined && !g.suiteStarted) {
+        if (g.manageSuite && !g.suiteStarted) {
             var ids = g.urls.map(function(obj) {
                 return obj.identifier;
             });
             var suite = prefs.getCharPref('reftest.suite', 'reftest');
             logger.suiteStart(ids, suite, {"skipped": g.urls.length - numActiveTests});
             g.suiteStarted = true
         }
 
         if (g.shuffle) {
-            if (g.startAfter !== undefined) {
-                logger.error("Can't resume from a crashed test when " +
-                             "--shuffle is enabled, continue by shuffling " +
-                             "all the tests");
-                DoneTests();
-                return;
-            }
             Shuffle(g.urls);
-        } else if (g.startAfter !== undefined) {
-            // Skip through previously crashed test
-            // We have to do this after chunking so we don't break the numbers
-            var crash_idx = g.urls.map(function(url) {
-                return url['url1']['spec'];
-            }).indexOf(g.startAfter);
-            if (crash_idx == -1) {
-                throw "Can't find the previously crashed test";
-            }
-            g.urls = g.urls.slice(crash_idx + 1);
         }
 
         g.totalTests = g.urls.length;
-
         if (!g.totalTests && !g.verify)
             throw "No tests to run";
 
         g.uriCanvases = {};
         StartCurrentTest();
     } catch (ex) {
         //g.browser.loadURI('data:text/plain,' + ex);
         ++g.testResults.Exception;
--- a/layout/tools/reftest/remotereftest.py
+++ b/layout/tools/reftest/remotereftest.py
@@ -255,27 +255,23 @@ class RemoteReftest(RefTest):
                         except Exception as e:
                             self.log.info("Failed to kill process %d: %s" % (proc.pid, str(e)))
                     else:
                         self.log.info("NOT killing %s (not an orphan?)" % procd)
             except Exception:
                 # may not be able to access process info for all processes
                 continue
 
-    def createReftestProfile(self, options, startAfter=None, **kwargs):
+    def createReftestProfile(self, options, **kwargs):
         profile = RefTest.createReftestProfile(self,
                                                options,
                                                server=options.remoteWebServer,
                                                port=options.httpPort,
                                                **kwargs)
-        if startAfter is not None:
-            print ("WARNING: Continuing after a crash is not supported for remote "
-                   "reftest yet.")
         profileDir = profile.profile
-
         prefs = {}
         prefs["app.update.url.android"] = ""
         prefs["browser.firstrun.show.localepicker"] = False
         prefs["reftest.remote"] = True
         prefs["datareporting.policy.dataSubmissionPolicyBypassAcceptance"] = True
         # move necko cache to a location that can be cleaned up
         prefs["browser.cache.disk.parent_directory"] = self.remoteCache
 
@@ -349,31 +345,28 @@ class RemoteReftest(RefTest):
 
         binary = options.app
         profile = self.createReftestProfile(options, **profileArgs)
 
         # browser environment
         env = self.buildBrowserEnv(options, profile.profile)
 
         self.log.info("Running with e10s: {}".format(options.e10s))
-        status, lastTestSeen = self.automation.runApp(None, env,
-                                                      binary,
-                                                      profile.profile,
-                                                      cmdargs,
-                                                      utilityPath=options.utilityPath,
-                                                      xrePath=options.xrePath,
-                                                      debuggerInfo=debuggerInfo,
-                                                      symbolsPath=symbolsPath,
-                                                      timeout=timeout)
-        if status == 1:
-            # when max run time exceeded, avoid restart
-            lastTestSeen = RefTest.TEST_SEEN_FINAL
+        status, self.lastTestSeen = self.automation.runApp(None, env,
+                                                           binary,
+                                                           profile.profile,
+                                                           cmdargs,
+                                                           utilityPath=options.utilityPath,
+                                                           xrePath=options.xrePath,
+                                                           debuggerInfo=debuggerInfo,
+                                                           symbolsPath=symbolsPath,
+                                                           timeout=timeout)
 
         self.cleanup(profile.profile)
-        return status, lastTestSeen, self.outputHandler.results
+        return status, self.outputHandler.results
 
     def cleanup(self, profileDir):
         # Pull results back from device
         if self.remoteLogFile and \
                 self._devicemanager.fileExists(self.remoteLogFile):
             self._devicemanager.getFile(self.remoteLogFile, self.localLogName)
         else:
             print "WARNING: Unable to retrieve log file (%s) from remote " \
--- a/layout/tools/reftest/runreftest.py
+++ b/layout/tools/reftest/runreftest.py
@@ -3,17 +3,16 @@
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 """
 Runs the reftest test harness.
 """
 
 import collections
 import copy
-import itertools
 import json
 import multiprocessing
 import os
 import platform
 import re
 import shutil
 import signal
 import subprocess
@@ -222,26 +221,24 @@ class ReftestResolver(object):
             if None in manifests[key]:
                 manifests[key] = None
             else:
                 manifests[key] = "|".join(list(manifests[key]))
         return manifests
 
 
 class RefTest(object):
-    TEST_SEEN_INITIAL = 'reftest'
-    TEST_SEEN_FINAL = 'Main app process exited normally'
     oldcwd = os.getcwd()
     parse_manifest = True
     resolver_cls = ReftestResolver
     use_marionette = True
 
     def __init__(self):
         update_mozinfo()
-        self.lastTestSeen = self.TEST_SEEN_INITIAL
+        self.lastTestSeen = None
         self.haveDumpedScreen = False
         self.resolver = self.resolver_cls()
         self.log = None
         self.testDumpFile = os.path.join(tempfile.gettempdir(), 'reftests.json')
 
     def _populate_logger(self, options):
         if self.log:
             return
@@ -260,28 +257,26 @@ class RefTest(object):
         self.log = mozlog.commandline.setup_logging(
             "reftest harness", options, {"tbpl": sys.stdout}, fmt_options)
 
     def getFullPath(self, path):
         "Get an absolute path relative to self.oldcwd."
         return os.path.normpath(os.path.join(self.oldcwd, os.path.expanduser(path)))
 
     def createReftestProfile(self, options, tests=None, manifests=None,
-                             server='localhost', port=0, profile_to_clone=None,
-                             startAfter=None, prefs=None):
+                             server='localhost', port=0, profile_to_clone=None, prefs=None):
         """Sets up a profile for reftest.
 
         :param options: Object containing command line options
         :param tests: List of test objects to run
         :param manifests: List of manifest files to parse (only takes effect
                           if tests were not passed in)
         :param server: Server name to use for http tests
         :param profile_to_clone: Path to a profile to use as the basis for the
                                  test profile
-        :param startAfter: Start running tests after the specified test id
         :param prefs: Extra preferences to set in the profile
         """
         locations = mozprofile.permissions.ServerLocations()
         locations.add_host(server, scheme='http', port=port)
         locations.add_host(server, scheme='https', port=port)
 
         # Set preferences for communication between our command line arguments
         # and the reftest harness.  Preferences that are required for reftest
@@ -301,20 +296,16 @@ class RefTest(object):
         if options.verify:
             prefs['reftest.verify'] = True
         if options.cleanupCrashes:
             prefs['reftest.cleanupPendingCrashes'] = True
         prefs['reftest.focusFilterMode'] = options.focusFilterMode
         prefs['reftest.logLevel'] = options.log_tbpl_level or 'info'
         prefs['reftest.suite'] = options.suite
 
-        if startAfter not in (None, self.TEST_SEEN_INITIAL, self.TEST_SEEN_FINAL):
-            self.log.info("Setting reftest.startAfter to %s" % startAfter)
-            prefs['reftest.startAfter'] = startAfter
-
         # Unconditionally update the e10s pref.
         if options.e10s:
             prefs['browser.tabs.remote.autostart'] = True
         else:
             prefs['browser.tabs.remote.autostart'] = False
 
         # Bug 1262954: For winXP + e10s disable acceleration
         if platform.system() in ("Windows", "Microsoft") and \
@@ -777,46 +768,44 @@ class RefTest(object):
                 marionette_exception = sys.exc_info()
 
         status = runner.wait()
         runner.process_handler = None
         outputHandler.proc_name = None
 
         if status:
             msg = "TEST-UNEXPECTED-FAIL | %s | application terminated with exit code %s" % \
-                (self.lastTestSeen, status)
+                    (self.lastTestSeen, status)
             # use process_output so message is logged verbatim
             self.log.process_output(None, msg)
-        else:
-            self.lastTestSeen = self.TEST_SEEN_FINAL
 
         crashed = mozcrash.log_crashes(self.log, os.path.join(profile.profile, 'minidumps'),
                                        symbolsPath, test=self.lastTestSeen)
         if not status and crashed:
             status = 1
 
         runner.cleanup()
         self.cleanup(profile.profile)
 
         if marionette_exception is not None:
             exc, value, tb = marionette_exception
             raise exc, value, tb
 
         self.log.info("Process mode: {}".format('e10s' if options.e10s else 'non-e10s'))
-        return status, self.lastTestSeen, outputHandler.results
+        return status, outputHandler.results
 
     def getActiveTests(self, manifests, options, testDumpFile=None):
         # These prefs will cause reftest.jsm to parse the manifests,
         # dump the resulting tests to a file, and exit.
         prefs = {
             'reftest.manifests': json.dumps(manifests),
             'reftest.manifests.dumpTests': testDumpFile or self.testDumpFile,
         }
         cmdargs = []  # ['-headless']
-        status, _, _ = self.runApp(options, cmdargs=cmdargs, prefs=prefs)
+        status, _ = self.runApp(options, cmdargs=cmdargs, prefs=prefs)
 
         with open(self.testDumpFile, 'r') as fh:
             tests = json.load(fh)
 
         if os.path.isfile(self.testDumpFile):
             mozfile.remove(self.testDumpFile)
 
         for test in tests:
@@ -841,67 +830,39 @@ class RefTest(object):
 
         tests = None
         if self.parse_manifest:
             tests = self.getActiveTests(manifests, options)
 
             ids = [t['identifier'] for t in tests]
             self.log.suite_start(ids, name=options.suite)
 
-        startAfter = None  # When the previous run crashed, we skip the tests we ran before
-        prevStartAfter = None
-        for i in itertools.count():
-            status, startAfter, results = self.runApp(
-                options,
-                tests=tests,
-                manifests=manifests,
-                cmdargs=cmdargs,
-                # We generally want the JS harness or marionette
-                # to handle timeouts if they can.
-                # The default JS harness timeout is currently
-                # 300 seconds (default options.timeout).
-                # The default Marionette socket timeout is
-                # currently 360 seconds.
-                # Give the JS harness extra time to deal with
-                # its own timeouts and try to usually exceed
-                # the 360 second marionette socket timeout.
-                # See bug 479518 and bug 1414063.
-                timeout=options.timeout + 70.0,
-                symbolsPath=options.symbolsPath,
-                debuggerInfo=debuggerInfo
-            )
-            mozleak.process_leak_log(self.leakLogFile,
-                                     leak_thresholds=options.leakThresholds,
-                                     stack_fixer=get_stack_fixer_function(options.utilityPath,
-                                                                          options.symbolsPath))
-
-            if status == 0:
-                break
-
-            if startAfter == self.TEST_SEEN_FINAL:
-                self.log.info("Finished running all tests, skipping resume "
-                              "despite non-zero status code: %s" % status)
-                break
-
-            if startAfter is not None and options.shuffle:
-                self.log.error("Can not resume from a crash with --shuffle "
-                               "enabled. Please consider disabling --shuffle")
-                break
-            if startAfter is not None and options.maxRetries <= i:
-                self.log.error("Hit maximum number of allowed retries ({}) "
-                               "in the test run".format(options.maxRetries))
-                break
-            if startAfter == prevStartAfter:
-                # If the test stuck on the same test, or there the crashed
-                # test appeared more then once, stop
-                self.log.error("Force stop because we keep running into "
-                               "test \"{}\"".format(startAfter))
-                break
-            prevStartAfter = startAfter
-            # TODO: we need to emit an SUITE-END log if it crashed
+        status, results = self.runApp(
+            options,
+            tests=tests,
+            manifests=manifests,
+            cmdargs=cmdargs,
+            # We generally want the JS harness or marionette
+            # to handle timeouts if they can.
+            # The default JS harness timeout is currently
+            # 300 seconds (default options.timeout).
+            # The default Marionette socket timeout is
+            # currently 360 seconds.
+            # Give the JS harness extra time to deal with
+            # its own timeouts and try to usually exceed
+            # the 360 second marionette socket timeout.
+            # See bug 479518 and bug 1414063.
+            timeout=options.timeout + 70.0,
+            symbolsPath=options.symbolsPath,
+            debuggerInfo=debuggerInfo
+        )
+        mozleak.process_leak_log(self.leakLogFile,
+                                 leak_thresholds=options.leakThresholds,
+                                 stack_fixer=get_stack_fixer_function(options.utilityPath,
+                                                                      options.symbolsPath))
 
         if self.parse_manifest:
             self.log.suite_end(extra={'results': results})
         return status
 
     def copyExtraFilesToProfile(self, options, profile):
         "Copy extra files or dirs specified on the command line to the testing profile."
         profileDir = profile.profile