Bug 1341466 - Stop Marionette test run when Android emulator is unresponsive; r?AutomatedTester
The clean-up code in FennecInstance now counts and logs consecutive DMErrors.
The Marionette clean-up code then throws an UnresponsiveInstanceException
if we hit consecutive errors more than twice, which interrupts the
test run entirely. (Previously, MarionetteTestRunner would just keep running
tests despite consecutive clean-up failures, and eventually it would time out.)
This change allows us to take advantage of the retry mechanism in the
mozharness script that runs all Android tests: it sets the job status to "retry"
if it finds DMError in the test log after the run_tests step is done.
MozReview-Commit-ID: J36XuFVK1aK
--- a/testing/marionette/client/marionette_driver/errors.py
+++ b/testing/marionette/client/marionette_driver/errors.py
@@ -162,16 +162,20 @@ class UnknownCommandException(Marionette
class UnknownException(MarionetteException):
status = "unknown error"
class UnsupportedOperationException(MarionetteException):
status = "unsupported operation"
+class UnresponsiveInstanceException(Exception):
+ pass
+
+
es_ = [e for e in locals().values() if type(e) == type and issubclass(e, MarionetteException)]
by_string = {e.status: e for e in es_}
def lookup(identifier):
"""Finds error exception class by associated Selenium JSON wire
protocol number code, or W3C WebDriver protocol string.
--- a/testing/marionette/client/marionette_driver/geckoinstance.py
+++ b/testing/marionette/client/marionette_driver/geckoinstance.py
@@ -1,21 +1,23 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/
import os
import sys
import tempfile
import time
+import traceback
from copy import deepcopy
import mozversion
+from mozdevice import DMError
from mozprofile import Profile
from mozrunner import Runner, FennecEmulatorRunner
class GeckoInstance(object):
required_prefs = {
# Increase the APZ content response timeout in tests to 1 minute.
# This is to accommodate the fact that test environments tends to be slower
@@ -135,16 +137,18 @@ class GeckoInstance(object):
self.required_prefs = deepcopy(self.required_prefs)
if prefs:
self.required_prefs.update(prefs)
self._gecko_log_option = gecko_log
self._gecko_log = None
self.verbose = verbose
self.headless = headless
+ # keep track of errors to decide whether instance is unresponsive
+ self.unresponsive_count = 0
@property
def gecko_log(self):
if self._gecko_log:
return self._gecko_log
path = self._gecko_log_option
if path != "-":
@@ -397,18 +401,23 @@ class FennecInstance(GeckoInstance):
If `clean` is True and the Fennec instance is running in an
emulator managed by mozrunner, this will stop the emulator.
:param clean: If True, also perform runner cleanup.
"""
super(FennecInstance, self).close(clean)
if clean and self.runner and self.runner.device.connected:
- self.runner.device.dm.remove_forward(
- "tcp:{}".format(self.marionette_port))
+ try:
+ self.runner.device.dm.remove_forward(
+ "tcp:{}".format(self.marionette_port))
+ self.unresponsive_count = 0
+ except DMError:
+ self.unresponsive_count += 1
+ traceback.print_exception(*sys.exc_info())
class DesktopInstance(GeckoInstance):
desktop_prefs = {
# Disable application updates
"app.update.enabled": False,
# Enable output of dump()
--- a/testing/marionette/client/marionette_driver/marionette.py
+++ b/testing/marionette/client/marionette_driver/marionette.py
@@ -648,16 +648,19 @@ class Marionette(object):
except (errors.MarionetteException, IOError):
# These exceptions get thrown if the Marionette server
# hit an exception/died or the connection died. We can
# do no further server-side cleanup in this case.
pass
if self.instance:
# stop application and, if applicable, stop emulator
self.instance.close(clean=True)
+ if self.instance.unresponsive_count >= 3:
+ raise errors.UnresponsiveInstanceException(
+ "Application clean-up has failed >2 consecutive times.")
def __del__(self):
self.cleanup()
@staticmethod
def is_port_available(port, host=''):
port = int(port)
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
--- a/testing/marionette/harness/marionette_harness/marionette_test/testcases.py
+++ b/testing/marionette/harness/marionette_harness/marionette_test/testcases.py
@@ -15,16 +15,17 @@ import weakref
from unittest.case import (
_ExpectedFailure,
_UnexpectedSuccess,
SkipTest,
)
from marionette_driver.errors import (
TimeoutException,
+ UnresponsiveInstanceException
)
from mozlog import get_default_logger
def _wraps_parameterized(func, func_suffix, args, kwargs):
"""Internal: Decorator used in class MetaParameterized."""
def wrapper(self):
return func(self, *args, **kwargs)
@@ -132,17 +133,17 @@ class CommonTestCase(unittest.TestCase):
try:
self.setUp()
except Exception:
raise _ExpectedFailure(sys.exc_info())
else:
self.setUp()
except SkipTest as e:
self._addSkip(result, str(e))
- except KeyboardInterrupt:
+ except (KeyboardInterrupt, UnresponsiveInstanceException) as e:
raise
except _ExpectedFailure as e:
expected_failure(result, e.exc_info)
except:
self._enter_pm()
result.addError(self, sys.exc_info())
else:
try:
@@ -152,17 +153,17 @@ class CommonTestCase(unittest.TestCase):
except:
raise _ExpectedFailure(sys.exc_info())
raise _UnexpectedSuccess
else:
testMethod()
except self.failureException:
self._enter_pm()
result.addFailure(self, sys.exc_info())
- except KeyboardInterrupt:
+ except (KeyboardInterrupt, UnresponsiveInstanceException) as e:
raise
except _ExpectedFailure as e:
expected_failure(result, e.exc_info)
except _UnexpectedSuccess:
addUnexpectedSuccess = getattr(result, 'addUnexpectedSuccess', None)
if addUnexpectedSuccess is not None:
addUnexpectedSuccess(self)
else:
@@ -180,17 +181,17 @@ class CommonTestCase(unittest.TestCase):
try:
if self.expected == "fail":
try:
self.tearDown()
except:
raise _ExpectedFailure(sys.exc_info())
else:
self.tearDown()
- except KeyboardInterrupt:
+ except (KeyboardInterrupt, UnresponsiveInstanceException) as e:
raise
except _ExpectedFailure as e:
expected_failure(result, e.exc_info)
except:
self._enter_pm()
result.addError(self, sys.exc_info())
success = False
# Here we could handle doCleanups() instead of calling cleanTest directly