Bug 1341466 - Stop Marionette test run when Android emulator is unresponsive; r?AutomatedTester draft
authorMaja Frydrychowicz <mjzffr@gmail.com>
Thu, 16 Nov 2017 12:00:49 -0500
changeset 714046 12bbc0f2927132ac9516ee4d8cf3d81ae3f84621
parent 713529 2ff08db67b917fba1558986f3f2f796260f970f8
child 744500 f8704fa4afddb186e326f7e1065f098f558240de
push id93826
push userbmo:mjzffr@gmail.com
push dateThu, 21 Dec 2017 16:33:33 +0000
reviewersAutomatedTester
bugs1341466
milestone59.0a1
Bug 1341466 - Stop Marionette test run when Android emulator is unresponsive; r?AutomatedTester The clean-up code in FennecInstance now counts and logs consecutive DMErrors. The Marionette clean-up code then throws an UnresponsiveInstanceException if we hit consecutive errors more than twice, which interrupts the test run entirely. (Previously, MarionetteTestRunner would just keep running tests despite consecutive clean-up failures, and eventually it would time out.) This change allows us to take advantage of the retry mechanism in the mozharness script that runs all Android tests: it sets the job status to "retry" if it finds DMError in the test log after the run_tests step is done. MozReview-Commit-ID: J36XuFVK1aK
testing/marionette/client/marionette_driver/errors.py
testing/marionette/client/marionette_driver/geckoinstance.py
testing/marionette/client/marionette_driver/marionette.py
testing/marionette/harness/marionette_harness/marionette_test/testcases.py
--- a/testing/marionette/client/marionette_driver/errors.py
+++ b/testing/marionette/client/marionette_driver/errors.py
@@ -162,16 +162,20 @@ class UnknownCommandException(Marionette
 class UnknownException(MarionetteException):
     status = "unknown error"
 
 
 class UnsupportedOperationException(MarionetteException):
     status = "unsupported operation"
 
 
+class UnresponsiveInstanceException(Exception):
+    pass
+
+
 es_ = [e for e in locals().values() if type(e) == type and issubclass(e, MarionetteException)]
 by_string = {e.status: e for e in es_}
 
 
 def lookup(identifier):
     """Finds error exception class by associated Selenium JSON wire
     protocol number code, or W3C WebDriver protocol string.
 
--- a/testing/marionette/client/marionette_driver/geckoinstance.py
+++ b/testing/marionette/client/marionette_driver/geckoinstance.py
@@ -1,21 +1,23 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/
 
 import os
 import sys
 import tempfile
 import time
+import traceback
 
 from copy import deepcopy
 
 import mozversion
 
+from mozdevice import DMError
 from mozprofile import Profile
 from mozrunner import Runner, FennecEmulatorRunner
 
 
 class GeckoInstance(object):
     required_prefs = {
         # Increase the APZ content response timeout in tests to 1 minute.
         # This is to accommodate the fact that test environments tends to be slower
@@ -135,16 +137,18 @@ class GeckoInstance(object):
         self.required_prefs = deepcopy(self.required_prefs)
         if prefs:
             self.required_prefs.update(prefs)
 
         self._gecko_log_option = gecko_log
         self._gecko_log = None
         self.verbose = verbose
         self.headless = headless
+        # keep track of errors to decide whether instance is unresponsive
+        self.unresponsive_count = 0
 
     @property
     def gecko_log(self):
         if self._gecko_log:
             return self._gecko_log
 
         path = self._gecko_log_option
         if path != "-":
@@ -397,18 +401,23 @@ class FennecInstance(GeckoInstance):
 
         If `clean` is True and the Fennec instance is running in an
         emulator managed by mozrunner, this will stop the emulator.
 
         :param clean: If True, also perform runner cleanup.
         """
         super(FennecInstance, self).close(clean)
         if clean and self.runner and self.runner.device.connected:
-            self.runner.device.dm.remove_forward(
-                "tcp:{}".format(self.marionette_port))
+            try:
+                self.runner.device.dm.remove_forward(
+                    "tcp:{}".format(self.marionette_port))
+                self.unresponsive_count = 0
+            except DMError:
+                self.unresponsive_count += 1
+                traceback.print_exception(*sys.exc_info())
 
 
 class DesktopInstance(GeckoInstance):
     desktop_prefs = {
         # Disable application updates
         "app.update.enabled": False,
 
         # Enable output of dump()
--- a/testing/marionette/client/marionette_driver/marionette.py
+++ b/testing/marionette/client/marionette_driver/marionette.py
@@ -648,16 +648,19 @@ class Marionette(object):
             except (errors.MarionetteException, IOError):
                 # These exceptions get thrown if the Marionette server
                 # hit an exception/died or the connection died. We can
                 # do no further server-side cleanup in this case.
                 pass
         if self.instance:
             # stop application and, if applicable, stop emulator
             self.instance.close(clean=True)
+            if self.instance.unresponsive_count >= 3:
+                raise errors.UnresponsiveInstanceException(
+                    "Application clean-up has failed >2 consecutive times.")
 
     def __del__(self):
         self.cleanup()
 
     @staticmethod
     def is_port_available(port, host=''):
         port = int(port)
         s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
--- a/testing/marionette/harness/marionette_harness/marionette_test/testcases.py
+++ b/testing/marionette/harness/marionette_harness/marionette_test/testcases.py
@@ -15,16 +15,17 @@ import weakref
 from unittest.case import (
     _ExpectedFailure,
     _UnexpectedSuccess,
     SkipTest,
 )
 
 from marionette_driver.errors import (
     TimeoutException,
+    UnresponsiveInstanceException
 )
 from mozlog import get_default_logger
 
 
 def _wraps_parameterized(func, func_suffix, args, kwargs):
     """Internal: Decorator used in class MetaParameterized."""
     def wrapper(self):
         return func(self, *args, **kwargs)
@@ -132,17 +133,17 @@ class CommonTestCase(unittest.TestCase):
                     try:
                         self.setUp()
                     except Exception:
                         raise _ExpectedFailure(sys.exc_info())
                 else:
                     self.setUp()
             except SkipTest as e:
                 self._addSkip(result, str(e))
-            except KeyboardInterrupt:
+            except (KeyboardInterrupt, UnresponsiveInstanceException) as e:
                 raise
             except _ExpectedFailure as e:
                 expected_failure(result, e.exc_info)
             except:
                 self._enter_pm()
                 result.addError(self, sys.exc_info())
             else:
                 try:
@@ -152,17 +153,17 @@ class CommonTestCase(unittest.TestCase):
                         except:
                             raise _ExpectedFailure(sys.exc_info())
                         raise _UnexpectedSuccess
                     else:
                         testMethod()
                 except self.failureException:
                     self._enter_pm()
                     result.addFailure(self, sys.exc_info())
-                except KeyboardInterrupt:
+                except (KeyboardInterrupt, UnresponsiveInstanceException) as e:
                     raise
                 except _ExpectedFailure as e:
                     expected_failure(result, e.exc_info)
                 except _UnexpectedSuccess:
                     addUnexpectedSuccess = getattr(result, 'addUnexpectedSuccess', None)
                     if addUnexpectedSuccess is not None:
                         addUnexpectedSuccess(self)
                     else:
@@ -180,17 +181,17 @@ class CommonTestCase(unittest.TestCase):
                 try:
                     if self.expected == "fail":
                         try:
                             self.tearDown()
                         except:
                             raise _ExpectedFailure(sys.exc_info())
                     else:
                         self.tearDown()
-                except KeyboardInterrupt:
+                except (KeyboardInterrupt, UnresponsiveInstanceException) as e:
                     raise
                 except _ExpectedFailure as e:
                     expected_failure(result, e.exc_info)
                 except:
                     self._enter_pm()
                     result.addError(self, sys.exc_info())
                     success = False
             # Here we could handle doCleanups() instead of calling cleanTest directly