Bug 1397675 - Immediately kill the process if no connection can be made after startup. draft
authorHenrik Skupin <mail@hskupin.info>
Thu, 07 Sep 2017 15:40:19 +0200
changeset 663637 eec6dd39d6b2723c6eae8350788edfc372f1daec
parent 663134 a73cc4e08bf5a005722c95b43f84ab0c8ff2bc7c
child 731252 0aa756206c690f6b483a8e76a1fa7516122930b8
push id79490
push userbmo:hskupin@gmail.com
push dateWed, 13 Sep 2017 07:57:45 +0000
bugs1397675
milestone57.0a1
Bug 1397675 - Immediately kill the process if no connection can be made after startup. When the binary gets launched we do not immediately kill the process if the connection to Marionette server cannot be established within the given amount of seconds. Instead "_handle_socket_failure" is getting called because the utility method `raise_for_port` inappropriately uses the `@do_process_check` decorator. By removing the decorator the initial connection attempt can be handled differently. As such the process if handled by Marionette will be immediately killed. Currently we are waiting for the process to quit itself within 120s, which will actually never happen due to no active session. Further `start_session` defaults to a timeout of 60s which itself is problematic for test harnesses using Marionette but controlling the binary themselves. In those cases timeouts can happen often for slow starting browser processes like debug builds. Instead this timeout should default to the `startup_timeout` value. MozReview-Commit-ID: BZvX5KT45mK
testing/marionette/client/marionette_driver/marionette.py
testing/marionette/harness/marionette_harness/tests/unit/test_cli_arguments.py
--- a/testing/marionette/client/marionette_driver/marionette.py
+++ b/testing/marionette/client/marionette_driver/marionette.py
@@ -614,26 +614,39 @@ class Marionette(object):
 
         if self.bin:
             if not Marionette.is_port_available(self.port, host=self.host):
                 ex_msg = "{0}:{1} is unavailable.".format(self.host, self.port)
                 raise errors.MarionetteException(message=ex_msg)
 
             self.instance = GeckoInstance.create(
                 app, host=self.host, port=self.port, bin=self.bin, **instance_args)
-            self.instance.start()
-            self.raise_for_port(timeout=self.startup_timeout)
+            self.start_binary(self.startup_timeout)
 
         self.timeout = Timeouts(self)
 
     @property
     def profile_path(self):
         if self.instance and self.instance.profile:
             return self.instance.profile.profile
 
+    def start_binary(self, timeout):
+        try:
+            self.instance.start()
+            self.raise_for_port(timeout=timeout)
+        except socket.timeout:
+            # Something went wrong with starting up Marionette server. Given
+            # that the process will not quit itself, force a shutdown immediately.
+            self.cleanup()
+
+            msg = "Process killed after {}s because no connection to Marionette "\
+                  "server could be established. Check gecko.log for errors"
+            _, _, tb = sys.exc_info()
+            raise IOError, msg.format(timeout), tb
+
     def cleanup(self):
         if self.session is not None:
             try:
                 self.delete_session()
             except (errors.MarionetteException, IOError):
                 # These exceptions get thrown if the Marionette server
                 # hit an exception/died or the connection died. We can
                 # do no further server-side cleanup in this case.
@@ -692,17 +705,16 @@ class Marionette(object):
             finally:
                 if sock is not None:
                     sock.close()
 
             time.sleep(poll_interval)
 
         return False
 
-    @do_process_check
     def raise_for_port(self, timeout=None):
         """Raise socket.timeout if no connection can be established.
 
         :param timeout: Timeout in seconds for the server to be ready.
 
         """
         if not self.wait_for_port(timeout):
             raise socket.timeout("Timed out waiting for connection on {0}:{1}!".format(
@@ -1173,47 +1185,52 @@ class Marionette(object):
         '''
         Returns an absolute url for files served from Marionette's www directory.
 
         :param relative_url: The url of a static file, relative to Marionette's www directory.
         '''
         return "{0}{1}".format(self.baseurl, relative_url)
 
     @do_process_check
-    def start_session(self, capabilities=None, timeout=60):
+    def start_session(self, capabilities=None, timeout=None):
         """Create a new WebDriver session.
         This method must be called before performing any other action.
 
         :param capabilities: An optional dictionary of
             Marionette-recognised capabilities.  It does not
             accept a WebDriver conforming capabilities dictionary
             (including alwaysMatch, firstMatch, desiredCapabilities,
             or requriedCapabilities), and only recognises extension
             capabilities that are specific to Marionette.
-        :param timeout: Timeout in seconds for the server to be ready.
+        :param timeout: Optional timeout in seconds for the server to be ready.
         :returns: A dictionary of the capabilities offered.
         """
+        if timeout is None:
+            timeout = self.startup_timeout
+
         self.crashed = 0
 
         if self.instance:
             returncode = self.instance.runner.returncode
+            # We're managing a binary which has terminated. Start it again
+            # and implicitely wait for the Marionette server to be ready.
             if returncode is not None:
-                # We're managing a binary which has terminated, so start it again.
-                self.instance.start()
+                self.start_binary(timeout)
+
+        else:
+            # In the case when Marionette doesn't manage the binary wait until
+            # its server component has been started.
+            self.wait_for_port(timeout=timeout)
 
         self.client = transport.TcpTransport(
             self.host,
             self.port,
             self.socket_timeout)
+        self.protocol, _ = self.client.connect()
 
-        # Call wait_for_port() before attempting to connect in
-        # the event gecko hasn't started yet.
-        timeout = timeout or self.startup_timeout
-        self.wait_for_port(timeout=timeout)
-        self.protocol, _ = self.client.connect()
         body = capabilities
         if body is None:
             body = {}
 
         # Duplicate capabilities object so the body we end up
         # sending looks like this:
         #
         #     {acceptInsecureCerts: true, {capabilities: {acceptInsecureCerts: true}}}
--- a/testing/marionette/harness/marionette_harness/tests/unit/test_cli_arguments.py
+++ b/testing/marionette/harness/marionette_harness/tests/unit/test_cli_arguments.py
@@ -29,8 +29,23 @@ class TestCommandLineArguments(Marionett
         with self.marionette.using_context("chrome"):
             safe_mode = self.marionette.execute_script("""
               Cu.import("resource://gre/modules/Services.jsm");
 
               return Services.appinfo.inSafeMode;
             """)
 
             self.assertTrue(safe_mode, "Safe Mode has not been enabled")
+
+    def test_startup_timeout(self):
+        startup_timeout = self.marionette.startup_timeout
+
+        # Use a timeout which always cause an IOError
+        self.marionette.startup_timeout = 1
+        msg = "Process killed after {}s".format(self.marionette.startup_timeout)
+
+        try:
+            self.marionette.quit()
+            with self.assertRaisesRegexp(IOError, msg):
+                self.marionette.start_session()
+        finally:
+            self.marionette.startup_timeout = startup_timeout
+            self.marionette.start_session()