Bug 1166033 - Notice when the process has died, and restart it, r=dburns draft
authorJonathan Griffin <jgriffin@mozilla.com>
Wed, 10 Jun 2015 10:24:29 -0700
changeset 271927 71db7103fbc797049f8d87aa9557be3c96d9c07d
parent 270794 3d11cb4f31b997bb8de5c8d858a3bc1deb4a03e9
child 506916 e0b9d71ebd658eac1e1c5740dd2766eb2c17ee9b
push id2843
push userjgriffin@mozilla.com
push dateFri, 12 Jun 2015 22:28:56 +0000
reviewersdburns
bugs1166033
milestone41.0a1
Bug 1166033 - Notice when the process has died, and restart it, r=dburns
testing/marionette/driver/marionette_driver/marionette.py
testing/marionette/transport/marionette_transport/transport.py
--- a/testing/marionette/driver/marionette_driver/marionette.py
+++ b/testing/marionette/driver/marionette_driver/marionette.py
@@ -623,17 +623,21 @@ class Marionette(object):
             self.runner = B2GEmulatorRunner(b2g_home=homedir,
                                             logdir=logdir,
                                             process_args=process_args)
             self.emulator = self.runner.device
             self.emulator.connect()
             self.port = self.emulator.setup_port_forwarding(remote_port=self.port)
             assert(self.emulator.wait_for_port(self.port)), "Timed out waiting for port!"
 
-        self.client = MarionetteTransport(self.host, self.port, self.socket_timeout)
+        self.client = MarionetteTransport(
+            self.host,
+            self.port,
+            self.socket_timeout,
+            instance=self.instance)
 
         if emulator:
             if busybox:
                 self.emulator.install_busybox(busybox=busybox)
             self.emulator.wait_for_system_message(self)
 
     def cleanup(self):
         if self.session:
@@ -967,16 +971,19 @@ class Marionette(object):
             # Values here correspond to constants in nsIAppStartup.
             # See https://developer.mozilla.org/en-US/docs/Mozilla/Tech/XPCOM/Reference/Interface/nsIAppStartup
             restart_flags = [
                 "eForceQuit",
                 "eRestart",
             ]
             self._send_message('quitApplication', flags=restart_flags)
             self.client.close()
+            # The instance is restarting itself; we will no longer be able to
+            # track it by pid, so mark it as 'detached'.
+            self.instance.detached = True
         else:
             self.delete_session()
             self.instance.restart(clean=clean)
         assert(self.wait_for_port()), "Timed out waiting for port!"
         self.start_session(session_id=self.session_id)
         self._reset_timeouts()
 
     def absolute_url(self, relative_url):
@@ -994,16 +1001,21 @@ class Marionette(object):
 
         :param desired_capabilities: An optional dict of desired
             capabilities.  This is currently ignored.
         :param timeout: Timeout in seconds for the server to be ready.
         :param session_id: unique identifier for the session. If no session id is
             passed in then one will be generated by the marionette server.
 
         :returns: A dict of the capabilities offered."""
+        if self.instance:
+            returncode = self.instance.runner.process_handler.proc.returncode
+            if returncode is not None:
+                # We're managing a binary which has terminated, so restart it.
+                self.instance.restart()
         self.wait_for_port(timeout=timeout)
         self.session = self._send_message('newSession', 'value', capabilities=desired_capabilities, sessionId=session_id)
         self.b2g = 'b2g' in self.session
         return self.session
 
     @property
     def test_name(self):
         return self._test_name
--- a/testing/marionette/transport/marionette_transport/transport.py
+++ b/testing/marionette/transport/marionette_transport/transport.py
@@ -15,24 +15,25 @@ class MarionetteTransport(object):
         always preceded by the message length and a colon, e.g.,
 
         20:{'command': 'test'}
     """
 
     max_packet_length = 4096
     connection_lost_msg = "Connection to Marionette server is lost. Check gecko.log (desktop firefox) or logcat (b2g) for errors."
 
-    def __init__(self, addr, port, socket_timeout=360.0):
+    def __init__(self, addr, port, socket_timeout=360.0, instance=None):
         self.addr = addr
         self.port = port
         self.socket_timeout = socket_timeout
         self.sock = None
         self.traits = None
         self.applicationType = None
         self.actor = 'root'
+        self.instance = instance
 
     def _recv_n_bytes(self, n):
         """ Convenience method for receiving exactly n bytes from
             self.sock (assuming it's open and connected).
         """
         data = ''
         while len(data) < n:
             chunk = self.sock.recv(n - len(data))
@@ -42,41 +43,54 @@ class MarionetteTransport(object):
         return data
 
     def receive(self):
         """ Receive the next complete response from the server, and return
             it as a dict.  Each response from the server is prepended by
             len(message) + ':'.
         """
         assert(self.sock)
-        response = self.sock.recv(10)
-        initial_size = len(response)
-        sep = response.find(':')
-        length = response[0:sep]
-        if length != '':
-            response = response[sep + 1:]
-            remaining_size = int(length) + 1 + len(length) - initial_size
-            response += self._recv_n_bytes(remaining_size)
-            return json.loads(response)
-        else:
-            raise IOError(self.connection_lost_msg)
+        now = time.time()
+        response = ''
+        bytes_to_recv = 10
+        while time.time() - now < self.socket_timeout:
+            try:
+                response += self.sock.recv(bytes_to_recv)
+            except socket.timeout:
+                pass
+            if self.instance and not hasattr(self.instance, 'detached'):
+                # If we've launched the binary we've connected to, make
+                # sure it hasn't died.
+                poll = self.instance.runner.process_handler.proc.poll()
+                if poll is not None:
+                    # process isn't alive
+                    raise IOError("process has died with return code %d" % poll)
+            sep = response.find(':')
+            if sep > -1:
+                length = response[0:sep]
+                remaining = response[sep + 1:]
+                if len(remaining) == int(length):
+                    return json.loads(remaining)
+                bytes_to_recv = int(length) - len(remaining)
+        raise IOError(self.connection_lost_msg)
 
     def connect(self):
         """ Connect to the server and process the hello message we expect
             to receive in response.
         """
         self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
         self.sock.settimeout(self.socket_timeout)
         try:
             self.sock.connect((self.addr, self.port))
         except:
             # Unset self.sock so that the next attempt to send will cause
             # another connection attempt.
             self.sock = None
             raise
+        self.sock.settimeout(2.0)
         hello = self.receive()
         self.traits = hello.get('traits')
         self.applicationType = hello.get('applicationType')
 
         # get the marionette actor id
         response = self.send({'to': 'root', 'name': 'getMarionetteID'})
         self.actor = response['id']