robustcheckout: retry after socket errors (bug 1317594); r?glob draft
authorGregory Szorc <gps@mozilla.com>
Mon, 10 Apr 2017 13:30:27 -0700
changeset 10756 143b91c4339cd391787fd512ab5827a23897d12d
parent 10755 209c4529830574a9c5287c9b5dff5e4e43a4b1c5
push id1617
push userbmo:gps@mozilla.com
push dateMon, 10 Apr 2017 21:00:30 +0000
reviewersglob
bugs1317594
robustcheckout: retry after socket errors (bug 1317594); r?glob Previously, we didn't handle urllib.URLError and Mercurial's default exception handler converted this to an error.Abort for display to the user. This commit explicitly catches urllib.URLError (aliased as pycompat.urlerr.urlerror in Mercurial for compatibility with Python 3) and detects when the underlying error is a socket.error. It then invokes the retry code for this scenario. This should detect a whole class of network-related failures, including connection refused, routing errors, and connection timeout. Some of these might make sense to not retry on. But I'm inclined to cast a wide net to ensure we aren't chasing the long tail of one-off conditions that can contribute an intermittent network event. This patch also doesn't handle non-socket errors in URLError. I'm not as confident that we should retry on these. Let's see what happens and we can evaluate things later. MozReview-Commit-ID: JZ7mYBpnddD
hgext/robustcheckout/__init__.py
hgext/robustcheckout/tests/test-server-failure.t
--- a/hgext/robustcheckout/__init__.py
+++ b/hgext/robustcheckout/__init__.py
@@ -12,27 +12,29 @@ times and storage efficiency.
 from __future__ import absolute_import
 
 import contextlib
 import errno
 import functools
 import os
 import random
 import re
+import socket
 import time
 
 from mercurial.i18n import _
 from mercurial.node import hex
 from mercurial import (
     commands,
     error,
     exchange,
     extensions,
     cmdutil,
     hg,
+    pycompat,
     scmutil,
     util,
 )
 
 testedwith = '3.8 3.9 4.0 4.1'
 minimumhgversion = '3.8'
 
 cmdtable = {}
@@ -260,16 +262,21 @@ def _docheckout(ui, url, dest, upstream,
                 ui.warn('(repository is unrelated; deleting)\n')
                 destvfs.rmtree(forcibly=True)
                 return True
             elif e.args[0].startswith(_('stream ended unexpectedly')):
                 ui.warn('%s\n' % e.args[0])
                 # Will raise if failure limit reached.
                 handlenetworkfailure()
                 return True
+        elif isinstance(e, pycompat.urlerr.urlerror):
+            if isinstance(e.reason, socket.error):
+                ui.warn('socket error: %s\n' % e.reason)
+                handlenetworkfailure()
+                return True
 
         return False
 
     created = False
 
     if not destvfs.exists():
         # Ensure parent directories of destination exist.
         # Mercurial 3.8 removed ensuredirs and made makedirs race safe.
@@ -283,17 +290,17 @@ def _docheckout(ui, url, dest, upstream,
 
         if upstream:
             ui.write('(cloning from upstream repo %s)\n' % upstream)
         cloneurl = upstream or url
 
         try:
             res = hg.clone(ui, {}, cloneurl, dest=dest, update=False,
                            shareopts={'pool': sharebase, 'mode': 'identity'})
-        except error.Abort as e:
+        except (error.Abort, pycompat.urlerr.urlerror) as e:
             if handlepullerror(e):
                 return callself()
             raise
         except error.RepoError as e:
             return handlerepoerror(e)
         except error.RevlogError as e:
             ui.warn('(repo corruption: %s; deleting shared store)\n' % e.message)
             deletesharedstore()
@@ -342,17 +349,17 @@ def _docheckout(ui, url, dest, upstream,
                         (branch, checkoutrevision))
 
             if checkoutrevision in repo:
                 ui.warn('(revision already present locally; not pulling)\n')
             else:
                 pullop = exchange.pull(repo, remote, heads=pullrevs)
                 if not pullop.rheads:
                     raise error.Abort('unable to pull requested revision')
-        except error.Abort as e:
+        except (error.Abort, pycompat.urlerr.urlerror) as e:
             if handlepullerror(e):
                 return callself()
             raise
         except error.RepoError as e:
             return handlerepoerror(e)
         except error.RevlogError as e:
             ui.warn('(repo corruption: %s; deleting shared store)\n' % e.message)
             deletesharedstore()
--- a/hgext/robustcheckout/tests/test-server-failure.t
+++ b/hgext/robustcheckout/tests/test-server-failure.t
@@ -12,19 +12,25 @@ Extension works with default config
   added 2 changesets with 2 changes to 1 files
   searching for changes
   no changes found
   1 files updated, 0 files merged, 0 files removed, 0 files unresolved
   updated to 94086d65796fd7fc8f957a2c5548db17a13f1f1f
 
 Connecting to non-running server fails
 
-  $ hg robustcheckout http://localhost:$HGPORT1/repo0 no-server --revision 94086d65796f
+  $ hg robustcheckout http://localhost:$HGPORT1/repo0 no-server --revision 94086d65796f --networkattempts 2
   ensuring http://localhost:$HGPORT1/repo0@94086d65796f is available at no-server
-  abort: error: Connection refused
+  socket error: [Errno 111] Connection refused
+  (retrying after network failure on attempt 1 of 2)
+  (waiting *s before retry) (glob)
+  ensuring http://localhost:$HGPORT1/repo0@94086d65796f is available at no-server
+  socket error: [Errno 111] Connection refused
+  abort: reached maximum number of network attempts; giving up
+  
   [255]
 
 Server abort part way through response results in retries
 
   $ cp -a server/bad-server server/bad-server-bytelimit
 
   $ cat >> server/bad-server-bytelimit/.hg/hgrc << EOF
   > [badserver]