author | Gregory Szorc <gps@mozilla.com> |
Tue, 10 Jan 2017 18:18:37 -0800 | |
changeset 10185 | 7befbd63bff207f1e742f17bed5b1415207e2e5d |
parent 10184 | df7c2d353db35c1ac7f00194e8f5318f877c71c7 |
push id | 1465 |
push user | bmo:gps@mozilla.com |
push date | Fri, 20 Jan 2017 06:33:50 +0000 |
reviewers | glob |
bugs | 1291926 |
--- a/hgext/robustcheckout/__init__.py +++ b/hgext/robustcheckout/__init__.py @@ -10,17 +10,19 @@ times and storage efficiency. """ from __future__ import absolute_import import contextlib import errno import functools import os +import random import re +import time from mercurial.i18n import _ from mercurial.node import hex from mercurial import ( commands, error, exchange, extensions, @@ -111,21 +113,23 @@ def purgewrapper(orig, ui, *args, **kwar @command('robustcheckout', [ ('', 'upstream', '', 'URL of upstream repo to clone from'), ('r', 'revision', '', 'Revision to check out'), ('b', 'branch', '', 'Branch to check out'), ('', 'purge', False, 'Whether to purge the working directory'), ('', 'sharebase', '', 'Directory where shared repos should be placed'), + ('', 'networkattempts', 3, 'Maximum number of attempts for network ' + 'operations'), ], '[OPTION]... URL DEST', norepo=True) def robustcheckout(ui, url, dest, upstream=None, revision=None, branch=None, - purge=False, sharebase=None): + purge=False, sharebase=None, networkattempts=None): """Ensure a working copy has the specified revision checked out.""" if not revision and not branch: raise error.Abort('must specify one of --revision or --branch') if revision and branch: raise error.Abort('cannot specify both --revision and --branch') # Require revision to look like a SHA-1. @@ -151,22 +155,26 @@ def robustcheckout(ui, url, dest, upstre # otherwise we're at the whim of whatever configs are used in automation. ui.setconfig('progress', 'delay', 1.0) ui.setconfig('progress', 'refresh', 1.0) ui.setconfig('progress', 'assume-tty', True) sharebase = os.path.realpath(sharebase) return _docheckout(ui, url, dest, upstream, revision, branch, purge, - sharebase) + sharebase, networkattempts) -def _docheckout(ui, url, dest, upstream, revision, branch, purge, sharebase): +def _docheckout(ui, url, dest, upstream, revision, branch, purge, sharebase, + networkattemptlimit, networkattempts=None): + if not networkattempts: + networkattempts = [1] + def callself(): return _docheckout(ui, url, dest, upstream, revision, branch, purge, - sharebase) + sharebase, networkattemptlimit, networkattempts) ui.write('ensuring %s@%s is available at %s\n' % (url, revision or branch, dest)) destvfs = scmutil.vfs(dest, audit=False, realpath=True) if destvfs.exists() and not destvfs.exists('.hg'): raise error.Abort('destination exists but no .hg directory') @@ -222,16 +230,42 @@ def _docheckout(ui, url, dest, upstream, """Handle an error.Abort raised during a pull. Returns True if caller should call ``callself()`` to retry. """ if e.args[0] == _('repository is unrelated'): ui.warn('(repository is unrelated; deleting)\n') destvfs.rmtree(forcibly=True) return True + elif e.args[0].startswith(_('stream ended unexpectedly')): + ui.warn('%s\n' % e.args[0]) + if networkattempts[0] < networkattemptlimit: + ui.warn('(retrying after network failure on attempt %d of %d)\n' % + (networkattempts[0], networkattemptlimit)) + + # Do a backoff on retries to mitigate the thundering herd + # problem. This is an exponential backoff with a multipler + # plus random jitter thrown in for good measure. + # With the default settings, backoffs will be: + # 1) 2.5 - 6.5 + # 2) 5.5 - 9.5 + # 3) 11.5 - 15.5 + backoff = (2 ** networkattempts[0] - 1) * 1.5 + jittermin = ui.configint('robustcheckout', 'retryjittermin', 1000) + jittermax = ui.configint('robustcheckout', 'retryjittermax', 5000) + backoff += float(random.randint(jittermin, jittermax)) / 1000.0 + ui.warn('(waiting %.2fs before retry)\n' % backoff) + time.sleep(backoff) + + networkattempts[0] += 1 + + return True + else: + raise error.Abort('reached maximum number of network attempts; ' + 'giving up\n') return False created = False if not destvfs.exists(): # Ensure parent directories of destination exist. # Mercurial 3.8 removed ensuredirs and made makedirs race safe. @@ -245,16 +279,20 @@ def _docheckout(ui, url, dest, upstream, if upstream: ui.write('(cloning from upstream repo %s)\n' % upstream) cloneurl = upstream or url try: res = hg.clone(ui, {}, cloneurl, dest=dest, update=False, shareopts={'pool': sharebase, 'mode': 'identity'}) + except error.Abort as e: + if handlepullabort(e): + return callself() + raise except error.RepoError as e: return handlerepoerror(e) except error.RevlogError as e: ui.warn('(repo corruption: %s; deleting shared store)\n' % e.message) deletesharedstore() return callself() # TODO retry here.
--- a/hgext/robustcheckout/tests/helpers.sh +++ b/hgext/robustcheckout/tests/helpers.sh @@ -1,14 +1,18 @@ cat >> $HGRCPATH << EOF [share] pool = $TESTTMP/share [extensions] robustcheckout = $TESTDIR/hgext/robustcheckout + +[robustcheckout] +retryjittermin = 0 +retryjittermax = 100 EOF mkdir server hg init server/repo0 hg init server/repo1 hg init server/bad-server cd server/repo0
--- a/hgext/robustcheckout/tests/test-server-failure.t +++ b/hgext/robustcheckout/tests/test-server-failure.t @@ -10,25 +10,77 @@ Extension works with default config adding manifests adding file changes added 2 changesets with 2 changes to 1 files searching for changes no changes found 1 files updated, 0 files merged, 0 files removed, 0 files unresolved updated to 94086d65796fd7fc8f957a2c5548db17a13f1f1f -Server abort part way through response is handled -TODO not yet implemented properly +Server abort part way through response results in retries $ cp -a server/bad-server server/bad-server-bytelimit $ cat >> server/bad-server-bytelimit/.hg/hgrc << EOF > [badserver] > bytelimit = 11 > EOF - $ hg robustcheckout http://localhost:$HGPORT/bad-server-bytelimit byte-limit --revision 94086d65796f + $ hg robustcheckout http://localhost:$HGPORT/bad-server-bytelimit byte-limit --revision 94086d65796f --sharebase $TESTTMP/bad-server-share + ensuring http://localhost:$HGPORT/bad-server-bytelimit@94086d65796f is available at byte-limit + (sharing from new pooled repository 96ee1d7354c4ad7372047672c36a1f561e3a6a4c) + requesting all changes + stream ended unexpectedly (got 0 bytes, expected 4) + (retrying after network failure on attempt 1 of 3) + (waiting *s before retry) (glob) + ensuring http://localhost:$HGPORT/bad-server-bytelimit@94086d65796f is available at byte-limit + (sharing from new pooled repository 96ee1d7354c4ad7372047672c36a1f561e3a6a4c) + requesting all changes + stream ended unexpectedly (got 0 bytes, expected 4) + (retrying after network failure on attempt 2 of 3) + (waiting *s before retry) (glob) + ensuring http://localhost:$HGPORT/bad-server-bytelimit@94086d65796f is available at byte-limit + (sharing from new pooled repository 96ee1d7354c4ad7372047672c36a1f561e3a6a4c) + requesting all changes + stream ended unexpectedly (got 0 bytes, expected 4) + abort: reached maximum number of network attempts; giving up + + [255] + +Adjusting the network limit works + + $ hg robustcheckout http://localhost:$HGPORT/bad-server-bytelimit byte-limit --networkattempts 2 --revision 94086d65796f --sharebase $TESTTMP/bad-server-share ensuring http://localhost:$HGPORT/bad-server-bytelimit@94086d65796f is available at byte-limit - (sharing from existing pooled repository 96ee1d7354c4ad7372047672c36a1f561e3a6a4c) + (sharing from new pooled repository 96ee1d7354c4ad7372047672c36a1f561e3a6a4c) + requesting all changes + stream ended unexpectedly (got 0 bytes, expected 4) + (retrying after network failure on attempt 1 of 2) + (waiting *s before retry) (glob) + ensuring http://localhost:$HGPORT/bad-server-bytelimit@94086d65796f is available at byte-limit + (sharing from new pooled repository 96ee1d7354c4ad7372047672c36a1f561e3a6a4c) + requesting all changes + stream ended unexpectedly (got 0 bytes, expected 4) + abort: reached maximum number of network attempts; giving up + + [255] + +Recovering server will result in good clone + + $ echo 5 > server/bad-server-bytelimit/.hg/badserveruntilgood + + $ hg robustcheckout http://localhost:$HGPORT/bad-server-bytelimit byte-limit --revision 94086d65796f --sharebase $TESTTMP/bad-server-share + ensuring http://localhost:$HGPORT/bad-server-bytelimit@94086d65796f is available at byte-limit + (sharing from new pooled repository 96ee1d7354c4ad7372047672c36a1f561e3a6a4c) + requesting all changes + stream ended unexpectedly (got 0 bytes, expected 4) + (retrying after network failure on attempt 1 of 3) + (waiting *s before retry) (glob) + ensuring http://localhost:$HGPORT/bad-server-bytelimit@94086d65796f is available at byte-limit + (sharing from new pooled repository 96ee1d7354c4ad7372047672c36a1f561e3a6a4c) + requesting all changes + adding changesets + adding manifests + adding file changes + added 2 changesets with 2 changes to 1 files searching for changes no changes found - abort: stream ended unexpectedly (got 3 bytes, expected 4) - [255] + 1 files updated, 0 files merged, 0 files removed, 0 files unresolved + updated to 94086d65796fd7fc8f957a2c5548db17a13f1f1f