Bug 1371489 - Reuse HTTP connections after HTTP errors. r?dustin
When a response has unconsumed content, requests closes the connection
instead of releasing it to the connection pool, such that a new
connection has to be created when another request to the same host is
done within the same Session.
When calling response.raise_for_status, the response content is left
unconsumed, meaning that without manually consuming the content first,
every time raise_for_status raises, a connection is closed.
While here, we move the raise_for_status to the _do_request function, so
that there's only one place where this is dealt with.
--- a/taskcluster/taskgraph/util/taskcluster.py
+++ b/taskcluster/taskgraph/util/taskcluster.py
@@ -21,17 +21,23 @@ def get_session():
status_forcelist=[500, 502, 503, 504])
session.mount('http://', HTTPAdapter(max_retries=retry))
session.mount('https://', HTTPAdapter(max_retries=retry))
return session
def _do_request(url):
session = get_session()
- return session.get(url, stream=True)
+ response = session.get(url, stream=True)
+ if response.status_code >= 400:
+ # Consume content before raise_for_status, so that the connection can be
+ # reused.
+ response.content
+ response.raise_for_status()
+ return response
def get_artifact_url(task_id, path, use_proxy=False):
if use_proxy:
ARTIFACT_URL = 'http://taskcluster/queue/v1/task/{}/artifacts/{}'
else:
ARTIFACT_URL = 'https://queue.taskcluster.net/v1/task/{}/artifacts/{}'
return ARTIFACT_URL.format(task_id, path)
@@ -42,36 +48,33 @@ def get_artifact(task_id, path, use_prox
Returns the artifact with the given path for the given task id.
If the path ends with ".json" or ".yml", the content is deserialized as,
respectively, json or yaml, and the corresponding python data (usually
dict) is returned.
For other types of content, a file-like object is returned.
"""
response = _do_request(get_artifact_url(task_id, path, use_proxy))
- response.raise_for_status()
if path.endswith('.json'):
return response.json()
if path.endswith('.yml'):
return yaml.load(response.text)
response.raw.read = functools.partial(response.raw.read,
decode_content=True)
return response.raw
def list_artifacts(task_id, use_proxy=False):
response = _do_request(get_artifact_url(task_id, '', use_proxy).rstrip('/'))
- response.raise_for_status()
return response.json()['artifacts']
def get_index_url(index_path, use_proxy=False):
if use_proxy:
INDEX_URL = 'http://taskcluster/index/v1/task/{}'
else:
INDEX_URL = 'https://index.taskcluster.net/v1/task/{}'
return INDEX_URL.format(index_path)
def find_task_id(index_path, use_proxy=False):
response = _do_request(get_index_url(index_path, use_proxy))
- response.raise_for_status()
return response.json()['taskId']