Bug 1405408 - Post: Use taskcluster-proxy /bewit endpoint to download private toolchains. r=dustin
This is a work-around until
Bug 1405889 is deployed. Using the /bewit
endpoint does have the advantage of avoiding another issue in
taskcluster-proxy, namely that the /bewit approach streams. Fetching
through the proxy does not stream from the upstream resource; the
upstream resource is fetched and stored in taskcluster-proxy's memory,
increasing operational costs.
MozReview-Commit-ID: 8yS7zKLALhd
--- a/taskcluster/taskgraph/util/taskcluster.py
+++ b/taskcluster/taskgraph/util/taskcluster.py
@@ -29,22 +29,22 @@ def get_session():
session = requests.Session()
retry = Retry(total=5, backoff_factor=0.1,
status_forcelist=[500, 502, 503, 504])
session.mount('http://', HTTPAdapter(max_retries=retry))
session.mount('https://', HTTPAdapter(max_retries=retry))
return session
-def _do_request(url, content=None):
+def _do_request(url, **kwargs):
session = get_session()
- if content is None:
+ if kwargs:
+ response = session.post(url, **kwargs)
+ else:
response = session.get(url, stream=True)
- else:
- response = session.post(url, json=content)
if response.status_code >= 400:
# Consume content before raise_for_status, so that the connection can be
# reused.
response.content
response.raise_for_status()
return response
@@ -54,20 +54,26 @@ def _handle_artifact(path, response):
if path.endswith('.yml'):
return yaml.load(response.text)
response.raw.read = functools.partial(response.raw.read,
decode_content=True)
return response.raw
def get_artifact_url(task_id, path, use_proxy=False):
+ ARTIFACT_URL = 'https://queue.taskcluster.net/v1/task/{}/artifacts/{}'
if use_proxy:
- ARTIFACT_URL = 'http://taskcluster/queue/v1/task/{}/artifacts/{}'
- else:
- ARTIFACT_URL = 'https://queue.taskcluster.net/v1/task/{}/artifacts/{}'
+ # Until Bug 1405889 is deployed, we can't download directly
+ # from the taskcluster-proxy. Work around by using the /bewit
+ # endpoint instead.
+ data = ARTIFACT_URL.format(task_id, path)
+ # The bewit URL is the body of a 303 redirect, which we don't
+ # want to follow (which fetches a potentially large resource).
+ response = _do_request('http://taskcluster/bewit', data=data, allow_redirects=False)
+ return response.text
return ARTIFACT_URL.format(task_id, path)
def get_artifact(task_id, path, use_proxy=False):
"""
Returns the artifact with the given path for the given task id.
If the path ends with ".json" or ".yml", the content is deserialized as,
@@ -111,17 +117,17 @@ def get_artifact_from_index(index_path,
def list_tasks(index_path, use_proxy=False):
"""
Returns a list of task_ids where each task_id is indexed under a path
in the index. Results are sorted by expiration date from oldest to newest.
"""
results = []
data = {}
while True:
- response = _do_request(get_index_url(index_path, use_proxy, multiple=True), data)
+ response = _do_request(get_index_url(index_path, use_proxy, multiple=True), json=data)
response = response.json()
results += response['tasks']
if response.get('continuationToken'):
data = {'continuationToken': response.get('continuationToken')}
else:
break
# We can sort on expires because in the general case