Bug 1405408 - Post: Use taskcluster-proxy /bewit endpoint to download private toolchains. r=dustin draft
authorNick Alexander <nalexander@mozilla.com>
Thu, 05 Oct 2017 16:17:58 -0700
changeset 678724 23e1bc683248f69f6e4c90204e9bc0701f4a778a
parent 678723 8628be21bacacbd782dffc47933d000c57dc2fb2
child 678756 73c27146ab14ae122fa6afe8e43d4a8d9bc0f168
push id84017
push usernalexander@mozilla.com
push dateWed, 11 Oct 2017 19:53:59 +0000
reviewersdustin
bugs1405408, 1405889
milestone58.0a1
Bug 1405408 - Post: Use taskcluster-proxy /bewit endpoint to download private toolchains. r=dustin This is a work-around until Bug 1405889 is deployed. Using the /bewit endpoint does have the advantage of avoiding another issue in taskcluster-proxy, namely that the /bewit approach streams. Fetching through the proxy does not stream from the upstream resource; the upstream resource is fetched and stored in taskcluster-proxy's memory, increasing operational costs. MozReview-Commit-ID: 8yS7zKLALhd
taskcluster/taskgraph/util/taskcluster.py
--- a/taskcluster/taskgraph/util/taskcluster.py
+++ b/taskcluster/taskgraph/util/taskcluster.py
@@ -29,22 +29,22 @@ def get_session():
     session = requests.Session()
     retry = Retry(total=5, backoff_factor=0.1,
                   status_forcelist=[500, 502, 503, 504])
     session.mount('http://', HTTPAdapter(max_retries=retry))
     session.mount('https://', HTTPAdapter(max_retries=retry))
     return session
 
 
-def _do_request(url, content=None):
+def _do_request(url, **kwargs):
     session = get_session()
-    if content is None:
+    if kwargs:
+        response = session.post(url, **kwargs)
+    else:
         response = session.get(url, stream=True)
-    else:
-        response = session.post(url, json=content)
     if response.status_code >= 400:
         # Consume content before raise_for_status, so that the connection can be
         # reused.
         response.content
     response.raise_for_status()
     return response
 
 
@@ -54,20 +54,26 @@ def _handle_artifact(path, response):
     if path.endswith('.yml'):
         return yaml.load(response.text)
     response.raw.read = functools.partial(response.raw.read,
                                           decode_content=True)
     return response.raw
 
 
 def get_artifact_url(task_id, path, use_proxy=False):
+    ARTIFACT_URL = 'https://queue.taskcluster.net/v1/task/{}/artifacts/{}'
     if use_proxy:
-        ARTIFACT_URL = 'http://taskcluster/queue/v1/task/{}/artifacts/{}'
-    else:
-        ARTIFACT_URL = 'https://queue.taskcluster.net/v1/task/{}/artifacts/{}'
+        # Until Bug 1405889 is deployed, we can't download directly
+        # from the taskcluster-proxy.  Work around by using the /bewit
+        # endpoint instead.
+        data = ARTIFACT_URL.format(task_id, path)
+        # The bewit URL is the body of a 303 redirect, which we don't
+        # want to follow (which fetches a potentially large resource).
+        response = _do_request('http://taskcluster/bewit', data=data, allow_redirects=False)
+        return response.text
     return ARTIFACT_URL.format(task_id, path)
 
 
 def get_artifact(task_id, path, use_proxy=False):
     """
     Returns the artifact with the given path for the given task id.
 
     If the path ends with ".json" or ".yml", the content is deserialized as,
@@ -111,17 +117,17 @@ def get_artifact_from_index(index_path, 
 def list_tasks(index_path, use_proxy=False):
     """
     Returns a list of task_ids where each task_id is indexed under a path
     in the index. Results are sorted by expiration date from oldest to newest.
     """
     results = []
     data = {}
     while True:
-        response = _do_request(get_index_url(index_path, use_proxy, multiple=True), data)
+        response = _do_request(get_index_url(index_path, use_proxy, multiple=True), json=data)
         response = response.json()
         results += response['tasks']
         if response.get('continuationToken'):
             data = {'continuationToken': response.get('continuationToken')}
         else:
             break
 
     # We can sort on expires because in the general case