Bug 1390693 - Use thread pool for S3 uploads; r?dustin draft
authorGregory Szorc <gps@mozilla.com>
Thu, 24 Aug 2017 12:38:01 -0700
changeset 652435 894334de4a9ef5bc67b65e164fd31690cbad4b44
parent 652434 44548d7131582b817723fb040e4a6819d14096fc
child 728087 372738981bb16656032269421dd865bc6c6f8384
push id76055
push usergszorc@mozilla.com
push dateThu, 24 Aug 2017 20:42:35 +0000
reviewersdustin
bugs1390693
milestone57.0a1
Bug 1390693 - Use thread pool for S3 uploads; r?dustin This reduces the time taken to upload the Firefox docs from ~30s to ~5s (per invocation). MozReview-Commit-ID: DxOrvxvVn42
tools/docs/moztreedocs/upload.py
--- a/tools/docs/moztreedocs/upload.py
+++ b/tools/docs/moztreedocs/upload.py
@@ -2,18 +2,21 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, # You can obtain one at http://mozilla.org/MPL/2.0/.
 
 from __future__ import absolute_import, unicode_literals
 
 import io
 import mimetypes
 import os
+import sys
 
+import botocore
 import boto3
+import concurrent.futures as futures
 import requests
 
 
 def s3_upload(files, key_prefix=None):
     """Upload files to an S3 bucket.
 
     ``files`` is an iterable of ``(path, BaseFile)`` (typically from a
     mozpack Finder).
@@ -42,29 +45,41 @@ def s3_upload(files, key_prefix=None):
         secret = res.json()['secret']
         session = boto3.session.Session(
             aws_access_key_id=secret['AWS_ACCESS_KEY_ID'],
             aws_secret_access_key=secret['AWS_SECRET_ACCESS_KEY'],
             region_name=region)
     else:
         print("Trying to use your AWS credentials..")
         session = boto3.session.Session(region_name=region)
-    s3 = session.client('s3')
+
+    s3 = session.client('s3',
+                        config=botocore.client.Config(max_pool_connections=20))
 
-    for path, f in files:
-        content_type, content_encoding = mimetypes.guess_type(path)
-        extra_args = {}
-        if content_type:
-            extra_args['ContentType'] = content_type
-        if content_encoding:
-            extra_args['ContentEncoding'] = content_encoding
+    def upload(f, bucket, key, extra_args):
+        # Need to flush to avoid buffering/interleaving from multiple threads.
+        sys.stdout.write('uploading %s to %s\n' % (path, key))
+        sys.stdout.flush()
+        s3.upload_fileobj(f, bucket, key, ExtraArgs=extra_args)
 
-        if key_prefix:
-            key = '%s/%s' % (key_prefix, path)
-        else:
-            key = path
+    fs = []
+    with futures.ThreadPoolExecutor(20) as e:
+        for path, f in files:
+            content_type, content_encoding = mimetypes.guess_type(path)
+            extra_args = {}
+            if content_type:
+                extra_args['ContentType'] = content_type
+            if content_encoding:
+                extra_args['ContentEncoding'] = content_encoding
 
-        print('uploading %s to %s' % (path, key))
+            if key_prefix:
+                key = '%s/%s' % (key_prefix, path)
+            else:
+                key = path
 
-        # The file types returned by mozpack behave like file objects. But they
-        # don't accept an argument to read(). So we wrap in a BytesIO.
-        s3.upload_fileobj(io.BytesIO(f.read()), bucket, key,
-                          ExtraArgs=extra_args)
+            # The file types returned by mozpack behave like file objects. But
+            # they don't accept an argument to read(). So we wrap in a BytesIO.
+            fs.append(e.submit(upload, io.BytesIO(f.read()), bucket, key,
+                               extra_args))
+
+    # Need to do this to catch any exceptions.
+    for f in fs:
+        f.result()