Bug 1305752 - If the file we download is a corrupted zip file store it for inspection. r=dustin draft
authorArmen Zambrano Gasparnian <armenzg@mozilla.com>
Wed, 28 Sep 2016 12:35:56 -0400
changeset 418545 05f29616c90f36991582d285c6fa00d62fe06b40
parent 418456 b1d60f2f68c7cccc96fcf9a2075bb430a500a0f2
child 532368 8896df7f42743ebf68cb7c0429e23777856daf6e
push id30705
push userarmenzg@mozilla.com
push dateWed, 28 Sep 2016 17:55:14 +0000
reviewersdustin
bugs1305752
milestone52.0a1
Bug 1305752 - If the file we download is a corrupted zip file store it for inspection. r=dustin download_unpack() is managing to download files correctly, however, sometimes we get an exception that the zip file is corrupted. This change adds more logging and saves the fetched file to disk in order to get uploaded as an artifact for inspection. MozReview-Commit-ID: 2KCK6qGNor4
testing/mozharness/mozharness/base/script.py
--- a/testing/mozharness/mozharness/base/script.py
+++ b/testing/mozharness/mozharness/base/script.py
@@ -387,24 +387,25 @@ class ScriptMixin(PlatformMixin):
         # Bug 1301855 - URLError: <urlopen error [Errno 60] Operation timed out>
         # Bug 1302237 - URLError: <urlopen error [Errno 104] Connection reset by peer>
         # Bug 1301807 - BadStatusLine: ''
         response = urllib2.urlopen(request)
 
         if parsed_url.scheme in ('http', 'https'):
             expected_file_size = int(response.headers.get('Content-Length'))
 
-        self.info('Expected file size: {}'.format(expected_file_size))
-        self.debug('Url: {}'.format(url))
-        self.info('Content-Encoding {}'.format(response.headers.get('Content-Encoding')))
-        self.info('Content-Type {}'.format(response.headers.get('Content-Type')))
-        self.info('Http code {}'.format(response.getcode()))
+        self.info('Http code: {}'.format(response.getcode()))
+        for k in ('Content-Encoding', 'Content-Type', 'via', 'x-amz-cf-id',
+                  'x-amz-version-id', 'x-cache'):
+            self.info('{}: {}'.format(k, response.headers.get(k)))
 
         file_contents = response.read()
         obtained_file_size = len(file_contents)
+        self.info('Expected file size: {}'.format(expected_file_size))
+        self.info('Obtained file size: {}'.format(obtained_file_size))
 
         if obtained_file_size != expected_file_size:
             raise FetchedIncorrectFilesize(
                 'The expected file size is {} while we got instead {}'.format(
                     expected_file_size, obtained_file_size)
             )
 
         # Use BytesIO instead of StringIO
@@ -548,17 +549,17 @@ class ScriptMixin(PlatformMixin):
             compressed_file (object): File-like object with the contents of a compressed zip file.
             extract_to (str): where to extract the compressed file.
             extract_dirs (list, optional): directories inside the archive file to extract.
                                            Defaults to '*'.
             verbose (bool, optional): whether or not extracted content should be displayed.
                                       Defaults to False.
 
         Raises:
-            zipfile.BadZipFile: on contents of zipfile being invalid
+            zipfile.BadZipfile: on contents of zipfile being invalid
         """
         with zipfile.ZipFile(compressed_file) as bundle:
             entries = self._filter_entries(bundle.namelist(), extract_dirs)
 
             for entry in entries:
                 if verbose:
                     self.info(' {}'.format(entry))
 
@@ -678,17 +679,28 @@ class ScriptMixin(PlatformMixin):
             self.fetch_url_into_memory,
             kwargs={'url': url},
             **retry_args
         )
 
         # 2) We're guaranteed to have download the file with error_level=FATAL
         #    Let's unpack the file
         function, kwargs = _determine_extraction_method_and_kwargs(url)
-        function(**kwargs)
+        try:
+            function(**kwargs)
+        except zipfile.BadZipfile:
+            # Bug 1305752 - Sometimes a good download turns out to be a
+            # corrupted zipfile. Let's upload the file for inspection
+            filepath = os.path.join(self.query_abs_dirs()['abs_upload_dir'], url.split('/')[-1])
+            self.info('Storing corrupted file to {}'.format(filepath))
+            with open(filepath, 'w') as f:
+                f.write(compressed_file.read())
+
+            # Dump the exception and exit
+            self.exception(level=FATAL)
 
 
     def load_json_url(self, url, error_level=None, *args, **kwargs):
         """ Returns a json object from a url (it retries). """
         contents = self._retry_download(
             url=url, error_level=error_level, *args, **kwargs
         )
         return json.loads(contents.read())