Bug 1305752 - If the file we download is a corrupted zip file store it for inspection. r=dustin
download_unpack() is managing to download files correctly, however, sometimes we get an exception
that the zip file is corrupted.
This change adds more logging and saves the fetched file to disk in order to get uploaded as an artifact
for inspection.
MozReview-Commit-ID: 2KCK6qGNor4
--- a/testing/mozharness/mozharness/base/script.py
+++ b/testing/mozharness/mozharness/base/script.py
@@ -387,24 +387,25 @@ class ScriptMixin(PlatformMixin):
# Bug 1301855 - URLError: <urlopen error [Errno 60] Operation timed out>
# Bug 1302237 - URLError: <urlopen error [Errno 104] Connection reset by peer>
# Bug 1301807 - BadStatusLine: ''
response = urllib2.urlopen(request)
if parsed_url.scheme in ('http', 'https'):
expected_file_size = int(response.headers.get('Content-Length'))
- self.info('Expected file size: {}'.format(expected_file_size))
- self.debug('Url: {}'.format(url))
- self.info('Content-Encoding {}'.format(response.headers.get('Content-Encoding')))
- self.info('Content-Type {}'.format(response.headers.get('Content-Type')))
- self.info('Http code {}'.format(response.getcode()))
+ self.info('Http code: {}'.format(response.getcode()))
+ for k in ('Content-Encoding', 'Content-Type', 'via', 'x-amz-cf-id',
+ 'x-amz-version-id', 'x-cache'):
+ self.info('{}: {}'.format(k, response.headers.get(k)))
file_contents = response.read()
obtained_file_size = len(file_contents)
+ self.info('Expected file size: {}'.format(expected_file_size))
+ self.info('Obtained file size: {}'.format(obtained_file_size))
if obtained_file_size != expected_file_size:
raise FetchedIncorrectFilesize(
'The expected file size is {} while we got instead {}'.format(
expected_file_size, obtained_file_size)
)
# Use BytesIO instead of StringIO
@@ -548,17 +549,17 @@ class ScriptMixin(PlatformMixin):
compressed_file (object): File-like object with the contents of a compressed zip file.
extract_to (str): where to extract the compressed file.
extract_dirs (list, optional): directories inside the archive file to extract.
Defaults to '*'.
verbose (bool, optional): whether or not extracted content should be displayed.
Defaults to False.
Raises:
- zipfile.BadZipFile: on contents of zipfile being invalid
+ zipfile.BadZipfile: on contents of zipfile being invalid
"""
with zipfile.ZipFile(compressed_file) as bundle:
entries = self._filter_entries(bundle.namelist(), extract_dirs)
for entry in entries:
if verbose:
self.info(' {}'.format(entry))
@@ -678,17 +679,28 @@ class ScriptMixin(PlatformMixin):
self.fetch_url_into_memory,
kwargs={'url': url},
**retry_args
)
# 2) We're guaranteed to have download the file with error_level=FATAL
# Let's unpack the file
function, kwargs = _determine_extraction_method_and_kwargs(url)
- function(**kwargs)
+ try:
+ function(**kwargs)
+ except zipfile.BadZipfile:
+ # Bug 1305752 - Sometimes a good download turns out to be a
+ # corrupted zipfile. Let's upload the file for inspection
+ filepath = os.path.join(self.query_abs_dirs()['abs_upload_dir'], url.split('/')[-1])
+ self.info('Storing corrupted file to {}'.format(filepath))
+ with open(filepath, 'w') as f:
+ f.write(compressed_file.read())
+
+ # Dump the exception and exit
+ self.exception(level=FATAL)
def load_json_url(self, url, error_level=None, *args, **kwargs):
""" Returns a json object from a url (it retries). """
contents = self._retry_download(
url=url, error_level=error_level, *args, **kwargs
)
return json.loads(contents.read())