--- a/testing/mozharness/mozharness/base/script.py
+++ b/testing/mozharness/mozharness/base/script.py
@@ -45,16 +45,18 @@ if os.name == 'nt':
PYWIN32 = False
try:
import simplejson as json
assert json
except ImportError:
import json
+from cStringIO import StringIO
+
from mozprocess import ProcessHandler
from mozharness.base.config import BaseConfig
from mozharness.base.log import SimpleFileLogger, MultiFileLogger, \
LogMixin, OutputParser, DEBUG, INFO, ERROR, FATAL
def platform_name():
pm = PlatformMixin()
@@ -453,38 +455,167 @@ class ScriptMixin(PlatformMixin):
kwargs = {"url": url, "file_name": file_name}
return self.retry(
download_func,
kwargs=kwargs,
**retry_args
)
- def download_unpack(self, url, extract_to, extract_dirs=None,
- error_level=FATAL):
- """Generic method to download and extract a compressed file.
+
+ def _filter_entries(self, namelist, extract_dirs):
+ """Filter entries of the archive based on the specified list of to extract dirs."""
+ filter_partial = functools.partial(fnmatch.filter, namelist)
+ entries = itertools.chain(*map(filter_partial, extract_dirs or ['*']))
+
+ for entry in entries:
+ yield entry
+
+
+ def unzip(self, file_object, extract_to='.', extract_dirs='*', verbose=False):
+ """This method allows to extract a zip file without writing to disk first.
+
+ Args:
+ file_object (object): Any file like object that is seekable.
+ extract_to (str, optional): where to extract the compressed file.
+ extract_dirs (list, optional): directories inside the archive file to extract.
+ Defaults to '*'.
+ """
+ compressed_file = StringIO(file_object.read())
+ try:
+ with zipfile.ZipFile(compressed_file) as bundle:
+ entries = self._filter_entries(bundle.namelist(), extract_dirs)
+
+ for entry in entries:
+ if verbose:
+ self.info(' {}'.format(entry))
+ bundle.extract(entry, path=extract_to)
- The downloaded file will always be saved to the working directory and is not getting
- deleted after extracting.
+ # ZipFile doesn't preserve permissions during extraction:
+ # http://bugs.python.org/issue15795
+ fname = os.path.realpath(os.path.join(extract_to, entry))
+ mode = bundle.getinfo(entry).external_attr >> 16 & 0x1FF
+ # Only set permissions if attributes are available. Otherwise all
+ # permissions will be removed eg. on Windows.
+ if mode:
+ os.chmod(fname, mode)
+
+ except zipfile.BadZipfile as e:
+ self.exception('{}'.format(e.message))
+
+
+ def deflate(self, file_object, mode, extract_to='.', extract_dirs='*', verbose=False):
+ """This method allows to extract a tar, tar.bz2 and tar.gz file without writing to disk first.
+
+ Args:
+ file_object (object): Any file like object that is seekable.
+ extract_to (str, optional): where to extract the compressed file.
+ extract_dirs (list, optional): directories inside the archive file to extract.
+ Defaults to `*`.
+ verbose (bool, optional): whether or not extracted content should be displayed.
+ Defaults to False.
+ """
+ compressed_file = StringIO(file_object.read())
+ t = tarfile.open(fileobj=compressed_file, mode=mode)
+ t.extractall(path=extract_to)
+
+
+ def download_unpack(self, url, extract_to='.', extract_dirs='*', verbose=False):
+ """Generic method to download and extract a compressed file without writing it to disk first.
Args:
url (str): URL where the file to be downloaded is located.
- extract_to (str): directory where the downloaded file will
- be extracted to.
+ extract_to (str, optional): directory where the downloaded file will
+ be extracted to.
extract_dirs (list, optional): directories inside the archive to extract.
- Defaults to `None`.
- error_level (str, optional): log level to use in case an error occurs.
- Defaults to `FATAL`.
+ Defaults to `*`. It currently only applies to zip files.
+
+ Raises:
+ IOError: on `filename` file not found.
"""
- dirs = self.query_abs_dirs()
- archive = self.download_file(url, parent_dir=dirs['abs_work_dir'],
- error_level=error_level)
- self.unpack(archive, extract_to, extract_dirs=extract_dirs,
- error_level=error_level)
+ # Many scripts overwrite this method and set extract_dirs to None
+ extract_dirs = '*' if extract_dirs is None else extract_dirs
+ EXTENSION_TO_MIMETYPE = {
+ 'bz2': 'application/x-bzip2',
+ 'gz': 'application/x-gzip',
+ 'tar': 'application/x-tar',
+ 'zip': 'application/zip',
+ }
+ MIMETYPES = {
+ 'application/x-bzip2': {
+ 'function': self.deflate,
+ 'kwargs': {'mode': 'r:bz2'},
+ },
+ 'application/x-gzip': {
+ 'function': self.deflate,
+ 'kwargs': {'mode': 'r:gz'},
+ },
+ 'application/x-tar': {
+ 'function': self.deflate,
+ 'kwargs': {'mode': 'r'},
+ },
+ 'application/zip': {
+ 'function': self.unzip,
+ },
+ }
+
+ parsed_url = urlparse.urlparse(url)
+
+ # In case we're referrencing a file without file://
+ if parsed_url.scheme == '':
+ if not os.path.isfile(url):
+ raise IOError('Could not find file to extract: {}'.format(url))
+
+ url = 'file://%s' % os.path.abspath(url)
+ parsed_fd = urlparse.urlparse(url)
+
+ request = urllib2.Request(url)
+ response = urllib2.urlopen(request)
+
+ if parsed_url.scheme == 'file':
+ filename = url.split('/')[-1]
+ # XXX: bz2/gz instead of tar.{bz2/gz}
+ extension = filename[filename.rfind('.')+1:]
+ mimetype = EXTENSION_TO_MIMETYPE[extension]
+ else:
+ mimetype = response.headers.type
+
+ self.debug('Url: {}'.format(url))
+ self.debug('Mimetype: {}'.format(mimetype))
+ self.debug('Content-Encoding {}'.format(response.headers.get('Content-Encoding')))
+
+ function = MIMETYPES[mimetype]['function']
+ kwargs = {
+ 'file_object': response,
+ 'extract_to': extract_to,
+ 'extract_dirs': extract_dirs,
+ 'verbose': verbose,
+ }
+ kwargs.update(MIMETYPES[mimetype].get('kwargs', {}))
+
+ self.info('Downloading and extracting to {} these dirs {} from {}'.format(
+ extract_to,
+ ', '.join(extract_dirs),
+ url,
+ ))
+ retry_args = dict(
+ failure_status=None,
+ retry_exceptions=(urllib2.HTTPError, urllib2.URLError,
+ httplib.BadStatusLine,
+ socket.timeout, socket.error),
+ error_message="Can't download from {}".format(url),
+ error_level=FATAL,
+ )
+ self.retry(
+ function,
+ kwargs=kwargs,
+ **retry_args
+ )
+
def load_json_url(self, url, error_level=None, *args, **kwargs):
""" Returns a json object from a url (it retries). """
contents = self._retry_download(
url=url, error_level=error_level, *args, **kwargs
)
return json.loads(contents.read())
@@ -1404,30 +1535,24 @@ class ScriptMixin(PlatformMixin):
of the command is not in `success_codes`. Defaults to 2.
verbose (bool, optional): whether or not extracted content should be displayed.
Defaults to False.
Raises:
IOError: on `filename` file not found.
"""
- def _filter_entries(namelist):
- """Filter entries of the archive based on the specified list of to extract dirs."""
- filter_partial = functools.partial(fnmatch.filter, namelist)
- for entry in itertools.chain(*map(filter_partial, extract_dirs or ['*'])):
- yield entry
-
if not os.path.isfile(filename):
raise IOError('Could not find file to extract: %s' % filename)
if zipfile.is_zipfile(filename):
try:
self.info('Using ZipFile to extract {} to {}'.format(filename, extract_to))
with zipfile.ZipFile(filename) as bundle:
- for entry in _filter_entries(bundle.namelist()):
+ for entry in self._filter_entries(bundle.namelist(), extract_dirs):
if verbose:
self.info(' %s' % entry)
bundle.extract(entry, path=extract_to)
# ZipFile doesn't preserve permissions during extraction:
# http://bugs.python.org/issue15795
fname = os.path.realpath(os.path.join(extract_to, entry))
mode = bundle.getinfo(entry).external_attr >> 16 & 0x1FF
@@ -1439,17 +1564,17 @@ class ScriptMixin(PlatformMixin):
self.log('%s (%s)' % (e.message, filename),
level=error_level, exit_code=fatal_exit_code)
# Bug 1211882 - is_tarfile cannot be trusted for dmg files
elif tarfile.is_tarfile(filename) and not filename.lower().endswith('.dmg'):
try:
self.info('Using TarFile to extract {} to {}'.format(filename, extract_to))
with tarfile.open(filename) as bundle:
- for entry in _filter_entries(bundle.getnames()):
+ for entry in self._filter_entries(bundle.getnames(), extract_dirs):
if verbose:
self.info(' %s' % entry)
bundle.extract(entry, path=extract_to)
except tarfile.TarError as e:
self.log('%s (%s)' % (e.message, filename),
level=error_level, exit_code=fatal_exit_code)
else:
self.log('No extraction method found for: %s' % filename,
--- a/testing/mozharness/test/test_base_script.py
+++ b/testing/mozharness/test/test_base_script.py
@@ -254,16 +254,60 @@ class TestScript(unittest.TestCase):
'regex': re.compile(',$'), 'level': IGNORE,
}, {
'substr': ']$', 'level': WARNING,
}])
error_logsize = os.path.getsize("test_logs/test_error.log")
self.assertTrue(error_logsize > 0,
msg="error list not working properly")
+ def test_download_unpack(self):
+ # NOTE: The action is called *download*, however, it can work for files in disk
+ self.s = get_debug_script_obj()
+
+ archives_path = os.path.join(here, 'helper_files', 'archives')
+
+ # Test basic decompression
+ for archive in ('archive.tar', 'archive.tar.bz2', 'archive.tar.gz', 'archive.zip'):
+ self.s.download_unpack(
+ url=os.path.join(archives_path, archive),
+ extract_to=self.tmpdir
+ )
+ self.assertIn('script.sh', os.listdir(os.path.join(self.tmpdir, 'bin')))
+ self.assertIn('lorem.txt', os.listdir(self.tmpdir))
+ shutil.rmtree(self.tmpdir)
+
+ # Test permissions for extracted entries from zip archive
+ self.s.download_unpack(
+ url=os.path.join(archives_path, 'archive.zip'),
+ extract_to=self.tmpdir,
+ )
+ file_stats = os.stat(os.path.join(self.tmpdir, 'bin', 'script.sh'))
+ orig_fstats = os.stat(os.path.join(archives_path, 'reference', 'bin', 'script.sh'))
+ self.assertEqual(file_stats.st_mode, orig_fstats.st_mode)
+ shutil.rmtree(self.tmpdir)
+
+ # Test unzip specific dirs only
+ self.s.download_unpack(
+ url=os.path.join(archives_path, 'archive.zip'),
+ extract_to=self.tmpdir,
+ extract_dirs=['bin/*']
+ )
+ self.assertIn('bin', os.listdir(self.tmpdir))
+ self.assertNotIn('lorem.txt', os.listdir(self.tmpdir))
+ shutil.rmtree(self.tmpdir)
+
+ # Test for invalid filenames (Windows only)
+ if PYWIN32:
+ with self.assertRaises(IOError):
+ self.s.download_unpack(
+ url=os.path.join(archives_path, 'archive_invalid_filename.zip'),
+ extract_to=self.tmpdir
+ )
+
def test_unpack(self):
self.s = get_debug_script_obj()
archives_path = os.path.join(here, 'helper_files', 'archives')
# Test basic decompression
for archive in ('archive.tar', 'archive.tar.bz2', 'archive.tar.gz', 'archive.zip'):
self.s.unpack(os.path.join(archives_path, archive), self.tmpdir)