author Armen Zambrano Gasparnian <armenzg@mozilla.com>

Thu, 25 Aug 2016 11:04:16 -0400

changeset 408706 f54fc25438431bce343254709fe8cb313231f353

parent 408083 b3ec8a3373e8faca26c39e9ff577a2d4f8b6927a

child 530167 55bcf824be266553721f846117b8fb91442f245e

push id 28275

push user bmo:armenzg@mozilla.com

push date Thu, 01 Sep 2016 15:16:29 +0000

bugs 1272083

milestone 51.0a1

testing/mozharness/mozharness/base/script.py file | annotate | diff | comparison | revisions

testing/mozharness/test/test_base_script.py file | annotate | diff | comparison | revisions
--- a/testing/mozharness/mozharness/base/script.py
+++ b/testing/mozharness/mozharness/base/script.py
@@ -45,16 +45,18 @@ if os.name == 'nt':
         PYWIN32 = False
 
 try:
     import simplejson as json
     assert json
 except ImportError:
     import json
 
+from cStringIO import StringIO
+
 from mozprocess import ProcessHandler
 from mozharness.base.config import BaseConfig
 from mozharness.base.log import SimpleFileLogger, MultiFileLogger, \
     LogMixin, OutputParser, DEBUG, INFO, ERROR, FATAL
 
 
 def platform_name():
     pm = PlatformMixin()
@@ -453,38 +455,167 @@ class ScriptMixin(PlatformMixin):
             kwargs = {"url": url, "file_name": file_name}
 
         return self.retry(
             download_func,
             kwargs=kwargs,
             **retry_args
         )
 
-    def download_unpack(self, url, extract_to, extract_dirs=None,
-                        error_level=FATAL):
-        """Generic method to download and extract a compressed file.
+
+    def _filter_entries(self, namelist, extract_dirs):
+        """Filter entries of the archive based on the specified list of to extract dirs."""
+        filter_partial = functools.partial(fnmatch.filter, namelist)
+        entries = itertools.chain(*map(filter_partial, extract_dirs or ['*']))
+
+        for entry in entries:
+            yield entry
+
+
+    def unzip(self, file_object, extract_to='.', extract_dirs='*', verbose=False):
+        """This method allows to extract a zip file without writing to disk first.
+
+        Args:
+            file_object (object): Any file like object that is seekable.
+            extract_to (str, optional): where to extract the compressed file.
+            extract_dirs (list, optional): directories inside the archive file to extract.
+                                           Defaults to '*'.
+        """
+        compressed_file = StringIO(file_object.read())
+        try:
+            with zipfile.ZipFile(compressed_file) as bundle:
+                entries = self._filter_entries(bundle.namelist(), extract_dirs)
+
+                for entry in entries:
+                    if verbose:
+                        self.info(' {}'.format(entry))
+                    bundle.extract(entry, path=extract_to)
 
-        The downloaded file will always be saved to the working directory and is not getting
-        deleted after extracting.
+                    # ZipFile doesn't preserve permissions during extraction:
+                    # http://bugs.python.org/issue15795
+                    fname = os.path.realpath(os.path.join(extract_to, entry))
+                    mode = bundle.getinfo(entry).external_attr >> 16 & 0x1FF
+                    # Only set permissions if attributes are available. Otherwise all
+                    # permissions will be removed eg. on Windows.
+                    if mode:
+                        os.chmod(fname, mode)
+
+        except zipfile.BadZipfile as e:
+            self.exception('{}'.format(e.message))
+
+
+    def deflate(self, file_object, mode, extract_to='.', extract_dirs='*', verbose=False):
+        """This method allows to extract a tar, tar.bz2 and tar.gz file without writing to disk first.
+
+        Args:
+            file_object (object): Any file like object that is seekable.
+            extract_to (str, optional): where to extract the compressed file.
+            extract_dirs (list, optional): directories inside the archive file to extract.
+                                           Defaults to `*`.
+            verbose (bool, optional): whether or not extracted content should be displayed.
+                                      Defaults to False.
+        """
+        compressed_file = StringIO(file_object.read())
+        t = tarfile.open(fileobj=compressed_file, mode=mode)
+        t.extractall(path=extract_to)
+
+
+    def download_unpack(self, url, extract_to='.', extract_dirs='*', verbose=False):
+        """Generic method to download and extract a compressed file without writing it to disk first.
 
         Args:
             url (str): URL where the file to be downloaded is located.
-            extract_to (str): directory where the downloaded file will
-                              be extracted to.
+            extract_to (str, optional): directory where the downloaded file will
+                                        be extracted to.
             extract_dirs (list, optional): directories inside the archive to extract.
-                                           Defaults to `None`.
-            error_level (str, optional): log level to use in case an error occurs.
-                                         Defaults to `FATAL`.
+                                           Defaults to `*`. It currently only applies to zip files.
+
+        Raises:
+            IOError: on `filename` file not found.
 
         """
-        dirs = self.query_abs_dirs()
-        archive = self.download_file(url, parent_dir=dirs['abs_work_dir'],
-                                     error_level=error_level)
-        self.unpack(archive, extract_to, extract_dirs=extract_dirs,
-                    error_level=error_level)
+        # Many scripts overwrite this method and set extract_dirs to None
+        extract_dirs = '*' if extract_dirs is None else extract_dirs
+        EXTENSION_TO_MIMETYPE = {
+            'bz2': 'application/x-bzip2',
+            'gz':  'application/x-gzip',
+            'tar': 'application/x-tar',
+            'zip': 'application/zip',
+        }
+        MIMETYPES = {
+            'application/x-bzip2': {
+                'function': self.deflate,
+                'kwargs': {'mode': 'r:bz2'},
+            },
+            'application/x-gzip': {
+                'function': self.deflate,
+                'kwargs': {'mode': 'r:gz'},
+            },
+            'application/x-tar': {
+                'function': self.deflate,
+                'kwargs': {'mode': 'r'},
+            },
+            'application/zip': {
+                'function': self.unzip,
+            },
+        }
+
+        parsed_url = urlparse.urlparse(url)
+
+        # In case we're referrencing a file without file://
+        if parsed_url.scheme == '':
+            if not os.path.isfile(url):
+                raise IOError('Could not find file to extract: {}'.format(url))
+
+            url = 'file://%s' % os.path.abspath(url)
+            parsed_fd = urlparse.urlparse(url)
+
+        request = urllib2.Request(url)
+        response = urllib2.urlopen(request)
+
+        if parsed_url.scheme == 'file':
+            filename = url.split('/')[-1]
+            # XXX: bz2/gz instead of tar.{bz2/gz}
+            extension = filename[filename.rfind('.')+1:]
+            mimetype = EXTENSION_TO_MIMETYPE[extension]
+        else:
+            mimetype = response.headers.type
+
+        self.debug('Url: {}'.format(url))
+        self.debug('Mimetype: {}'.format(mimetype))
+        self.debug('Content-Encoding {}'.format(response.headers.get('Content-Encoding')))
+
+        function = MIMETYPES[mimetype]['function']
+        kwargs = {
+            'file_object': response,
+            'extract_to': extract_to,
+            'extract_dirs': extract_dirs,
+            'verbose': verbose,
+        }
+        kwargs.update(MIMETYPES[mimetype].get('kwargs', {}))
+
+        self.info('Downloading and extracting to {} these dirs {} from {}'.format(
+            extract_to,
+            ', '.join(extract_dirs),
+            url,
+        ))
+        retry_args = dict(
+            failure_status=None,
+            retry_exceptions=(urllib2.HTTPError, urllib2.URLError,
+                              httplib.BadStatusLine,
+                              socket.timeout, socket.error),
+            error_message="Can't download from {}".format(url),
+            error_level=FATAL,
+        )
+        self.retry(
+            function,
+            kwargs=kwargs,
+            **retry_args
+        )
+
 
     def load_json_url(self, url, error_level=None, *args, **kwargs):
         """ Returns a json object from a url (it retries). """
         contents = self._retry_download(
             url=url, error_level=error_level, *args, **kwargs
         )
         return json.loads(contents.read())
 
@@ -1404,30 +1535,24 @@ class ScriptMixin(PlatformMixin):
               of the command is not in `success_codes`. Defaults to 2.
             verbose (bool, optional): whether or not extracted content should be displayed.
                                       Defaults to False.
 
         Raises:
             IOError: on `filename` file not found.
 
         """
-        def _filter_entries(namelist):
-            """Filter entries of the archive based on the specified list of to extract dirs."""
-            filter_partial = functools.partial(fnmatch.filter, namelist)
-            for entry in itertools.chain(*map(filter_partial, extract_dirs or ['*'])):
-                yield entry
-
         if not os.path.isfile(filename):
             raise IOError('Could not find file to extract: %s' % filename)
 
         if zipfile.is_zipfile(filename):
             try:
                 self.info('Using ZipFile to extract {} to {}'.format(filename, extract_to))
                 with zipfile.ZipFile(filename) as bundle:
-                    for entry in _filter_entries(bundle.namelist()):
+                    for entry in self._filter_entries(bundle.namelist(), extract_dirs):
                         if verbose:
                             self.info(' %s' % entry)
                         bundle.extract(entry, path=extract_to)
 
                         # ZipFile doesn't preserve permissions during extraction:
                         # http://bugs.python.org/issue15795
                         fname = os.path.realpath(os.path.join(extract_to, entry))
                         mode = bundle.getinfo(entry).external_attr >> 16 & 0x1FF
@@ -1439,17 +1564,17 @@ class ScriptMixin(PlatformMixin):
                 self.log('%s (%s)' % (e.message, filename),
                          level=error_level, exit_code=fatal_exit_code)
 
         # Bug 1211882 - is_tarfile cannot be trusted for dmg files
         elif tarfile.is_tarfile(filename) and not filename.lower().endswith('.dmg'):
             try:
                 self.info('Using TarFile to extract {} to {}'.format(filename, extract_to))
                 with tarfile.open(filename) as bundle:
-                    for entry in _filter_entries(bundle.getnames()):
+                    for entry in self._filter_entries(bundle.getnames(), extract_dirs):
                         if verbose:
                             self.info(' %s' % entry)
                         bundle.extract(entry, path=extract_to)
             except tarfile.TarError as e:
                 self.log('%s (%s)' % (e.message, filename),
                          level=error_level, exit_code=fatal_exit_code)
         else:
             self.log('No extraction method found for: %s' % filename,
--- a/testing/mozharness/test/test_base_script.py
+++ b/testing/mozharness/test/test_base_script.py
@@ -254,16 +254,60 @@ class TestScript(unittest.TestCase):
                 'regex': re.compile(',$'), 'level': IGNORE,
             }, {
                 'substr': ']$', 'level': WARNING,
             }])
         error_logsize = os.path.getsize("test_logs/test_error.log")
         self.assertTrue(error_logsize > 0,
                         msg="error list not working properly")
 
+    def test_download_unpack(self):
+        # NOTE: The action is called *download*, however, it can work for files in disk
+        self.s = get_debug_script_obj()
+
+        archives_path = os.path.join(here, 'helper_files', 'archives')
+
+        # Test basic decompression
+        for archive in ('archive.tar', 'archive.tar.bz2', 'archive.tar.gz', 'archive.zip'):
+            self.s.download_unpack(
+                url=os.path.join(archives_path, archive),
+                extract_to=self.tmpdir
+            )
+            self.assertIn('script.sh', os.listdir(os.path.join(self.tmpdir, 'bin')))
+            self.assertIn('lorem.txt', os.listdir(self.tmpdir))
+            shutil.rmtree(self.tmpdir)
+
+        # Test permissions for extracted entries from zip archive
+        self.s.download_unpack(
+            url=os.path.join(archives_path, 'archive.zip'),
+            extract_to=self.tmpdir,
+        )
+        file_stats = os.stat(os.path.join(self.tmpdir, 'bin', 'script.sh'))
+        orig_fstats = os.stat(os.path.join(archives_path, 'reference', 'bin', 'script.sh'))
+        self.assertEqual(file_stats.st_mode, orig_fstats.st_mode)
+        shutil.rmtree(self.tmpdir)
+
+        # Test unzip specific dirs only
+        self.s.download_unpack(
+            url=os.path.join(archives_path, 'archive.zip'),
+            extract_to=self.tmpdir,
+            extract_dirs=['bin/*']
+        )
+        self.assertIn('bin', os.listdir(self.tmpdir))
+        self.assertNotIn('lorem.txt', os.listdir(self.tmpdir))
+        shutil.rmtree(self.tmpdir)
+
+        # Test for invalid filenames (Windows only)
+        if PYWIN32:
+            with self.assertRaises(IOError):
+                self.s.download_unpack(
+                    url=os.path.join(archives_path, 'archive_invalid_filename.zip'),
+                    extract_to=self.tmpdir
+                )
+
     def test_unpack(self):
         self.s = get_debug_script_obj()
 
         archives_path = os.path.join(here, 'helper_files', 'archives')
 
         # Test basic decompression
         for archive in ('archive.tar', 'archive.tar.bz2', 'archive.tar.gz', 'archive.zip'):
             self.s.unpack(os.path.join(archives_path, archive), self.tmpdir)
author	Armen Zambrano Gasparnian <armenzg@mozilla.com>
	Thu, 25 Aug 2016 11:04:16 -0400
changeset 408706	f54fc25438431bce343254709fe8cb313231f353
parent 408083	b3ec8a3373e8faca26c39e9ff577a2d4f8b6927a
child 530167	55bcf824be266553721f846117b8fb91442f245e
push id	28275
push user	bmo:armenzg@mozilla.com
push date	Thu, 01 Sep 2016 15:16:29 +0000
bugs	1272083
milestone	51.0a1
testing/mozharness/mozharness/base/script.py		file \| annotate \| diff \| comparison \| revisions
testing/mozharness/test/test_base_script.py		file \| annotate \| diff \| comparison \| revisions