Bug 1240134 - Implement a TarFinder to facilitate extracting files from compressed Firefox archives. draft
authorChris Manchester <cmanchester@mozilla.com>
Tue, 16 Aug 2016 15:16:47 -0700
changeset 401345 2b07820788d02f0486f68116bf40f6721e1885b4
parent 401344 a2ed59c1266787994df41378098018935f41ee85
child 401346 d44d97d4ab7e2ad63a8408a7bd5233bdd4abe0fc
push id26441
push usercmanchester@mozilla.com
push dateTue, 16 Aug 2016 22:27:47 +0000
bugs1240134
milestone51.0a1
Bug 1240134 - Implement a TarFinder to facilitate extracting files from compressed Firefox archives. MozReview-Commit-ID: F4l8505bvwR
python/mozbuild/mozpack/files.py
python/mozbuild/mozpack/test/test_files.py
--- a/python/mozbuild/mozpack/files.py
+++ b/python/mozbuild/mozpack/files.py
@@ -30,17 +30,20 @@ from mozpack.errors import (
 from mozpack.mozjar import JarReader
 import mozpack.path as mozpath
 from collections import OrderedDict
 from jsmin import JavascriptMinify
 from tempfile import (
     mkstemp,
     NamedTemporaryFile,
 )
-
+from tarfile import (
+    TarFile,
+    TarInfo,
+)
 try:
     import hglib
 except ImportError:
     hglib = None
 
 
 # For clean builds, copying files on win32 using CopyFile through ctypes is
 # ~2x as fast as using shutil.copyfile.
@@ -131,16 +134,32 @@ class BaseFile(object):
         # shutil.copystat only copies milliseconds, and seconds is not
         # enough precision.
         dest_mtime = int(os.path.getmtime(dest) * 1000)
         for input in inputs:
             if dest_mtime < int(os.path.getmtime(input) * 1000):
                 return True
         return False
 
+    @staticmethod
+    def normalize_mode(mode):
+        # Normalize file mode:
+        # - keep file type (e.g. S_IFREG)
+        ret = stat.S_IFMT(mode)
+        # - expand user read and execute permissions to everyone
+        if mode & 0400:
+            ret |= 0444
+        if mode & 0100:
+            ret |= 0111
+        # - keep user write permissions
+        if mode & 0200:
+            ret |= 0200
+        # - leave away sticky bit, setuid, setgid
+        return ret
+
     def copy(self, dest, skip_if_older=True):
         '''
         Copy the BaseFile content to the destination given as a string or a
         Dest instance. Avoids replacing existing files if the BaseFile content
         matches that of the destination, or in case of plain files, if the
         destination is newer than the original file. This latter behaviour is
         disabled when skip_if_older is False.
         Returns whether a copy was actually performed (True) or not (False).
@@ -219,29 +238,17 @@ class File(BaseFile):
     def mode(self):
         '''
         Return the file's unix mode, as returned by os.stat().st_mode.
         '''
         if platform.system() == 'Windows':
             return None
         assert self.path is not None
         mode = os.stat(self.path).st_mode
-        # Normalize file mode:
-        # - keep file type (e.g. S_IFREG)
-        ret = stat.S_IFMT(mode)
-        # - expand user read and execute permissions to everyone
-        if mode & 0400:
-            ret |= 0444
-        if mode & 0100:
-            ret |= 0111
-        # - keep user write permissions
-        if mode & 0200:
-            ret |= 0200
-        # - leave away sticky bit, setuid, setgid
-        return ret
+        return self.normalize_mode(mode)
 
     def read(self):
         '''Return the contents of the file.'''
         with open(self.path, 'rb') as fh:
             return fh.read()
 
 
 class ExecutableFile(File):
@@ -500,16 +507,33 @@ class DeflatedFile(BaseFile):
         from mozpack.mozjar import JarFileReader
         assert isinstance(file, JarFileReader)
         self.file = file
 
     def open(self):
         self.file.seek(0)
         return self.file
 
+class ExtractedTarFile(GeneratedFile):
+    '''
+    File class for members of a tar archive. Contents of the underlying file
+    are extracted immediately and stored in memory.
+    '''
+    def __init__(self, tar, info):
+        assert isinstance(info, TarInfo)
+        assert isinstance(tar, TarFile)
+        GeneratedFile.__init__(self, tar.extractfile(info).read())
+        self._mode = self.normalize_mode(info.mode)
+
+    @property
+    def mode(self):
+        return self._mode
+
+    def read(self):
+        return self.content
 
 class XPTFile(GeneratedFile):
     '''
     File class for a linked XPT file. It takes several XPT files as input
     (using the add() and remove() member functions), and links them at copy()
     time.
     '''
     def __init__(self):
@@ -945,16 +969,40 @@ class JarFinder(BaseFinder):
         '''
         Actual implementation of JarFinder.find(), dispatching to specialized
         member functions depending on what kind of pattern was given.
         '''
         return self._find_helper(pattern, self._files,
                                  lambda x: DeflatedFile(self._files[x]))
 
 
+class TarFinder(BaseFinder):
+    '''
+    Helper to get files from a TarFile.
+    '''
+    def __init__(self, base, tar, **kargs):
+        '''
+        Create a TarFinder for files in the given TarFile. The base argument
+        is used as an indication of the Tar file location.
+        '''
+        assert isinstance(tar, TarFile)
+        self._tar = tar
+        BaseFinder.__init__(self, base, **kargs)
+        self._files = OrderedDict((f.name, f) for f in tar if f.isfile())
+
+    def _find(self, pattern):
+        '''
+        Actual implementation of TarFinder.find(), dispatching to specialized
+        member functions depending on what kind of pattern was given.
+        '''
+        return self._find_helper(pattern, self._files,
+                                 lambda x: ExtractedTarFile(self._tar,
+                                                            self._files[x]))
+
+
 class ComposedFinder(BaseFinder):
     '''
     Composes multiple File Finders in some sort of virtual file system.
 
     A ComposedFinder is initialized from a dictionary associating paths to
     *Finder instances.
 
     Note this could be optimized to be smarter than getting all the files
--- a/python/mozbuild/mozpack/test/test_files.py
+++ b/python/mozbuild/mozpack/test/test_files.py
@@ -9,20 +9,22 @@ from mozpack.errors import (
     errors,
 )
 from mozpack.files import (
     AbsoluteSymlinkFile,
     ComposedFinder,
     DeflatedFile,
     Dest,
     ExistingFile,
+    ExtractedTarFile,
     FileFinder,
     File,
     GeneratedFile,
     JarFinder,
+    TarFinder,
     ManifestFile,
     MercurialFile,
     MercurialRevisionFinder,
     MinifiedJavaScript,
     MinifiedProperties,
     PreprocessedFile,
     XPTFile,
 )
@@ -50,16 +52,17 @@ from mozpack.chrome.manifest import (
 )
 import unittest
 import mozfile
 import mozunit
 import os
 import random
 import string
 import sys
+import tarfile
 import mozpack.path as mozpath
 from tempfile import mkdtemp
 from io import BytesIO
 from StringIO import StringIO
 from xpt import Typelib
 
 
 class TestWithTmpDir(unittest.TestCase):
@@ -1006,16 +1009,36 @@ class TestJarFinder(MatchTestTemplate, T
         self.jar.finish()
         reader = JarReader(file=self.tmppath('test.jar'))
         self.finder = JarFinder(self.tmppath('test.jar'), reader)
         self.do_match_test()
 
         self.assertIsNone(self.finder.get('does-not-exist'))
         self.assertIsInstance(self.finder.get('bar'), DeflatedFile)
 
+class TestTarFinder(MatchTestTemplate, TestWithTmpDir):
+    def add(self, path):
+        self.tar.addfile(tarfile.TarInfo(name=path))
+
+    def do_check(self, pattern, result):
+        do_check(self, self.finder, pattern, result)
+
+    def test_jar_finder(self):
+        self.tar = tarfile.open(name=self.tmppath('test.tar.bz2'),
+                                mode='w:bz2')
+        self.prepare_match_test()
+        self.tar.close()
+        tarreader = tarfile.open(name=self.tmppath('test.tar.bz2'),
+                                 mode='r:bz2')
+        self.finder = TarFinder(self.tmppath('test.tar.bz2'), tarreader)
+        self.do_match_test()
+
+        self.assertIsNone(self.finder.get('does-not-exist'))
+        self.assertIsInstance(self.finder.get('bar'), ExtractedTarFile)
+
 
 class TestComposedFinder(MatchTestTemplate, TestWithTmpDir):
     def add(self, path, content=None):
         # Put foo/qux files under $tmp/b.
         if path.startswith('foo/qux/'):
             real_path = mozpath.join('b', path[8:])
         else:
             real_path = mozpath.join('a', path)