Bug 1352595 - Add basic support for brotli compression to the packager. r?gps draft
authorMike Hommey <mh+mozilla@glandium.org>
Fri, 18 Aug 2017 05:37:18 +0900
changeset 651808 29a44d7b94587be6b573ce4ed4a1b9384ec80710
parent 651395 c86b7150523c10e1d1dbc0be2d8ed96f205be35f
child 651809 58ce23729e2d06c873e9bc4389e9adfda8674deb
push id75807
push userbmo:mh+mozilla@glandium.org
push dateThu, 24 Aug 2017 00:18:34 +0000
reviewersgps
bugs1352595, 1355661, 1355671
milestone57.0a1
Bug 1352595 - Add basic support for brotli compression to the packager. r?gps Bug 1355661 added support for brotli streams in "jar" files handled by Gecko, and bug 1355671 made us build the `bro` command line utility that allows to compress and decompress brotli streams. This change uses the `bro` command line utility in the packager so that it can create and handle "jar" files using brotli streams. However, the `bro` command line utility is not available to l10n repacks. As, at the moment, we're only hoping that the outcome of using brotli will be good, we avoid doing all the work to make those work and just hook things enough to enable brotli, while ensuring l10n repacks don't break. This involves forcing some files to be deflated, and to disable some optimizations from the packager. Things will need to be figured out more properly if the experiment proves brotli to be worthwhile.
python/mozbuild/mozpack/copier.py
python/mozbuild/mozpack/mozjar.py
python/mozbuild/mozpack/packager/l10n.py
python/mozbuild/mozpack/packager/unpack.py
toolkit/mozapps/installer/packager.mk
toolkit/mozapps/installer/packager.py
toolkit/mozapps/installer/upload-files.mk
--- a/python/mozbuild/mozpack/copier.py
+++ b/python/mozbuild/mozpack/copier.py
@@ -6,17 +6,20 @@ from __future__ import absolute_import
 
 import os
 import stat
 import sys
 
 from mozpack.errors import errors
 from mozpack.files import (
     BaseFile,
+    DeflatedFile,
     Dest,
+    ManifestFile,
+    XPTFile,
 )
 import mozpack.path as mozpath
 import errno
 from collections import (
     defaultdict,
     Counter,
     OrderedDict,
 )
@@ -559,30 +562,47 @@ class Jarrer(FileRegistry, BaseFile):
 
             def exists(self):
                 return self.deflater is not None
 
         if isinstance(dest, basestring):
             dest = Dest(dest)
         assert isinstance(dest, Dest)
 
-        from mozpack.mozjar import JarWriter, JarReader
+        from mozpack.mozjar import JarWriter, JarReader, JAR_BROTLI
         try:
             old_jar = JarReader(fileobj=dest)
         except Exception:
             old_jar = []
 
         old_contents = dict([(f.filename, f) for f in old_jar])
 
         with JarWriter(fileobj=dest, compress=self.compress,
                        optimize=self.optimize) as jar:
             for path, file in self:
                 compress = self._compress_options.get(path, self.compress)
+                # Temporary: Because l10n repacks can't handle brotli just yet,
+                # but need to be able to decompress those files, per
+                # UnpackFinder and formatters, we force deflate on them.
+                if compress == JAR_BROTLI and (
+                        isinstance(file, (ManifestFile, XPTFile)) or
+                        mozpath.basename(path) == 'install.rdf'):
+                    compress = True
 
-                if path in old_contents:
+                # If the added content already comes from a jar file, we just add
+                # the raw data from the original jar file to the new one.
+                if isinstance(file, DeflatedFile):
+                    jar.add(path, file.file, mode=file.mode,
+                            compress=file.file.compress)
+                    continue
+                # If the file is already in the old contents for this jar,
+                # we avoid compressing when the contents match, which requires
+                # decompressing the old content. But for e.g. l10n repacks,
+                # which can't decompress brotli, we skip this.
+                elif path in old_contents and old_contents[path].compress != JAR_BROTLI:
                     deflater = DeflaterDest(old_contents[path], compress)
                 else:
                     deflater = DeflaterDest(compress=compress)
                 file.copy(deflater, skip_if_older)
                 jar.add(path, deflater.deflater, mode=file.mode, compress=compress)
             if self._preload:
                 jar.preload(self._preload)
 
--- a/python/mozbuild/mozpack/mozjar.py
+++ b/python/mozbuild/mozpack/mozjar.py
@@ -1,28 +1,32 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 from __future__ import absolute_import
 
 from io import BytesIO
 import struct
+import subprocess
 import zlib
 import os
 from zipfile import (
     ZIP_STORED,
     ZIP_DEFLATED,
 )
 from collections import OrderedDict
 from urlparse import urlparse, ParseResult
 import mozpack.path as mozpath
+from mozbuild.util import memoize
+
 
 JAR_STORED = ZIP_STORED
 JAR_DEFLATED = ZIP_DEFLATED
+JAR_BROTLI = 0x81
 MAX_WBITS = 15
 
 
 class JarReaderError(Exception):
     '''Error type for Jar reader errors.'''
 
 
 class JarWriterError(Exception):
@@ -257,23 +261,24 @@ class JarFileReader(object):
     within a Jar archive.
     '''
     def __init__(self, header, data):
         '''
         Initialize a JarFileReader. header is the local file header
         corresponding to the file in the jar archive, data a buffer containing
         the file data.
         '''
-        assert header['compression'] in [JAR_DEFLATED, JAR_STORED]
+        assert header['compression'] in [JAR_DEFLATED, JAR_STORED, JAR_BROTLI]
         self._data = data
         # Copy some local file header fields.
         for name in ['filename', 'compressed_size',
                      'uncompressed_size', 'crc32']:
             setattr(self, name, header[name])
-        self.compressed = header['compression'] == JAR_DEFLATED
+        self.compressed = header['compression'] != JAR_STORED
+        self.compress = header['compression']
 
     def read(self, length=-1):
         '''
         Read some amount of uncompressed data.
         '''
         return self.uncompressed_data.read(length)
 
     def readlines(self):
@@ -312,20 +317,24 @@ class JarFileReader(object):
     @property
     def uncompressed_data(self):
         '''
         Return the uncompressed data.
         '''
         if hasattr(self, '_uncompressed_data'):
             return self._uncompressed_data
         data = self.compressed_data
-        if self.compressed:
+        if self.compress == JAR_STORED:
+            data = data.tobytes()
+        elif self.compress == JAR_BROTLI:
+            data = Brotli.decompress(data.tobytes())
+        elif self.compress == JAR_DEFLATED:
             data = zlib.decompress(data.tobytes(), -MAX_WBITS)
         else:
-            data = data.tobytes()
+            assert False  # Can't be another value per __init__
         if len(data) != self.uncompressed_size:
             raise JarReaderError('Corrupted file? %s' % self.filename)
         self._uncompressed_data = BytesIO(data)
         return self._uncompressed_data
 
 
 class JarReader(object):
     '''
@@ -356,16 +365,23 @@ class JarReader(object):
 
     def close(self):
         '''
         Free some resources associated with the Jar.
         '''
         del self._data
 
     @property
+    def compression(self):
+        entries = self.entries
+        if not entries:
+            return JAR_STORED
+        return max(f['compression'] for f in entries.itervalues())
+
+    @property
     def entries(self):
         '''
         Return an ordered dict of central directory entries, indexed by
         filename, in the order they appear in the Jar archive central
         directory. Directory entries are skipped.
         '''
         if hasattr(self, '_entries'):
             return self._entries
@@ -468,16 +484,18 @@ class JarWriter(object):
         archive should be optimized for Gecko or not. ``compress_level``
         defines the zlib compression level. It must be a value between 0 and 9
         and defaults to 9, the highest and slowest level of compression.
         '''
         if fileobj:
             self._data = fileobj
         else:
             self._data = open(file, 'wb')
+        if compress is True:
+            compress = JAR_DEFLATED
         self._compress = compress
         self._compress_level = compress_level
         self._contents = OrderedDict()
         self._last_preloaded = None
         self._optimize = optimize
 
     def __enter__(self):
         '''
@@ -569,38 +587,43 @@ class JarWriter(object):
         # Store the end of central directory.
         self._data.write(end.serialize())
         self._data.close()
 
     def add(self, name, data, compress=None, mode=None, skip_duplicates=False):
         '''
         Add a new member to the jar archive, with the given name and the given
         data.
-        The compress option indicates if the given data should be compressed
-        (True), not compressed (False), or compressed according to the default
-        defined when creating the JarWriter (None).
-        When the data should be compressed (True or None with self.compress ==
-        True), it is only really compressed if the compressed size is smaller
-        than the uncompressed size.
+        The compress option indicates how the given data should be compressed
+        (one of JAR_STORED, JAR_DEFLATE or JAR_BROTLI), or compressed according
+        to the default defined when creating the JarWriter (None). True and
+        False are allowed values for backwards compatibility, mapping,
+        respectively, to JAR_DEFLATE and JAR_STORED.
+        When the data should be compressed, it is only really compressed if
+        the compressed size is smaller than the uncompressed size.
         The mode option gives the unix permissions that should be stored
         for the jar entry.
         If a duplicated member is found skip_duplicates will prevent raising
         an exception if set to True.
         The given data may be a buffer, a file-like instance, a Deflater or a
         JarFileReader instance. The latter two allow to avoid uncompressing
         data to recompress it.
         '''
         name = mozpath.normsep(name)
 
         if name in self._contents and not skip_duplicates:
             raise JarWriterError("File %s already in JarWriter" % name)
         if compress is None:
             compress = self._compress
-        if (isinstance(data, JarFileReader) and data.compressed == compress) \
-                or (isinstance(data, Deflater) and data.compress == compress):
+        if compress is True:
+            compress = JAR_DEFLATED
+        if compress is False:
+            compress = JAR_STORED
+        if (isinstance(data, (JarFileReader, Deflater)) and \
+                data.compress == compress):
             deflater = data
         else:
             deflater = Deflater(compress, compress_level=self._compress_level)
             if isinstance(data, basestring):
                 deflater.write(data)
             elif hasattr(data, 'read'):
                 if hasattr(data, 'seek'):
                     data.seek(0)
@@ -614,17 +637,17 @@ class JarWriter(object):
         if mode is not None:
             # Set creator host system (upper byte of creator_version)
             # to 3 (Unix) so mode is honored when there is one.
             entry['creator_version'] |= 3 << 8
             entry['external_attr'] = (mode & 0xFFFF) << 16L
         if deflater.compressed:
             entry['min_version'] = 20  # Version 2.0 supports deflated streams
             entry['general_flag'] = 2  # Max compression
-            entry['compression'] = JAR_DEFLATED
+            entry['compression'] = deflater.compress
         else:
             entry['min_version'] = 10  # Version 1.0 for stored streams
             entry['general_flag'] = 0
             entry['compression'] = JAR_STORED
         # January 1st, 2010. See bug 592369.
         entry['lastmod_date'] = ((2010 - 1980) << 9) | (1 << 5) | 1
         entry['lastmod_time'] = 0
         entry['crc32'] = deflater.crc32
@@ -654,26 +677,34 @@ class JarWriter(object):
 class Deflater(object):
     '''
     File-like interface to zlib compression. The data is actually not
     compressed unless the compressed form is smaller than the uncompressed
     data.
     '''
     def __init__(self, compress=True, compress_level=9):
         '''
-        Initialize a Deflater. The compress argument determines whether to
-        try to compress at all.
+        Initialize a Deflater. The compress argument determines how to
+        compress.
         '''
         self._data = BytesIO()
+        if compress is True:
+            compress = JAR_DEFLATED
+        elif compress is False:
+            compress = JAR_STORED
         self.compress = compress
-        if compress:
-            self._deflater = zlib.compressobj(compress_level, zlib.DEFLATED,
-                                              -MAX_WBITS)
+        if compress in (JAR_DEFLATED, JAR_BROTLI):
+            if compress == JAR_DEFLATED:
+                self._deflater = zlib.compressobj(
+                    compress_level, zlib.DEFLATED, -MAX_WBITS)
+            else:
+                self._deflater = BrotliCompress()
             self._deflated = BytesIO()
         else:
+            assert compress == JAR_STORED
             self._deflater = None
 
     def write(self, data):
         '''
         Append a buffer to the Deflater.
         '''
         self._data.write(data)
         if self.compress:
@@ -754,16 +785,57 @@ class Deflater(object):
         compressed size smaller than the uncompressed size), or the
         uncompressed data otherwise.
         '''
         if self.compressed:
             return self._deflated.getvalue()
         return self._data.getvalue()
 
 
+class Brotli(object):
+    @staticmethod
+    @memoize
+    def brotli_tool():
+            from buildconfig import topobjdir, substs
+            return os.path.join(topobjdir, 'dist', 'host', 'bin',
+                               'bro' + substs.get('BIN_SUFFIX', ''))
+
+    @staticmethod
+    def run_brotli_tool(args, input):
+        proc = subprocess.Popen([Brotli.brotli_tool()] + args,
+                                stdin=subprocess.PIPE,
+                                stdout=subprocess.PIPE)
+        (stdout, _) = proc.communicate(input)
+        ret = proc.wait()
+        if ret != 0:
+            raise Exception("Brotli compression failed")
+        return stdout
+
+    @staticmethod
+    def compress(data):
+        return Brotli.run_brotli_tool(['--window', '17'], data)
+
+    @staticmethod
+    def decompress(data):
+        return Brotli.run_brotli_tool(['--decompress'], data)
+
+
+
+class BrotliCompress(object):
+    def __init__(self):
+        self._buf = BytesIO()
+
+    def compress(self, data):
+        self._buf.write(data)
+        return b''
+
+    def flush(self):
+        return Brotli.compress(self._buf.getvalue())
+
+
 class JarLog(dict):
     '''
     Helper to read the file Gecko generates when setting MOZ_JAR_LOG_FILE.
     The jar log is then available as a dict with the jar path as key (see
     canonicalize for more details on the key value), and the corresponding
     access log as a list value. Only the first access to a given member of
     a jar is stored.
     '''
--- a/python/mozbuild/mozpack/packager/l10n.py
+++ b/python/mozbuild/mozpack/packager/l10n.py
@@ -32,16 +32,17 @@ from mozpack.copier import (
 from mozpack.chrome.manifest import (
     ManifestLocale,
     ManifestEntryWithRelPath,
     is_manifest,
     ManifestChrome,
     Manifest,
 )
 from mozpack.errors import errors
+from mozpack.mozjar import JAR_DEFLATED
 from mozpack.packager.unpack import UnpackFinder
 from createprecomplete import generate_precomplete
 
 
 class LocaleManifestFinder(object):
     def __init__(self, finder):
         entries = self.entries = []
         bases = self.bases = []
@@ -246,24 +247,25 @@ def repack(source, l10n, extra_l10n={}, 
     if extra_l10n:
         finders = {
             '': l10n_finder,
         }
         for base, path in extra_l10n.iteritems():
             finders[base] = UnpackFinder(path)
         l10n_finder = ComposedFinder(finders)
     copier = FileCopier()
+    compress = min(app_finder.compressed, JAR_DEFLATED)
     if app_finder.kind == 'flat':
         formatter = FlatFormatter(copier)
     elif app_finder.kind == 'jar':
         formatter = JarFormatter(copier,
                                  optimize=app_finder.optimizedjars,
-                                 compress=app_finder.compressed)
+                                 compress=compress)
     elif app_finder.kind == 'omni':
         formatter = OmniJarFormatter(copier, app_finder.omnijar,
                                      optimize=app_finder.optimizedjars,
-                                     compress=app_finder.compressed,
+                                     compress=compress,
                                      non_resources=non_resources)
 
     with errors.accumulate():
         _repack(app_finder, l10n_finder, copier, formatter, non_chrome)
     copier.copy(source, skip_if_older=False)
     generate_precomplete(source)
--- a/python/mozbuild/mozpack/packager/unpack.py
+++ b/python/mozbuild/mozpack/packager/unpack.py
@@ -46,17 +46,17 @@ class UnpackFinder(BaseFinder):
         else:
             self._finder = FileFinder(source)
         self.base = self._finder.base
         self.files = FileRegistry()
         self.kind = 'flat'
         self.omnijar = None
         self.jarlogs = {}
         self.optimizedjars = False
-        self.compressed = True
+        self.compressed = False
 
         jars = set()
 
         for p, f in self._finder.find('*'):
             # Skip the precomplete file, which is generated at packaging time.
             if p == 'precomplete':
                 continue
             base = mozpath.dirname(p)
@@ -138,18 +138,17 @@ class UnpackFinder(BaseFinder):
     def _open_jar(self, path, file):
         '''
         Return a JarReader for the given BaseFile instance, keeping a log of
         the preloaded entries it has.
         '''
         jar = JarReader(fileobj=file.open())
         if jar.is_optimized:
             self.optimizedjars = True
-        if not any(f.compressed for f in jar):
-            self.compressed = False
+        self.compressed = max(self.compressed, jar.compression)
         if jar.last_preloaded:
             jarlog = jar.entries.keys()
             self.jarlogs[path] = jarlog[:jarlog.index(jar.last_preloaded) + 1]
         return jar
 
     def find(self, path):
         for p in self.files.match(path):
             yield p, self.files[p]
--- a/toolkit/mozapps/installer/packager.mk
+++ b/toolkit/mozapps/installer/packager.mk
@@ -40,17 +40,17 @@ stage-package: $(MOZ_PKG_MANIFEST) $(MOZ
 		$(addprefix --removals ,$(MOZ_PKG_REMOVALS)) \
 		$(if $(filter-out 0,$(MOZ_PKG_FATAL_WARNINGS)),,--ignore-errors) \
 		$(if $(MOZ_PACKAGER_MINIFY),--minify) \
 		$(if $(MOZ_PACKAGER_MINIFY_JS),--minify-js \
 		  $(addprefix --js-binary ,$(JS_BINARY)) \
 		) \
 		$(if $(JARLOG_DIR),$(addprefix --jarlog ,$(wildcard $(JARLOG_FILE_AB_CD)))) \
 		$(if $(OPTIMIZEJARS),--optimizejars) \
-		$(if $(DISABLE_JAR_COMPRESSION),--disable-compression) \
+		$(addprefix --compress ,$(JAR_COMPRESSION)) \
 		$(MOZ_PKG_MANIFEST) $(DIST) $(DIST)/$(MOZ_PKG_DIR)$(if $(MOZ_PKG_MANIFEST),,$(_BINPATH)) \
 		$(if $(filter omni,$(MOZ_PACKAGER_FORMAT)),$(if $(NON_OMNIJAR_FILES),--non-resource $(NON_OMNIJAR_FILES)))
 	$(PYTHON) $(MOZILLA_DIR)/toolkit/mozapps/installer/find-dupes.py $(DEFINES) $(ACDEFINES) $(MOZ_PKG_DUPEFLAGS) $(DIST)/$(MOZ_PKG_DIR)
 ifndef MOZ_THUNDERBIRD
 	# Package mozharness
 	$(call py_action,test_archive, \
 		mozharness \
 		$(ABS_DIST)/$(PKG_PATH)$(MOZHARNESS_PACKAGE))
--- a/toolkit/mozapps/installer/packager.py
+++ b/toolkit/mozapps/installer/packager.py
@@ -18,16 +18,17 @@ from mozpack.files import (
     FileFinder,
     File,
 )
 from mozpack.copier import (
     FileCopier,
     Jarrer,
 )
 from mozpack.errors import errors
+from mozpack.mozjar import JAR_BROTLI
 import mozpack.path as mozpath
 import buildconfig
 from argparse import ArgumentParser
 from createprecomplete import generate_precomplete
 import os
 from StringIO import StringIO
 import subprocess
 import mozinfo
@@ -208,19 +209,19 @@ def main():
     parser.add_argument('--js-binary',
                         help='Path to js binary. This is used to verify '
                         'minified JavaScript. If this is not defined, '
                         'minification verification will not be performed.')
     parser.add_argument('--jarlog', default='', help='File containing jar ' +
                         'access logs')
     parser.add_argument('--optimizejars', action='store_true', default=False,
                         help='Enable jar optimizations')
-    parser.add_argument('--disable-compression', action='store_false',
-                        dest='compress', default=True,
-                        help='Disable jar compression')
+    parser.add_argument('--compress', choices=('none', 'deflate', 'brotli'),
+                        default='deflate',
+                        help='Use given jar compression (default: deflate)')
     parser.add_argument('manifest', default=None, nargs='?',
                         help='Manifest file name')
     parser.add_argument('source', help='Source directory')
     parser.add_argument('destination', help='Destination directory')
     parser.add_argument('--non-resource', nargs='+', metavar='PATTERN',
                         default=[],
                         help='Extra files not to be considered as resources')
     args = parser.parse_args()
@@ -228,25 +229,31 @@ def main():
     defines = dict(buildconfig.defines)
     if args.ignore_errors:
         errors.ignore_errors()
 
     if args.defines:
         for name, value in [split_define(d) for d in args.defines]:
             defines[name] = value
 
+    compress = {
+        'none': False,
+        'deflate': True,
+        'brotli': JAR_BROTLI,
+    }[args.compress]
+
     copier = FileCopier()
     if args.format == 'flat':
         formatter = FlatFormatter(copier)
     elif args.format == 'jar':
-        formatter = JarFormatter(copier, compress=args.compress, optimize=args.optimizejars)
+        formatter = JarFormatter(copier, compress=compress, optimize=args.optimizejars)
     elif args.format == 'omni':
         formatter = OmniJarFormatter(copier,
                                      buildconfig.substs['OMNIJAR_NAME'],
-                                     compress=args.compress,
+                                     compress=compress,
                                      optimize=args.optimizejars,
                                      non_resources=args.non_resource)
     else:
         errors.fatal('Unknown format: %s' % args.format)
 
     # Adjust defines according to the requested format.
     if isinstance(formatter, OmniJarFormatter):
         defines['MOZ_OMNIJAR'] = 1
--- a/toolkit/mozapps/installer/upload-files.mk
+++ b/toolkit/mozapps/installer/upload-files.mk
@@ -330,17 +330,17 @@ ifndef MOZ_PKG_MANIFEST
 endif # MOZ_PKG_MANIFEST
 
 ifndef MOZ_PACKAGER_FORMAT
   MOZ_PACKAGER_FORMAT = $(error MOZ_PACKAGER_FORMAT is not set)
 endif
 
 ifneq (android,$(MOZ_WIDGET_TOOLKIT))
   OPTIMIZEJARS = 1
-  DISABLE_JAR_COMPRESSION = 1
+  JAR_COMPRESSION ?= none
 endif
 
 # A js binary is needed to perform verification of JavaScript minification.
 # We can only use the built binary when not cross-compiling. Environments
 # (such as release automation) can provide their own js binary to enable
 # verification when cross-compiling.
 ifndef JS_BINARY
   ifndef CROSS_COMPILE