Bug 1355661 - Add support for brotli streams in Jar archives. r?aklotz
Modern compression algorithms are better than zlib both in terms of
space and time. The jar format, used for e.g. omni.ja, addons, etc.
could benefit from using such modern algorithms, but the format only
allows a limited set of compression algorithms.
However, the format in itself is flexible, in that it can be extended
with arbitrary compression algorithms. This breaks compatibility with
programs like unzip, obviously, but we've never promised the files
shipped with Firefox will always remain "valid" zips (which they already
aren't, but they currently work with most zip readers).
With this change, we allow those archives to contain brotli streams,
using an arbitrary large value for the compression type in the Zip local
file header. This only allows to read such archives, but not to produce
them, and, for now, support for brotli streams is kept Nightly-only,
until everything is pieced together and we're happy to ship it.
--- a/modules/libjar/moz.build
+++ b/modules/libjar/moz.build
@@ -39,8 +39,12 @@ UNIFIED_SOURCES += [
'nsJARProtocolHandler.cpp',
'nsJARURI.cpp',
'nsZipArchive.cpp',
]
include('/ipc/chromium/chromium-config.mozbuild')
FINAL_LIBRARY = 'xul'
+
+LOCAL_INCLUDES += [
+ '/modules/brotli/dec',
+]
--- a/modules/libjar/nsJARInputStream.cpp
+++ b/modules/libjar/nsJARInputStream.cpp
@@ -2,16 +2,19 @@
/* nsJARInputStream.cpp
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "nsJARInputStream.h"
#include "zipstruct.h" // defines ZIP compression codes
+#ifdef MOZ_JAR_BROTLI
+#include "decode.h" // brotli
+#endif
#include "nsZipArchive.h"
#include "nsEscape.h"
#include "nsIFile.h"
#include "nsDebug.h"
#include <algorithm>
#if defined(XP_WIN)
#include <windows.h>
@@ -46,16 +49,25 @@ nsJARInputStream::InitFile(nsJAR *aJar,
rv = gZlibInit(&mZs);
NS_ENSURE_SUCCESS(rv, rv);
mMode = MODE_INFLATE;
mInCrc = item->CRC32();
mOutCrc = crc32(0L, Z_NULL, 0);
break;
+#ifdef MOZ_JAR_BROTLI
+ case MOZ_JAR_BROTLI:
+ mBrotliState = BrotliCreateState(nullptr, nullptr, nullptr);
+ mMode = MODE_BROTLI;
+ mInCrc = item->CRC32();
+ mOutCrc = crc32(0L, Z_NULL, 0);
+ break;
+#endif
+
default:
return NS_ERROR_NOT_IMPLEMENTED;
}
// Must keep handle to filepointer and mmap structure as long as we need access to the mmapped data
mFd = aJar->mZip->GetFD();
mZs.next_in = (Bytef *)aJar->mZip->GetData(item);
if (!mZs.next_in) {
@@ -161,16 +173,19 @@ nsJARInputStream::Available(uint64_t *_r
case MODE_CLOSED:
return NS_BASE_STREAM_CLOSED;
case MODE_DIRECTORY:
*_retval = mBuffer.Length();
break;
case MODE_INFLATE:
+#ifdef MOZ_JAR_BROTLI
+ case MODE_BROTLI:
+#endif
case MODE_COPY:
*_retval = mOutSize - mZs.total_out;
break;
}
return NS_OK;
}
@@ -190,16 +205,19 @@ MOZ_WIN_MEM_TRY_BEGIN
case MODE_CLOSED:
return NS_BASE_STREAM_CLOSED;
case MODE_DIRECTORY:
return ReadDirectory(aBuffer, aCount, aBytesRead);
case MODE_INFLATE:
+#ifdef MOZ_JAR_BROTLI
+ case MODE_BROTLI:
+#endif
if (mZs.total_out < mOutSize) {
rv = ContinueInflate(aBuffer, aCount, aBytesRead);
}
// be aggressive about releasing the file!
// note that sometimes, we will release mFd before we've finished
// deflating - this is because zlib buffers the input
if (mZs.avail_in == 0) {
mFd = nullptr;
@@ -241,52 +259,89 @@ nsJARInputStream::IsNonBlocking(bool *aN
}
NS_IMETHODIMP
nsJARInputStream::Close()
{
if (mMode == MODE_INFLATE) {
inflateEnd(&mZs);
}
+#ifdef MOZ_JAR_BROTLI
+ if (mMode == MODE_BROTLI) {
+ BrotliDestroyState(mBrotliState);
+ }
+#endif
mMode = MODE_CLOSED;
mFd = nullptr;
return NS_OK;
}
nsresult
nsJARInputStream::ContinueInflate(char* aBuffer, uint32_t aCount,
uint32_t* aBytesRead)
{
+ bool finished = false;
+
// No need to check the args, ::Read did that, but assert them at least
NS_ASSERTION(aBuffer,"aBuffer parameter must not be null");
NS_ASSERTION(aBytesRead,"aBytesRead parameter must not be null");
// Keep old total_out count
const uint32_t oldTotalOut = mZs.total_out;
// make sure we aren't reading too much
mZs.avail_out = std::min(aCount, (mOutSize-oldTotalOut));
mZs.next_out = (unsigned char*)aBuffer;
- // now inflate
- int zerr = inflate(&mZs, Z_SYNC_FLUSH);
- if ((zerr != Z_OK) && (zerr != Z_STREAM_END)) {
- nsZipArchive::sFileCorruptedReason = "nsJARInputStream: error while inflating";
- return NS_ERROR_FILE_CORRUPTED;
+#ifndef MOZ_JAR_BROTLI
+ MOZ_ASSERT(mMode == MODE_INFLATE);
+#endif
+ if (mMode == MODE_INFLATE) {
+ // now inflate
+ int zerr = inflate(&mZs, Z_SYNC_FLUSH);
+ if ((zerr != Z_OK) && (zerr != Z_STREAM_END)) {
+ nsZipArchive::sFileCorruptedReason = "nsJARInputStream: error while inflating";
+ return NS_ERROR_FILE_CORRUPTED;
+ }
+ finished = (zerr == Z_STREAM_END);
+#ifdef MOZ_JAR_BROTLI
+ } else {
+ MOZ_ASSERT(mMode == MODE_BROTLI);
+ /* The brotli library wants size_t, but z_stream only contains
+ * unsigned int for avail_* and unsigned long for total_*.
+ * So use temporary stack values. */
+ size_t avail_in = mZs.avail_in;
+ size_t avail_out = mZs.avail_out;
+ size_t total_out = mZs.total_out;
+ BrotliResult result = BrotliDecompressStream(
+ &avail_in, const_cast<const unsigned char**>(&mZs.next_in),
+ &avail_out, &mZs.next_out, &total_out, mBrotliState);
+ /* We don't need to update avail_out, it's not used outside this
+ * function. */
+ mZs.total_out = total_out;
+ mZs.avail_in = avail_in;
+ if (result == BROTLI_RESULT_ERROR) {
+ nsZipArchive::sFileCorruptedReason = "nsJARInputStream: brotli decompression error";
+ return NS_ERROR_FILE_CORRUPTED;
+ }
+ finished = (result == BROTLI_RESULT_SUCCESS);
+#endif
}
*aBytesRead = (mZs.total_out - oldTotalOut);
// Calculate the CRC on the output
mOutCrc = crc32(mOutCrc, (unsigned char*)aBuffer, *aBytesRead);
// be aggressive about ending the inflation
// for some reason we don't always get Z_STREAM_END
- if (zerr == Z_STREAM_END || mZs.total_out == mOutSize) {
- inflateEnd(&mZs);
+ if (finished || mZs.total_out == mOutSize) {
+ if (mMode == MODE_INFLATE) {
+ inflateEnd(&mZs);
+ }
// stop returning valid data as soon as we know we have a bad CRC
if (mOutCrc != mInCrc) {
nsZipArchive::sFileCorruptedReason = "nsJARInputStream: crc mismatch";
return NS_ERROR_FILE_CORRUPTED;
}
}
--- a/modules/libjar/nsJARInputStream.h
+++ b/modules/libjar/nsJARInputStream.h
@@ -7,27 +7,39 @@
#ifndef nsJARINPUTSTREAM_h__
#define nsJARINPUTSTREAM_h__
#include "nsIInputStream.h"
#include "nsJAR.h"
#include "nsTArray.h"
#include "mozilla/Attributes.h"
+#ifdef MOZ_JAR_BROTLI
+struct BrotliStateStruct;
+#endif
+
/*-------------------------------------------------------------------------
* Class nsJARInputStream declaration. This class defines the type of the
* object returned by calls to nsJAR::GetInputStream(filename) for the
* purpose of reading a file item out of a JAR file.
*------------------------------------------------------------------------*/
class nsJARInputStream final : public nsIInputStream
{
public:
- nsJARInputStream() :
- mOutSize(0), mInCrc(0), mOutCrc(0), mNameLen(0),
- mCurPos(0), mArrPos(0), mMode(MODE_NOTINITED)
+ nsJARInputStream()
+ : mOutSize(0)
+ , mInCrc(0)
+ , mOutCrc(0)
+#ifdef MOZ_JAR_BROTLI
+ , mBrotliState(nullptr)
+#endif
+ , mNameLen(0)
+ , mCurPos(0)
+ , mArrPos(0)
+ , mMode(MODE_NOTINITED)
{
memset(&mZs, 0, sizeof(z_stream));
}
NS_DECL_THREADSAFE_ISUPPORTS
NS_DECL_NSIINPUTSTREAM
// takes ownership of |fd|, even on failure
@@ -40,30 +52,36 @@ class nsJARInputStream final : public ns
private:
~nsJARInputStream() { Close(); }
RefPtr<nsZipHandle> mFd; // handle for reading
uint32_t mOutSize; // inflated size
uint32_t mInCrc; // CRC as provided by the zipentry
uint32_t mOutCrc; // CRC as calculated by me
z_stream mZs; // zip data structure
+#ifdef MOZ_JAR_BROTLI
+ BrotliStateStruct* mBrotliState; // Brotli decoder state
+#endif
/* For directory reading */
RefPtr<nsJAR> mJar; // string reference to zipreader
uint32_t mNameLen; // length of dirname
nsCString mBuffer; // storage for generated text of stream
uint32_t mCurPos; // Current position in buffer
uint32_t mArrPos; // current position within mArray
nsTArray<nsCString> mArray; // array of names in (zip) directory
typedef enum {
MODE_NOTINITED,
MODE_CLOSED,
MODE_DIRECTORY,
MODE_INFLATE,
+#ifdef MOZ_JAR_BROTLI
+ MODE_BROTLI,
+#endif
MODE_COPY
} JISMode;
JISMode mMode; // Modus of the stream
nsresult ContinueInflate(char* aBuf, uint32_t aCount, uint32_t* aBytesRead);
nsresult ReadDirectory(char* aBuf, uint32_t aCount, uint32_t* aBytesRead);
uint32_t CopyDataToBuffer(char* &aBuffer, uint32_t &aCount);
--- a/modules/libjar/nsZipArchive.cpp
+++ b/modules/libjar/nsZipArchive.cpp
@@ -7,16 +7,19 @@
* This module implements a simple archive extractor for the PKZIP format.
*
* The underlying nsZipArchive is NOT thread-safe. Do not pass references
* or pointers to it across thread boundaries.
*/
#define READTYPE int32_t
#include "zlib.h"
+#ifdef MOZ_JAR_BROTLI
+#include "decode.h" // brotli
+#endif
#include "nsISupportsUtils.h"
#include "prio.h"
#include "plstr.h"
#include "mozilla/Logging.h"
#include "mozilla/UniquePtrExtensions.h"
#include "stdlib.h"
#include "nsWildCard.h"
#include "nsZipArchive.h"
@@ -1163,40 +1166,54 @@ bool nsZipItem::IsSymlink()
}
#endif
nsZipCursor::nsZipCursor(nsZipItem *item, nsZipArchive *aZip, uint8_t* aBuf,
uint32_t aBufSize, bool doCRC)
: mItem(item)
, mBuf(aBuf)
, mBufSize(aBufSize)
+#ifdef MOZ_JAR_BROTLI
+ , mBrotliState(nullptr)
+#endif
, mCRC(0)
, mDoCRC(doCRC)
{
if (mItem->Compression() == DEFLATED) {
#ifdef DEBUG
nsresult status =
#endif
gZlibInit(&mZs);
NS_ASSERTION(status == NS_OK, "Zlib failed to initialize");
NS_ASSERTION(aBuf, "Must pass in a buffer for DEFLATED nsZipItem");
}
mZs.avail_in = item->Size();
mZs.next_in = (Bytef*)aZip->GetData(item);
+
+#ifdef MOZ_JAR_BROTLI
+ if (mItem->Compression() == MOZ_JAR_BROTLI) {
+ mBrotliState = BrotliCreateState(nullptr, nullptr, nullptr);
+ }
+#endif
if (doCRC)
mCRC = crc32(0L, Z_NULL, 0);
}
nsZipCursor::~nsZipCursor()
{
if (mItem->Compression() == DEFLATED) {
inflateEnd(&mZs);
}
+#ifdef MOZ_JAR_BROTLI
+ if (mItem->Compression() == MOZ_JAR_BROTLI) {
+ BrotliDestroyState(mBrotliState);
+ }
+#endif
}
uint8_t* nsZipCursor::ReadOrCopy(uint32_t *aBytesRead, bool aCopy) {
int zerr;
uint8_t *buf = nullptr;
bool verifyCRC = true;
if (!mZs.next_in)
@@ -1223,16 +1240,40 @@ MOZ_WIN_MEM_TRY_BEGIN
zerr = inflate(&mZs, Z_PARTIAL_FLUSH);
if (zerr != Z_OK && zerr != Z_STREAM_END)
return nullptr;
*aBytesRead = mZs.next_out - buf;
verifyCRC = (zerr == Z_STREAM_END);
break;
+#ifdef MOZ_JAR_BROTLI
+ case MOZ_JAR_BROTLI: {
+ buf = mBuf;
+ mZs.next_out = buf;
+ /* The brotli library wants size_t, but z_stream only contains
+ * unsigned int for avail_*. So use temporary stack values. */
+ size_t avail_out = mBufSize;
+ size_t avail_in = mZs.avail_in;
+ BrotliResult result = BrotliDecompressStream(
+ &avail_in, const_cast<const unsigned char**>(&mZs.next_in),
+ &avail_out, &mZs.next_out, nullptr, mBrotliState);
+ /* We don't need to update avail_out, it's not used outside this
+ * function. */
+ mZs.avail_in = avail_in;
+
+ if (result == BROTLI_RESULT_ERROR) {
+ return nullptr;
+ }
+
+ *aBytesRead = mZs.next_out - buf;
+ verifyCRC = (result == BROTLI_RESULT_SUCCESS);
+ break;
+ }
+#endif
default:
return nullptr;
}
if (mDoCRC) {
mCRC = crc32(mCRC, (const unsigned char*)buf, *aBytesRead);
if (verifyCRC && mCRC != mItem->CRC32())
return nullptr;
@@ -1249,17 +1290,21 @@ nsZipItemPtr_base::nsZipItemPtr_base(nsZ
// make sure the ziparchive hangs around
mZipHandle = aZip->GetFD();
nsZipItem* item = aZip->GetItem(aEntryName);
if (!item)
return;
uint32_t size = 0;
- if (item->Compression() == DEFLATED) {
+ bool compressed = (item->Compression() == DEFLATED);
+#ifdef MOZ_JAR_BROTLI
+ compressed |= (item->Compression() == MOZ_JAR_BROTLI);
+#endif
+ if (compressed) {
size = item->RealSize();
mAutoBuf = MakeUniqueFallible<uint8_t[]>(size);
if (!mAutoBuf) {
return;
}
}
nsZipCursor cursor(item, aZip, mAutoBuf.get(), size, doCRC);
--- a/modules/libjar/nsZipArchive.h
+++ b/modules/libjar/nsZipArchive.h
@@ -32,16 +32,19 @@
}
#else
#define MOZ_WIN_MEM_TRY_BEGIN {
#define MOZ_WIN_MEM_TRY_CATCH(cmd) }
#endif
class nsZipFind;
struct PRFileDesc;
+#ifdef MOZ_JAR_BROTLI
+struct BrotliStateStruct;
+#endif
/**
* This file defines some of the basic structures used by libjar to
* read Zip files. It makes use of zlib in order to do the decompression.
*
* A few notes on the classes/structs:
* nsZipArchive represents a single Zip file, and maintains an index
* of all the items in the file.
@@ -309,16 +312,19 @@ public:
private:
/* Actual implementation for both Read and Copy above */
uint8_t* ReadOrCopy(uint32_t *aBytesRead, bool aCopy);
nsZipItem *mItem;
uint8_t *mBuf;
uint32_t mBufSize;
z_stream mZs;
+#ifdef MOZ_JAR_BROTLI
+ BrotliStateStruct* mBrotliState;
+#endif
uint32_t mCRC;
bool mDoCRC;
};
/**
* nsZipItemPtr - a RAII convenience class for reading the individual items in a zip.
* It reads whole files and does zero-copy IO for stored files. A buffer is allocated
* for decompression.
--- a/modules/libjar/zipstruct.h
+++ b/modules/libjar/zipstruct.h
@@ -97,11 +97,14 @@ typedef struct ZipEnd_
#define REDUCED1 2
#define REDUCED2 3
#define REDUCED3 4
#define REDUCED4 5
#define IMPLODED 6
#define TOKENIZED 7
#define DEFLATED 8
#define UNSUPPORTED 0xFF
-
+/* non-standard extension */
+#ifdef NIGHTLY_BUILD
+#define MOZ_JAR_BROTLI 0x81
+#endif
#endif /* _zipstruct_h */