new file mode 100644
--- /dev/null
+++ b/toolkit/components/url-classifier/RiceDeltaDecoder.cpp
@@ -0,0 +1,308 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "RiceDeltaDecoder.h"
+
+namespace {
+
+////////////////////////////////////////////////////////////////////////
+// BitBuffer is copied and modified from webrtc/base/bitbuffer.h
+//
+
+/*
+ * Copyright 2015 The WebRTC Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree (webrtc/base/bitbuffer.h/cc). An additional intellectual property
+ * rights grant can be found in the file PATENTS. All contributing
+ * project authors may be found in the AUTHORS file in the root of
+ * the source tree.
+ */
+
+class BitBuffer {
+ public:
+ BitBuffer(const uint8_t* bytes, size_t byte_count);
+
+ // Gets the current offset, in bytes/bits, from the start of the buffer. The
+ // bit offset is the offset into the current byte, in the range [0,7].
+ void GetCurrentOffset(size_t* out_byte_offset, size_t* out_bit_offset);
+
+ // The remaining bits in the byte buffer.
+ uint64_t RemainingBitCount() const;
+
+ // Reads byte-sized values from the buffer. Returns false if there isn't
+ // enough data left for the specified type.
+ bool ReadUInt8(uint8_t* val);
+ bool ReadUInt16(uint16_t* val);
+ bool ReadUInt32(uint32_t* val);
+
+ // Reads bit-sized values from the buffer. Returns false if there isn't enough
+ // data left for the specified bit count..
+ bool ReadBits(uint32_t* val, size_t bit_count);
+
+ // Peeks bit-sized values from the buffer. Returns false if there isn't enough
+ // data left for the specified number of bits. Doesn't move the current
+ // offset.
+ bool PeekBits(uint32_t* val, size_t bit_count);
+
+ // Reads the exponential golomb encoded value at the current offset.
+ // Exponential golomb values are encoded as:
+ // 1) x = source val + 1
+ // 2) In binary, write [countbits(x) - 1] 1s, then x
+ // To decode, we count the number of leading 1 bits, read that many + 1 bits,
+ // and increment the result by 1.
+ // Returns false if there isn't enough data left for the specified type, or if
+ // the value wouldn't fit in a uint32_t.
+ bool ReadExponentialGolomb(uint32_t* val);
+ // Reads signed exponential golomb values at the current offset. Signed
+ // exponential golomb values are just the unsigned values mapped to the
+ // sequence 0, 1, -1, 2, -2, etc. in order.
+ bool ReadSignedExponentialGolomb(int32_t* val);
+
+ // Moves current position |byte_count| bytes forward. Returns false if
+ // there aren't enough bytes left in the buffer.
+ bool ConsumeBytes(size_t byte_count);
+ // Moves current position |bit_count| bits forward. Returns false if
+ // there aren't enough bits left in the buffer.
+ bool ConsumeBits(size_t bit_count);
+
+ // Sets the current offset to the provied byte/bit offsets. The bit
+ // offset is from the given byte, in the range [0,7].
+ bool Seek(size_t byte_offset, size_t bit_offset);
+
+ protected:
+ const uint8_t* const bytes_;
+ // The total size of |bytes_|.
+ size_t byte_count_;
+ // The current offset, in bytes, from the start of |bytes_|.
+ size_t byte_offset_;
+ // The current offset, in bits, into the current byte.
+ size_t bit_offset_;
+};
+
+} // end of unnamed namespace
+
+static void
+ReverseByte(uint8_t& b)
+{
+ b = (b & 0xF0) >> 4 | (b & 0x0F) << 4;
+ b = (b & 0xCC) >> 2 | (b & 0x33) << 2;
+ b = (b & 0xAA) >> 1 | (b & 0x55) << 1;
+}
+
+namespace mozilla {
+namespace safebrowsing {
+
+RiceDeltaDecoder::RiceDeltaDecoder(uint8_t* aEncodedData,
+ size_t aEncodedDataSize)
+ : mEncodedData(aEncodedData)
+ , mEncodedDataSize(aEncodedDataSize)
+{
+}
+
+bool
+RiceDeltaDecoder::Decode(uint32_t aRiceParameter,
+ uint32_t aFirstValue,
+ uint32_t aNumEntries,
+ uint32_t* aDecodedData)
+{
+ // Reverse each byte before reading bits from the byte buffer.
+ for (size_t i = 0; i < mEncodedDataSize; i++) {
+ ReverseByte(mEncodedData[i]);
+ }
+
+ BitBuffer bitBuffer(mEncodedData, mEncodedDataSize);
+
+ // q = quotient
+ // r = remainder
+ // k = RICE parameter
+ const uint32_t k = aRiceParameter;
+ uint32_t previous = aFirstValue;
+ for (uint32_t i = 0; i < aNumEntries; i++) {
+ // Read the quotient of N.
+ uint32_t q;
+ if (!bitBuffer.ReadExponentialGolomb(&q)) {
+ LOG(("Encoded data underflow!"));
+ return false;
+ }
+
+ // Read the remainder of N, one bit at a time.
+ uint32_t r = 0;
+ for (uint32_t j = 0; j < k; j++) {
+ uint32_t b = 0;
+ if (!bitBuffer.ReadBits(&b, 1)) {
+ // Insufficient bits. Just leave them as zeros.
+ break;
+ }
+ // Add the bit to the right position so that it's in Little Endian order.
+ r |= b << j;
+ }
+
+ // Caculate N from q,r,k.
+ aDecodedData[i] = ((q << k) + r) + previous;
+ previous = aDecodedData[i];
+ }
+
+ return true;
+}
+
+} // end of namespace mozilla
+} // end of namespace safebrowsing
+
+namespace {
+//////////////////////////////////////////////////////////////////////////
+// The BitBuffer impl is copied and modified from webrtc/base/bitbuffer.cc
+//
+
+// Returns the lowest (right-most) |bit_count| bits in |byte|.
+uint8_t LowestBits(uint8_t byte, size_t bit_count) {
+ return byte & ((1 << bit_count) - 1);
+}
+
+// Returns the highest (left-most) |bit_count| bits in |byte|, shifted to the
+// lowest bits (to the right).
+uint8_t HighestBits(uint8_t byte, size_t bit_count) {
+ MOZ_ASSERT(bit_count < 8u);
+ uint8_t shift = 8 - static_cast<uint8_t>(bit_count);
+ uint8_t mask = 0xFF << shift;
+ return (byte & mask) >> shift;
+}
+
+BitBuffer::BitBuffer(const uint8_t* bytes, size_t byte_count)
+ : bytes_(bytes), byte_count_(byte_count), byte_offset_(), bit_offset_() {
+ MOZ_ASSERT(static_cast<uint64_t>(byte_count_) <=
+ std::numeric_limits<uint32_t>::max());
+}
+
+uint64_t BitBuffer::RemainingBitCount() const {
+ return (static_cast<uint64_t>(byte_count_) - byte_offset_) * 8 - bit_offset_;
+}
+
+bool BitBuffer::ReadUInt8(uint8_t* val) {
+ uint32_t bit_val;
+ if (!ReadBits(&bit_val, sizeof(uint8_t) * 8)) {
+ return false;
+ }
+ MOZ_ASSERT(bit_val <= std::numeric_limits<uint8_t>::max());
+ *val = static_cast<uint8_t>(bit_val);
+ return true;
+}
+
+bool BitBuffer::ReadUInt16(uint16_t* val) {
+ uint32_t bit_val;
+ if (!ReadBits(&bit_val, sizeof(uint16_t) * 8)) {
+ return false;
+ }
+ MOZ_ASSERT(bit_val <= std::numeric_limits<uint16_t>::max());
+ *val = static_cast<uint16_t>(bit_val);
+ return true;
+}
+
+bool BitBuffer::ReadUInt32(uint32_t* val) {
+ return ReadBits(val, sizeof(uint32_t) * 8);
+}
+
+bool BitBuffer::PeekBits(uint32_t* val, size_t bit_count) {
+ if (!val || bit_count > RemainingBitCount() || bit_count > 32) {
+ return false;
+ }
+ const uint8_t* bytes = bytes_ + byte_offset_;
+ size_t remaining_bits_in_current_byte = 8 - bit_offset_;
+ uint32_t bits = LowestBits(*bytes++, remaining_bits_in_current_byte);
+ // If we're reading fewer bits than what's left in the current byte, just
+ // return the portion of this byte that we need.
+ if (bit_count < remaining_bits_in_current_byte) {
+ *val = HighestBits(bits, bit_offset_ + bit_count);
+ return true;
+ }
+ // Otherwise, subtract what we've read from the bit count and read as many
+ // full bytes as we can into bits.
+ bit_count -= remaining_bits_in_current_byte;
+ while (bit_count >= 8) {
+ bits = (bits << 8) | *bytes++;
+ bit_count -= 8;
+ }
+ // Whatever we have left is smaller than a byte, so grab just the bits we need
+ // and shift them into the lowest bits.
+ if (bit_count > 0) {
+ bits <<= bit_count;
+ bits |= HighestBits(*bytes, bit_count);
+ }
+ *val = bits;
+ return true;
+}
+
+bool BitBuffer::ReadBits(uint32_t* val, size_t bit_count) {
+ return PeekBits(val, bit_count) && ConsumeBits(bit_count);
+}
+
+bool BitBuffer::ConsumeBytes(size_t byte_count) {
+ return ConsumeBits(byte_count * 8);
+}
+
+bool BitBuffer::ConsumeBits(size_t bit_count) {
+ if (bit_count > RemainingBitCount()) {
+ return false;
+ }
+
+ byte_offset_ += (bit_offset_ + bit_count) / 8;
+ bit_offset_ = (bit_offset_ + bit_count) % 8;
+ return true;
+}
+
+bool BitBuffer::ReadExponentialGolomb(uint32_t* val) {
+ if (!val) {
+ return false;
+ }
+
+ *val = 0;
+
+ // Count the number of leading 0 bits by peeking/consuming them one at a time.
+ size_t one_bit_count = 0;
+ uint32_t peeked_bit;
+ while (PeekBits(&peeked_bit, 1) && peeked_bit == 1) {
+ one_bit_count++;
+ ConsumeBits(1);
+ }
+ if (!ConsumeBits(1)) {
+ return false; // The stream is incorrectly terminated at '1'.
+ }
+
+ *val = one_bit_count;
+ return true;
+}
+
+bool BitBuffer::ReadSignedExponentialGolomb(int32_t* val) {
+ uint32_t unsigned_val;
+ if (!ReadExponentialGolomb(&unsigned_val)) {
+ return false;
+ }
+ if ((unsigned_val & 1) == 0) {
+ *val = -static_cast<int32_t>(unsigned_val / 2);
+ } else {
+ *val = (unsigned_val + 1) / 2;
+ }
+ return true;
+}
+
+void BitBuffer::GetCurrentOffset(
+ size_t* out_byte_offset, size_t* out_bit_offset) {
+ MOZ_ASSERT(out_byte_offset != NULL);
+ MOZ_ASSERT(out_bit_offset != NULL);
+ *out_byte_offset = byte_offset_;
+ *out_bit_offset = bit_offset_;
+}
+
+bool BitBuffer::Seek(size_t byte_offset, size_t bit_offset) {
+ if (byte_offset > byte_count_ || bit_offset > 7 ||
+ (byte_offset == byte_count_ && bit_offset > 0)) {
+ return false;
+ }
+ byte_offset_ = byte_offset;
+ bit_offset_ = bit_offset;
+ return true;
+}
+}
+
new file mode 100644
--- /dev/null
+++ b/toolkit/components/url-classifier/RiceDeltaDecoder.h
@@ -0,0 +1,33 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef RICE_DELTA_DECODER_H
+#define RICE_DELTA_DECODER_H
+
+namespace mozilla {
+namespace safebrowsing {
+
+class RiceDeltaDecoder {
+public:
+ // This decoder is tailored for safebrowsing v4, including the
+ // bit reading order and how the remainder part is interpreted.
+ // The caller just needs to feed the byte stream received from
+ // network directly. Note that the input buffer must be mutable
+ // since the decoder will do some pre-processing before decoding.
+ RiceDeltaDecoder(uint8_t* aEncodedData, size_t aEncodedDataSize);
+
+ bool Decode(uint32_t aRiceParameter,
+ uint32_t aFirstValue,
+ uint32_t aNumEntries,
+ uint32_t* aDecodedData);
+
+private:
+ uint8_t* mEncodedData;
+ size_t mEncodedDataSize;
+};
+
+} // namespace safebrowsing
+} // namespace mozilla
+
+#endif // UPDATE_V4_DECODER_H
--- a/toolkit/components/url-classifier/moz.build
+++ b/toolkit/components/url-classifier/moz.build
@@ -26,16 +26,17 @@ UNIFIED_SOURCES += [
'LookupCache.cpp',
'LookupCacheV4.cpp',
'nsCheckSummedOutputStream.cpp',
'nsUrlClassifierDBService.cpp',
'nsUrlClassifierProxies.cpp',
'nsUrlClassifierUtils.cpp',
'protobuf/safebrowsing.pb.cc',
'ProtocolParser.cpp',
+ 'RiceDeltaDecoder.cpp',
]
# define conflicting LOG() macros
SOURCES += [
'nsUrlClassifierPrefixSet.cpp',
'nsUrlClassifierStreamUpdater.cpp',
'VariableLengthPrefixSet.cpp',
]
new file mode 100644
--- /dev/null
+++ b/toolkit/components/url-classifier/tests/gtest/TestRiceDeltaDecoder.cpp
@@ -0,0 +1,159 @@
+/* Any copyright is dedicated to the Public Domain.
+ * http://creativecommons.org/publicdomain/zero/1.0/ */
+
+#include "gtest/gtest.h"
+#include "RiceDeltaDecoder.h"
+#include "mozilla/ArrayUtils.h"
+
+using namespace mozilla;
+using namespace mozilla::safebrowsing;
+
+struct TestingData {
+ std::vector<uint32_t> mExpectedDecoded;
+ std::vector<uint8_t> mEncoded;
+ uint32_t mRiceParameter;
+};
+
+static bool runOneTest(TestingData& aData);
+
+// In this batch of tests, the encoded data would be like
+// what we originally receive from the network. See comment
+// in |runOneTest| for more detail.
+TEST(RiceDeltaDecoder, Empty) {
+
+ // The following structure and testing data is copied from Chromium source code:
+ //
+ // https://chromium.googlesource.com/chromium/src.git/+/950f9975599768b6a08c7146cb4befa161be87aa/components/safe_browsing_db/v4_rice_unittest.cc#75
+ //
+ // and will be translated to our own testing format.
+
+ struct RiceDecodingTestInfo {
+ uint32_t mRiceParameter;
+ std::vector<uint32_t> mDeltas;
+ std::string mEncoded;
+
+ RiceDecodingTestInfo(uint32_t aRiceParameter,
+ const std::vector<uint32_t>& aDeltas,
+ const std::string& aEncoded)
+ : mRiceParameter(aRiceParameter)
+ , mDeltas(aDeltas)
+ , mEncoded(aEncoded)
+ {
+ }
+ };
+
+ // Copyright 2016 The Chromium Authors. All rights reserved.
+ // Use of this source code is governed by a BSD-style license that can be
+ // found in the media/webrtc/trunk/webrtc/LICENSE.
+
+ // ----- Start of Chromium test code ----
+ const std::vector<RiceDecodingTestInfo> TESTING_DATA_CHROMIUM = {
+ RiceDecodingTestInfo(2, {15, 9}, "\xf7\x2"),
+ RiceDecodingTestInfo(
+ 28, {1777762129, 2093280223, 924369848},
+ "\xbf\xa8\x3f\xfb\xfc\xfb\x5e\x27\xe6\xc3\x1d\xc6\x38"),
+ RiceDecodingTestInfo(
+ 28, {62763050, 1046523781, 192522171, 1800511020, 4442775, 582142548},
+ "\x54\x60\x7b\xe7\x0a\x5f\xc1\xdc\xee\x69\xde"
+ "\xfe\x58\x3c\xa3\xd6\xa5\xf2\x10\x8c\x4a\x59"
+ "\x56\x00"),
+ RiceDecodingTestInfo(
+ 28, {26067715, 344823336, 8420095, 399843890, 95029378, 731622412,
+ 35811335, 1047558127, 1117722715, 78698892},
+ "\x06\x86\x1b\x23\x14\xcb\x46\xf2\xaf\x07\x08\xc9\x88\x54\x1f\x41\x04"
+ "\xd5\x1a\x03\xeb\xe6\x3a\x80\x13\x91\x7b\xbf\x83\xf3\xb7\x85\xf1\x29"
+ "\x18\xb3\x61\x09"),
+ RiceDecodingTestInfo(
+ 27, {225846818, 328287420, 166748623, 29117720, 552397365, 350353215,
+ 558267528, 4738273, 567093445, 28563065, 55077698, 73091685,
+ 339246010, 98242620, 38060941, 63917830, 206319759, 137700744},
+ "\x89\x98\xd8\x75\xbc\x44\x91\xeb\x39\x0c\x3e\x30\x9a\x78\xf3\x6a\xd4"
+ "\xd9\xb1\x9f\xfb\x70\x3e\x44\x3e\xa3\x08\x67\x42\xc2\x2b\x46\x69\x8e"
+ "\x3c\xeb\xd9\x10\x5a\x43\x9a\x32\xa5\x2d\x4e\x77\x0f\x87\x78\x20\xb6"
+ "\xab\x71\x98\x48\x0c\x9e\x9e\xd7\x23\x0c\x13\x43\x2c\xa9\x01"),
+ RiceDecodingTestInfo(
+ 28, {339784008, 263128563, 63871877, 69723256, 826001074, 797300228,
+ 671166008, 207712688},
+ std::string("\x21\xc5\x02\x91\xf9\x82\xd7\x57\xb8\xe9\x3c\xf0\xc8\x4f"
+ "\xe8\x64\x8d\x77\x62\x04\xd6\x85\x3f\x1c\x97\x00\x04\x1b"
+ "\x17\xc6",
+ 30)),
+ RiceDecodingTestInfo(
+ 28, {471820069, 196333855, 855579133, 122737976, 203433838, 85354544,
+ 1307949392, 165938578, 195134475, 553930435, 49231136},
+ "\x95\x9c\x7d\xb0\x8f\xe8\xd9\xbd\xfe\x8c\x7f\x81\x53\x0d\x75\xdc\x4e"
+ "\x40\x18\x0c\x9a\x45\x3d\xa8\xdc\xfa\x26\x59\x40\x9e\x16\x08\x43\x77"
+ "\xc3\x4e\x04\x01\xa4\xe6\x5d\x00"),
+ RiceDecodingTestInfo(
+ 27, {87336845, 129291033, 30906211, 433549264, 30899891, 53207875,
+ 11959529, 354827862, 82919275, 489637251, 53561020, 336722992,
+ 408117728, 204506246, 188216092, 9047110, 479817359, 230317256},
+ "\x1a\x4f\x69\x2a\x63\x9a\xf6\xc6\x2e\xaf\x73\xd0\x6f\xd7\x31\xeb\x77"
+ "\x1d\x43\xe3\x2b\x93\xce\x67\x8b\x59\xf9\x98\xd4\xda\x4f\x3c\x6f\xb0"
+ "\xe8\xa5\x78\x8d\x62\x36\x18\xfe\x08\x1e\x78\xd8\x14\x32\x24\x84\x61"
+ "\x1c\xf3\x37\x63\xc4\xa0\x88\x7b\x74\xcb\x64\xc8\x5c\xba\x05"),
+ RiceDecodingTestInfo(
+ 28, {297968956, 19709657, 259702329, 76998112, 1023176123, 29296013,
+ 1602741145, 393745181, 177326295, 55225536, 75194472},
+ "\xf1\x94\x0a\x87\x6c\x5f\x96\x90\xe3\xab\xf7\xc0\xcb\x2d\xe9\x76\xdb"
+ "\xf8\x59\x63\xc1\x6f\x7c\x99\xe3\x87\x5f\xc7\x04\xde\xb9\x46\x8e\x54"
+ "\xc0\xac\x4a\x03\x0d\x6c\x8f\x00"),
+ RiceDecodingTestInfo(
+ 28, {532220688, 780594691, 436816483, 163436269, 573044456, 1069604,
+ 39629436, 211410997, 227714491, 381562898, 75610008, 196754597,
+ 40310339, 15204118, 99010842},
+ "\x41\x2c\xe4\xfe\x06\xdc\x0d\xbd\x31\xa5\x04\xd5\x6e\xdd\x9b\x43\xb7"
+ "\x3f\x11\x24\x52\x10\x80\x4f\x96\x4b\xd4\x80\x67\xb2\xdd\x52\xc9\x4e"
+ "\x02\xc6\xd7\x60\xde\x06\x92\x52\x1e\xdd\x35\x64\x71\x26\x2c\xfe\xcf"
+ "\x81\x46\xb2\x79\x01"),
+ RiceDecodingTestInfo(
+ 28, {219354713, 389598618, 750263679, 554684211, 87381124, 4523497,
+ 287633354, 801308671, 424169435, 372520475, 277287849},
+ "\xb2\x2c\x26\x3a\xcd\x66\x9c\xdb\x5f\x07\x2e\x6f\xe6\xf9\x21\x10\x52"
+ "\xd5\x94\xf4\x82\x22\x48\xf9\x9d\x24\xf6\xff\x2f\xfc\x6d\x3f\x21\x65"
+ "\x1b\x36\x34\x56\xea\xc4\x21\x00"),
+ };
+
+ // ----- End of Chromium test code ----
+
+ for (auto tdc : TESTING_DATA_CHROMIUM) {
+ // Populate chromium testing data to our native testing data struct.
+ TestingData d;
+
+ d.mRiceParameter = tdc.mRiceParameter; // Populate rice parameter.
+
+ // Populate encoded data from std::string to vector<uint8>.
+ d.mEncoded.resize(tdc.mEncoded.size());
+ memcpy(&d.mEncoded[0], tdc.mEncoded.c_str(), tdc.mEncoded.size());
+
+ // Populate deltas to expected decoded data. The first value would be just
+ // set to an arbitrary value, say 7, to avoid any assumption to the
+ // first value in the implementation.
+ d.mExpectedDecoded.resize(tdc.mDeltas.size() + 1);
+ for (size_t i = 0; i < d.mExpectedDecoded.size(); i++) {
+ if (0 == i) {
+ d.mExpectedDecoded[i] = 7; // "7" is an arbitrary starting value
+ } else {
+ d.mExpectedDecoded[i] = d.mExpectedDecoded[i - 1] + tdc.mDeltas[i - 1];
+ }
+ }
+
+ ASSERT_TRUE(runOneTest(d));
+ }
+}
+
+static bool
+runOneTest(TestingData& aData)
+{
+ RiceDeltaDecoder decoder(&aData.mEncoded[0], aData.mEncoded.size());
+
+ std::vector<uint32_t> decoded(aData.mExpectedDecoded.size());
+
+ decoded[0] = aData.mExpectedDecoded[0];
+ bool rv = decoder.Decode(aData.mRiceParameter,
+ decoded[0], // first value.
+ decoded.size() - 1, // # of entries (first value not included).
+ &decoded[1]);
+
+ return rv && decoded == aData.mExpectedDecoded;
+}
--- a/toolkit/components/url-classifier/tests/gtest/moz.build
+++ b/toolkit/components/url-classifier/tests/gtest/moz.build
@@ -6,16 +6,17 @@
LOCAL_INCLUDES += [
'../..',
]
UNIFIED_SOURCES += [
'TestChunkSet.cpp',
'TestPerProviderDirectory.cpp',
+ 'TestRiceDeltaDecoder.cpp',
'TestSafebrowsingHash.cpp',
'TestSafeBrowsingProtobuf.cpp',
'TestTable.cpp',
'TestUrlClassifierTableUpdateV4.cpp',
'TestUrlClassifierUtils.cpp',
'TestVariableLengthPrefixSet.cpp',
]