Bug 1285848 - Part 2: Request and parse RICE encoded prefix by default. r=francois
MozReview-Commit-ID: Cd0lT5VTM7t
--- a/toolkit/components/url-classifier/HashStore.cpp
+++ b/toolkit/components/url-classifier/HashStore.cpp
@@ -163,16 +163,36 @@ TableUpdateV2::NewSubComplete(uint32_t a
}
void
TableUpdateV4::NewPrefixes(int32_t aSize, std::string& aPrefixes)
{
NS_ENSURE_TRUE_VOID(aPrefixes.size() % aSize == 0);
NS_ENSURE_TRUE_VOID(!mPrefixesMap.Get(aSize));
+ if (LOG_ENABLED() && 4 == aSize) {
+ int numOfPrefixes = aPrefixes.size() / 4;
+ uint32_t* p = (uint32_t*)aPrefixes.c_str();
+
+ // Dump the first/last 10 fixed-length prefixes for debugging.
+ LOG(("* The first 10 (maximum) fixed-length prefixes: "));
+ for (int i = 0; i < std::min(10, numOfPrefixes); i++) {
+ uint8_t* c = (uint8_t*)&p[i];
+ LOG(("%.2X%.2X%.2X%.2X", c[0], c[1], c[2], c[3]));
+ }
+
+ LOG(("* The last 10 (maximum) fixed-length prefixes: "));
+ for (int i = std::max(0, numOfPrefixes - 10); i < numOfPrefixes; i++) {
+ uint8_t* c = (uint8_t*)&p[i];
+ LOG(("%.2X%.2X%.2X%.2X", c[0], c[1], c[2], c[3]));
+ }
+
+ LOG(("---- %d fixed-length prefixes in total.", aPrefixes.size() / aSize));
+ }
+
PrefixStdString* prefix = new PrefixStdString(aPrefixes);
mPrefixesMap.Put(aSize, prefix);
}
void
TableUpdateV4::NewRemovalIndices(const uint32_t* aIndices, size_t aNumOfIndices)
{
for (size_t i = 0; i < aNumOfIndices; i++) {
--- a/toolkit/components/url-classifier/ProtocolParser.cpp
+++ b/toolkit/components/url-classifier/ProtocolParser.cpp
@@ -8,16 +8,18 @@
#include "nsNetCID.h"
#include "mozilla/Logging.h"
#include "prnetdb.h"
#include "prprf.h"
#include "nsUrlClassifierUtils.h"
#include "nsPrintfCString.h"
#include "mozilla/Base64.h"
+#include "RiceDeltaDecoder.h"
+#include "mozilla/EndianUtils.h"
// MOZ_LOG=UrlClassifierProtocolParser:5
mozilla::LazyLogModule gUrlClassifierProtocolParserLog("UrlClassifierProtocolParser");
#define PARSER_LOG(args) MOZ_LOG(gUrlClassifierProtocolParserLog, mozilla::LogLevel::Debug, args)
namespace mozilla {
namespace safebrowsing {
@@ -903,18 +905,18 @@ ProtocolParserProtobuf::ProcessAdditionO
break;
case RAW:
ret = (aIsAddition ? ProcessRawAddition(aTableUpdate, update)
: ProcessRawRemoval(aTableUpdate, update));
break;
case RICE:
- // Not implemented yet (see bug 1285848),
- NS_WARNING("Encoded table update is not supported yet.");
+ ret = (aIsAddition ? ProcessEncodedAddition(aTableUpdate, update)
+ : ProcessEncodedRemoval(aTableUpdate, update));
break;
}
}
return ret;
}
nsresult
@@ -972,11 +974,137 @@ ProtocolParserProtobuf::ProcessRawRemova
PARSER_LOG((" - # of removal: %d", indices.size()));
aTableUpdate.NewRemovalIndices((const uint32_t*)indices.data(),
indices.size());
return NS_OK;
}
+static nsresult
+DoRiceDeltaDecode(const RiceDeltaEncoding& aEncoding,
+ nsTArray<uint32_t>& aDecoded)
+{
+ // Sanity check of the encoding info.
+ if (!aEncoding.has_first_value() ||
+ !aEncoding.has_rice_parameter() ||
+ !aEncoding.has_num_entries() ||
+ !aEncoding.has_encoded_data()) {
+ PARSER_LOG(("The encoding info is incomplete."));
+ return NS_ERROR_FAILURE;
+ }
+
+ PARSER_LOG(("* Encoding info:"));
+ PARSER_LOG((" - First value: %d", aEncoding.first_value()));
+ PARSER_LOG((" - Num of entries: %d", aEncoding.num_entries()));
+ PARSER_LOG((" - Rice parameter: %d", aEncoding.rice_parameter()));
+
+ // Set up the input buffer. Note that the bits should be read
+ // from LSB to MSB so that we in-place reverse the bits before
+ // feeding to the decoder.
+ auto encoded = const_cast<RiceDeltaEncoding&>(aEncoding).mutable_encoded_data();
+ RiceDeltaDecoder decoder((uint8_t*)encoded->c_str(), encoded->size());
+
+ // Setup the output buffer. The "first value" is included in
+ // the output buffer.
+ aDecoded.SetLength(aEncoding.num_entries() + 1);
+ aDecoded[0] = aEncoding.first_value();
+
+ // Decode!
+ bool rv = decoder.Decode(aEncoding.rice_parameter(),
+ aEncoding.first_value(), // first value.
+ aEncoding.num_entries(), // # of entries (first value not included).
+ &aDecoded[1]);
+
+ NS_ENSURE_TRUE(rv, NS_ERROR_FAILURE);
+
+ return NS_OK;
+}
+
+nsresult
+ProtocolParserProtobuf::ProcessEncodedAddition(TableUpdateV4& aTableUpdate,
+ const ThreatEntrySet& aAddition)
+{
+ if (!aAddition.has_rice_hashes()) {
+ PARSER_LOG(("* No rice encoded addition."));
+ return NS_OK;
+ }
+
+ nsTArray<uint32_t> decoded;
+ nsresult rv = DoRiceDeltaDecode(aAddition.rice_hashes(), decoded);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ // Say we have the following raw prefixes
+ // BE LE
+ // 00 00 00 01 1 16777216
+ // 00 00 02 00 512 131072
+ // 00 03 00 00 196608 768
+ // 04 00 00 00 67108864 4
+ //
+ // which can be treated as uint32 (big-endian) sorted in increasing order:
+ //
+ // [1, 512, 196608, 67108864]
+ //
+ // According to https://developers.google.com/safe-browsing/v4/compression,
+ // the following should be done prior to compression:
+ //
+ // 1) re-interpret in little-endian ==> [16777216, 131072, 768, 4]
+ // 2) sort in increasing order ==> [4, 768, 131072, 16777216]
+ //
+ // In order to get the original byte stream from |decoded|
+ // ([4, 768, 131072, 16777216] in this case), we have to:
+ //
+ // 1) sort in big-endian order ==> [16777216, 131072, 768, 4]
+ // 2) copy each uint32 in little-endian to the result string
+ //
+
+ // The 4-byte prefixes have to be re-sorted in Big-endian increasing order.
+ struct CompareBigEndian
+ {
+ bool Equals(const uint32_t& aA, const uint32_t& aB) const
+ {
+ return aA == aB;
+ }
+
+ bool LessThan(const uint32_t& aA, const uint32_t& aB) const
+ {
+ return NativeEndian::swapToBigEndian(aA) <
+ NativeEndian::swapToBigEndian(aB);
+ }
+ };
+ decoded.Sort(CompareBigEndian());
+
+ // The encoded prefixes are always 4 bytes.
+ std::string prefixes;
+ for (size_t i = 0; i < decoded.Length(); i++) {
+ // Note that the third argument is the number of elements we want
+ // to copy (and swap) but not the number of bytes we want to copy.
+ char p[4];
+ NativeEndian::copyAndSwapToLittleEndian(p, &decoded[i], 1);
+ prefixes.append(p, 4);
+ }
+
+ aTableUpdate.NewPrefixes(4, prefixes);
+
+ return NS_OK;
+}
+
+nsresult
+ProtocolParserProtobuf::ProcessEncodedRemoval(TableUpdateV4& aTableUpdate,
+ const ThreatEntrySet& aRemoval)
+{
+ if (!aRemoval.has_rice_indices()) {
+ PARSER_LOG(("* No rice encoded removal."));
+ return NS_OK;
+ }
+
+ nsTArray<uint32_t> decoded;
+ nsresult rv = DoRiceDeltaDecode(aRemoval.rice_indices(), decoded);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ // The encoded prefixes are always 4 bytes.
+ aTableUpdate.NewRemovalIndices(&decoded[0], decoded.Length());
+
+ return NS_OK;
+}
} // namespace safebrowsing
} // namespace mozilla
--- a/toolkit/components/url-classifier/ProtocolParser.h
+++ b/toolkit/components/url-classifier/ProtocolParser.h
@@ -179,14 +179,20 @@ private:
const ThreatEntrySetList& aUpdate,
bool aIsAddition);
nsresult ProcessRawAddition(TableUpdateV4& aTableUpdate,
const ThreatEntrySet& aAddition);
nsresult ProcessRawRemoval(TableUpdateV4& aTableUpdate,
const ThreatEntrySet& aRemoval);
+
+ nsresult ProcessEncodedAddition(TableUpdateV4& aTableUpdate,
+ const ThreatEntrySet& aAddition);
+
+ nsresult ProcessEncodedRemoval(TableUpdateV4& aTableUpdate,
+ const ThreatEntrySet& aRemoval);
};
} // namespace safebrowsing
} // namespace mozilla
#endif
--- a/toolkit/components/url-classifier/nsUrlClassifierUtils.cpp
+++ b/toolkit/components/url-classifier/nsUrlClassifierUtils.cpp
@@ -102,20 +102,18 @@ static void
InitListUpdateRequest(ThreatType aThreatType,
const char* aStateBase64,
ListUpdateRequest* aListUpdateRequest)
{
aListUpdateRequest->set_threat_type(aThreatType);
aListUpdateRequest->set_platform_type(GetPlatformType());
aListUpdateRequest->set_threat_entry_type(URL);
- // Only RAW data is supported for now.
- // TODO: Bug 1285848 Supports Rice-Golomb encoding.
Constraints* contraints = new Constraints();
- contraints->add_supported_compressions(RAW);
+ contraints->add_supported_compressions(RICE);
aListUpdateRequest->set_allocated_constraints(contraints);
// Only set non-empty state.
if (aStateBase64[0] != '\0') {
nsCString stateBinary;
nsresult rv = Base64Decode(nsCString(aStateBase64), stateBinary);
if (NS_SUCCEEDED(rv)) {
aListUpdateRequest->set_state(stateBinary.get(), stateBinary.Length());