Bug 1285848 - Part 2: Request and parse RICE encoded prefix by default. r=francois draft
authorHenry Chang <hchang@mozilla.com>
Wed, 05 Oct 2016 14:59:53 +0800
changeset 421906 08724d3c8039039b8c73f58230b1cdad94ccd786
parent 421905 d8b00fcf83cb6a8ec859825929da95e60468aadd
child 533202 7603aa1b80fe454809e5c6040ab33f138d580fe1
push id31633
push userhchang@mozilla.com
push dateFri, 07 Oct 2016 03:13:11 +0000
reviewersfrancois
bugs1285848
milestone52.0a1
Bug 1285848 - Part 2: Request and parse RICE encoded prefix by default. r=francois MozReview-Commit-ID: Cd0lT5VTM7t
toolkit/components/url-classifier/HashStore.cpp
toolkit/components/url-classifier/ProtocolParser.cpp
toolkit/components/url-classifier/ProtocolParser.h
toolkit/components/url-classifier/nsUrlClassifierUtils.cpp
--- a/toolkit/components/url-classifier/HashStore.cpp
+++ b/toolkit/components/url-classifier/HashStore.cpp
@@ -163,16 +163,36 @@ TableUpdateV2::NewSubComplete(uint32_t a
 }
 
 void
 TableUpdateV4::NewPrefixes(int32_t aSize, std::string& aPrefixes)
 {
   NS_ENSURE_TRUE_VOID(aPrefixes.size() % aSize == 0);
   NS_ENSURE_TRUE_VOID(!mPrefixesMap.Get(aSize));
 
+  if (LOG_ENABLED() && 4 == aSize) {
+    int numOfPrefixes = aPrefixes.size() / 4;
+    uint32_t* p = (uint32_t*)aPrefixes.c_str();
+
+    // Dump the first/last 10 fixed-length prefixes for debugging.
+    LOG(("* The first 10 (maximum) fixed-length prefixes: "));
+    for (int i = 0; i < std::min(10, numOfPrefixes); i++) {
+      uint8_t* c = (uint8_t*)&p[i];
+      LOG(("%.2X%.2X%.2X%.2X", c[0], c[1], c[2], c[3]));
+    }
+
+    LOG(("* The last 10 (maximum) fixed-length prefixes: "));
+    for (int i = std::max(0, numOfPrefixes - 10); i < numOfPrefixes; i++) {
+      uint8_t* c = (uint8_t*)&p[i];
+      LOG(("%.2X%.2X%.2X%.2X", c[0], c[1], c[2], c[3]));
+    }
+
+    LOG(("---- %d fixed-length prefixes in total.", aPrefixes.size() / aSize));
+  }
+
   PrefixStdString* prefix = new PrefixStdString(aPrefixes);
   mPrefixesMap.Put(aSize, prefix);
 }
 
 void
 TableUpdateV4::NewRemovalIndices(const uint32_t* aIndices, size_t aNumOfIndices)
 {
   for (size_t i = 0; i < aNumOfIndices; i++) {
--- a/toolkit/components/url-classifier/ProtocolParser.cpp
+++ b/toolkit/components/url-classifier/ProtocolParser.cpp
@@ -8,16 +8,18 @@
 #include "nsNetCID.h"
 #include "mozilla/Logging.h"
 #include "prnetdb.h"
 #include "prprf.h"
 
 #include "nsUrlClassifierUtils.h"
 #include "nsPrintfCString.h"
 #include "mozilla/Base64.h"
+#include "RiceDeltaDecoder.h"
+#include "mozilla/EndianUtils.h"
 
 // MOZ_LOG=UrlClassifierProtocolParser:5
 mozilla::LazyLogModule gUrlClassifierProtocolParserLog("UrlClassifierProtocolParser");
 #define PARSER_LOG(args) MOZ_LOG(gUrlClassifierProtocolParserLog, mozilla::LogLevel::Debug, args)
 
 namespace mozilla {
 namespace safebrowsing {
 
@@ -903,18 +905,18 @@ ProtocolParserProtobuf::ProcessAdditionO
       break;
 
     case RAW:
       ret = (aIsAddition ? ProcessRawAddition(aTableUpdate, update)
                          : ProcessRawRemoval(aTableUpdate, update));
       break;
 
     case RICE:
-      // Not implemented yet (see bug 1285848),
-      NS_WARNING("Encoded table update is not supported yet.");
+      ret = (aIsAddition ? ProcessEncodedAddition(aTableUpdate, update)
+                         : ProcessEncodedRemoval(aTableUpdate, update));
       break;
     }
   }
 
   return ret;
 }
 
 nsresult
@@ -972,11 +974,137 @@ ProtocolParserProtobuf::ProcessRawRemova
   PARSER_LOG(("  - # of removal: %d", indices.size()));
 
   aTableUpdate.NewRemovalIndices((const uint32_t*)indices.data(),
                                  indices.size());
 
   return NS_OK;
 }
 
+static nsresult
+DoRiceDeltaDecode(const RiceDeltaEncoding& aEncoding,
+                  nsTArray<uint32_t>& aDecoded)
+{
+  // Sanity check of the encoding info.
+  if (!aEncoding.has_first_value() ||
+      !aEncoding.has_rice_parameter() ||
+      !aEncoding.has_num_entries() ||
+      !aEncoding.has_encoded_data()) {
+    PARSER_LOG(("The encoding info is incomplete."));
+    return NS_ERROR_FAILURE;
+  }
+
+  PARSER_LOG(("* Encoding info:"));
+  PARSER_LOG(("  - First value: %d", aEncoding.first_value()));
+  PARSER_LOG(("  - Num of entries: %d", aEncoding.num_entries()));
+  PARSER_LOG(("  - Rice parameter: %d", aEncoding.rice_parameter()));
+
+  // Set up the input buffer. Note that the bits should be read
+  // from LSB to MSB so that we in-place reverse the bits before
+  // feeding to the decoder.
+  auto encoded = const_cast<RiceDeltaEncoding&>(aEncoding).mutable_encoded_data();
+  RiceDeltaDecoder decoder((uint8_t*)encoded->c_str(), encoded->size());
+
+  // Setup the output buffer. The "first value" is included in
+  // the output buffer.
+  aDecoded.SetLength(aEncoding.num_entries() + 1);
+  aDecoded[0] = aEncoding.first_value();
+
+  // Decode!
+  bool rv = decoder.Decode(aEncoding.rice_parameter(),
+                           aEncoding.first_value(), // first value.
+                           aEncoding.num_entries(), // # of entries (first value not included).
+                           &aDecoded[1]);
+
+  NS_ENSURE_TRUE(rv, NS_ERROR_FAILURE);
+
+  return NS_OK;
+}
+
+nsresult
+ProtocolParserProtobuf::ProcessEncodedAddition(TableUpdateV4& aTableUpdate,
+                                               const ThreatEntrySet& aAddition)
+{
+  if (!aAddition.has_rice_hashes()) {
+    PARSER_LOG(("* No rice encoded addition."));
+    return NS_OK;
+  }
+
+  nsTArray<uint32_t> decoded;
+  nsresult rv = DoRiceDeltaDecode(aAddition.rice_hashes(), decoded);
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  //  Say we have the following raw prefixes
+  //                              BE            LE
+  //   00 00 00 01                 1      16777216
+  //   00 00 02 00               512        131072
+  //   00 03 00 00            196608           768
+  //   04 00 00 00          67108864             4
+  //
+  // which can be treated as uint32 (big-endian) sorted in increasing order:
+  //
+  // [1, 512, 196608, 67108864]
+  //
+  // According to https://developers.google.com/safe-browsing/v4/compression,
+  // the following should be done prior to compression:
+  //
+  // 1) re-interpret in little-endian ==> [16777216, 131072, 768, 4]
+  // 2) sort in increasing order       ==> [4, 768, 131072, 16777216]
+  //
+  // In order to get the original byte stream from |decoded|
+  // ([4, 768, 131072, 16777216] in this case), we have to:
+  //
+  // 1) sort in big-endian order      ==> [16777216, 131072, 768, 4]
+  // 2) copy each uint32 in little-endian to the result string
+  //
+
+  // The 4-byte prefixes have to be re-sorted in Big-endian increasing order.
+  struct CompareBigEndian
+  {
+    bool Equals(const uint32_t& aA, const uint32_t& aB) const
+    {
+      return aA == aB;
+    }
+
+    bool LessThan(const uint32_t& aA, const uint32_t& aB) const
+    {
+      return NativeEndian::swapToBigEndian(aA) <
+             NativeEndian::swapToBigEndian(aB);
+    }
+  };
+  decoded.Sort(CompareBigEndian());
+
+  // The encoded prefixes are always 4 bytes.
+  std::string prefixes;
+  for (size_t i = 0; i < decoded.Length(); i++) {
+    // Note that the third argument is the number of elements we want
+    // to copy (and swap) but not the number of bytes we want to copy.
+    char p[4];
+    NativeEndian::copyAndSwapToLittleEndian(p, &decoded[i], 1);
+    prefixes.append(p, 4);
+  }
+
+  aTableUpdate.NewPrefixes(4, prefixes);
+
+  return NS_OK;
+}
+
+nsresult
+ProtocolParserProtobuf::ProcessEncodedRemoval(TableUpdateV4& aTableUpdate,
+                                              const ThreatEntrySet& aRemoval)
+{
+  if (!aRemoval.has_rice_indices()) {
+    PARSER_LOG(("* No rice encoded removal."));
+    return NS_OK;
+  }
+
+  nsTArray<uint32_t> decoded;
+  nsresult rv = DoRiceDeltaDecode(aRemoval.rice_indices(), decoded);
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  // The encoded prefixes are always 4 bytes.
+  aTableUpdate.NewRemovalIndices(&decoded[0], decoded.Length());
+
+  return NS_OK;
+}
 
 } // namespace safebrowsing
 } // namespace mozilla
--- a/toolkit/components/url-classifier/ProtocolParser.h
+++ b/toolkit/components/url-classifier/ProtocolParser.h
@@ -179,14 +179,20 @@ private:
                                     const ThreatEntrySetList& aUpdate,
                                     bool aIsAddition);
 
   nsresult ProcessRawAddition(TableUpdateV4& aTableUpdate,
                               const ThreatEntrySet& aAddition);
 
   nsresult ProcessRawRemoval(TableUpdateV4& aTableUpdate,
                              const ThreatEntrySet& aRemoval);
+
+  nsresult ProcessEncodedAddition(TableUpdateV4& aTableUpdate,
+                                  const ThreatEntrySet& aAddition);
+
+  nsresult ProcessEncodedRemoval(TableUpdateV4& aTableUpdate,
+                                 const ThreatEntrySet& aRemoval);
 };
 
 } // namespace safebrowsing
 } // namespace mozilla
 
 #endif
--- a/toolkit/components/url-classifier/nsUrlClassifierUtils.cpp
+++ b/toolkit/components/url-classifier/nsUrlClassifierUtils.cpp
@@ -102,20 +102,18 @@ static void
 InitListUpdateRequest(ThreatType aThreatType,
                       const char* aStateBase64,
                       ListUpdateRequest* aListUpdateRequest)
 {
   aListUpdateRequest->set_threat_type(aThreatType);
   aListUpdateRequest->set_platform_type(GetPlatformType());
   aListUpdateRequest->set_threat_entry_type(URL);
 
-  // Only RAW data is supported for now.
-  // TODO: Bug 1285848 Supports Rice-Golomb encoding.
   Constraints* contraints = new Constraints();
-  contraints->add_supported_compressions(RAW);
+  contraints->add_supported_compressions(RICE);
   aListUpdateRequest->set_allocated_constraints(contraints);
 
   // Only set non-empty state.
   if (aStateBase64[0] != '\0') {
     nsCString stateBinary;
     nsresult rv = Base64Decode(nsCString(aStateBase64), stateBinary);
     if (NS_SUCCEEDED(rv)) {
       aListUpdateRequest->set_state(stateBinary.get(), stateBinary.Length());