Bug 1298257 - Implement url matching for variable-length prefix set. r=gcp draft
authorThomas Nguyen <tnguyen@mozilla.com>
Fri, 04 Nov 2016 12:00:33 +0800
changeset 433725 678a0e8cb8b8f48aec09164de7d3585ed2bc758c
parent 431996 2c773b97167252cedcba0be0c7af9d4cab192ef5
child 535935 e480dd55f3a6c3b1d3256602a524d2fa9385e8a9
push id34628
push usertnguyen@mozilla.com
push dateFri, 04 Nov 2016 04:01:05 +0000
reviewersgcp
bugs1298257
milestone52.0a1
Bug 1298257 - Implement url matching for variable-length prefix set. r=gcp MozReview-Commit-ID: 8Goh7yyAotN
toolkit/components/telemetry/Histograms.json
toolkit/components/url-classifier/Classifier.cpp
toolkit/components/url-classifier/LookupCacheV4.cpp
toolkit/components/url-classifier/tests/gtest/Common.cpp
toolkit/components/url-classifier/tests/gtest/Common.h
toolkit/components/url-classifier/tests/gtest/TestLookupCacheV4.cpp
toolkit/components/url-classifier/tests/gtest/TestPerProviderDirectory.cpp
toolkit/components/url-classifier/tests/gtest/TestUrlClassifierTableUpdateV4.cpp
toolkit/components/url-classifier/tests/gtest/moz.build
--- a/toolkit/components/telemetry/Histograms.json
+++ b/toolkit/components/telemetry/Histograms.json
@@ -3873,16 +3873,24 @@
   "URLCLASSIFIER_UPDATE_ERROR_TYPE": {
     "alert_emails": ["safebrowsing-telemetry@mozilla.org"],
     "expires_in_version": "58",
     "kind": "enumerated",
     "n_values": 10,
     "bug_numbers": [1305801],
     "description": "An error was encountered while parsing a partial update returned by a Safe Browsing V4 server (0 = addition of an already existing prefix, 1 = parser got into an infinite loop, 2 = removal index out of bounds, 3 = checksum mismatch, 4 = missing checksum)"
   },
+  "URLCLASSIFIER_PREFIX_MATCH": {
+    "alert_emails": ["safebrowsing-telemetry@mozilla.org"],
+    "expires_in_version": "58",
+    "kind": "enumerated",
+    "n_values": 4,
+    "bug_numbers": [1298257],
+    "description": "Classifier prefix matching result (0 = no match, 1 = match only V2, 2 = match only V4, 3 = match both V2 and V4)"
+  },
   "CSP_DOCUMENTS_COUNT": {
     "alert_emails": ["seceng@mozilla.com"],
     "bug_numbers": [1252829],
     "expires_in_version": "55",
     "kind": "count",
     "description": "Number of unique pages that contain a CSP"
   },
   "CSP_UNSAFE_INLINE_DOCUMENTS_COUNT": {
--- a/toolkit/components/url-classifier/Classifier.cpp
+++ b/toolkit/components/url-classifier/Classifier.cpp
@@ -15,16 +15,17 @@
 #include "nsNetCID.h"
 #include "nsPrintfCString.h"
 #include "nsThreadUtils.h"
 #include "mozilla/Telemetry.h"
 #include "mozilla/Logging.h"
 #include "mozilla/SyncRunnable.h"
 #include "mozilla/Base64.h"
 #include "mozilla/Unused.h"
+#include "mozilla/TypedEnumBits.h"
 
 // MOZ_LOG=UrlClassifierDbService:5
 extern mozilla::LazyLogModule gUrlClassifierDbServiceLog;
 #define LOG(args) MOZ_LOG(gUrlClassifierDbServiceLog, mozilla::LogLevel::Debug, args)
 #define LOG_ENABLED() MOZ_LOG_TEST(gUrlClassifierDbServiceLog, mozilla::LogLevel::Debug)
 
 #define STORE_DIRECTORY      NS_LITERAL_CSTRING("safebrowsing")
 #define TO_DELETE_DIR_SUFFIX NS_LITERAL_CSTRING("-to_delete")
@@ -471,16 +472,26 @@ Classifier::TableRequest(nsACString& aRe
   // Load meta data from *.metadata files in the root directory.
   // Specifically for v4 tables.
   nsCString metadata;
   nsresult rv = LoadMetadata(mRootStoreDirectory, metadata);
   NS_ENSURE_SUCCESS_VOID(rv);
   aResult.Append(metadata);
 }
 
+// This is used to record the matching statistics for v2 and v4.
+enum class PrefixMatch : uint8_t {
+  eNoMatch = 0x00,
+  eMatchV2Prefix = 0x01,
+  eMatchV4Prefix = 0x02,
+  eMatchBoth = eMatchV2Prefix | eMatchV4Prefix
+};
+
+MOZ_MAKE_ENUM_CLASS_BITWISE_OPERATORS(PrefixMatch)
+
 nsresult
 Classifier::Check(const nsACString& aSpec,
                   const nsACString& aTables,
                   uint32_t aFreshnessGuarantee,
                   LookupResultArray& aResults)
 {
   Telemetry::AutoTimer<Telemetry::URLCLASSIFIER_CL_CHECK_TIME> timer;
 
@@ -500,31 +511,49 @@ Classifier::Check(const nsACString& aSpe
     LookupCache *cache = GetLookupCache(activeTables[i]);
     if (cache) {
       cacheArray.AppendElement(cache);
     } else {
       return NS_ERROR_FAILURE;
     }
   }
 
+  PrefixMatch matchingStatistics = PrefixMatch::eNoMatch;
+
   // Now check each lookup fragment against the entries in the DB.
   for (uint32_t i = 0; i < fragments.Length(); i++) {
     Completion lookupHash;
     lookupHash.FromPlaintext(fragments[i], mCryptoHash);
 
     if (LOG_ENABLED()) {
       nsAutoCString checking;
       lookupHash.ToHexString(checking);
       LOG(("Checking fragment %s, hash %s (%X)", fragments[i].get(),
            checking.get(), lookupHash.ToUint32()));
     }
 
     for (uint32_t i = 0; i < cacheArray.Length(); i++) {
       LookupCache *cache = cacheArray[i];
       bool has, complete;
+
+      if (LookupCache::Cast<LookupCacheV4>(cache)) {
+        // TODO Bug 1312339 Return length in LookupCache.Has and support
+        // VariableLengthPrefix in LookupResultArray
+        rv = cache->Has(lookupHash, &has, &complete);
+        if (NS_FAILED(rv)) {
+          LOG(("Failed to lookup fragment %s V4", fragments[i].get()));
+        }
+        if (has) {
+          matchingStatistics |= PrefixMatch::eMatchV4Prefix;
+          // TODO: Bug 1311935 - Implement Safe Browsing v4 caching
+          // Should check cache expired
+        }
+        continue;
+      }
+
       rv = cache->Has(lookupHash, &has, &complete);
       NS_ENSURE_SUCCESS(rv, rv);
       if (has) {
         LookupResult *result = aResults.AppendElement();
         if (!result)
           return NS_ERROR_OUT_OF_MEMORY;
 
         int64_t age;
@@ -540,19 +569,23 @@ Classifier::Check(const nsACString& aSpe
              cache->TableName().get(),
              complete ? "complete." : "Not complete.",
              age));
 
         result->hash.complete = lookupHash;
         result->mComplete = complete;
         result->mFresh = (age < aFreshnessGuarantee);
         result->mTableName.Assign(cache->TableName());
+
+        matchingStatistics |= PrefixMatch::eMatchV2Prefix;
       }
     }
 
+    Telemetry::Accumulate(Telemetry::URLCLASSIFIER_PREFIX_MATCH,
+                          static_cast<uint8_t>(matchingStatistics));
   }
 
   return NS_OK;
 }
 
 nsresult
 Classifier::ApplyUpdates(nsTArray<TableUpdate*>* aUpdates)
 {
--- a/toolkit/components/url-classifier/LookupCacheV4.cpp
+++ b/toolkit/components/url-classifier/LookupCacheV4.cpp
@@ -73,22 +73,38 @@ LookupCacheV4::Init()
 {
   mVLPrefixSet = new VariableLengthPrefixSet();
   nsresult rv = mVLPrefixSet->Init(mTableName);
   NS_ENSURE_SUCCESS(rv, rv);
 
   return NS_OK;
 }
 
-// TODO : Bug 1298257, Implement url matching for variable-length prefix set
 nsresult
 LookupCacheV4::Has(const Completion& aCompletion,
                    bool* aHas, bool* aComplete)
 {
   *aHas = false;
+
+  uint32_t length = 0;
+  nsDependentCSubstring fullhash;
+  fullhash.Rebind((const char *)aCompletion.buf, COMPLETE_SIZE);
+
+  nsresult rv = mVLPrefixSet->Matches(fullhash, &length);
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  *aHas = length >= PREFIX_SIZE;
+  *aComplete = length == COMPLETE_SIZE;
+
+  if (LOG_ENABLED()) {
+    uint32_t prefix = aCompletion.ToUint32();
+    LOG(("Probe in V4 %s: %X, found %d, complete %d", mTableName.get(),
+          prefix, *aHas, *aComplete));
+  }
+
   return NS_OK;
 }
 
 nsresult
 LookupCacheV4::Build(PrefixStringMap& aPrefixMap)
 {
   return mVLPrefixSet->SetPrefixes(aPrefixMap);
 }
@@ -146,16 +162,27 @@ AppendPrefixToMap(PrefixStringMap& prefi
   if (!prefix.Length()) {
     return;
   }
 
   nsCString* prefixString = prefixes.LookupOrAdd(prefix.Length());
   prefixString->Append(prefix.BeginReading(), prefix.Length());
 }
 
+// Read prefix into a buffer and also update the hash which
+// keeps track of the checksum
+static void
+UpdateChecksum(nsICryptoHash* aCrypto, const nsACString& aPrefix)
+{
+  MOZ_ASSERT(aCrypto);
+  aCrypto->Update(reinterpret_cast<uint8_t*>(const_cast<char*>(
+                  aPrefix.BeginReading())),
+                  aPrefix.Length());
+}
+
 // Please see https://bug1287058.bmoattachments.org/attachment.cgi?id=8795366
 // for detail about partial update algorithm.
 nsresult
 LookupCacheV4::ApplyUpdate(TableUpdateV4* aTableUpdate,
                            PrefixStringMap& aInputMap,
                            PrefixStringMap& aOutputMap)
 {
   MOZ_ASSERT(aOutputMap.IsEmpty());
@@ -227,28 +254,22 @@ LookupCacheV4::ApplyUpdate(TableUpdateV4
 
       // If the number of picks from old map matches the removalIndex, then this prefix
       // will be removed by not merging it to new map.
       if (removalIndex < removalArray.Length() &&
           numOldPrefixPicked == removalArray[removalIndex]) {
         removalIndex++;
       } else {
         AppendPrefixToMap(aOutputMap, smallestOldPrefix);
-
-        crypto->Update(reinterpret_cast<uint8_t*>(const_cast<char*>(
-                       smallestOldPrefix.BeginReading())),
-                       smallestOldPrefix.Length());
+        UpdateChecksum(crypto, smallestOldPrefix);
       }
       smallestOldPrefix.SetLength(0);
     } else {
       AppendPrefixToMap(aOutputMap, smallestAddPrefix);
-
-      crypto->Update(reinterpret_cast<uint8_t*>(const_cast<char*>(
-                     smallestAddPrefix.BeginReading())),
-                     smallestAddPrefix.Length());
+      UpdateChecksum(crypto, smallestAddPrefix);
 
       smallestAddPrefix.SetLength(0);
     }
   }
 
   // We expect index will be greater to 0 because max number of runs will be
   // the number of original prefix plus add prefix.
   if (index <= 0) {
@@ -292,17 +313,17 @@ LookupCacheV4::InitCrypto(nsCOMPtr<nsICr
 {
   nsresult rv;
   aCrypto = do_CreateInstance(NS_CRYPTO_HASH_CONTRACTID, &rv);
   if (NS_WARN_IF(NS_FAILED(rv))) {
     return rv;
   }
 
   rv = aCrypto->Init(nsICryptoHash::SHA256);
-  Unused << NS_WARN_IF(NS_FAILED(rv));
+  NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "InitCrypto failed");
 
   return rv;
 }
 
 nsresult
 LookupCacheV4::VerifyChecksum(const nsACString& aChecksum)
 {
   nsCOMPtr<nsICryptoHash> crypto;
@@ -316,19 +337,17 @@ LookupCacheV4::VerifyChecksum(const nsAC
 
   VLPrefixSet loadPSet(map);
   uint32_t index = loadPSet.Count() + 1;
   for(;index > 0; index--) {
     nsDependentCSubstring prefix;
     if (!loadPSet.GetSmallestPrefix(prefix)) {
       break;
     }
-    crypto->Update(reinterpret_cast<uint8_t*>(const_cast<char*>(
-                   prefix.BeginReading())),
-                   prefix.Length());
+    UpdateChecksum(crypto, prefix);
   }
 
   nsAutoCString checksum;
   crypto->Finish(false, checksum);
 
   if (checksum != aChecksum) {
     LOG(("Checksum mismatch when loading prefixes from file."));
     return NS_ERROR_FILE_CORRUPTED;
--- a/toolkit/components/url-classifier/tests/gtest/Common.cpp
+++ b/toolkit/components/url-classifier/tests/gtest/Common.cpp
@@ -46,8 +46,22 @@ void ApplyUpdate(nsTArray<TableUpdate*>&
   });
 }
 
 void ApplyUpdate(TableUpdate* update)
 {
   nsTArray<TableUpdate*> updates = { update };
   ApplyUpdate(updates);
 }
+
+void
+PrefixArrayToPrefixStringMap(const nsTArray<nsCString>& prefixArray,
+                             PrefixStringMap& out)
+{
+  out.Clear();
+
+  for (uint32_t i = 0; i < prefixArray.Length(); i++) {
+    const nsCString& prefix = prefixArray[i];
+    nsCString* prefixString = out.LookupOrAdd(prefix.Length());
+    prefixString->Append(prefix.BeginReading(), prefix.Length());
+  }
+}
+
--- a/toolkit/components/url-classifier/tests/gtest/Common.h
+++ b/toolkit/components/url-classifier/tests/gtest/Common.h
@@ -14,8 +14,13 @@ void RunTestInNewThread(Function&& aFunc
 already_AddRefed<nsIFile>
 GetFile(const nsTArray<nsString>& path);
 
 // ApplyUpdate will call |ApplyUpdates| of Classifier within a new thread
 void ApplyUpdate(nsTArray<TableUpdate*>& updates);
 
 void ApplyUpdate(TableUpdate* update);
 
+// This function converts lexigraphic-sorted prefixes to a hashtable
+// which key is prefix size and value is concatenated prefix string.
+void PrefixArrayToPrefixStringMap(const nsTArray<nsCString>& prefixArray,
+                                  PrefixStringMap& out);
+
new file mode 100644
--- /dev/null
+++ b/toolkit/components/url-classifier/tests/gtest/TestLookupCacheV4.cpp
@@ -0,0 +1,88 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "LookupCacheV4.h"
+#include "Common.h"
+
+#define GTEST_SAFEBROWSING_DIR NS_LITERAL_CSTRING("safebrowsing")
+#define GTEST_TABLE NS_LITERAL_CSTRING("gtest-malware-proto")
+
+typedef nsCString _Fragment;
+typedef nsTArray<nsCString> _PrefixArray;
+
+// Generate a hash prefix from string
+static const nsCString
+GeneratePrefix(const _Fragment& aFragment, uint8_t aLength)
+{
+  Completion complete;
+  nsCOMPtr<nsICryptoHash> cryptoHash = do_CreateInstance(NS_CRYPTO_HASH_CONTRACTID);
+  complete.FromPlaintext(aFragment, cryptoHash);
+
+  nsCString hash;
+  hash.Assign((const char *)complete.buf, aLength);
+  return hash;
+}
+
+static UniquePtr<LookupCacheV4>
+SetupLookupCacheV4(const _PrefixArray& prefixArray)
+{
+  nsCOMPtr<nsIFile> file;
+  NS_GetSpecialDirectory(NS_APP_USER_PROFILE_50_DIR, getter_AddRefs(file));
+
+  file->AppendNative(GTEST_SAFEBROWSING_DIR);
+
+  UniquePtr<LookupCacheV4> cache = MakeUnique<LookupCacheV4>(GTEST_TABLE, file);
+  nsresult rv = cache->Init();
+  EXPECT_EQ(rv, NS_OK);
+
+  PrefixStringMap map;
+  PrefixArrayToPrefixStringMap(prefixArray, map);
+  rv = cache->Build(map);
+  EXPECT_EQ(rv, NS_OK);
+
+  return Move(cache);
+}
+
+void
+TestHasPrefix(const _Fragment& aFragment, bool aExpectedHas, bool aExpectedComplete)
+{
+  _PrefixArray array = { GeneratePrefix(_Fragment("bravo.com/"), 32),
+                         GeneratePrefix(_Fragment("browsing.com/"), 8),
+                         GeneratePrefix(_Fragment("gound.com/"), 5),
+                         GeneratePrefix(_Fragment("small.com/"), 4)
+                       };
+
+  RunTestInNewThread([&] () -> void {
+    UniquePtr<LookupCache> cache = SetupLookupCacheV4(array);
+
+    Completion lookupHash;
+    nsCOMPtr<nsICryptoHash> cryptoHash = do_CreateInstance(NS_CRYPTO_HASH_CONTRACTID);
+    lookupHash.FromPlaintext(aFragment, cryptoHash);
+
+    bool has, complete;
+    nsresult rv = cache->Has(lookupHash, &has, &complete);
+
+    EXPECT_EQ(rv, NS_OK);
+    EXPECT_EQ(has, aExpectedHas);
+    EXPECT_EQ(complete, aExpectedComplete);
+
+    cache->ClearAll();
+  });
+
+}
+
+TEST(LookupCacheV4, HasComplete)
+{
+  TestHasPrefix(_Fragment("bravo.com/"), true, true);
+}
+
+TEST(LookupCacheV4, HasPrefix)
+{
+  TestHasPrefix(_Fragment("browsing.com/"), true, false);
+}
+
+TEST(LookupCacheV4, Nomatch)
+{
+  TestHasPrefix(_Fragment("nomatch.com/"), false, false);
+}
--- a/toolkit/components/url-classifier/tests/gtest/TestPerProviderDirectory.cpp
+++ b/toolkit/components/url-classifier/tests/gtest/TestPerProviderDirectory.cpp
@@ -1,15 +1,13 @@
 #include "LookupCache.h"
 #include "LookupCacheV4.h"
 #include "HashStore.h"
 #include "gtest/gtest.h"
-#include "nsIThread.h"
 #include "nsAppDirectoryServiceDefs.h"
-#include "nsThreadUtils.h"
 
 namespace mozilla {
 namespace safebrowsing {
 
 class PerProviderDirectoryTestUtils {
 public:
   template<typename T>
   static nsIFile* InspectStoreDirectory(const T& aT)
--- a/toolkit/components/url-classifier/tests/gtest/TestUrlClassifierTableUpdateV4.cpp
+++ b/toolkit/components/url-classifier/tests/gtest/TestUrlClassifierTableUpdateV4.cpp
@@ -1,11 +1,12 @@
 /* Any copyright is dedicated to the Public Domain.
  * http://creativecommons.org/publicdomain/zero/1.0/ */
 
+#include "Common.h"
 #include "Classifier.h"
 #include "HashStore.h"
 #include "nsAppDirectoryServiceDefs.h"
 #include "nsIFile.h"
 #include "nsIThread.h"
 #include "string.h"
 #include "gtest/gtest.h"
 #include "nsThreadUtils.h"
@@ -50,31 +51,16 @@ MergeAndSortArray(const _PrefixArray& ar
                   _PrefixArray& output)
 {
   output.Clear();
   output.AppendElements(array1);
   output.AppendElements(array2);
   output.Sort();
 }
 
-// This function converts lexigraphic-sorted prefixes to a hashtable
-// which key is prefix size and value is concatenated prefix string.
-static void
-PrefixArrayToPrefixStringMap(const _PrefixArray& prefixArray,
-                             PrefixStringMap& outMap)
-{
-  outMap.Clear();
-
-  for (uint32_t i = 0; i < prefixArray.Length(); i++) {
-    const _Prefix& prefix = prefixArray[i];
-    nsCString* prefixString = outMap.LookupOrAdd(prefix.Length());
-    prefixString->Append(prefix.BeginReading(), prefix.Length());
-  }
-}
-
 static void
 CalculateCheckSum(_PrefixArray& prefixArray, nsCString& checksum)
 {
   prefixArray.Sort();
 
   nsresult rv;
   nsCOMPtr<nsICryptoHash> cryptoHash =
     do_CreateInstance(NS_CRYPTO_HASH_CONTRACTID, &rv);
--- a/toolkit/components/url-classifier/tests/gtest/moz.build
+++ b/toolkit/components/url-classifier/tests/gtest/moz.build
@@ -7,16 +7,17 @@
 LOCAL_INCLUDES += [
     '../..',
 ]
 
 UNIFIED_SOURCES += [
     'Common.cpp',
     'TestChunkSet.cpp',
     'TestFailUpdate.cpp',
+    'TestLookupCacheV4.cpp',
     'TestPerProviderDirectory.cpp',
     'TestProtocolParser.cpp',
     'TestRiceDeltaDecoder.cpp',
     'TestSafebrowsingHash.cpp',
     'TestSafeBrowsingProtobuf.cpp',
     'TestTable.cpp',
     'TestUrlClassifierTableUpdateV4.cpp',
     'TestUrlClassifierUtils.cpp',