Bug 1358324 - The URLCLASSIFIER_MATCH_THREAT_TYPE_RESULT probe doesn't seem to be working correctly. r?francois draft
authorDimiL <dlee@mozilla.com>
Fri, 21 Apr 2017 15:09:44 +0800
changeset 569197 2b78a2c9113a6542bf0ce831bb0bdecfdb784445
parent 566156 8b854986038cf3f3f240697e27ef48ea65914c13
child 626131 04c6b5d443b8183fb4ca77b0d2825473d9c219f0
push id56091
push userbmo:dlee@mozilla.com
push dateThu, 27 Apr 2017 04:39:24 +0000
reviewersfrancois
bugs1358324
milestone55.0a1
Bug 1358324 - The URLCLASSIFIER_MATCH_THREAT_TYPE_RESULT probe doesn't seem to be working correctly. r?francois mResults is lookupResultArray which is created when we find matched prefix in the database. mCacheResults stores response of gethash request. We record threat type match telemetry only when completion is found in both V2 and V4, that is we got one confirmed result for V2 and one for V4. And then we could record threat types by iterating through mCacheResultsArray. But if one of lookupResult is from cache, for example, completion is found in 'goog-malware-proto', then we won't trigger a gethash request for it because it is in the cache. In this scenario, mCacheResults will not have results from V4 so when we try to record threat types we won't find V4 ones. In this patch we only record telemetry when gethash is sent for both V2 and V4. This may limit the usefulness of that probe a little bit, but we shouldn't make the code less efficient just to be able to measure telemetry better. MozReview-Commit-ID: Ib8SGUaxb4c
toolkit/components/url-classifier/nsUrlClassifierDBService.cpp
--- a/toolkit/components/url-classifier/nsUrlClassifierDBService.cpp
+++ b/toolkit/components/url-classifier/nsUrlClassifierDBService.cpp
@@ -1360,43 +1360,55 @@ nsUrlClassifierLookupCallback::HandleRes
     }
   }
 
   // Only record threat type telemetry when completion is found in V2 & V4.
   if (matchResult == MatchResult::eAll && mCacheResults) {
     MatchThreatType types = MatchThreatType::eIdentical;
 
     // Check all the results because there may be multiple matches being returned.
+    bool foundV2Result = false, foundV4Result = false;
     for (uint32_t i = 0; i < mCacheResults->Length(); i++) {
       CacheResult* c = mCacheResults->ElementAt(i).get();
+      bool isV2 = CacheResult::V2 == c->Ver();
+      if (isV2) {
+        foundV2Result = true;
+      } else {
+        foundV4Result = true;
+      }
       for (LookupResult& l : *(mResults.get())) {
-        if (l.hash.fixedLengthPrefix != c->prefix) {
+        if (l.mProtocolV2 != isV2 || l.hash.fixedLengthPrefix != c->prefix) {
           continue;
         }
 
         // Ignore unconfirmed results.
         if (l.Confirmed()) {
           types |= TableNameToThreatType(CacheResult::V2 == c->Ver(), c->table);
         }
         break;
       }
     }
 
-    auto fnIsMatchSameThreatType = [&](const MatchThreatType& aTypeMask) {
-      uint8_t val = static_cast<uint8_t>(types & aTypeMask);
-      return val == 0 || val == static_cast<uint8_t>(aTypeMask);
-    };
-    if (fnIsMatchSameThreatType(MatchThreatType::ePhishingMask) &&
-        fnIsMatchSameThreatType(MatchThreatType::eMalwareMask) &&
-        fnIsMatchSameThreatType(MatchThreatType::eUnwantedMask)) {
-      types = MatchThreatType::eIdentical;
+    // We don't want to record telemetry when one of the results is from cache
+    // because finding an unexpired cache entry will prevent us from doing gethash
+    // requests that would otherwise be required.
+    if (foundV2Result && foundV4Result) {
+      auto fnIsMatchSameThreatType = [&](const MatchThreatType& aTypeMask) {
+        uint8_t val = static_cast<uint8_t>(types & aTypeMask);
+        return val == 0 || val == static_cast<uint8_t>(aTypeMask);
+      };
+      if (fnIsMatchSameThreatType(MatchThreatType::ePhishingMask) &&
+          fnIsMatchSameThreatType(MatchThreatType::eMalwareMask) &&
+          fnIsMatchSameThreatType(MatchThreatType::eUnwantedMask)) {
+        types = MatchThreatType::eIdentical;
+      }
+
+      Telemetry::Accumulate(Telemetry::URLCLASSIFIER_MATCH_THREAT_TYPE_RESULT,
+                            static_cast<uint8_t>(types));
     }
-
-    Telemetry::Accumulate(Telemetry::URLCLASSIFIER_MATCH_THREAT_TYPE_RESULT,
-                          static_cast<uint8_t>(types));
   }
 
   if (matchResult != MatchResult::eTelemetryDisabled) {
     Telemetry::Accumulate(Telemetry::URLCLASSIFIER_MATCH_RESULT,
                           MatchResultToUint(matchResult));
   }
 
   // TODO: Bug 1333328, Refactor cache miss mechanism for v2.