Bug 1378967 - Ignore highlights items with opaque URIs or invalid hosts r=sebastian draft
authorGrigory Kruglov <gkruglov@mozilla.com>
Thu, 13 Jul 2017 18:07:04 -0400
changeset 608597 17fa1d3506dc47b3353e3e884678fcec68759b9f
parent 608596 2347b4fde0bc1e9f520a8a4485f25172ed57a70d
child 637365 e91238b470e272dc2f3002fa681445f02dd5cc58
push id68346
push userbmo:gkruglov@mozilla.com
push dateThu, 13 Jul 2017 22:12:51 +0000
reviewerssebastian
bugs1378967
milestone56.0a1
Bug 1378967 - Ignore highlights items with opaque URIs or invalid hosts r=sebastian MozReview-Commit-ID: DMkeqGH79dj
mobile/android/base/java/org/mozilla/gecko/activitystream/ranking/HighlightCandidate.java
mobile/android/base/java/org/mozilla/gecko/activitystream/ranking/HighlightsRanking.java
--- a/mobile/android/base/java/org/mozilla/gecko/activitystream/ranking/HighlightCandidate.java
+++ b/mobile/android/base/java/org/mozilla/gecko/activitystream/ranking/HighlightCandidate.java
@@ -39,17 +39,17 @@ import java.util.Map;
     public @interface Feature {}
 
     @VisibleForTesting final Map<String, Double> features;
     private Highlight highlight;
     private @Nullable String imageUrl;
     private String host;
     private double score;
 
-    public static HighlightCandidate fromCursor(Cursor cursor) {
+    public static HighlightCandidate fromCursor(Cursor cursor) throws InvalidHighlightCandidateException {
         final HighlightCandidate candidate = new HighlightCandidate();
 
         extractHighlight(candidate, cursor);
         extractFeatures(candidate, cursor);
 
         return candidate;
     }
 
@@ -58,17 +58,17 @@ import java.util.Map;
      */
     private static void extractHighlight(HighlightCandidate candidate, Cursor cursor) {
         candidate.highlight = Highlight.fromCursor(cursor);
     }
 
     /**
      * Extract and assign features that will be used for ranking.
      */
-    private static void extractFeatures(HighlightCandidate candidate, Cursor cursor) {
+    private static void extractFeatures(HighlightCandidate candidate, Cursor cursor) throws InvalidHighlightCandidateException {
         candidate.features.put(
                 FEATURE_AGE_IN_DAYS,
                 (System.currentTimeMillis() - cursor.getDouble(cursor.getColumnIndexOrThrow(BrowserContract.History.DATE_LAST_VISITED)))
                         / (1000 * 3600 * 24));
 
         candidate.features.put(
                 FEATURE_VISITS_COUNT,
                 cursor.getDouble(cursor.getColumnIndexOrThrow(BrowserContract.History.VISITS)));
@@ -120,31 +120,33 @@ import java.util.Map;
         }
 
         candidate.features.put(
                 FEATURE_DESCRIPTION_LENGTH,
                 (double) candidate.highlight.getMetadata().getDescriptionLength());
 
         final Uri uri = Uri.parse(candidate.highlight.getUrl());
 
+        // We don't support opaque URIs (such as mailto:...), or URIs which do not have a valid host.
+        // The latter might simply be URIs with invalid characters in them (such as underscore...).
+        // See Bug 1363521 and Bug 1378967.
+        if (!uri.isHierarchical() || uri.getHost() == null) {
+            throw new InvalidHighlightCandidateException();
+        }
+
         candidate.host = uri.getHost();
 
         candidate.features.put(
                 FEATURE_PATH_LENGTH,
                 (double) uri.getPathSegments().size());
 
-        if (uri.isHierarchical()) {
-            candidate.features.put(
-                    FEATURE_QUERY_LENGTH,
-                    (double) uri.getQueryParameterNames().size());
-
-        // Opaque URIs do not support getQueryParameterNames.
-        } else {
-            candidate.features.put(FEATURE_QUERY_LENGTH, 0d);
-        }
+        // Only hierarchical URIs support getQueryParameterNames.
+        candidate.features.put(
+                FEATURE_QUERY_LENGTH,
+                (double) uri.getQueryParameterNames().size());
     }
 
     @VisibleForTesting HighlightCandidate() {
         features = new HashMap<>();
     }
 
     /* package-private */ double getScore() {
         return score;
@@ -189,9 +191,13 @@ import java.util.Map;
         for (Map.Entry<String, Double> entry : features.entrySet()) {
             if (filter.call(entry.getKey())) {
                 filteredFeatures.put(entry.getKey(), entry.getValue());
             }
         }
 
         return filteredFeatures;
     }
+
+    /* package-private */ static class InvalidHighlightCandidateException extends Exception {
+        private static final long serialVersionUID = 949263104621445850L;
+    }
 }
--- a/mobile/android/base/java/org/mozilla/gecko/activitystream/ranking/HighlightsRanking.java
+++ b/mobile/android/base/java/org/mozilla/gecko/activitystream/ranking/HighlightsRanking.java
@@ -2,16 +2,17 @@
  * This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 package org.mozilla.gecko.activitystream.ranking;
 
 import android.database.Cursor;
 import android.support.annotation.VisibleForTesting;
+import android.util.Log;
 
 import org.mozilla.gecko.activitystream.homepanel.model.Highlight;
 
 import java.util.Arrays;
 import java.util.Comparator;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
@@ -36,16 +37,18 @@ import static org.mozilla.gecko.activity
  * of good highlights. The result set is likely smaller than the cursor size.
  *
  * - First we calculate an initial score based on how frequent we visit the URL and domain.
  * - Then we multiply some (normalized) feature values with weights to calculate:
  *      initialScore * e ^ -(sum of weighted features)
  * - Finally we adjust the score with some custom rules.
  */
 public class HighlightsRanking {
+    private static final String LOG_TAG = "HighlightsRanking";
+
     private static final Map<String, Double> HIGHLIGHT_WEIGHTS = new HashMap<>();
     static {
         // TODO: Needs confirmation from the desktop team that this is the correct weight mapping (Bug 1336037)
         HIGHLIGHT_WEIGHTS.put(HighlightCandidate.FEATURE_VISITS_COUNT, -0.1);
         HIGHLIGHT_WEIGHTS.put(HighlightCandidate.FEATURE_DESCRIPTION_LENGTH, -0.1);
         HIGHLIGHT_WEIGHTS.put(HighlightCandidate.FEATURE_PATH_LENGTH, -0.1);
 
         HIGHLIGHT_WEIGHTS.put(HighlightCandidate.FEATURE_QUERY_LENGTH, 0.4);
@@ -92,17 +95,22 @@ public class HighlightsRanking {
     /**
      * Extract features for every candidate. The heavy lifting is done in
      * HighlightCandidate.fromCursor().
      */
     @VisibleForTesting static List<HighlightCandidate> extractFeatures(Cursor cursor) {
         return looselyMapCursor(cursor, new Func1<Cursor, HighlightCandidate>() {
             @Override
             public HighlightCandidate call(Cursor cursor) {
-                return HighlightCandidate.fromCursor(cursor);
+                try {
+                    return HighlightCandidate.fromCursor(cursor);
+                } catch (HighlightCandidate.InvalidHighlightCandidateException e) {
+                    Log.w(LOG_TAG, "Skipping invalid highlight item", e);
+                    return null;
+                }
             }
         });
     }
 
     /**
      * Normalize the values of all features listed in NORMALIZATION_FEATURES. Normalization will map
      * the values into the interval of [0,1] based on the min/max values for the features.
      */