Bug 1378967 - Ignore highlights items with opaque URIs or invalid hosts r=sebastian
MozReview-Commit-ID: DMkeqGH79dj
--- a/mobile/android/base/java/org/mozilla/gecko/activitystream/ranking/HighlightCandidate.java
+++ b/mobile/android/base/java/org/mozilla/gecko/activitystream/ranking/HighlightCandidate.java
@@ -39,17 +39,17 @@ import java.util.Map;
public @interface Feature {}
@VisibleForTesting final Map<String, Double> features;
private Highlight highlight;
private @Nullable String imageUrl;
private String host;
private double score;
- public static HighlightCandidate fromCursor(Cursor cursor) {
+ public static HighlightCandidate fromCursor(Cursor cursor) throws InvalidHighlightCandidateException {
final HighlightCandidate candidate = new HighlightCandidate();
extractHighlight(candidate, cursor);
extractFeatures(candidate, cursor);
return candidate;
}
@@ -58,17 +58,17 @@ import java.util.Map;
*/
private static void extractHighlight(HighlightCandidate candidate, Cursor cursor) {
candidate.highlight = Highlight.fromCursor(cursor);
}
/**
* Extract and assign features that will be used for ranking.
*/
- private static void extractFeatures(HighlightCandidate candidate, Cursor cursor) {
+ private static void extractFeatures(HighlightCandidate candidate, Cursor cursor) throws InvalidHighlightCandidateException {
candidate.features.put(
FEATURE_AGE_IN_DAYS,
(System.currentTimeMillis() - cursor.getDouble(cursor.getColumnIndexOrThrow(BrowserContract.History.DATE_LAST_VISITED)))
/ (1000 * 3600 * 24));
candidate.features.put(
FEATURE_VISITS_COUNT,
cursor.getDouble(cursor.getColumnIndexOrThrow(BrowserContract.History.VISITS)));
@@ -120,31 +120,33 @@ import java.util.Map;
}
candidate.features.put(
FEATURE_DESCRIPTION_LENGTH,
(double) candidate.highlight.getMetadata().getDescriptionLength());
final Uri uri = Uri.parse(candidate.highlight.getUrl());
+ // We don't support opaque URIs (such as mailto:...), or URIs which do not have a valid host.
+ // The latter might simply be URIs with invalid characters in them (such as underscore...).
+ // See Bug 1363521 and Bug 1378967.
+ if (!uri.isHierarchical() || uri.getHost() == null) {
+ throw new InvalidHighlightCandidateException();
+ }
+
candidate.host = uri.getHost();
candidate.features.put(
FEATURE_PATH_LENGTH,
(double) uri.getPathSegments().size());
- if (uri.isHierarchical()) {
- candidate.features.put(
- FEATURE_QUERY_LENGTH,
- (double) uri.getQueryParameterNames().size());
-
- // Opaque URIs do not support getQueryParameterNames.
- } else {
- candidate.features.put(FEATURE_QUERY_LENGTH, 0d);
- }
+ // Only hierarchical URIs support getQueryParameterNames.
+ candidate.features.put(
+ FEATURE_QUERY_LENGTH,
+ (double) uri.getQueryParameterNames().size());
}
@VisibleForTesting HighlightCandidate() {
features = new HashMap<>();
}
/* package-private */ double getScore() {
return score;
@@ -189,9 +191,13 @@ import java.util.Map;
for (Map.Entry<String, Double> entry : features.entrySet()) {
if (filter.call(entry.getKey())) {
filteredFeatures.put(entry.getKey(), entry.getValue());
}
}
return filteredFeatures;
}
+
+ /* package-private */ static class InvalidHighlightCandidateException extends Exception {
+ private static final long serialVersionUID = 949263104621445850L;
+ }
}
--- a/mobile/android/base/java/org/mozilla/gecko/activitystream/ranking/HighlightsRanking.java
+++ b/mobile/android/base/java/org/mozilla/gecko/activitystream/ranking/HighlightsRanking.java
@@ -2,16 +2,17 @@
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
package org.mozilla.gecko.activitystream.ranking;
import android.database.Cursor;
import android.support.annotation.VisibleForTesting;
+import android.util.Log;
import org.mozilla.gecko.activitystream.homepanel.model.Highlight;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
@@ -36,16 +37,18 @@ import static org.mozilla.gecko.activity
* of good highlights. The result set is likely smaller than the cursor size.
*
* - First we calculate an initial score based on how frequent we visit the URL and domain.
* - Then we multiply some (normalized) feature values with weights to calculate:
* initialScore * e ^ -(sum of weighted features)
* - Finally we adjust the score with some custom rules.
*/
public class HighlightsRanking {
+ private static final String LOG_TAG = "HighlightsRanking";
+
private static final Map<String, Double> HIGHLIGHT_WEIGHTS = new HashMap<>();
static {
// TODO: Needs confirmation from the desktop team that this is the correct weight mapping (Bug 1336037)
HIGHLIGHT_WEIGHTS.put(HighlightCandidate.FEATURE_VISITS_COUNT, -0.1);
HIGHLIGHT_WEIGHTS.put(HighlightCandidate.FEATURE_DESCRIPTION_LENGTH, -0.1);
HIGHLIGHT_WEIGHTS.put(HighlightCandidate.FEATURE_PATH_LENGTH, -0.1);
HIGHLIGHT_WEIGHTS.put(HighlightCandidate.FEATURE_QUERY_LENGTH, 0.4);
@@ -92,17 +95,22 @@ public class HighlightsRanking {
/**
* Extract features for every candidate. The heavy lifting is done in
* HighlightCandidate.fromCursor().
*/
@VisibleForTesting static List<HighlightCandidate> extractFeatures(Cursor cursor) {
return looselyMapCursor(cursor, new Func1<Cursor, HighlightCandidate>() {
@Override
public HighlightCandidate call(Cursor cursor) {
- return HighlightCandidate.fromCursor(cursor);
+ try {
+ return HighlightCandidate.fromCursor(cursor);
+ } catch (HighlightCandidate.InvalidHighlightCandidateException e) {
+ Log.w(LOG_TAG, "Skipping invalid highlight item", e);
+ return null;
+ }
}
});
}
/**
* Normalize the values of all features listed in NORMALIZATION_FEATURES. Normalization will map
* the values into the interval of [0,1] based on the min/max values for the features.
*/