Bug 1265525 - Part 2: Frecency calculation and top sites query updates r=sebastian
MozReview-Commit-ID: 7tqr4IT9635
--- a/mobile/android/base/java/org/mozilla/gecko/db/BrowserContract.java
+++ b/mobile/android/base/java/org/mozilla/gecko/db/BrowserContract.java
@@ -3,16 +3,18 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
package org.mozilla.gecko.db;
import org.mozilla.gecko.AppConstants;
import android.net.Uri;
+import android.support.annotation.NonNull;
+
import org.mozilla.gecko.annotation.RobocopTarget;
@RobocopTarget
public class BrowserContract {
public static final String AUTHORITY = AppConstants.ANDROID_PACKAGE_NAME + ".db.browser";
public static final Uri AUTHORITY_URI = Uri.parse("content://" + AUTHORITY);
public static final String PASSWORDS_AUTHORITY = AppConstants.ANDROID_PACKAGE_NAME + ".db.passwords";
@@ -52,29 +54,85 @@ public class BrowserContract {
public static final String PARAM_DATASET_ID = "dataset_id";
public static final String PARAM_GROUP_BY = "group_by";
static public enum ExpirePriority {
NORMAL,
AGGRESSIVE
}
- static public String getFrecencySortOrder(boolean includesBookmarks, boolean asc) {
- final String age = "(" + Combined.DATE_LAST_VISITED + " - " + System.currentTimeMillis() + ") / 86400000";
-
- StringBuilder order = new StringBuilder(Combined.VISITS + " * MAX(1, 100 * 225 / (" + age + "*" + age + " + 225)) ");
+ /**
+ * Produces a SQL expression used for sorting results of the "combined" view by frecency.
+ * Combines remote and local frecency calculations, weighting local visits much heavier.
+ *
+ * @param includesBookmarks When URL is bookmarked, should we give it bonus frecency points?
+ * @param ascending Indicates if sorting order ascending
+ * @return Combined frecency sorting expression
+ */
+ static public String getCombinedFrecencySortOrder(boolean includesBookmarks, boolean ascending) {
+ final long now = System.currentTimeMillis();
+ StringBuilder order = new StringBuilder(getRemoteFrecencySQL(now) + " + " + getLocalFrecencySQL(now));
if (includesBookmarks) {
order.insert(0, "(CASE WHEN " + Combined.BOOKMARK_ID + " > -1 THEN 100 ELSE 0 END) + ");
}
- order.append(asc ? " ASC" : " DESC");
+ order.append(ascending ? " ASC" : " DESC");
return order.toString();
}
+ /**
+ * See Bug 1265525 for details (explanation + graphs) on how Remote frecency compares to Local frecency for different
+ * combinations of visits count and age.
+ *
+ * @param now Base time in milliseconds for age calculation
+ * @return remote frecency SQL calculation
+ */
+ static public String getRemoteFrecencySQL(final long now) {
+ return getFrecencyCalculation(now, 1, 110, Combined.REMOTE_VISITS_COUNT, Combined.REMOTE_DATE_LAST_VISITED);
+ }
+
+ /**
+ * Local frecency SQL calculation. Note higher scale factor and squared visit count which achieve
+ * visits generated locally being much preferred over remote visits.
+ * See Bug 1265525 for details (explanation + comparison graphs).
+ *
+ * @param now Base time in milliseconds for age calculation
+ * @return local frecency SQL calculation
+ */
+ static public String getLocalFrecencySQL(final long now) {
+ String visitCountExpr = "(" + Combined.LOCAL_VISITS_COUNT + " + 2)";
+ visitCountExpr = visitCountExpr + " * " + visitCountExpr;
+
+ return getFrecencyCalculation(now, 2, 225, visitCountExpr, Combined.LOCAL_DATE_LAST_VISITED);
+ }
+
+ /**
+ * Our version of frecency is computed by scaling the number of visits by a multiplier
+ * that approximates Gaussian decay, based on how long ago the entry was last visited.
+ * Since we're limited by the math we can do with sqlite, we're calculating this
+ * approximation using the Cauchy distribution: multiplier = scale_const / (age^2 + scale_const).
+ * For example, with 15 as our scale parameter, we get a scale constant 15^2 = 225. Then:
+ * frecencyScore = numVisits * max(1, 100 * 225 / (age*age + 225)). (See bug 704977)
+ *
+ * @param now Base time in milliseconds for age calculation
+ * @param minFrecency Minimum allowed frecency value
+ * @param multiplier Scale constant
+ * @param visitCountExpr Expression which will produce a visit count
+ * @param lastVisitExpr Expression which will produce "last-visited" timestamp
+ * @return Frecency SQL calculation
+ */
+ static public String getFrecencyCalculation(final long now, final int minFrecency, final int multiplier, @NonNull final String visitCountExpr, @NonNull final String lastVisitExpr) {
+ final long nowInMicroseconds = now * 1000;
+ final long microsecondsPerDay = 86400000000L;
+ final String ageExpr = "(" + nowInMicroseconds + " - " + lastVisitExpr + ") / " + microsecondsPerDay;
+
+ return visitCountExpr + " * MAX(" + minFrecency + ", 100 * " + multiplier + " / (" + ageExpr + " * " + ageExpr + " + " + multiplier + "))";
+ }
+
@RobocopTarget
public interface CommonColumns {
public static final String _ID = "_id";
}
@RobocopTarget
public interface DateSyncColumns {
public static final String DATE_CREATED = "created";
@@ -240,16 +298,22 @@ public class BrowserContract {
public static final String VIEW_NAME = "combined";
public static final String VIEW_WITH_FAVICONS = "combined_with_favicons";
public static final Uri CONTENT_URI = Uri.withAppendedPath(AUTHORITY_URI, "combined");
public static final String BOOKMARK_ID = "bookmark_id";
public static final String HISTORY_ID = "history_id";
+
+ public static final String REMOTE_VISITS_COUNT = "remoteVisitCount";
+ public static final String REMOTE_DATE_LAST_VISITED = "remoteDateLastVisited";
+
+ public static final String LOCAL_VISITS_COUNT = "localVisitCount";
+ public static final String LOCAL_DATE_LAST_VISITED = "localDateLastVisited";
}
public static final class Schema {
private Schema() {}
public static final Uri CONTENT_URI = Uri.withAppendedPath(AUTHORITY_URI, "schema");
public static final String VERSION = "version";
}
--- a/mobile/android/base/java/org/mozilla/gecko/db/BrowserProvider.java
+++ b/mobile/android/base/java/org/mozilla/gecko/db/BrowserProvider.java
@@ -309,59 +309,58 @@ public class BrowserProvider extends Sha
}
/**
* Remove enough history items to bring the database count below <code>retain</code>,
* removing no items with a modified time after <code>keepAfter</code>.
*
* Provide <code>keepAfter</code> less than or equal to zero to skip that check.
*
- * Items will be removed according to an approximate frecency calculation.
+ * Items will be removed according to last visited date.
*/
private void expireHistory(final SQLiteDatabase db, final int retain, final long keepAfter) {
Log.d(LOGTAG, "Expiring history.");
final long rows = DatabaseUtils.queryNumEntries(db, TABLE_HISTORY);
if (retain >= rows) {
debug("Not expiring history: only have " + rows + " rows.");
return;
}
- final String sortOrder = BrowserContract.getFrecencySortOrder(false, true);
final long toRemove = rows - retain;
debug("Expiring at most " + toRemove + " rows earlier than " + keepAfter + ".");
final String sql;
if (keepAfter > 0) {
sql = "DELETE FROM " + TABLE_HISTORY + " " +
"WHERE MAX(" + History.DATE_LAST_VISITED + ", " + History.DATE_MODIFIED + ") < " + keepAfter + " " +
" AND " + History._ID + " IN ( SELECT " +
History._ID + " FROM " + TABLE_HISTORY + " " +
- "ORDER BY " + sortOrder + " LIMIT " + toRemove +
+ "ORDER BY " + History.DATE_LAST_VISITED + " ASC LIMIT " + toRemove +
")";
} else {
sql = "DELETE FROM " + TABLE_HISTORY + " WHERE " + History._ID + " " +
"IN ( SELECT " + History._ID + " FROM " + TABLE_HISTORY + " " +
- "ORDER BY " + sortOrder + " LIMIT " + toRemove + ")";
+ "ORDER BY " + History.DATE_LAST_VISITED + " ASC LIMIT " + toRemove + ")";
}
trace("Deleting using query: " + sql);
beginWrite(db);
db.execSQL(sql);
}
/**
* Remove any thumbnails that for sites that aren't likely to be ever shown.
* Items will be removed according to a frecency calculation and only if they are not pinned
*
* Call this method within a transaction.
*/
private void expireThumbnails(final SQLiteDatabase db) {
Log.d(LOGTAG, "Expiring thumbnails.");
- final String sortOrder = BrowserContract.getFrecencySortOrder(true, false);
+ final String sortOrder = BrowserContract.getCombinedFrecencySortOrder(true, false);
final String sql = "DELETE FROM " + TABLE_THUMBNAILS +
" WHERE " + Thumbnails.URL + " NOT IN ( " +
" SELECT " + Combined.URL +
" FROM " + Combined.VIEW_NAME +
" ORDER BY " + sortOrder +
" LIMIT " + DEFAULT_EXPIRY_THUMBNAIL_COUNT +
") AND " + Thumbnails.URL + " NOT IN ( " +
" SELECT " + Bookmarks.URL +
@@ -897,17 +896,17 @@ public class BrowserProvider extends Sha
Combined.BOOKMARK_ID + ", " +
Combined.HISTORY_ID + ", " +
Bookmarks.URL + ", " +
Bookmarks.TITLE + ", " +
Combined.HISTORY_ID + ", " +
TopSites.TYPE_TOP + " AS " + TopSites.TYPE +
" FROM " + Combined.VIEW_NAME +
" WHERE " + ignoreForTopSitesWhereClause +
- " ORDER BY " + BrowserContract.getFrecencySortOrder(true, false) +
+ " ORDER BY " + BrowserContract.getCombinedFrecencySortOrder(true, false) +
" LIMIT " + totalLimit,
ignoreForTopSitesArgs);
if (hasProcessedAnySuggestedSites) {
db.execSQL("INSERT INTO " + TABLE_TOPSITES +
// We need to LIMIT _after_ selecting the relevant suggested sites, which requires us to
// use an additional internal subquery, since we cannot LIMIT a subquery that is part of UNION ALL.
--- a/mobile/android/base/java/org/mozilla/gecko/db/LocalBrowserDB.java
+++ b/mobile/android/base/java/org/mozilla/gecko/db/LocalBrowserDB.java
@@ -564,24 +564,20 @@ public class LocalBrowserDB implements B
}
}
if (urlFilter != null) {
selection = DBUtils.concatenateWhere(selection, "(" + Combined.URL + " NOT LIKE ?)");
selectionArgs = DBUtils.appendSelectionArgs(selectionArgs, new String[] { urlFilter.toString() });
}
- // Our version of frecency is computed by scaling the number of visits by a multiplier
- // that approximates Gaussian decay, based on how long ago the entry was last visited.
- // Since we're limited by the math we can do with sqlite, we're calculating this
- // approximation using the Cauchy distribution: multiplier = 15^2 / (age^2 + 15^2).
- // Using 15 as our scale parameter, we get a constant 15^2 = 225. Following this math,
- // frecencyScore = numVisits * max(1, 100 * 225 / (age*age + 225)). (See bug 704977)
- // We also give bookmarks an extra bonus boost by adding 100 points to their frecency score.
- final String sortOrder = BrowserContract.getFrecencySortOrder(true, false);
+ // Order by combined remote+local frecency score.
+ // Local visits are preferred, so they will by far outweigh remote visits.
+ // Bookmarked history items get extra frecency points.
+ final String sortOrder = BrowserContract.getCombinedFrecencySortOrder(true, false);
return cr.query(combinedUriWithLimit(limit),
projection,
selection,
selectionArgs,
sortOrder);
}
new file mode 100644
--- /dev/null
+++ b/mobile/android/tests/background/junit4/src/org/mozilla/gecko/db/BrowserContractTest.java
@@ -0,0 +1,67 @@
+/* Any copyright is dedicated to the Public Domain.
+ http://creativecommons.org/publicdomain/zero/1.0/ */
+
+package org.mozilla.gecko.db;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.mozilla.gecko.background.testhelpers.TestRunner;
+
+import static org.junit.Assert.*;
+
+@RunWith(TestRunner.class)
+public class BrowserContractTest {
+ @Test
+ /**
+ * Test that bookmark and sorting order clauses are set correctly
+ */
+ public void testGetCombinedFrecencySortOrder() throws Exception {
+ String sqlNoBookmarksDesc = BrowserContract.getCombinedFrecencySortOrder(false, false);
+ String sqlNoBookmarksAsc = BrowserContract.getCombinedFrecencySortOrder(false, true);
+ String sqlBookmarksDesc = BrowserContract.getCombinedFrecencySortOrder(true, false);
+ String sqlBookmarksAsc = BrowserContract.getCombinedFrecencySortOrder(true, true);
+
+ assertTrue(sqlBookmarksAsc.endsWith(" ASC"));
+ assertTrue(sqlBookmarksDesc.endsWith(" DESC"));
+ assertTrue(sqlNoBookmarksAsc.endsWith(" ASC"));
+ assertTrue(sqlNoBookmarksDesc.endsWith(" DESC"));
+
+ assertTrue(sqlBookmarksAsc.startsWith("(CASE WHEN bookmark_id > -1 THEN 100 ELSE 0 END) + "));
+ assertTrue(sqlBookmarksDesc.startsWith("(CASE WHEN bookmark_id > -1 THEN 100 ELSE 0 END) + "));
+ }
+
+ @Test
+ /**
+ * Test that calculation string is correct for remote visits
+ * maxFrecency=1, scaleConst=110, correct sql params for visit count and last date
+ * and that time is converted to microseconds.
+ */
+ public void testGetRemoteFrecencySQL() throws Exception {
+ long now = 1;
+ String sql = BrowserContract.getRemoteFrecencySQL(now);
+ String ageExpr = "(" + now * 1000 + " - remoteDateLastVisited) / 86400000000";
+
+ assertEquals(
+ "remoteVisitCount * MAX(1, 100 * 110 / (" + ageExpr + " * " + ageExpr + " + 110))",
+ sql
+ );
+ }
+
+ @Test
+ /**
+ * Test that calculation string is correct for remote visits
+ * maxFrecency=2, scaleConst=225, correct sql params for visit count and last date
+ * and that time is converted to microseconds.
+ */
+ public void testGetLocalFrecencySQL() throws Exception {
+ long now = 1;
+ String sql = BrowserContract.getLocalFrecencySQL(now);
+ String ageExpr = "(" + now * 1000 + " - localDateLastVisited) / 86400000000";
+ String visitCountExpr = "(localVisitCount + 2) * (localVisitCount + 2)";
+
+ assertEquals(
+ visitCountExpr + " * MAX(2, 100 * 225 / (" + ageExpr + " * " + ageExpr + " + 225))",
+ sql
+ );
+ }
+}
\ No newline at end of file