Bug 1305563 - Expose `PlacesUtils.history.hashURL`. r?mak draft
authorKit Cambridge <kit@yakshaving.ninja>
Wed, 16 Aug 2017 16:01:40 -0700
changeset 680371 c702d82aabd0679870ad3132b9327fb303cef60d
parent 680263 56b5c1a87dcb2c0391e7f642f99e6638dcf235c0
child 680372 f4cc89ccd2b28b42962647ff8cb8ee015c1fc2c0
push id84483
push userbmo:kit@mozilla.com
push dateFri, 13 Oct 2017 22:55:10 +0000
reviewersmak
bugs1305563
milestone58.0a1
Bug 1305563 - Expose `PlacesUtils.history.hashURL`. r?mak This patch exposes the Places `hash()` SQL function, so that the Sync bookmark buffer can store hashed URLs for remote bookmarks, and join with `moz_places`. MozReview-Commit-ID: C4Zj4FyMZpq
toolkit/components/places/Helpers.cpp
toolkit/components/places/Helpers.h
toolkit/components/places/SQLFunctions.cpp
toolkit/components/places/nsINavHistoryService.idl
toolkit/components/places/nsNavHistory.cpp
--- a/toolkit/components/places/Helpers.cpp
+++ b/toolkit/components/places/Helpers.cpp
@@ -4,21 +4,29 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "Helpers.h"
 #include "mozIStorageError.h"
 #include "prio.h"
 #include "nsString.h"
 #include "nsNavHistory.h"
 #include "mozilla/Base64.h"
+#include "mozilla/HashFunctions.h"
+#include <algorithm>
 #include "mozilla/Services.h"
 
 // The length of guids that are used by history and bookmarks.
 #define GUID_LENGTH 12
 
+// Maximum number of chars to use for calculating hashes. This value has been
+// picked to ensure low hash collisions on a real world common places.sqlite.
+// While collisions are not a big deal for functionality, a low ratio allows
+// for slightly more efficient SELECTs.
+#define MAX_CHARS_TO_HASH 1500U
+
 namespace mozilla {
 namespace places {
 
 ////////////////////////////////////////////////////////////////////////////////
 //// AsyncStatementCallback
 
 NS_IMPL_ISUPPORTS(
   AsyncStatementCallback
@@ -306,16 +314,63 @@ RoundToMilliseconds(PRTime aTime) {
   return aTime - (aTime % PR_USEC_PER_MSEC);
 }
 
 PRTime
 RoundedPRNow() {
   return RoundToMilliseconds(PR_Now());
 }
 
+nsresult
+HashURL(const nsACString& aSpec, const nsACString& aMode, uint64_t *_hash)
+{
+  NS_ENSURE_ARG_POINTER(_hash);
+
+  // HashString doesn't stop at the string boundaries if a length is passed to
+  // it, so ensure to pass a proper value.
+  const uint32_t maxLenToHash = std::min(static_cast<uint32_t>(aSpec.Length()),
+                                         MAX_CHARS_TO_HASH);
+
+  if (aMode.IsEmpty()) {
+    // URI-like strings (having a prefix before a colon), are handled specially,
+    // as a 48 bit hash, where first 16 bits are the prefix hash, while the
+    // other 32 are the string hash.
+    // The 16 bits have been decided based on the fact hashing all of the IANA
+    // known schemes, plus "places", does not generate collisions.
+    // Since we only care about schemes, we just search in the first 50 chars.
+    // The longest known IANA scheme, at this time, is 30 chars.
+    const nsDependentCSubstring& strHead = StringHead(aSpec, 50);
+    nsACString::const_iterator start, tip, end;
+    strHead.BeginReading(tip);
+    start = tip;
+    strHead.EndReading(end);
+    uint32_t strHash = HashString(aSpec.BeginReading(), maxLenToHash);
+    if (FindCharInReadable(':', tip, end)) {
+      const nsDependentCSubstring& prefix = Substring(start, tip);
+      uint64_t prefixHash = static_cast<uint64_t>(HashString(prefix) & 0x0000FFFF);
+      // The second half of the url is more likely to be unique, so we add it.
+      *_hash = (prefixHash << 32) + strHash;
+    } else {
+      *_hash = strHash;
+    }
+  } else if (aMode.EqualsLiteral("prefix_lo")) {
+    // Keep only 16 bits.
+    *_hash = static_cast<uint64_t>(HashString(aSpec.BeginReading(), maxLenToHash) & 0x0000FFFF) << 32;
+  } else if (aMode.EqualsLiteral("prefix_hi")) {
+    // Keep only 16 bits.
+    *_hash = static_cast<uint64_t>(HashString(aSpec.BeginReading(), maxLenToHash) & 0x0000FFFF) << 32;
+    // Make this a prefix upper bound by filling the lowest 32 bits.
+    *_hash +=  0xFFFFFFFF;
+  } else {
+    return NS_ERROR_FAILURE;
+  }
+
+  return NS_OK;
+}
+
 bool
 GetHiddenState(bool aIsRedirect,
                uint32_t aTransitionType)
 {
   return aTransitionType == nsINavHistoryService::TRANSITION_FRAMED_LINK ||
          aTransitionType == nsINavHistoryService::TRANSITION_EMBED ||
          aIsRedirect;
 }
--- a/toolkit/components/places/Helpers.h
+++ b/toolkit/components/places/Helpers.h
@@ -169,16 +169,19 @@ PRTime RoundToMilliseconds(PRTime aTime)
 
 /**
  * Round down PR_Now() to milliseconds precision.
  *
  * @return @see PR_Now, RoundToMilliseconds.
  */
 PRTime RoundedPRNow();
 
+nsresult HashURL(const nsAString& aSpec, const nsACString& aMode,
+                 uint64_t *_hash);
+
 /**
  * Used to finalize a statementCache on a specified thread.
  */
 template<typename StatementType>
 class FinalizeStatementCacheProxy : public Runnable
 {
 public:
   /**
--- a/toolkit/components/places/SQLFunctions.cpp
+++ b/toolkit/components/places/SQLFunctions.cpp
@@ -13,29 +13,21 @@
 #include "nsMathUtils.h"
 #include "nsUnicodeProperties.h"
 #include "nsUTF8Utils.h"
 #include "nsINavHistoryService.h"
 #include "nsPrintfCString.h"
 #include "nsNavHistory.h"
 #include "mozilla/Likely.h"
 #include "nsVariant.h"
-#include "mozilla/HashFunctions.h"
-#include <algorithm>
 
 // Maximum number of chars to search through.
 // MatchAutoCompleteFunction won't look for matches over this threshold.
 #define MAX_CHARS_TO_SEARCH_THROUGH 255
 
-// Maximum number of chars to use for calculating hashes. This value has been
-// picked to ensure low hash collisions on a real world common places.sqlite.
-// While collisions are not a big deal for functionality, a low ratio allows
-// for slightly more efficient SELECTs.
-#define MAX_CHARS_TO_HASH 1500U
-
 using namespace mozilla::storage;
 
 ////////////////////////////////////////////////////////////////////////////////
 //// Anonymous Helpers
 
 namespace {
 
   typedef nsACString::const_char_iterator const_char_iterator;
@@ -1046,56 +1038,21 @@ namespace places {
     NS_ENSURE_TRUE(numEntries >= 1  && numEntries <= 2, NS_ERROR_FAILURE);
 
     nsDependentCString str = getSharedUTF8String(aArguments, 0);
     nsAutoCString mode;
     if (numEntries > 1) {
       aArguments->GetUTF8String(1, mode);
     }
 
-    // HashString doesn't stop at the string boundaries if a length is passed to
-    // it, so ensure to pass a proper value.
-    const uint32_t maxLenToHash = std::min(static_cast<uint32_t>(str.Length()),
-                                           MAX_CHARS_TO_HASH);
     RefPtr<nsVariant> result = new nsVariant();
-    if (mode.IsEmpty()) {
-      // URI-like strings (having a prefix before a colon), are handled specially,
-      // as a 48 bit hash, where first 16 bits are the prefix hash, while the
-      // other 32 are the string hash.
-      // The 16 bits have been decided based on the fact hashing all of the IANA
-      // known schemes, plus "places", does not generate collisions.
-      // Since we only care about schemes, we just search in the first 50 chars.
-      // The longest known IANA scheme, at this time, is 30 chars.
-      const nsDependentCSubstring& strHead = StringHead(str, 50);
-      nsACString::const_iterator start, tip, end;
-      strHead.BeginReading(tip);
-      start = tip;
-      strHead.EndReading(end);
-      uint32_t strHash = HashString(str.get(), maxLenToHash);
-      if (FindCharInReadable(':', tip, end)) {
-        const nsDependentCSubstring& prefix = Substring(start, tip);
-        uint64_t prefixHash = static_cast<uint64_t>(HashString(prefix) & 0x0000FFFF);
-        // The second half of the url is more likely to be unique, so we add it.
-        uint64_t hash = (prefixHash << 32) + strHash;
-        result->SetAsInt64(hash);
-      } else {
-        result->SetAsInt64(strHash);
-      }
-    } else if (mode.EqualsLiteral("prefix_lo")) {
-      // Keep only 16 bits.
-      uint64_t hash = static_cast<uint64_t>(HashString(str.get(), maxLenToHash) & 0x0000FFFF) << 32;
-      result->SetAsInt64(hash);
-    } else if (mode.EqualsLiteral("prefix_hi")) {
-      // Keep only 16 bits.
-      uint64_t hash = static_cast<uint64_t>(HashString(str.get(), maxLenToHash) & 0x0000FFFF) << 32;
-      // Make this a prefix upper bound by filling the lowest 32 bits.
-      hash +=  0xFFFFFFFF;
-      result->SetAsInt64(hash);
-    } else {
-      return NS_ERROR_FAILURE;
-    }
+    uint64_t hash;
+    rv = HashURL(str, mode, &hash);
+    NS_ENSURE_SUCCESS(rv, rv);
+    rv = result->SetAsInt64(hash);
+    NS_ENSURE_SUCCESS(rv, rv);
 
     result.forget(_result);
     return NS_OK;
   }
 
 } // namespace places
 } // namespace mozilla
--- a/toolkit/components/places/nsINavHistoryService.idl
+++ b/toolkit/components/places/nsINavHistoryService.idl
@@ -1431,16 +1431,18 @@ interface nsINavHistoryService : nsISupp
   void clearEmbedVisits();
 
   /**
    * Generate a guid.
    * Guids can be used for any places purposes (history, bookmarks, etc.)
    * Returns null if the generation of the guid failed.
    */
   ACString makeGuid();
+
+  uint64_t hashURL(in ACString aSpec, [optional] in ACString mode);
 };
 
 /**
  * @see runInBatchMode of nsINavHistoryService/nsINavBookmarksService
  */
 [scriptable, function, uuid(5a5a9154-95ac-4e3d-90df-558816297407)]
 interface nsINavHistoryBatchCallback : nsISupports {
   void runBatched(in nsISupports aUserData);
--- a/toolkit/components/places/nsNavHistory.cpp
+++ b/toolkit/components/places/nsNavHistory.cpp
@@ -3679,16 +3679,22 @@ NS_IMETHODIMP
 nsNavHistory::MakeGuid(nsACString& aGuid) {
   if (NS_FAILED(GenerateGUID(aGuid))) {
     MOZ_ASSERT(false, "Shouldn't fail to create a guid!");
     aGuid.SetIsVoid(true);
   }
   return NS_OK;
 }
 
+NS_IMETHODIMP
+nsNavHistory::HashURL(const nsACString& aSpec, const nsACString& aMode,
+                      uint64_t* aHash) {
+  return places::HashURL(aSpec, aMode, aHash);
+}
+
 // nsNavHistory::CheckIsRecentEvent
 //
 //    Sees if this URL happened "recently."
 //
 //    It is always removed from our recent list no matter what. It only counts
 //    as "recent" if the event happened more recently than our event
 //    threshold ago.