Bug 1393924 - Collect description and preview image and store it into moz_places draft
authorUrsula Sarracini
Thu, 07 Sep 2017 16:51:02 -0400
changeset 660906 472b9572253cbe776fde4a52ea28b84eb42a1b44
parent 660738 37b95547f0d27565452136d16b2df2857be840f6
child 730430 d47aa2bfdf21a2d4627faa736c9338c9a1c6fc7e
push id78606
push userusarracini@mozilla.com
push dateThu, 07 Sep 2017 20:51:30 +0000
bugs1393924
milestone57.0a1
Bug 1393924 - Collect description and preview image and store it into moz_places MozReview-Commit-ID: 4ZPGMpz21S9
browser/base/content/browser.js
browser/base/content/content.js
browser/base/content/tabbrowser.xml
browser/base/content/test/metaTags/.eslintrc.js
browser/base/content/test/metaTags/browser.ini
browser/base/content/test/metaTags/browser_meta_tags.js
browser/base/content/test/metaTags/meta_tags.html
browser/base/moz.build
browser/modules/ContentMetaHandler.jsm
browser/modules/moz.build
--- a/browser/base/content/browser.js
+++ b/browser/base/content/browser.js
@@ -1285,17 +1285,17 @@ var gBrowserInit = {
     Services.obs.addObserver(gPluginHandler.NPAPIPluginCrashed, "plugin-crashed");
 
     window.addEventListener("AppCommand", HandleAppCommandEvent, true);
 
     // These routines add message listeners. They must run before
     // loading the frame script to ensure that we don't miss any
     // message sent between when the frame script is loaded and when
     // the listener is registered.
-    DOMLinkHandler.init();
+    DOMEventHandler.init();
     gPageStyleMenu.init();
     LanguageDetectionListener.init();
     BrowserOnClick.init();
     FeedHandler.init();
     CompactTheme.init();
     AboutPrivateBrowsingListener.init();
     TrackingProtection.init();
     CaptivePortalWatcher.init();
@@ -3699,40 +3699,50 @@ var newWindowButtonObserver = {
       if (link.url) {
         let data = await getShortcutOrURIAndPostData(link.url);
         // Allow third-party services to fixup this URL.
         openNewWindowWith(data.url, null, data.postData, true);
       }
     }
   }
 }
-
-const DOMLinkHandler = {
+const DOMEventHandler = {
   init() {
     let mm = window.messageManager;
     mm.addMessageListener("Link:AddFeed", this);
     mm.addMessageListener("Link:SetIcon", this);
     mm.addMessageListener("Link:AddSearch", this);
+    mm.addMessageListener("Meta:SetPageInfo", this);
   },
 
   receiveMessage(aMsg) {
     switch (aMsg.name) {
       case "Link:AddFeed":
         let link = {type: aMsg.data.type, href: aMsg.data.href, title: aMsg.data.title};
         FeedHandler.addFeed(link, aMsg.target);
         break;
 
       case "Link:SetIcon":
         this.setIcon(aMsg.target, aMsg.data.url, aMsg.data.loadingPrincipal);
         break;
 
       case "Link:AddSearch":
         this.addSearch(aMsg.target, aMsg.data.engine, aMsg.data.url);
         break;
-    }
+
+      case "Meta:SetPageInfo":
+        this.setPageInfo(aMsg.data);
+        break;
+    }
+  },
+
+  setPageInfo(aData) {
+    const {url, description, previewImageURL} = aData;
+    gBrowser.setPageInfo(url, description, previewImageURL);
+    return true;
   },
 
   setIcon(aBrowser, aURL, aLoadingPrincipal) {
     if (gBrowser.isFailedIcon(aURL))
       return false;
 
     let tab = gBrowser.getTabForBrowser(aBrowser);
     if (!tab)
--- a/browser/base/content/content.js
+++ b/browser/base/content/content.js
@@ -12,16 +12,17 @@ var {classes: Cc, interfaces: Ci, utils:
 
 Cu.import("resource://gre/modules/XPCOMUtils.jsm");
 Cu.import("resource://gre/modules/Services.jsm");
 
 XPCOMUtils.defineLazyModuleGetters(this, {
   E10SUtils: "resource:///modules/E10SUtils.jsm",
   BrowserUtils: "resource://gre/modules/BrowserUtils.jsm",
   ContentLinkHandler: "resource:///modules/ContentLinkHandler.jsm",
+  ContentMetaHandler: "resource:///modules/ContentMetaHandler.jsm",
   ContentWebRTC: "resource:///modules/ContentWebRTC.jsm",
   SpellCheckHelper: "resource://gre/modules/InlineSpellChecker.jsm",
   InlineSpellCheckerContent: "resource://gre/modules/InlineSpellCheckerContent.jsm",
   LoginManagerContent: "resource://gre/modules/LoginManagerContent.jsm",
   LoginFormFactory: "resource://gre/modules/LoginManagerContent.jsm",
   InsecurePasswordUtils: "resource://gre/modules/InsecurePasswordUtils.jsm",
   PluginContent: "resource:///modules/PluginContent.jsm",
   PrivateBrowsingUtils: "resource://gre/modules/PrivateBrowsingUtils.jsm",
@@ -764,16 +765,17 @@ var ClickEventHandler = {
     // Note: makeURI() will throw if aUri is not a valid URI.
     return [href ? Services.io.newURI(href, null, baseURI).spec : null, null,
             node && node.ownerDocument.nodePrincipal];
   }
 };
 ClickEventHandler.init();
 
 ContentLinkHandler.init(this);
+ContentMetaHandler.init(this);
 
 // TODO: Load this lazily so the JSM is run only if a relevant event/message fires.
 var pluginContent = new PluginContent(global);
 
 addEventListener("DOMWindowFocus", function(event) {
   sendAsyncMessage("DOMWindowFocus", {});
 }, false);
 
--- a/browser/base/content/tabbrowser.xml
+++ b/browser/base/content/tabbrowser.xml
@@ -1017,16 +1017,30 @@
         <body>
           <![CDATA[
             let browser = aTab ? this.getBrowserForTab(aTab) : this.selectedBrowser;
             return browser.mIconURL;
           ]]>
         </body>
       </method>
 
+      <method name="setPageInfo">
+        <parameter name="aURL"/>
+        <parameter name="aDescription"/>
+        <parameter name="aPreviewImage"/>
+        <body>
+          <![CDATA[
+            if (aURL) {
+              let pageInfo = {url: aURL, description: aDescription, previewImageURL: aPreviewImage}
+              PlacesUtils.history.update(pageInfo).catch(Components.utils.reportError);
+            }
+          ]]>
+        </body>
+      </method>
+
       <method name="shouldLoadFavIcon">
         <parameter name="aURI"/>
         <body>
           <![CDATA[
             return (aURI &&
                     Services.prefs.getBoolPref("browser.chrome.site_icons") &&
                     Services.prefs.getBoolPref("browser.chrome.favicons") &&
                     ("schemeIs" in aURI) && (aURI.schemeIs("http") || aURI.schemeIs("https")));
new file mode 100644
--- /dev/null
+++ b/browser/base/content/test/metaTags/.eslintrc.js
@@ -0,0 +1,7 @@
+"use strict";
+
+module.exports = {
+  "extends": [
+    "plugin:mozilla/browser-test",
+  ]
+};
new file mode 100644
--- /dev/null
+++ b/browser/base/content/test/metaTags/browser.ini
@@ -0,0 +1,4 @@
+[DEFAULT]
+support-files =
+  meta_tags.html
+[browser_meta_tags.js]
new file mode 100644
--- /dev/null
+++ b/browser/base/content/test/metaTags/browser_meta_tags.js
@@ -0,0 +1,30 @@
+/* Any copyright is dedicated to the Public Domain.
+ * http://creativecommons.org/publicdomain/zero/1.0/
+ */
+
+/* globals gBrowser */
+/* This tests that with the page meta_tags.html, ContentMetaHandler.jsm parses out
+ * the meta tags avilable and only stores the best one for description and one for
+ * preview image url. In the case of this test, the best defined meta tags are
+ * "og:description" and "og:image:url". The list of meta tags and their order of
+ * preference is found in ContentMetaHandler.jsm. Because there is debounce logic
+ * in ContentLinkHandler.jsm to only make one single SQL update, we have to wait
+ * for some time before checking that the page info was stored correctly.
+ */
+add_task(async function test() {
+    Components.utils.import("resource://gre/modules/PlacesUtils.jsm");
+    const URL = "https://example.com/browser/browser/base/content/test/metaTags/meta_tags.html";
+    let tab = await BrowserTestUtils.openNewForegroundTab(gBrowser, URL);
+
+    // Wait until places has stored the page info
+    let pageInfo;
+    await BrowserTestUtils.waitForCondition(async () => {
+      pageInfo = await PlacesUtils.history.fetch(URL, {"includeMeta": true});
+      const {previewImageURL, description} = pageInfo;
+      return previewImageURL && description;
+    });
+    is(pageInfo.description, "og:description", "got the correct description");
+    is(pageInfo.previewImageURL.href, "og:image:url", "got the correct preview image");
+    await BrowserTestUtils.removeTab(tab);
+});
+
new file mode 100644
--- /dev/null
+++ b/browser/base/content/test/metaTags/meta_tags.html
@@ -0,0 +1,18 @@
+<!DOCTYPE HTML>
+<html>
+  <head>
+    <meta charset="UTF-8" />
+    <title>MetaTags</title>
+    <meta property="twitter:description" content="twitter:description" />
+    <meta property="og:description" content="og:description" />
+    <meta name="description" content="description" />
+    <meta name="unknown:tag" content="unknown:tag" />
+    <meta property="og:image" content="og:image" />
+    <meta property="twitter:image" content="twitter:image" />
+    <meta property="og:image:url" content="og:image:url" />
+    <meta name="thumbnail" content="thumbnail" />
+  </head>
+  <body>
+  </body>
+</html>
+
--- a/browser/base/moz.build
+++ b/browser/base/moz.build
@@ -19,16 +19,17 @@ MOCHITEST_CHROME_MANIFESTS += [
 
 BROWSER_CHROME_MANIFESTS += [
     'content/test/about/browser.ini',
     'content/test/alerts/browser.ini',
     'content/test/captivePortal/browser.ini',
     'content/test/contextMenu/browser.ini',
     'content/test/forms/browser.ini',
     'content/test/general/browser.ini',
+    'content/test/metaTags/browser.ini',
     'content/test/newtab/browser.ini',
     'content/test/pageinfo/browser.ini',
     'content/test/performance/browser.ini',
     'content/test/permissions/browser.ini',
     'content/test/plugins/browser.ini',
     'content/test/popupNotifications/browser.ini',
     'content/test/popups/browser.ini',
     'content/test/referrer/browser.ini',
new file mode 100644
--- /dev/null
+++ b/browser/modules/ContentMetaHandler.jsm
@@ -0,0 +1,132 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+"use strict";
+const {utils: Cu, interfaces: Ci, classes: Cc} = Components;
+Cu.importGlobalProperties(["URL"]);
+
+// Debounce time in milliseconds - this should be long enough to account for
+// sync script tags that could appear between desired meta tags
+const TIMEOUT_DELAY = 1000;
+
+// Possible description tags, listed in order from least favourable to most favourable
+const DESCRIPTION_RULES = [
+  "twitter:description",
+  "description",
+  "og:description"
+];
+
+// Possible image tags, listed in order from least favourable to most favourable
+const PREVIEW_IMAGE_RULES = [
+  "thumbnail",
+  "twitter:image",
+  "og:image",
+  "og:image:url",
+  "og:image:secure_url"
+];
+
+/*
+ * Checks if the incoming meta tag has a greater score than the current best
+ * score by checking the index of the meta tag in the list of rules provided.
+ *
+ * @param {Array} aRules
+ *          The list of rules for a given type of meta tag
+ * @param {String} aTag
+ *          The name or property of the incoming meta tag
+ * @param {String} aEntry
+ *          The current best entry for the given meta tag
+ *
+ * @returns {Boolean} true if the incoming meta tag is better than the current
+ *                    best meta tag of that same kind, false otherwise
+ */
+function shouldExtractMetadata(aRules, aTag, aEntry) {
+  return aRules.indexOf(aTag) > aEntry.currMaxScore;
+}
+
+this.EXPORTED_SYMBOLS = [ "ContentMetaHandler" ];
+
+/*
+ * This listens to DOMMetaAdded events and collects relevant metadata about the
+ * meta tag received. Then, it sends the metadata gathered from the meta tags
+ * and the url of the page as it's payload to be inserted into moz_places.
+ */
+
+this.ContentMetaHandler = {
+  init(chromeGlobal) {
+    chromeGlobal.addEventListener("DOMMetaAdded", event => {
+      const metaTag = event.originalTarget;
+      const window = metaTag.ownerGlobal;
+
+      // If there's no meta tag, or we're in a sub-frame, ignore this
+      if (!metaTag || !metaTag.ownerDocument || window != window.top) {
+        return;
+      }
+      this.handleMetaTag(metaTag, chromeGlobal);
+    });
+    // Stores a mapping of the best description and preview image collected so far
+    // for a given URL
+    this._metaTags = new Map();
+  },
+
+
+  handleMetaTag(metaTag, chromeGlobal) {
+    const url = metaTag.ownerDocument.documentURI;
+
+    let name = metaTag.name;
+    let prop = metaTag.getAttributeNS(null, "property");
+    if (!name && !prop) {
+      return;
+    }
+
+    let tag = name || prop;
+
+    const entry = this._metaTags.get(url) || {
+      description: {value: null, currMaxScore: -1},
+      image: {value: null, currMaxScore: -1},
+      timeout: null
+    };
+
+    if (shouldExtractMetadata(DESCRIPTION_RULES, tag, entry.description)) {
+      // Extract the description
+      const value = metaTag.getAttributeNS(null, "content");
+      if (value) {
+        entry.description.value = value;
+        entry.description.currMaxScore = DESCRIPTION_RULES.indexOf(tag);
+      }
+    } else if (shouldExtractMetadata(PREVIEW_IMAGE_RULES, tag, entry.image)) {
+      // Extract the preview image
+      const value = metaTag.getAttributeNS(null, "content");
+      if (value) {
+        entry.image.value = new URL(value, url).href;
+        entry.image.currMaxScore = PREVIEW_IMAGE_RULES.indexOf(tag);
+      }
+    } else {
+      // We don't care about other meta tags
+      return;
+    }
+
+    if (!this._metaTags.has(url)) {
+      this._metaTags.set(url, entry);
+    }
+
+    if (entry.timeout) {
+      entry.timeout.delay = TIMEOUT_DELAY;
+    } else {
+      // We want to debounce incoming meta tags until we're certain we have the
+      // best one for description and preview image, and only store that one
+      entry.timeout = Cc["@mozilla.org/timer;1"].createInstance(Ci.nsITimer);
+      entry.timeout.initWithCallback(() => {
+        entry.timeout = null;
+
+        // Save description and preview image to moz_places
+        chromeGlobal.sendAsyncMessage("Meta:SetPageInfo", {
+          url,
+          description: entry.description.value,
+          previewImageURL: entry.image.value
+        });
+        this._metaTags.delete(url);
+      }, TIMEOUT_DELAY, Ci.nsITimer.TYPE_ONE_SHOT);
+    }
+  }
+};
--- a/browser/modules/moz.build
+++ b/browser/modules/moz.build
@@ -127,16 +127,17 @@ EXTRA_JS_MODULES += [
     'AboutNewTab.jsm',
     'AttributionCode.jsm',
     'BrowserUITelemetry.jsm',
     'BrowserUsageTelemetry.jsm',
     'CastingApps.jsm',
     'ContentClick.jsm',
     'ContentCrashHandlers.jsm',
     'ContentLinkHandler.jsm',
+    'ContentMetaHandler.jsm',
     'ContentObservers.js',
     'ContentSearch.jsm',
     'ContentWebRTC.jsm',
     'DirectoryLinksProvider.jsm',
     'E10SUtils.jsm',
     'ExtensionsUI.jsm',
     'Feeds.jsm',
     'FormSubmitObserver.jsm',