Bug 1248499: [webext] Implement tabs.detectLanguage. r?billm draft
authorKris Maglione <maglione.k@gmail.com>
Fri, 26 Feb 2016 13:20:28 -0800
changeset 335070 a3d36e34e27ba002fc686698db7012047fd9fc5e
parent 335069 36d6bc68fe0f21d87b306c7712e939d8ae537b88
child 515061 c7049bc88ec1968deea48acb7a057490840e71c4
push id11707
push usermaglione.k@gmail.com
push dateFri, 26 Feb 2016 21:21:54 +0000
reviewersbillm
bugs1248499
milestone47.0a1
Bug 1248499: [webext] Implement tabs.detectLanguage. r?billm MozReview-Commit-ID: F4GpSesj2ho
browser/components/extensions/ext-tabs.js
browser/components/extensions/test/browser/browser.ini
browser/components/extensions/test/browser/browser_ext_tabs_detectLanguage.js
browser/components/extensions/test/browser/file_language_fr_en.html
browser/components/extensions/test/browser/file_language_ja.html
toolkit/components/extensions/ExtensionContent.jsm
toolkit/components/extensions/ExtensionUtils.jsm
--- a/browser/components/extensions/ext-tabs.js
+++ b/browser/components/extensions/ext-tabs.js
@@ -638,16 +638,29 @@ extensions.registerSchemaAPI("tabs", nul
           width: browser.clientWidth,
           height: browser.clientHeight,
         };
 
         return context.sendMessage(browser.messageManager, "Extension:Capture",
                                    message, recipient);
       },
 
+      detectLanguage: function(tabId) {
+        let tab = tabId !== null ? TabManager.getTab(tabId) : TabManager.activeTab;
+        if (!tab) {
+          return Promise.reject({message: `Invalid tab ID: ${tabId}`});
+        }
+
+        let browser = tab.linkedBrowser;
+        let recipient = {innerWindowID: browser.innerWindowID};
+
+        return context.sendMessage(browser.messageManager, "Extension:DetectLanguage",
+                                   {}, recipient);
+      },
+
       _execute: function(tabId, details, kind, method) {
         let tab = tabId !== null ? TabManager.getTab(tabId) : TabManager.activeTab;
         let mm = tab.linkedBrowser.messageManager;
 
         let options = {
           js: [],
           css: [],
         };
--- a/browser/components/extensions/test/browser/browser.ini
+++ b/browser/components/extensions/test/browser/browser.ini
@@ -4,16 +4,18 @@ support-files =
   context.html
   ctxmenu-image.png
   context_tabs_onUpdated_page.html
   context_tabs_onUpdated_iframe.html
   file_popup_api_injection_a.html
   file_popup_api_injection_b.html
   file_iframe_document.html
   file_iframe_document.sjs
+  file_language_fr_en.html
+  file_language_ja.html
 
 [browser_ext_simple.js]
 [browser_ext_commands.js]
 [browser_ext_currentWindow.js]
 [browser_ext_browserAction_simple.js]
 [browser_ext_browserAction_pageAction_icon.js]
 [browser_ext_browserAction_context.js]
 [browser_ext_browserAction_disabled.js]
@@ -22,16 +24,17 @@ support-files =
 [browser_ext_browserAction_popup.js]
 [browser_ext_popup_api_injection.js]
 [browser_ext_contextMenus.js]
 [browser_ext_getViews.js]
 [browser_ext_lastError.js]
 [browser_ext_runtime_setUninstallURL.js]
 [browser_ext_tabs_audio.js]
 [browser_ext_tabs_captureVisibleTab.js]
+[browser_ext_tabs_detectLanguage.js]
 [browser_ext_tabs_events.js]
 [browser_ext_tabs_executeScript.js]
 [browser_ext_tabs_executeScript_good.js]
 [browser_ext_tabs_executeScript_bad.js]
 [browser_ext_tabs_executeScript_runAt.js]
 [browser_ext_tabs_insertCSS.js]
 [browser_ext_tabs_query.js]
 [browser_ext_tabs_getCurrent.js]
new file mode 100644
--- /dev/null
+++ b/browser/components/extensions/test/browser/browser_ext_tabs_detectLanguage.js
@@ -0,0 +1,57 @@
+/* -*- Mode: indent-tabs-mode: nil; js-indent-level: 2 -*- */
+/* vim: set sts=2 sw=2 et tw=80: */
+"use strict";
+
+add_task(function* testDetectLanguage() {
+  let extension = ExtensionTestUtils.loadExtension({
+    manifest: {
+      "permissions": ["tabs"],
+    },
+
+    background() {
+      const BASE_PATH = "browser/browser/components/extensions/test/browser";
+
+      function loadTab(url) {
+        let tabId;
+        let awaitUpdated = new Promise(resolve => {
+          browser.tabs.onUpdated.addListener(function onUpdated(changedTabId, changed, tab) {
+            if (changedTabId === tabId && changed.url) {
+              browser.tabs.onUpdated.removeListener(onUpdated);
+              resolve(tab);
+            }
+          });
+        });
+
+        return browser.tabs.create({url}).then(tab => {
+          tabId = tab.id;
+          return awaitUpdated;
+        });
+      }
+
+      loadTab(`http://example.co.jp/${BASE_PATH}/file_language_ja.html`).then(tab => {
+        return browser.tabs.detectLanguage(tab.id).then(lang => {
+          browser.test.assertEq("ja", lang, "Japanese document should be detected as Japanese");
+          return browser.tabs.remove(tab.id);
+        });
+      }).then(() => {
+        return loadTab(`http://example.co.jp/${BASE_PATH}/file_language_fr_en.html`);
+      }).then(tab => {
+        return browser.tabs.detectLanguage(tab.id).then(lang => {
+          browser.test.assertEq("fr", lang, "French/English document should be detected as primarily French");
+          return browser.tabs.remove(tab.id);
+        });
+      }).then(() => {
+        browser.test.notifyPass("detectLanguage");
+      }).catch(e => {
+        browser.test.fail(`Error: ${e} :: ${e.stack}`);
+        browser.test.notifyFail("detectLanguage");
+      });
+    },
+  });
+
+  yield extension.startup();
+
+  yield extension.awaitFinish("detectLanguage");
+
+  yield extension.unload();
+});
new file mode 100644
--- /dev/null
+++ b/browser/components/extensions/test/browser/file_language_fr_en.html
@@ -0,0 +1,14 @@
+<!DOCTYPE html>
+<html lang="fr">
+<head>
+  <meta charset="UTF-8">
+  <title></title>
+</head>
+<body>
+  France is the largest country in Western Europe and the third-largest in Europe as a whole.
+  A accès aux chiens et aux frontaux qui lui ont été il peut consulter et modifier ses collections et exporter
+  Cet article concerne le pays européen aujourd’hui appelé République française. Pour d’autres usages du nom France,
+  Pour une aide rapide et effective, veuiller trouver votre aide dans le menu ci-dessus.
+  Motoring events began soon after the construction of the first successful gasoline-fueled automobiles. The quick brown fox jumps over the lazy dog.
+</body>
+</html>
new file mode 100644
--- /dev/null
+++ b/browser/components/extensions/test/browser/file_language_ja.html
@@ -0,0 +1,10 @@
+<!DOCTYPE html>
+<html lang="ja">
+<head>
+  <meta charset="UTF-8">
+  <title></title>
+</head>
+<body>
+  このペ ジでは アカウントに指定された予算の履歴を一覧にしています それぞれの項目には 予算額と特定期間のステ タスが表示されます 現在または今後の予算を設定するには
+</body>
+</html>
--- a/toolkit/components/extensions/ExtensionContent.jsm
+++ b/toolkit/components/extensions/ExtensionContent.jsm
@@ -20,38 +20,40 @@ const Cu = Components.utils;
 const Cr = Components.results;
 
 Cu.import("resource://gre/modules/XPCOMUtils.jsm");
 Cu.import("resource://gre/modules/Services.jsm");
 Cu.import("resource://gre/modules/AppConstants.jsm");
 
 XPCOMUtils.defineLazyModuleGetter(this, "ExtensionManagement",
                                   "resource://gre/modules/ExtensionManagement.jsm");
+XPCOMUtils.defineLazyModuleGetter(this, "LanguageDetector",
+                                  "resource:///modules/translation/LanguageDetector.jsm");
 XPCOMUtils.defineLazyModuleGetter(this, "MatchPattern",
                                   "resource://gre/modules/MatchPattern.jsm");
 XPCOMUtils.defineLazyModuleGetter(this, "PrivateBrowsingUtils",
                                   "resource://gre/modules/PrivateBrowsingUtils.jsm");
 XPCOMUtils.defineLazyModuleGetter(this, "PromiseUtils",
                                   "resource://gre/modules/PromiseUtils.jsm");
 XPCOMUtils.defineLazyModuleGetter(this, "MessageChannel",
                                   "resource://gre/modules/MessageChannel.jsm");
-
 XPCOMUtils.defineLazyModuleGetter(this, "WebNavigationFrames",
                                   "resource://gre/modules/WebNavigationFrames.jsm");
 
 Cu.import("resource://gre/modules/ExtensionUtils.jsm");
 var {
   runSafeSyncWithoutClone,
   BaseContext,
   LocaleData,
   MessageBroker,
   Messenger,
   injectAPI,
   flushJarCache,
   detectLanguage,
+  promiseDocumentReady,
 } = ExtensionUtils;
 
 function isWhenBeforeOrSame(when1, when2) {
   let table = {"document_start": 0,
                "document_end": 1,
                "document_idle": 2};
   return table[when1] <= table[when2];
 }
@@ -727,16 +729,17 @@ ExtensionManager = {
   },
 };
 
 class ExtensionGlobal {
   constructor(global) {
     this.global = global;
 
     MessageChannel.addListener(global, "Extension:Capture", this);
+    MessageChannel.addListener(global, "Extension:DetectLanguage", this);
     MessageChannel.addListener(global, "Extension:Execute", this);
     MessageChannel.addListener(global, "WebNavigation:GetFrame", this);
     MessageChannel.addListener(global, "WebNavigation:GetAllFrames", this);
 
     this.broker = new MessageBroker([global]);
 
     this.windowId = global.content
                           .QueryInterface(Ci.nsIInterfaceRequestor)
@@ -755,16 +758,18 @@ class ExtensionGlobal {
       innerWindowID: windowId(this.global.content),
     };
   }
 
   receiveMessage({target, messageName, recipient, data}) {
     switch (messageName) {
       case "Extension:Capture":
         return this.handleExtensionCapture(data.width, data.height, data.options);
+      case "Extension:DetectLanguage":
+        return this.handleDetectLanguage(target);
       case "Extension:Execute":
         return this.handleExtensionExecute(target, recipient.extensionId, data.options);
       case "WebNavigation:GetFrame":
         return this.handleWebNavigationGetFrame(data.options);
       case "WebNavigation:GetAllFrames":
         return this.handleWebNavigationGetAllFrames();
     }
   }
@@ -785,16 +790,46 @@ class ExtensionGlobal {
     // settings like full zoom come into play.
     ctx.scale(canvas.width / win.innerWidth, canvas.height / win.innerHeight);
 
     ctx.drawWindow(win, win.scrollX, win.scrollY, win.innerWidth, win.innerHeight, "#fff");
 
     return canvas.toDataURL(`image/${options.format}`, options.quality / 100);
   }
 
+  handleDetectLanguage(target) {
+    let doc = target.content.document;
+
+    return promiseDocumentReady(doc).then(() => {
+      let elem = doc.documentElement;
+
+      let language = (elem.getAttribute("xml:lang") || elem.getAttribute("lang") ||
+                      doc.contentLanguage || null);
+
+      // We only want the last element of the TLD here.
+      // Only country codes have any effect on the results, but other
+      // values cause no harm.
+      let tld = doc.location.hostname.match(/[a-z]*$/)[0];
+
+      // The CLD2 library used by the language detector is capable of
+      // analyzing raw HTML. Unfortunately, that takes much more memory,
+      // and since it's hosted by emscripten, and therefore can't shrink
+      // its heap after it's grown, it has a performance cost.
+      // So we send plain text instead.
+      let encoder = Cc["@mozilla.org/layout/documentEncoder;1?type=text/plain"].createInstance(Ci.nsIDocumentEncoder);
+      encoder.init(doc, "text/plain", encoder.SkipInvisibleContent);
+      let text = encoder.encodeToString();
+
+      let encoding = doc.characterSet;
+
+      return LanguageDetector.detectLanguage({language, tld, text, encoding})
+                             .then(result => result.language);
+    });
+  }
+
   handleExtensionExecute(target, extensionId, options) {
     return DocumentManager.executeScript(target, extensionId, options).then(result => {
       try {
         // Make sure we can structured-clone the result value before
         // we try to send it back over the message manager.
         Cu.cloneInto(result, target);
       } catch (e) {
         return Promise.reject({message: "Script returned non-structured-clonable data"});
--- a/toolkit/components/extensions/ExtensionUtils.jsm
+++ b/toolkit/components/extensions/ExtensionUtils.jsm
@@ -628,16 +628,38 @@ function injectAPI(source, dest) {
       let obj = Cu.createObjectIn(dest, {defineAs: prop});
       injectAPI(desc.value, obj);
     } else {
       Object.defineProperty(dest, prop, desc);
     }
   }
 }
 
+/**
+ * Returns a Promise which resolves when the given document's DOM has
+ * fully loaded.
+ *
+ * @param {Document} doc The document to await the load of.
+ * @returns {Promise<Document>}
+ */
+function promiseDocumentReady(doc) {
+  if (doc.readyState == "interactive" || doc.readyState == "complete") {
+    return Promise.resolve(doc);
+  }
+
+  return new Promise(resolve => {
+    doc.addEventListener("DOMContentLoaded", function onReady(event) {
+      if (event.target === event.currentTarget) {
+        doc.removeEventListener("DOMContentLoaded", onReady, true);
+        resolve(doc);
+      }
+    }, true);
+  });
+}
+
 /*
  * Messaging primitives.
  */
 
 var nextBrokerId = 1;
 
 var MESSAGES = [
   "Extension:Message",
@@ -1003,9 +1025,10 @@ this.ExtensionUtils = {
   MessageBroker,
   Messenger,
   PlatformInfo,
   SpreadArgs,
   extend,
   flushJarCache,
   instanceOf,
   detectLanguage,
+  promiseDocumentReady,
 };