Bug 1173823 - Always use document.baseURI as the uri parameter of _readerParse method, r=Gijs draft
authorEvan Tseng <evan@tseng.io>
Mon, 07 Nov 2016 17:31:23 +0800
changeset 435116 80b27a4ba1dfd2e491dad17f1ea9a2d3b744b25b
parent 434636 908557c762f798605a2f96e4c943791cbada1b50
child 536227 2f51ffde3e7001fc835403aa2eb0f86fca15a327
push id34948
push userbmo:evan@tseng.io
push dateTue, 08 Nov 2016 03:53:50 +0000
reviewersGijs
bugs1173823
milestone52.0a1
Bug 1173823 - Always use document.baseURI as the uri parameter of _readerParse method, r=Gijs MozReview-Commit-ID: RnICyZALNb
toolkit/components/reader/ReaderMode.jsm
--- a/toolkit/components/reader/ReaderMode.jsm
+++ b/toolkit/components/reader/ReaderMode.jsm
@@ -191,35 +191,41 @@ this.ReaderMode = {
    * Gets an article from a loaded browser's document. This method will not attempt
    * to parse certain URIs (e.g. about: URIs).
    *
    * @param doc A document to parse.
    * @return {Promise}
    * @resolves JS object representing the article, or null if no article is found.
    */
   parseDocument: Task.async(function* (doc) {
-    let uri = Services.io.newURI(doc.documentURI, null, null);
-    if (!this._shouldCheckUri(uri)) {
+    let documentURI = Services.io.newURI(doc.documentURI, null, null);
+    let baseURI = Services.io.newURI(doc.baseURI, null, null);
+    if (!this._shouldCheckUri(documentURI) || !this._shouldCheckUri(baseURI, true)) {
       this.log("Reader mode disabled for URI");
       return null;
     }
 
-    return yield this._readerParse(uri, doc);
+    return yield this._readerParse(baseURI, doc);
   }),
 
   /**
    * Downloads and parses a document from a URL.
    *
    * @param url URL to download and parse.
    * @return {Promise}
    * @resolves JS object representing the article, or null if no article is found.
    */
   downloadAndParseDocument: Task.async(function* (url) {
-    let uri = Services.io.newURI(url, null, null);
     let doc = yield this._downloadDocument(url);
+    let uri = Services.io.newURI(doc.baseURI, null, null);
+    if (!this._shouldCheckUri(uri, true)) {
+      this.log("Reader mode disabled for URI");
+      return null;
+    }
+
     return yield this._readerParse(uri, doc);
   }),
 
   _downloadDocument: function (url) {
     let histogram = Services.telemetry.getHistogramById("READER_MODE_DOWNLOAD_RESULT");
     return new Promise((resolve, reject) => {
       let xhr = new XMLHttpRequest();
       xhr.open("GET", url, true);
@@ -362,47 +368,47 @@ this.ReaderMode = {
     "mail.google.com",
     "github.com",
     "pinterest.com",
     "reddit.com",
     "twitter.com",
     "youtube.com",
   ],
 
-  _shouldCheckUri: function (uri) {
+  _shouldCheckUri: function (uri, isBaseUri = false) {
     if (!(uri.schemeIs("http") || uri.schemeIs("https"))) {
       this.log("Not parsing URI scheme: " + uri.scheme);
       return false;
     }
 
     try {
       uri.QueryInterface(Ci.nsIURL);
     } catch (ex) {
       // If this doesn't work, presumably the URL is not well-formed or something
       return false;
     }
     // Sadly, some high-profile pages have false positives, so bail early for those:
     let asciiHost = uri.asciiHost;
-    if (this._blockedHosts.some(blockedHost => asciiHost.endsWith(blockedHost))) {
+    if (!isBaseUri && this._blockedHosts.some(blockedHost => asciiHost.endsWith(blockedHost))) {
       return false;
     }
 
-    if (!uri.filePath || uri.filePath == "/") {
+    if (!isBaseUri && (!uri.filePath || uri.filePath == "/")) {
       this.log("Not parsing home page: " + uri.spec);
       return false;
     }
 
     return true;
   },
 
   /**
    * Attempts to parse a document into an article. Heavy lifting happens
    * in readerWorker.js.
    *
-   * @param uri The article URI.
+   * @param uri The base URI of the article.
    * @param doc The document to parse.
    * @return {Promise}
    * @resolves JS object representing the article, or null if no article is found.
    */
   _readerParse: Task.async(function* (uri, doc) {
     let histogram = Services.telemetry.getHistogramById("READER_MODE_PARSE_RESULT");
     if (this.parseNodeLimit) {
       let numTags = doc.getElementsByTagName("*").length;