Bug 1173823 - Always use document.baseURI as the uri parameter of _readerParse method, r=Gijs
MozReview-Commit-ID: RnICyZALNb
--- a/toolkit/components/reader/ReaderMode.jsm
+++ b/toolkit/components/reader/ReaderMode.jsm
@@ -191,35 +191,41 @@ this.ReaderMode = {
* Gets an article from a loaded browser's document. This method will not attempt
* to parse certain URIs (e.g. about: URIs).
*
* @param doc A document to parse.
* @return {Promise}
* @resolves JS object representing the article, or null if no article is found.
*/
parseDocument: Task.async(function* (doc) {
- let uri = Services.io.newURI(doc.documentURI, null, null);
- if (!this._shouldCheckUri(uri)) {
+ let documentURI = Services.io.newURI(doc.documentURI, null, null);
+ let baseURI = Services.io.newURI(doc.baseURI, null, null);
+ if (!this._shouldCheckUri(documentURI) || !this._shouldCheckUri(baseURI, true)) {
this.log("Reader mode disabled for URI");
return null;
}
- return yield this._readerParse(uri, doc);
+ return yield this._readerParse(baseURI, doc);
}),
/**
* Downloads and parses a document from a URL.
*
* @param url URL to download and parse.
* @return {Promise}
* @resolves JS object representing the article, or null if no article is found.
*/
downloadAndParseDocument: Task.async(function* (url) {
- let uri = Services.io.newURI(url, null, null);
let doc = yield this._downloadDocument(url);
+ let uri = Services.io.newURI(doc.baseURI, null, null);
+ if (!this._shouldCheckUri(uri, true)) {
+ this.log("Reader mode disabled for URI");
+ return null;
+ }
+
return yield this._readerParse(uri, doc);
}),
_downloadDocument: function (url) {
let histogram = Services.telemetry.getHistogramById("READER_MODE_DOWNLOAD_RESULT");
return new Promise((resolve, reject) => {
let xhr = new XMLHttpRequest();
xhr.open("GET", url, true);
@@ -362,47 +368,47 @@ this.ReaderMode = {
"mail.google.com",
"github.com",
"pinterest.com",
"reddit.com",
"twitter.com",
"youtube.com",
],
- _shouldCheckUri: function (uri) {
+ _shouldCheckUri: function (uri, isBaseUri = false) {
if (!(uri.schemeIs("http") || uri.schemeIs("https"))) {
this.log("Not parsing URI scheme: " + uri.scheme);
return false;
}
try {
uri.QueryInterface(Ci.nsIURL);
} catch (ex) {
// If this doesn't work, presumably the URL is not well-formed or something
return false;
}
// Sadly, some high-profile pages have false positives, so bail early for those:
let asciiHost = uri.asciiHost;
- if (this._blockedHosts.some(blockedHost => asciiHost.endsWith(blockedHost))) {
+ if (!isBaseUri && this._blockedHosts.some(blockedHost => asciiHost.endsWith(blockedHost))) {
return false;
}
- if (!uri.filePath || uri.filePath == "/") {
+ if (!isBaseUri && (!uri.filePath || uri.filePath == "/")) {
this.log("Not parsing home page: " + uri.spec);
return false;
}
return true;
},
/**
* Attempts to parse a document into an article. Heavy lifting happens
* in readerWorker.js.
*
- * @param uri The article URI.
+ * @param uri The base URI of the article.
* @param doc The document to parse.
* @return {Promise}
* @resolves JS object representing the article, or null if no article is found.
*/
_readerParse: Task.async(function* (uri, doc) {
let histogram = Services.telemetry.getHistogramById("READER_MODE_PARSE_RESULT");
if (this.parseNodeLimit) {
let numTags = doc.getElementsByTagName("*").length;