--- a/mobile/android/tests/browser/robocop/testReadingListCache.js
+++ b/mobile/android/tests/browser/robocop/testReadingListCache.js
@@ -26,17 +26,17 @@ var TEST_PAGES = [
},
{
url: URL_PREFIX + "not_an_article.html",
expected: null
},
{
url: URL_PREFIX + "developer.mozilla.org/en/XULRunner/Build_Instructions.html",
expected: {
- title: "Building XULRunner",
+ title: "Building XULRunner | MDN",
byline: null,
excerpt: "XULRunner is built using basically the same process as Firefox or other applications. Please read and follow the general Build Documentation for instructions on how to get sources and set up build prerequisites.",
}
},
];
add_task(function* test_article_not_found() {
let article = yield ReaderMode.getArticleFromCache(TEST_PAGES[0].url);
--- a/toolkit/components/reader/Readability.js
+++ b/toolkit/components/reader/Readability.js
@@ -114,17 +114,17 @@ Readability.prototype = {
DEFAULT_MAX_PAGES: 5,
// Element tags to score by default.
DEFAULT_TAGS_TO_SCORE: "section,h2,h3,h4,h5,h6,p,td,pre".toUpperCase().split(","),
// All of the regular expressions in use within readability.
// Defined up here so we don't instantiate them repeatedly in loops.
REGEXPS: {
- unlikelyCandidates: /banner|combx|comment|community|disqus|extra|foot|header|legends|menu|modal|related|remark|rss|shoutbox|sidebar|skyscraper|sponsor|ad-break|agegate|pagination|pager|popup/i,
+ unlikelyCandidates: /banner|combx|comment|community|cover-wrap|disqus|extra|foot|header|legends|menu|modal|related|remark|rss|shoutbox|sidebar|skyscraper|sponsor|ad-break|agegate|pagination|pager|popup|yom-remote/i,
okMaybeItsACandidate: /and|article|body|column|main|shadow/i,
positive: /article|body|content|entry|hentry|h-entry|main|page|pagination|post|text|blog|story/i,
negative: /hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|modal|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget/i,
extraneous: /print|archive|comment|discuss|e[\-]?mail|share|reply|all|login|sign|single|utility/i,
byline: /byline|author|dateline|writtenby|p-author/i,
replaceFonts: /<(\/?)font[^>]*>/gi,
normalize: /\s{2,}/g,
videos: /\/\/(www\.)?(dailymotion|youtube|youtube-nocookie|player\.vimeo)\.com/i,
@@ -150,45 +150,58 @@ Readability.prototype = {
},
/**
* Iterates over a NodeList, calls `filterFn` for each node and removes node
* if function returned `true`.
*
* If function is not passed, removes all the nodes in node list.
*
- * @param NodeList nodeList The no
- * @param Function filterFn
+ * @param NodeList nodeList The nodes to operate on
+ * @param Function filterFn the function to use as a filter
* @return void
*/
_removeNodes: function(nodeList, filterFn) {
for (var i = nodeList.length - 1; i >= 0; i--) {
var node = nodeList[i];
var parentNode = node.parentNode;
if (parentNode) {
if (!filterFn || filterFn.call(this, node, i, nodeList)) {
parentNode.removeChild(node);
}
}
}
},
/**
+ * Iterates over a NodeList, and calls _setNodeTag for each node.
+ *
+ * @param NodeList nodeList The nodes to operate on
+ * @param String newTagName the new tag name to use
+ * @return void
+ */
+ _replaceNodeTags: function(nodeList, newTagName) {
+ for (var i = nodeList.length - 1; i >= 0; i--) {
+ var node = nodeList[i];
+ this._setNodeTag(node, newTagName);
+ }
+ },
+
+ /**
* Iterate over a NodeList, which doesn't natively fully implement the Array
* interface.
*
* For convenience, the current object context is applied to the provided
* iterate function.
*
* @param NodeList nodeList The NodeList.
* @param Function fn The iterate function.
- * @param Boolean backward Whether to use backward iteration.
* @return void
*/
- _forEachNode: function(nodeList, fn, backward) {
+ _forEachNode: function(nodeList, fn) {
Array.prototype.forEach.call(nodeList, fn, this);
},
/**
* Iterate over a NodeList, return true if any of the provided iterate
* function calls returns true, false otherwise.
*
* For convenience, the current object context is applied to the
@@ -357,19 +370,17 @@ Readability.prototype = {
// Remove all style tags in head
this._removeNodes(doc.getElementsByTagName("style"));
if (doc.body) {
this._replaceBrs(doc.body);
}
- this._forEachNode(doc.getElementsByTagName("font"), function(fontNode) {
- this._setNodeTag(fontNode, "SPAN");
- });
+ this._replaceNodeTags(doc.getElementsByTagName("font"), "SPAN");
},
/**
* Finds the next element, starting from the given node, and ignoring
* whitespace in between. If the given node is an element, the same node is
* returned.
*/
_nextElement: function (node) {
@@ -1057,22 +1068,25 @@ Readability.prototype = {
} else if ("og:description" in values) {
// Use facebook open graph description.
metadata.excerpt = values["og:description"];
} else if ("twitter:description" in values) {
// Use twitter cards description.
metadata.excerpt = values["twitter:description"];
}
- if ("og:title" in values) {
- // Use facebook open graph title.
- metadata.title = values["og:title"];
- } else if ("twitter:title" in values) {
- // Use twitter cards title.
- metadata.title = values["twitter:title"];
+ metadata.title = this._getArticleTitle();
+ if (!metadata.title) {
+ if ("og:title" in values) {
+ // Use facebook open graph title.
+ metadata.title = values["og:title"];
+ } else if ("twitter:title" in values) {
+ // Use twitter cards title.
+ metadata.title = values["twitter:title"];
+ }
}
return metadata;
},
/**
* Removes script tags from the document.
*
@@ -1852,17 +1866,17 @@ Readability.prototype = {
// this._parsedPages[uri.spec.replace(/\/$/, '')] = true;
// Pull out any possible next page link first.
// var nextPageLink = this._findNextPageLink(doc.body);
this._prepDocument();
var metadata = this._getArticleMetadata();
- var articleTitle = metadata.title || this._getArticleTitle();
+ var articleTitle = metadata.title;
var articleContent = this._grabArticle();
if (!articleContent)
return null;
this.log("Grabbed: " + articleContent.innerHTML);
this._postProcessContent(articleContent);