No bug - Update readability from github repo, includes fix for Bug 1142312 and Bug 1285543, r=Gijs draft
authorEvan Tseng <evan@tseng.io>
Thu, 15 Dec 2016 12:03:53 +0800
changeset 450255 9c15ccde6cadc4df3d7751ca90a53dc4c0d021c3
parent 449801 7652a58efa46f1c57c94bba26efc5d53b6184e83
child 539717 a06283ffd3aa3a7b70f84810e5965ecbaa51f4a8
push id38816
push userbmo:evan@tseng.io
push dateFri, 16 Dec 2016 08:36:20 +0000
reviewersGijs
bugs1142312, 1285543
milestone53.0a1
No bug - Update readability from github repo, includes fix for Bug 1142312 and Bug 1285543, r=Gijs MozReview-Commit-ID: 5hi1iuDO3XE
mobile/android/tests/browser/robocop/testReadingListCache.js
toolkit/components/reader/Readability.js
--- a/mobile/android/tests/browser/robocop/testReadingListCache.js
+++ b/mobile/android/tests/browser/robocop/testReadingListCache.js
@@ -26,17 +26,17 @@ var TEST_PAGES = [
   },
   {
     url: URL_PREFIX + "not_an_article.html",
     expected: null
   },
   {
     url: URL_PREFIX + "developer.mozilla.org/en/XULRunner/Build_Instructions.html",
     expected: {
-      title: "Building XULRunner",
+      title: "Building XULRunner | MDN",
       byline: null,
       excerpt: "XULRunner is built using basically the same process as Firefox or other applications. Please read and follow the general Build Documentation for instructions on how to get sources and set up build prerequisites.",
     }
   },
 ];
 
 add_task(function* test_article_not_found() {
   let article = yield ReaderMode.getArticleFromCache(TEST_PAGES[0].url);
--- a/toolkit/components/reader/Readability.js
+++ b/toolkit/components/reader/Readability.js
@@ -114,17 +114,17 @@ Readability.prototype = {
   DEFAULT_MAX_PAGES: 5,
 
   // Element tags to score by default.
   DEFAULT_TAGS_TO_SCORE: "section,h2,h3,h4,h5,h6,p,td,pre".toUpperCase().split(","),
 
   // All of the regular expressions in use within readability.
   // Defined up here so we don't instantiate them repeatedly in loops.
   REGEXPS: {
-    unlikelyCandidates: /banner|combx|comment|community|disqus|extra|foot|header|legends|menu|modal|related|remark|rss|shoutbox|sidebar|skyscraper|sponsor|ad-break|agegate|pagination|pager|popup/i,
+    unlikelyCandidates: /banner|combx|comment|community|cover-wrap|disqus|extra|foot|header|legends|menu|modal|related|remark|rss|shoutbox|sidebar|skyscraper|sponsor|ad-break|agegate|pagination|pager|popup|yom-remote/i,
     okMaybeItsACandidate: /and|article|body|column|main|shadow/i,
     positive: /article|body|content|entry|hentry|h-entry|main|page|pagination|post|text|blog|story/i,
     negative: /hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|modal|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget/i,
     extraneous: /print|archive|comment|discuss|e[\-]?mail|share|reply|all|login|sign|single|utility/i,
     byline: /byline|author|dateline|writtenby|p-author/i,
     replaceFonts: /<(\/?)font[^>]*>/gi,
     normalize: /\s{2,}/g,
     videos: /\/\/(www\.)?(dailymotion|youtube|youtube-nocookie|player\.vimeo)\.com/i,
@@ -150,45 +150,58 @@ Readability.prototype = {
   },
 
   /**
    * Iterates over a NodeList, calls `filterFn` for each node and removes node
    * if function returned `true`.
    *
    * If function is not passed, removes all the nodes in node list.
    *
-   * @param NodeList nodeList The no
-   * @param Function filterFn
+   * @param NodeList nodeList The nodes to operate on
+   * @param Function filterFn the function to use as a filter
    * @return void
    */
   _removeNodes: function(nodeList, filterFn) {
     for (var i = nodeList.length - 1; i >= 0; i--) {
       var node = nodeList[i];
       var parentNode = node.parentNode;
       if (parentNode) {
         if (!filterFn || filterFn.call(this, node, i, nodeList)) {
           parentNode.removeChild(node);
         }
       }
     }
   },
 
   /**
+   * Iterates over a NodeList, and calls _setNodeTag for each node.
+   *
+   * @param NodeList nodeList The nodes to operate on
+   * @param String newTagName the new tag name to use
+   * @return void
+   */
+  _replaceNodeTags: function(nodeList, newTagName) {
+    for (var i = nodeList.length - 1; i >= 0; i--) {
+      var node = nodeList[i];
+      this._setNodeTag(node, newTagName);
+    }
+  },
+
+  /**
    * Iterate over a NodeList, which doesn't natively fully implement the Array
    * interface.
    *
    * For convenience, the current object context is applied to the provided
    * iterate function.
    *
    * @param  NodeList nodeList The NodeList.
    * @param  Function fn       The iterate function.
-   * @param  Boolean  backward Whether to use backward iteration.
    * @return void
    */
-  _forEachNode: function(nodeList, fn, backward) {
+  _forEachNode: function(nodeList, fn) {
     Array.prototype.forEach.call(nodeList, fn, this);
   },
 
   /**
    * Iterate over a NodeList, return true if any of the provided iterate
    * function calls returns true, false otherwise.
    *
    * For convenience, the current object context is applied to the
@@ -357,19 +370,17 @@ Readability.prototype = {
 
     // Remove all style tags in head
     this._removeNodes(doc.getElementsByTagName("style"));
 
     if (doc.body) {
       this._replaceBrs(doc.body);
     }
 
-    this._forEachNode(doc.getElementsByTagName("font"), function(fontNode) {
-      this._setNodeTag(fontNode, "SPAN");
-    });
+    this._replaceNodeTags(doc.getElementsByTagName("font"), "SPAN");
   },
 
   /**
    * Finds the next element, starting from the given node, and ignoring
    * whitespace in between. If the given node is an element, the same node is
    * returned.
    */
   _nextElement: function (node) {
@@ -1057,22 +1068,25 @@ Readability.prototype = {
     } else if ("og:description" in values) {
       // Use facebook open graph description.
       metadata.excerpt = values["og:description"];
     } else if ("twitter:description" in values) {
       // Use twitter cards description.
       metadata.excerpt = values["twitter:description"];
     }
 
-    if ("og:title" in values) {
-      // Use facebook open graph title.
-      metadata.title = values["og:title"];
-    } else if ("twitter:title" in values) {
-      // Use twitter cards title.
-      metadata.title = values["twitter:title"];
+    metadata.title = this._getArticleTitle();
+    if (!metadata.title) {
+      if ("og:title" in values) {
+        // Use facebook open graph title.
+        metadata.title = values["og:title"];
+      } else if ("twitter:title" in values) {
+        // Use twitter cards title.
+        metadata.title = values["twitter:title"];
+      }
     }
 
     return metadata;
   },
 
   /**
    * Removes script tags from the document.
    *
@@ -1852,17 +1866,17 @@ Readability.prototype = {
     // this._parsedPages[uri.spec.replace(/\/$/, '')] = true;
 
     // Pull out any possible next page link first.
     // var nextPageLink = this._findNextPageLink(doc.body);
 
     this._prepDocument();
 
     var metadata = this._getArticleMetadata();
-    var articleTitle = metadata.title || this._getArticleTitle();
+    var articleTitle = metadata.title;
 
     var articleContent = this._grabArticle();
     if (!articleContent)
       return null;
 
     this.log("Grabbed: " + articleContent.innerHTML);
 
     this._postProcessContent(articleContent);