author Evan Tseng <evan@tseng.io>

Thu, 15 Dec 2016 12:03:53 +0800

changeset 450255 9c15ccde6cadc4df3d7751ca90a53dc4c0d021c3

parent 449801 7652a58efa46f1c57c94bba26efc5d53b6184e83

child 539717 a06283ffd3aa3a7b70f84810e5965ecbaa51f4a8

push id 38816

push user bmo:evan@tseng.io

push date Fri, 16 Dec 2016 08:36:20 +0000

reviewers Gijs

bugs 1142312, 1285543

milestone 53.0a1

mobile/android/tests/browser/robocop/testReadingListCache.js file | annotate | diff | comparison | revisions

toolkit/components/reader/Readability.js file | annotate | diff | comparison | revisions
--- a/mobile/android/tests/browser/robocop/testReadingListCache.js
+++ b/mobile/android/tests/browser/robocop/testReadingListCache.js
@@ -26,17 +26,17 @@ var TEST_PAGES = [
   },
   {
     url: URL_PREFIX + "not_an_article.html",
     expected: null
   },
   {
     url: URL_PREFIX + "developer.mozilla.org/en/XULRunner/Build_Instructions.html",
     expected: {
-      title: "Building XULRunner",
+      title: "Building XULRunner | MDN",
       byline: null,
       excerpt: "XULRunner is built using basically the same process as Firefox or other applications. Please read and follow the general Build Documentation for instructions on how to get sources and set up build prerequisites.",
     }
   },
 ];
 
 add_task(function* test_article_not_found() {
   let article = yield ReaderMode.getArticleFromCache(TEST_PAGES[0].url);
--- a/toolkit/components/reader/Readability.js
+++ b/toolkit/components/reader/Readability.js
@@ -114,17 +114,17 @@ Readability.prototype = {
   DEFAULT_MAX_PAGES: 5,
 
   // Element tags to score by default.
   DEFAULT_TAGS_TO_SCORE: "section,h2,h3,h4,h5,h6,p,td,pre".toUpperCase().split(","),
 
   // All of the regular expressions in use within readability.
   // Defined up here so we don't instantiate them repeatedly in loops.
   REGEXPS: {
-    unlikelyCandidates: /banner|combx|comment|community|disqus|extra|foot|header|legends|menu|modal|related|remark|rss|shoutbox|sidebar|skyscraper|sponsor|ad-break|agegate|pagination|pager|popup/i,
+    unlikelyCandidates: /banner|combx|comment|community|cover-wrap|disqus|extra|foot|header|legends|menu|modal|related|remark|rss|shoutbox|sidebar|skyscraper|sponsor|ad-break|agegate|pagination|pager|popup|yom-remote/i,
     okMaybeItsACandidate: /and|article|body|column|main|shadow/i,
     positive: /article|body|content|entry|hentry|h-entry|main|page|pagination|post|text|blog|story/i,
     negative: /hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|modal|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget/i,
     extraneous: /print|archive|comment|discuss|e[\-]?mail|share|reply|all|login|sign|single|utility/i,
     byline: /byline|author|dateline|writtenby|p-author/i,
     replaceFonts: /<(\/?)font[^>]*>/gi,
     normalize: /\s{2,}/g,
     videos: /\/\/(www\.)?(dailymotion|youtube|youtube-nocookie|player\.vimeo)\.com/i,
@@ -150,45 +150,58 @@ Readability.prototype = {
   },
 
   /**
    * Iterates over a NodeList, calls `filterFn` for each node and removes node
    * if function returned `true`.
    *
    * If function is not passed, removes all the nodes in node list.
    *
-   * @param NodeList nodeList The no
-   * @param Function filterFn
+   * @param NodeList nodeList The nodes to operate on
+   * @param Function filterFn the function to use as a filter
    * @return void
    */
   _removeNodes: function(nodeList, filterFn) {
     for (var i = nodeList.length - 1; i >= 0; i--) {
       var node = nodeList[i];
       var parentNode = node.parentNode;
       if (parentNode) {
         if (!filterFn || filterFn.call(this, node, i, nodeList)) {
           parentNode.removeChild(node);
         }
       }
     }
   },
 
   /**
+   * Iterates over a NodeList, and calls _setNodeTag for each node.
+   *
+   * @param NodeList nodeList The nodes to operate on
+   * @param String newTagName the new tag name to use
+   * @return void
+   */
+  _replaceNodeTags: function(nodeList, newTagName) {
+    for (var i = nodeList.length - 1; i >= 0; i--) {
+      var node = nodeList[i];
+      this._setNodeTag(node, newTagName);
+    }
+  },
+
+  /**
    * Iterate over a NodeList, which doesn't natively fully implement the Array
    * interface.
    *
    * For convenience, the current object context is applied to the provided
    * iterate function.
    *
    * @param  NodeList nodeList The NodeList.
    * @param  Function fn       The iterate function.
-   * @param  Boolean  backward Whether to use backward iteration.
    * @return void
    */
-  _forEachNode: function(nodeList, fn, backward) {
+  _forEachNode: function(nodeList, fn) {
     Array.prototype.forEach.call(nodeList, fn, this);
   },
 
   /**
    * Iterate over a NodeList, return true if any of the provided iterate
    * function calls returns true, false otherwise.
    *
    * For convenience, the current object context is applied to the
@@ -357,19 +370,17 @@ Readability.prototype = {
 
     // Remove all style tags in head
     this._removeNodes(doc.getElementsByTagName("style"));
 
     if (doc.body) {
       this._replaceBrs(doc.body);
     }
 
-    this._forEachNode(doc.getElementsByTagName("font"), function(fontNode) {
-      this._setNodeTag(fontNode, "SPAN");
-    });
+    this._replaceNodeTags(doc.getElementsByTagName("font"), "SPAN");
   },
 
   /**
    * Finds the next element, starting from the given node, and ignoring
    * whitespace in between. If the given node is an element, the same node is
    * returned.
    */
   _nextElement: function (node) {
@@ -1057,22 +1068,25 @@ Readability.prototype = {
     } else if ("og:description" in values) {
       // Use facebook open graph description.
       metadata.excerpt = values["og:description"];
     } else if ("twitter:description" in values) {
       // Use twitter cards description.
       metadata.excerpt = values["twitter:description"];
     }
 
-    if ("og:title" in values) {
-      // Use facebook open graph title.
-      metadata.title = values["og:title"];
-    } else if ("twitter:title" in values) {
-      // Use twitter cards title.
-      metadata.title = values["twitter:title"];
+    metadata.title = this._getArticleTitle();
+    if (!metadata.title) {
+      if ("og:title" in values) {
+        // Use facebook open graph title.
+        metadata.title = values["og:title"];
+      } else if ("twitter:title" in values) {
+        // Use twitter cards title.
+        metadata.title = values["twitter:title"];
+      }
     }
 
     return metadata;
   },
 
   /**
    * Removes script tags from the document.
    *
@@ -1852,17 +1866,17 @@ Readability.prototype = {
     // this._parsedPages[uri.spec.replace(/\/$/, '')] = true;
 
     // Pull out any possible next page link first.
     // var nextPageLink = this._findNextPageLink(doc.body);
 
     this._prepDocument();
 
     var metadata = this._getArticleMetadata();
-    var articleTitle = metadata.title || this._getArticleTitle();
+    var articleTitle = metadata.title;
 
     var articleContent = this._grabArticle();
     if (!articleContent)
       return null;
 
     this.log("Grabbed: " + articleContent.innerHTML);
 
     this._postProcessContent(articleContent);
author	Evan Tseng <evan@tseng.io>
	Thu, 15 Dec 2016 12:03:53 +0800
changeset 450255	9c15ccde6cadc4df3d7751ca90a53dc4c0d021c3
parent 449801	7652a58efa46f1c57c94bba26efc5d53b6184e83
child 539717	a06283ffd3aa3a7b70f84810e5965ecbaa51f4a8
push id	38816
push user	bmo:evan@tseng.io
push date	Fri, 16 Dec 2016 08:36:20 +0000
reviewers	Gijs
bugs	1142312, 1285543
milestone	53.0a1
mobile/android/tests/browser/robocop/testReadingListCache.js		file \| annotate \| diff \| comparison \| revisions
toolkit/components/reader/Readability.js		file \| annotate \| diff \| comparison \| revisions