Bug 1371010 - Stop percent-encoding href attributes when serializing documents. r?hsivonen draft
authorMasatoshi Kimura <VYV03354@nifty.ne.jp>
Fri, 09 Jun 2017 11:26:52 +0900
changeset 592062 e1b938c477863c49f09f7788f2c75d838fae4d1f
parent 591406 f4262773c4331d4ae139be536ce278ea9aad3436
child 592063 94990584fb38a098cc45b2575b22d34f34430394
child 592567 da03874b43c4ce27300d6a7b416d9d4d60e574f6
push id63272
push userVYV03354@nifty.ne.jp
push dateSat, 10 Jun 2017 03:03:07 +0000
reviewershsivonen
bugs1371010
milestone55.0a1
Bug 1371010 - Stop percent-encoding href attributes when serializing documents. r?hsivonen MozReview-Commit-ID: 6nBFtzk0K4T
dom/base/nsHTMLContentSerializer.cpp
dom/base/nsXHTMLContentSerializer.cpp
dom/base/nsXHTMLContentSerializer.h
parser/xml/test/unit/results.js
--- a/dom/base/nsHTMLContentSerializer.cpp
+++ b/dom/base/nsHTMLContentSerializer.cpp
@@ -20,17 +20,16 @@
 #include "nsUnicharUtils.h"
 #include "nsXPIDLString.h"
 #include "nsIServiceManager.h"
 #include "nsIDocumentEncoder.h"
 #include "nsGkAtoms.h"
 #include "nsIURI.h"
 #include "nsNetUtil.h"
 #include "nsEscape.h"
-#include "nsITextToSubURI.h"
 #include "nsCRT.h"
 #include "nsIParserService.h"
 #include "nsContentUtils.h"
 #include "nsLWBrkCIID.h"
 #include "nsIScriptElement.h"
 #include "nsAttrName.h"
 #include "nsIDocShell.h"
 #include "nsIEditor.h"
@@ -127,20 +126,16 @@ nsHTMLContentSerializer::SerializeHTMLAt
         if (uri) {
           nsAutoString absURI;
           rv = NS_MakeAbsoluteURI(absURI, valueStr, uri);
           if (NS_SUCCEEDED(rv)) {
             valueStr = absURI;
           }
         }
       }
-      // Need to escape URI.
-      nsAutoString tempURI(valueStr);
-      if (!isJS && NS_FAILED(EscapeURI(aContent, tempURI, valueStr)))
-        valueStr = tempURI;
     }
 
     if (mRewriteEncodingDeclaration && aTagName == nsGkAtoms::meta &&
         aNamespace == kNameSpaceID_XHTML && attrName == nsGkAtoms::content
         && namespaceID == kNameSpaceID_None) {
       // If we're serializing a <meta http-equiv="content-type">,
       // use the proper value, rather than what's in the document.
       nsAutoString header;
--- a/dom/base/nsXHTMLContentSerializer.cpp
+++ b/dom/base/nsXHTMLContentSerializer.cpp
@@ -20,17 +20,16 @@
 #include "nsUnicharUtils.h"
 #include "nsXPIDLString.h"
 #include "nsIServiceManager.h"
 #include "nsIDocumentEncoder.h"
 #include "nsGkAtoms.h"
 #include "nsIURI.h"
 #include "nsNetUtil.h"
 #include "nsEscape.h"
-#include "nsITextToSubURI.h"
 #include "nsCRT.h"
 #include "nsIParserService.h"
 #include "nsContentUtils.h"
 #include "nsLWBrkCIID.h"
 #include "nsIScriptElement.h"
 #include "nsStubMutationObserver.h"
 #include "nsAttrName.h"
 #include "nsComputedDOMStyle.h"
@@ -152,80 +151,16 @@ nsXHTMLContentSerializer::AppendText(nsI
     else {
       NS_ENSURE_TRUE(AppendToStringConvertLF(data, aStr), NS_ERROR_OUT_OF_MEMORY);
     }
   }
 
   return NS_OK;
 }
 
-nsresult
-nsXHTMLContentSerializer::EscapeURI(nsIContent* aContent, const nsAString& aURI, nsAString& aEscapedURI)
-{
-  // URL escape %xx cannot be used in JS.
-  // No escaping if the scheme is 'javascript'.
-  if (IsJavaScript(aContent, nsGkAtoms::href, kNameSpaceID_None, aURI)) {
-    aEscapedURI = aURI;
-    return NS_OK;
-  }
-
-  // nsITextToSubURI does charset convert plus uri escape
-  // This is needed to convert to a document charset which is needed to support existing browsers.
-  // But we eventually want to use UTF-8 instead of a document charset, then the code would be much simpler.
-  // See HTML 4.01 spec, "Appendix B.2.1 Non-ASCII characters in URI attribute values"
-  nsCOMPtr<nsITextToSubURI> textToSubURI;
-  nsAutoString uri(aURI); // in order to use FindCharInSet()
-  nsresult rv = NS_OK;
-
-  if (!mCharset.IsEmpty() && !IsASCII(uri)) {
-    textToSubURI = do_GetService(NS_ITEXTTOSUBURI_CONTRACTID, &rv);
-    NS_ENSURE_SUCCESS(rv, rv);
-  }
-
-  int32_t start = 0;
-  int32_t end;
-  nsAutoString part;
-  nsXPIDLCString escapedURI;
-  aEscapedURI.Truncate(0);
-
-  // Loop and escape parts by avoiding escaping reserved characters
-  // (and '%', '#', as well as '[' and ']' for IPv6 address literals).
-  while ((end = uri.FindCharInSet("%#;/?:@&=+$,[]", start)) != -1) {
-    part = Substring(aURI, start, (end-start));
-    if (textToSubURI && !IsASCII(part)) {
-      rv = textToSubURI->ConvertAndEscape(mCharset.get(), part.get(), getter_Copies(escapedURI));
-      NS_ENSURE_SUCCESS(rv, rv);
-    } else if (NS_WARN_IF(!NS_Escape(NS_ConvertUTF16toUTF8(part), escapedURI,
-                                     url_Path))) {
-      return NS_ERROR_OUT_OF_MEMORY;
-    }
-    AppendASCIItoUTF16(escapedURI, aEscapedURI);
-
-    // Append a reserved character without escaping.
-    part = Substring(aURI, end, 1);
-    aEscapedURI.Append(part);
-    start = end + 1;
-  }
-
-  if (start < (int32_t) aURI.Length()) {
-    // Escape the remaining part.
-    part = Substring(aURI, start, aURI.Length()-start);
-    if (textToSubURI) {
-      rv = textToSubURI->ConvertAndEscape(mCharset.get(), part.get(), getter_Copies(escapedURI));
-      NS_ENSURE_SUCCESS(rv, rv);
-    } else if (NS_WARN_IF(!NS_Escape(NS_ConvertUTF16toUTF8(part), escapedURI,
-                                     url_Path))) {
-      return NS_ERROR_OUT_OF_MEMORY;
-    }
-    AppendASCIItoUTF16(escapedURI, aEscapedURI);
-  }
-
-  return rv;
-}
-
 bool
 nsXHTMLContentSerializer::SerializeAttributes(nsIContent* aContent,
                                               nsIContent *aOriginalElement,
                                               nsAString& aTagPrefix,
                                               const nsAString& aTagNamespaceURI,
                                               nsIAtom* aTagName,
                                               nsAString& aStr,
                                               uint32_t aSkipAttr,
@@ -368,20 +303,16 @@ nsXHTMLContentSerializer::SerializeAttri
           if (uri) {
             nsAutoString absURI;
             rv = NS_MakeAbsoluteURI(absURI, valueStr, uri);
             if (NS_SUCCEEDED(rv)) {
               valueStr = absURI;
             }
           }
         }
-        // Need to escape URI.
-        nsAutoString tempURI(valueStr);
-        if (!isJS && NS_FAILED(EscapeURI(aContent, tempURI, valueStr)))
-          valueStr = tempURI;
       }
 
       if (mRewriteEncodingDeclaration && aTagName == nsGkAtoms::meta &&
           attrName == nsGkAtoms::content) {
         // If we're serializing a <meta http-equiv="content-type">,
         // use the proper value, rather than what's in the document.
         nsAutoString header;
         aContent->GetAttr(kNameSpaceID_None, nsGkAtoms::httpEquiv, header);
--- a/dom/base/nsXHTMLContentSerializer.h
+++ b/dom/base/nsXHTMLContentSerializer.h
@@ -87,20 +87,16 @@ class nsXHTMLContentSerializer : public 
                                  nsAString& aStr);
   bool IsShorthandAttr(const nsIAtom* aAttrName,
                          const nsIAtom* aElementName);
 
   MOZ_MUST_USE
   virtual bool AppendAndTranslateEntities(const nsAString& aStr,
                                           nsAString& aOutputStr) override;
 
-  nsresult EscapeURI(nsIContent* aContent,
-                     const nsAString& aURI,
-                     nsAString& aEscapedURI);
-
 private:
   bool IsElementPreformatted(nsIContent* aNode);
 
 protected:
   nsCOMPtr<nsIEntityConverter> mEntityConverter;
 
   /*
    * isHTMLParser should be set to true by the HTML parser which inherits from
--- a/parser/xml/test/unit/results.js
+++ b/parser/xml/test/unit/results.js
@@ -426,17 +426,17 @@ var vectors = [
     "sanitized": "<html><head></head><body><img></body></html>"
   },
   {
     "data": "<title onpropertychange=alert(1)></title><title title=></title>",
     "sanitized": "<html><head><title></title><title title=\"\"></title></head><body></body></html>"
   },
   {
     "data": "<!-- IE 5-8 standards mode -->\r\n<a href=http://foo.bar/#x=`y></a><img alt=\"`><img src=xx:x onerror=alert(1)></a>\">\r\n\r\n<!-- IE 5-9 standards mode -->\r\n<!a foo=x=`y><img alt=\"`><img src=xx:x onerror=alert(2)//\">\r\n<?a foo=x=`y><img alt=\"`><img src=xx:x onerror=alert(3)//\">",
-    "sanitized": "<html><head></head><body><a href=\"http://foo.bar/#x=%60y\"></a><img alt=\"`&gt;&lt;img src=xx:x onerror=alert(1)&gt;&lt;/a&gt;\">\n\n\n<img alt=\"`&gt;&lt;img src=xx:x onerror=alert(2)//\">\n<img alt=\"`&gt;&lt;img src=xx:x onerror=alert(3)//\"></body></html>"
+    "sanitized": "<html><head></head><body><a href=\"http://foo.bar/#x=`y\"></a><img alt=\"`&gt;&lt;img src=xx:x onerror=alert(1)&gt;&lt;/a&gt;\">\n\n\n<img alt=\"`&gt;&lt;img src=xx:x onerror=alert(2)//\">\n<img alt=\"`&gt;&lt;img src=xx:x onerror=alert(3)//\"></body></html>"
   },
   {
     "data": "<svg xmlns=\"http://www.w3.org/2000/svg\">\n<a id=\"x\"><rect fill=\"white\" width=\"1000\" height=\"1000\"/></a>\n<rect  fill=\"white\" style=\"clip-path:url(test3.svg#a);fill:url(#b);filter:url(#c);marker:url(#d);mask:url(#e);stroke:url(#f);\"/>\n</svg>",
     "sanitized": "<html><head></head><body>\n\n\n</body></html>"
   },
   {
     "data": "<svg xmlns=\"http://www.w3.org/2000/svg\">\r\n<path d=\"M0,0\" style=\"marker-start:url(test4.svg#a)\"/>\r\n</svg>",
     "sanitized": "<html><head></head><body>\n\n</body></html>"