Bug 1371010 - Stop percent-encoding href attributes when serializing documents. r?hsivonen
MozReview-Commit-ID: 6nBFtzk0K4T
--- a/dom/base/nsHTMLContentSerializer.cpp
+++ b/dom/base/nsHTMLContentSerializer.cpp
@@ -20,17 +20,16 @@
#include "nsUnicharUtils.h"
#include "nsXPIDLString.h"
#include "nsIServiceManager.h"
#include "nsIDocumentEncoder.h"
#include "nsGkAtoms.h"
#include "nsIURI.h"
#include "nsNetUtil.h"
#include "nsEscape.h"
-#include "nsITextToSubURI.h"
#include "nsCRT.h"
#include "nsIParserService.h"
#include "nsContentUtils.h"
#include "nsLWBrkCIID.h"
#include "nsIScriptElement.h"
#include "nsAttrName.h"
#include "nsIDocShell.h"
#include "nsIEditor.h"
@@ -127,20 +126,16 @@ nsHTMLContentSerializer::SerializeHTMLAt
if (uri) {
nsAutoString absURI;
rv = NS_MakeAbsoluteURI(absURI, valueStr, uri);
if (NS_SUCCEEDED(rv)) {
valueStr = absURI;
}
}
}
- // Need to escape URI.
- nsAutoString tempURI(valueStr);
- if (!isJS && NS_FAILED(EscapeURI(aContent, tempURI, valueStr)))
- valueStr = tempURI;
}
if (mRewriteEncodingDeclaration && aTagName == nsGkAtoms::meta &&
aNamespace == kNameSpaceID_XHTML && attrName == nsGkAtoms::content
&& namespaceID == kNameSpaceID_None) {
// If we're serializing a <meta http-equiv="content-type">,
// use the proper value, rather than what's in the document.
nsAutoString header;
--- a/dom/base/nsXHTMLContentSerializer.cpp
+++ b/dom/base/nsXHTMLContentSerializer.cpp
@@ -20,17 +20,16 @@
#include "nsUnicharUtils.h"
#include "nsXPIDLString.h"
#include "nsIServiceManager.h"
#include "nsIDocumentEncoder.h"
#include "nsGkAtoms.h"
#include "nsIURI.h"
#include "nsNetUtil.h"
#include "nsEscape.h"
-#include "nsITextToSubURI.h"
#include "nsCRT.h"
#include "nsIParserService.h"
#include "nsContentUtils.h"
#include "nsLWBrkCIID.h"
#include "nsIScriptElement.h"
#include "nsStubMutationObserver.h"
#include "nsAttrName.h"
#include "nsComputedDOMStyle.h"
@@ -152,80 +151,16 @@ nsXHTMLContentSerializer::AppendText(nsI
else {
NS_ENSURE_TRUE(AppendToStringConvertLF(data, aStr), NS_ERROR_OUT_OF_MEMORY);
}
}
return NS_OK;
}
-nsresult
-nsXHTMLContentSerializer::EscapeURI(nsIContent* aContent, const nsAString& aURI, nsAString& aEscapedURI)
-{
- // URL escape %xx cannot be used in JS.
- // No escaping if the scheme is 'javascript'.
- if (IsJavaScript(aContent, nsGkAtoms::href, kNameSpaceID_None, aURI)) {
- aEscapedURI = aURI;
- return NS_OK;
- }
-
- // nsITextToSubURI does charset convert plus uri escape
- // This is needed to convert to a document charset which is needed to support existing browsers.
- // But we eventually want to use UTF-8 instead of a document charset, then the code would be much simpler.
- // See HTML 4.01 spec, "Appendix B.2.1 Non-ASCII characters in URI attribute values"
- nsCOMPtr<nsITextToSubURI> textToSubURI;
- nsAutoString uri(aURI); // in order to use FindCharInSet()
- nsresult rv = NS_OK;
-
- if (!mCharset.IsEmpty() && !IsASCII(uri)) {
- textToSubURI = do_GetService(NS_ITEXTTOSUBURI_CONTRACTID, &rv);
- NS_ENSURE_SUCCESS(rv, rv);
- }
-
- int32_t start = 0;
- int32_t end;
- nsAutoString part;
- nsXPIDLCString escapedURI;
- aEscapedURI.Truncate(0);
-
- // Loop and escape parts by avoiding escaping reserved characters
- // (and '%', '#', as well as '[' and ']' for IPv6 address literals).
- while ((end = uri.FindCharInSet("%#;/?:@&=+$,[]", start)) != -1) {
- part = Substring(aURI, start, (end-start));
- if (textToSubURI && !IsASCII(part)) {
- rv = textToSubURI->ConvertAndEscape(mCharset.get(), part.get(), getter_Copies(escapedURI));
- NS_ENSURE_SUCCESS(rv, rv);
- } else if (NS_WARN_IF(!NS_Escape(NS_ConvertUTF16toUTF8(part), escapedURI,
- url_Path))) {
- return NS_ERROR_OUT_OF_MEMORY;
- }
- AppendASCIItoUTF16(escapedURI, aEscapedURI);
-
- // Append a reserved character without escaping.
- part = Substring(aURI, end, 1);
- aEscapedURI.Append(part);
- start = end + 1;
- }
-
- if (start < (int32_t) aURI.Length()) {
- // Escape the remaining part.
- part = Substring(aURI, start, aURI.Length()-start);
- if (textToSubURI) {
- rv = textToSubURI->ConvertAndEscape(mCharset.get(), part.get(), getter_Copies(escapedURI));
- NS_ENSURE_SUCCESS(rv, rv);
- } else if (NS_WARN_IF(!NS_Escape(NS_ConvertUTF16toUTF8(part), escapedURI,
- url_Path))) {
- return NS_ERROR_OUT_OF_MEMORY;
- }
- AppendASCIItoUTF16(escapedURI, aEscapedURI);
- }
-
- return rv;
-}
-
bool
nsXHTMLContentSerializer::SerializeAttributes(nsIContent* aContent,
nsIContent *aOriginalElement,
nsAString& aTagPrefix,
const nsAString& aTagNamespaceURI,
nsIAtom* aTagName,
nsAString& aStr,
uint32_t aSkipAttr,
@@ -368,20 +303,16 @@ nsXHTMLContentSerializer::SerializeAttri
if (uri) {
nsAutoString absURI;
rv = NS_MakeAbsoluteURI(absURI, valueStr, uri);
if (NS_SUCCEEDED(rv)) {
valueStr = absURI;
}
}
}
- // Need to escape URI.
- nsAutoString tempURI(valueStr);
- if (!isJS && NS_FAILED(EscapeURI(aContent, tempURI, valueStr)))
- valueStr = tempURI;
}
if (mRewriteEncodingDeclaration && aTagName == nsGkAtoms::meta &&
attrName == nsGkAtoms::content) {
// If we're serializing a <meta http-equiv="content-type">,
// use the proper value, rather than what's in the document.
nsAutoString header;
aContent->GetAttr(kNameSpaceID_None, nsGkAtoms::httpEquiv, header);
--- a/dom/base/nsXHTMLContentSerializer.h
+++ b/dom/base/nsXHTMLContentSerializer.h
@@ -87,20 +87,16 @@ class nsXHTMLContentSerializer : public
nsAString& aStr);
bool IsShorthandAttr(const nsIAtom* aAttrName,
const nsIAtom* aElementName);
MOZ_MUST_USE
virtual bool AppendAndTranslateEntities(const nsAString& aStr,
nsAString& aOutputStr) override;
- nsresult EscapeURI(nsIContent* aContent,
- const nsAString& aURI,
- nsAString& aEscapedURI);
-
private:
bool IsElementPreformatted(nsIContent* aNode);
protected:
nsCOMPtr<nsIEntityConverter> mEntityConverter;
/*
* isHTMLParser should be set to true by the HTML parser which inherits from
--- a/parser/xml/test/unit/results.js
+++ b/parser/xml/test/unit/results.js
@@ -426,17 +426,17 @@ var vectors = [
"sanitized": "<html><head></head><body><img></body></html>"
},
{
"data": "<title onpropertychange=alert(1)></title><title title=></title>",
"sanitized": "<html><head><title></title><title title=\"\"></title></head><body></body></html>"
},
{
"data": "<!-- IE 5-8 standards mode -->\r\n<a href=http://foo.bar/#x=`y></a><img alt=\"`><img src=xx:x onerror=alert(1)></a>\">\r\n\r\n<!-- IE 5-9 standards mode -->\r\n<!a foo=x=`y><img alt=\"`><img src=xx:x onerror=alert(2)//\">\r\n<?a foo=x=`y><img alt=\"`><img src=xx:x onerror=alert(3)//\">",
- "sanitized": "<html><head></head><body><a href=\"http://foo.bar/#x=%60y\"></a><img alt=\"`><img src=xx:x onerror=alert(1)></a>\">\n\n\n<img alt=\"`><img src=xx:x onerror=alert(2)//\">\n<img alt=\"`><img src=xx:x onerror=alert(3)//\"></body></html>"
+ "sanitized": "<html><head></head><body><a href=\"http://foo.bar/#x=`y\"></a><img alt=\"`><img src=xx:x onerror=alert(1)></a>\">\n\n\n<img alt=\"`><img src=xx:x onerror=alert(2)//\">\n<img alt=\"`><img src=xx:x onerror=alert(3)//\"></body></html>"
},
{
"data": "<svg xmlns=\"http://www.w3.org/2000/svg\">\n<a id=\"x\"><rect fill=\"white\" width=\"1000\" height=\"1000\"/></a>\n<rect fill=\"white\" style=\"clip-path:url(test3.svg#a);fill:url(#b);filter:url(#c);marker:url(#d);mask:url(#e);stroke:url(#f);\"/>\n</svg>",
"sanitized": "<html><head></head><body>\n\n\n</body></html>"
},
{
"data": "<svg xmlns=\"http://www.w3.org/2000/svg\">\r\n<path d=\"M0,0\" style=\"marker-start:url(test4.svg#a)\"/>\r\n</svg>",
"sanitized": "<html><head></head><body>\n\n</body></html>"