Bug 741776 - Treat JSON, WebVTT and AppCache manifests as UTF-8 when loaded as plain text. r?ehsan
MozReview-Commit-ID: 5UvYqJVvX0r
--- a/dom/base/nsContentUtils.cpp
+++ b/dom/base/nsContentUtils.cpp
@@ -3753,16 +3753,26 @@ nsContentUtils::IsPlainTextType(const ns
return aContentType.EqualsLiteral(TEXT_PLAIN) ||
aContentType.EqualsLiteral(TEXT_CSS) ||
aContentType.EqualsLiteral(TEXT_CACHE_MANIFEST) ||
aContentType.EqualsLiteral(TEXT_VTT) ||
IsScriptType(aContentType);
}
bool
+nsContentUtils::IsUtf8OnlyPlainTextType(const nsACString& aContentType)
+{
+ // NOTE: This must be a subset of the list in IsPlainTextType().
+ return aContentType.EqualsLiteral(TEXT_CACHE_MANIFEST) ||
+ aContentType.EqualsLiteral(APPLICATION_JSON) ||
+ aContentType.EqualsLiteral(TEXT_JSON) ||
+ aContentType.EqualsLiteral(TEXT_VTT);
+}
+
+bool
nsContentUtils::GetWrapperSafeScriptFilename(nsIDocument* aDocument,
nsIURI* aURI,
nsACString& aScriptURI,
nsresult* aRv)
{
MOZ_ASSERT(aRv);
bool scriptFileNameModified = false;
*aRv = NS_OK;
--- a/dom/base/nsContentUtils.h
+++ b/dom/base/nsContentUtils.h
@@ -1021,26 +1021,32 @@ public:
static bool IsChromeDoc(nsIDocument *aDocument);
/**
* Returns true if aDocument is in a docshell whose parent is the same type
*/
static bool IsChildOfSameType(nsIDocument* aDoc);
/**
- '* Returns true if the content-type is any of the supported script types.
+ * Returns true if the content-type is any of the supported script types.
*/
static bool IsScriptType(const nsACString& aContentType);
/**
- '* Returns true if the content-type will be rendered as plain-text.
+ * Returns true if the content-type will be rendered as plain-text.
*/
static bool IsPlainTextType(const nsACString& aContentType);
/**
+ * Returns true iff the type is rendered as plain text and doesn't support
+ * non-UTF-8 encodings.
+ */
+ static bool IsUtf8OnlyPlainTextType(const nsACString& aContentType);
+
+ /**
* Get the script file name to use when compiling the script
* referenced by aURI. In cases where there's no need for any extra
* security wrapper automation the script file name that's returned
* will be the spec in aURI, else it will be the spec in aDocument's
* URI followed by aURI's spec, separated by " -> ". Returns true
* if the script file name was modified, false if it's aURI's
* spec.
*/
--- a/dom/html/nsHTMLDocument.cpp
+++ b/dom/html/nsHTMLDocument.cpp
@@ -545,16 +545,19 @@ nsHTMLDocument::StartDocumentLoad(const
bool html = contentType.EqualsLiteral(TEXT_HTML);
bool xhtml = !html && (contentType.EqualsLiteral(APPLICATION_XHTML_XML) || contentType.EqualsLiteral(APPLICATION_WAPXHTML_XML));
bool plainText = !html && !xhtml && nsContentUtils::IsPlainTextType(contentType);
if (!(html || xhtml || plainText || viewSource)) {
MOZ_ASSERT(false, "Channel with bad content type.");
return NS_ERROR_INVALID_ARG;
}
+ bool forceUtf8 = plainText &&
+ nsContentUtils::IsUtf8OnlyPlainTextType(contentType);
+
bool loadAsHtml5 = true;
if (!viewSource && xhtml) {
// We're parsing XHTML as XML, remember that.
mType = eXHTML;
mCompatMode = eCompatibility_FullStandards;
loadAsHtml5 = false;
}
@@ -664,17 +667,22 @@ nsHTMLDocument::StartDocumentLoad(const
executor = static_cast<nsHtml5TreeOpExecutor*> (mParser->GetContentSink());
if (mReferrerPolicySet) {
// CSP may have set the referrer policy, so a speculative parser should
// start with the new referrer policy.
executor->SetSpeculationReferrerPolicy(static_cast<ReferrerPolicy>(mReferrerPolicy));
}
}
- if (!IsHTMLDocument() || !docShell) { // no docshell for text/html XHR
+ if (forceUtf8) {
+ charsetSource = kCharsetFromUtf8OnlyMime;
+ charset.AssignLiteral("UTF-8");
+ parserCharsetSource = charsetSource;
+ parserCharset = charset;
+ } else if (!IsHTMLDocument() || !docShell) { // no docshell for text/html XHR
charsetSource = IsHTMLDocument() ? kCharsetFromFallback
: kCharsetFromDocTypeDefault;
charset.AssignLiteral("UTF-8");
TryChannelCharset(aChannel, charsetSource, charset, executor);
parserCharsetSource = charsetSource;
parserCharset = charset;
} else {
NS_ASSERTION(docShell, "Unexpected null value");
@@ -3613,17 +3621,17 @@ nsHTMLDocument::DocAddSizeOfExcludingThi
bool
nsHTMLDocument::WillIgnoreCharsetOverride()
{
if (mType != eHTML) {
MOZ_ASSERT(mType == eXHTML);
return true;
}
- if (mCharacterSetSource == kCharsetFromByteOrderMark) {
+ if (mCharacterSetSource >= kCharsetFromByteOrderMark) {
return true;
}
if (!EncodingUtils::IsAsciiCompatible(mCharacterSet)) {
return true;
}
nsCOMPtr<nsIWyciwygChannel> wyciwyg = do_QueryInterface(mChannel);
if (wyciwyg) {
return true;
new file mode 100644
--- /dev/null
+++ b/dom/html/reftests/741776-1-ref.html
@@ -0,0 +1,1 @@
+<meta charset=utf-8><pre>ää
new file mode 100644
--- /dev/null
+++ b/dom/html/reftests/741776-1.vtt
@@ -0,0 +1,1 @@
+ää
--- a/dom/html/reftests/reftest.list
+++ b/dom/html/reftests/reftest.list
@@ -22,16 +22,17 @@ include toblob-todataurl/reftest.list
== 573322-no-quirks.html 573322-no-quirks-ref.html
== 596455-1a.html 596455-ref-1.html
== 596455-1b.html 596455-ref-1.html
== 596455-2a.html 596455-ref-2.html
== 596455-2b.html 596455-ref-2.html
== 610935.html 610935-ref.html
== 649134-1.html 649134-ref.html
skip-if(Android) == 649134-2.html 649134-2-ref.html
+== 741776-1.vtt 741776-1-ref.html
== bug448564-1_malformed.html bug448564-1_well-formed.html
== bug448564-1_malformed.html bug448564-1_ideal.html
== bug448564-4a.html bug448564-4b.html
== bug502168-1_malformed.html bug502168-1_well-formed.html
== responsive-image-load-shortcircuit.html responsive-image-load-shortcircuit-ref.html
--- a/parser/html/nsHtml5StreamParser.cpp
+++ b/parser/html/nsHtml5StreamParser.cpp
@@ -976,23 +976,25 @@ nsHtml5StreamParser::OnStartRequest(nsIR
mInitialEncodingWasFromParentFrame = true;
}
if (mCharsetSource >= kCharsetFromAutoDetection) {
mFeedChardet = false;
}
nsCOMPtr<nsIWyciwygChannel> wyciwygChannel(do_QueryInterface(mRequest));
- if (!wyciwygChannel) {
+ if (mCharsetSource < kCharsetFromUtf8OnlyMime && !wyciwygChannel) {
// we aren't ready to commit to an encoding yet
// leave converter uninstantiated for now
return NS_OK;
}
- // We are reloading a document.open()ed doc.
+ // We are reloading a document.open()ed doc or loading JSON/WebVTT/etc. into
+ // a browsing context. In the latter case, there's no need to remove the
+ // BOM manually here, because the UTF-8 decoder removes it.
mReparseForbidden = true;
mFeedChardet = false;
// Instantiate the converter here to avoid BOM sniffing.
mUnicodeDecoder = EncodingUtils::DecoderForEncoding(mCharset);
return NS_OK;
}
--- a/parser/nsCharsetSource.h
+++ b/parser/nsCharsetSource.h
@@ -17,10 +17,11 @@
#define kCharsetFromMetaPrescan 8 // this one and smaller: HTML5 Tentative
#define kCharsetFromMetaTag 9 // this one and greater: HTML5 Confident
#define kCharsetFromIrreversibleAutoDetection 10
#define kCharsetFromChannel 11
#define kCharsetFromOtherComponent 12
#define kCharsetFromParentForced 13 // propagates to child frames
#define kCharsetFromUserForced 14 // propagates to child frames
#define kCharsetFromByteOrderMark 15
+#define kCharsetFromUtf8OnlyMime 16 // For JSON, WebVTT and such
#endif /* nsCharsetSource_h_ */
--- a/uriloader/exthandler/nsExternalHelperAppService.cpp
+++ b/uriloader/exthandler/nsExternalHelperAppService.cpp
@@ -586,16 +586,19 @@ static const nsExtraMimeTypeEntry extraM
{ IMAGE_JPEG, "jpeg,jpg,jfif,pjpeg,pjp", "JPEG Image" },
{ IMAGE_PNG, "png", "PNG Image" },
{ IMAGE_APNG, "apng", "APNG Image" },
{ IMAGE_TIFF, "tiff,tif", "TIFF Image" },
{ IMAGE_XBM, "xbm", "XBM Image" },
{ IMAGE_SVG_XML, "svg", "Scalable Vector Graphics" },
{ MESSAGE_RFC822, "eml", "RFC-822 data" },
{ TEXT_PLAIN, "txt,text", "Text File" },
+ { APPLICATION_JSON, "json", "JavaScript Object Notation" },
+ { TEXT_VTT, "vtt", "Web Video Text Tracks" },
+ { TEXT_CACHE_MANIFEST, "appcache", "Application Cache Manifest" },
{ TEXT_HTML, "html,htm,shtml,ehtml", "HyperText Markup Language" },
{ "application/xhtml+xml", "xhtml,xht", "Extensible HyperText Markup Language" },
{ APPLICATION_MATHML_XML, "mml", "Mathematical Markup Language" },
{ APPLICATION_RDF, "rdf", "Resource Description Framework" },
{ TEXT_XUL, "xul", "XML-Based User Interface Language" },
{ TEXT_XML, "xml,xsl,xbl", "Extensible Markup Language" },
{ TEXT_CSS, "css", "Style Sheet" },
{ TEXT_VCARD, "vcf,vcard", "Contact Information" },