Bug 1301854 - detect BOM when determining resource encoding; r?pbro
MozReview-Commit-ID: 6akivZSRwVN
--- a/devtools/client/styleeditor/test/browser.ini
+++ b/devtools/client/styleeditor/test/browser.ini
@@ -45,26 +45,28 @@ support-files =
sourcemaps-watching.html
test_private.css
test_private.html
doc_long.css
doc_uncached.css
doc_uncached.html
doc_xulpage.xul
sync.html
+ utf-16.css
!/devtools/client/commandline/test/helpers.js
!/devtools/client/framework/test/shared-head.js
!/devtools/client/inspector/shared/test/head.js
!/devtools/client/inspector/test/head.js
!/devtools/client/inspector/test/shared-head.js
!/devtools/client/shared/test/test-actor-registry.js
!/devtools/client/shared/test/test-actor.js
[browser_styleeditor_autocomplete.js]
[browser_styleeditor_autocomplete-disabled.js]
+[browser_styleeditor_bom.js]
[browser_styleeditor_bug_740541_iframes.js]
[browser_styleeditor_bug_851132_middle_click.js]
[browser_styleeditor_bug_870339.js]
[browser_styleeditor_cmd_edit.js]
[browser_styleeditor_enabled.js]
[browser_styleeditor_fetch-from-cache.js]
[browser_styleeditor_filesave.js]
[browser_styleeditor_highlight-selector.js]
new file mode 100644
--- /dev/null
+++ b/devtools/client/styleeditor/test/browser_styleeditor_bom.js
@@ -0,0 +1,34 @@
+/* vim: set ts=2 et sw=2 tw=80: */
+/* Any copyright is dedicated to the Public Domain.
+ http://creativecommons.org/publicdomain/zero/1.0/ */
+
+"use strict";
+
+const BOM_CSS = TEST_BASE_HTTPS + "utf-16.css";
+const DOCUMENT = "data:text/html;charset=UTF-8," +
+ encodeURIComponent(
+ ["<!DOCTYPE html>",
+ "<html>",
+ " <head>",
+ " <title>Bug 1301854</title>",
+ ' <link rel="stylesheet" type="text/css" href="' + BOM_CSS + '">',
+ " </head>",
+ " <body>",
+ " </body>",
+ "</html>"
+ ].join("\n"));
+
+const CONTENTS = "// Note that this file must be utf-16 with a " +
+ "BOM for the test to make sense.\n";
+
+add_task(function* () {
+ let {ui} = yield openStyleEditorForURL(DOCUMENT);
+
+ is(ui.editors.length, 1, "correct number of editors");
+
+ let editor = ui.editors[0];
+ yield editor.getSourceEditor();
+
+ let text = editor.sourceEditor.getText();
+ is(text, CONTENTS, "editor contains expected text");
+});
new file mode 100644
index 0000000000000000000000000000000000000000..92ff5eac5303cd990264d2e45fb949230abd9593
GIT binary patch
literal 156
zc$_VbO%6a%41~Y6Q@jfxHV$BqGmsGCFX4&XV_r#<>9o_C-VdqDQL*9a#5@Cy@M9x~
zSg$^tx^QX})Yj%xyeB3j*L3MFxSSFk1vRC)H}*g8`_TOvUP{Ppm`pi@t6@U)0^une
AhyVZp
--- a/devtools/server/actors/styleeditor.js
+++ b/devtools/server/actors/styleeditor.js
@@ -203,28 +203,20 @@ var OldStyleSheetActor = protocol.ActorC
this.text = content;
return content;
});
},
/**
* Get the charset of the stylesheet according to the character set rules
* defined in <http://www.w3.org/TR/CSS2/syndata.html#charset>.
- *
- * @param string channelCharset
- * Charset of the source string if set by the HTTP channel.
+ * Note that some of the algorithm is implemented in DevToolsUtils.fetch.
*/
- _getCSSCharset: function (channelCharset)
+ _getCSSCharset: function ()
{
- // StyleSheet's charset can be specified from multiple sources
- if (channelCharset && channelCharset.length > 0) {
- // step 1 of syndata.html: charset given in HTTP header.
- return channelCharset;
- }
-
let sheet = this.rawSheet;
if (sheet) {
// Do we have a @charset rule in the stylesheet?
// step 2 of syndata.html (without the BOM check).
if (sheet.cssRules) {
let rules = sheet.cssRules;
if (rules.length
&& rules.item(0).type == Ci.nsIDOMCSSRule.CHARSET_RULE) {
--- a/devtools/server/actors/stylesheets.js
+++ b/devtools/server/actors/stylesheets.js
@@ -660,28 +660,20 @@ var StyleSheetActor = protocol.ActorClas
}
return mediaRules;
});
},
/**
* Get the charset of the stylesheet according to the character set rules
* defined in <http://www.w3.org/TR/CSS2/syndata.html#charset>.
- *
- * @param string channelCharset
- * Charset of the source string if set by the HTTP channel.
+ * Note that some of the algorithm is implemented in DevToolsUtils.fetch.
*/
- _getCSSCharset: function (channelCharset)
+ _getCSSCharset: function ()
{
- // StyleSheet's charset can be specified from multiple sources
- if (channelCharset && channelCharset.length > 0) {
- // step 1 of syndata.html: charset given in HTTP header.
- return channelCharset;
- }
-
let sheet = this.rawSheet;
if (sheet) {
// Do we have a @charset rule in the stylesheet?
// step 2 of syndata.html (without the BOM check).
if (sheet.cssRules) {
let rules = sheet.cssRules;
if (rules.length
&& rules.item(0).type == Ci.nsIDOMCSSRule.CHARSET_RULE) {
--- a/devtools/shared/DevToolsUtils.js
+++ b/devtools/shared/DevToolsUtils.js
@@ -443,23 +443,41 @@ function mainThreadFetch(aURL, aOptions
// might fail and we lose the stream data. This means we can't fall back
// to using the locale default encoding (bug 1181345).
// Read and decode the data according to the locale default encoding.
let available = stream.available();
let source = NetUtil.readInputStreamToString(stream, available);
stream.close();
+ // We do our own BOM sniffing here because there's no convenient
+ // implementation of the "decode" algorithm
+ // (https://encoding.spec.whatwg.org/#decode) exposed to JS.
+ let bomCharset = null;
+ if (available >= 3 && source.codePointAt(0) == 0xef &&
+ source.codePointAt(1) == 0xbb && source.codePointAt(2) == 0xbf) {
+ bomCharset = "UTF-8";
+ source = source.slice(3);
+ } else if (available >= 2 && source.codePointAt(0) == 0xfe &&
+ source.codePointAt(1) == 0xff) {
+ bomCharset = "UTF-16BE";
+ source = source.slice(2);
+ } else if (available >= 2 && source.codePointAt(0) == 0xff &&
+ source.codePointAt(1) == 0xfe) {
+ bomCharset = "UTF-16LE";
+ source = source.slice(2);
+ }
+
// If the channel or the caller has correct charset information, the
// content will be decoded correctly. If we have to fall back to UTF-8 and
// the guess is wrong, the conversion fails and convertToUnicode returns
// the input unmodified. Essentially we try to decode the data as UTF-8
// and if that fails, we use the locale specific default encoding. This is
// the best we can do if the source does not provide charset info.
- let charset = channel.contentCharset || aOptions.charset || "UTF-8";
+ let charset = bomCharset || channel.contentCharset || aOptions.charset || "UTF-8";
let unicodeSource = NetworkHelper.convertToUnicode(source, charset);
deferred.resolve({
content: unicodeSource,
contentType: request.contentType
});
} catch (ex) {
let uri = request.originalURI;
new file mode 100644
--- /dev/null
+++ b/devtools/shared/tests/unit/test_fetch-bom.js
@@ -0,0 +1,76 @@
+/* vim: set ft=javascript ts=2 et sw=2 tw=80: */
+/* Any copyright is dedicated to the Public Domain.
+ http://creativecommons.org/publicdomain/zero/1.0/ */
+"use strict";
+
+// Tests for DevToolsUtils.fetch BOM detection.
+
+const CC = Components.Constructor;
+
+const { HttpServer } = Cu.import("resource://testing-common/httpd.js", {});
+const BinaryOutputStream = CC("@mozilla.org/binaryoutputstream;1",
+ "nsIBinaryOutputStream", "setOutputStream");
+
+function write8(bos) {
+ bos.write8(0xef);
+ bos.write8(0xbb);
+ bos.write8(0xbf);
+ bos.write8(0x68);
+ bos.write8(0xc4);
+ bos.write8(0xb1);
+}
+
+function write16be(bos) {
+ bos.write8(0xfe);
+ bos.write8(0xff);
+ bos.write8(0x00);
+ bos.write8(0x68);
+ bos.write8(0x01);
+ bos.write8(0x31);
+}
+
+function write16le(bos) {
+ bos.write8(0xff);
+ bos.write8(0xfe);
+ bos.write8(0x68);
+ bos.write8(0x00);
+ bos.write8(0x31);
+ bos.write8(0x01);
+}
+
+function getHandler(writer) {
+ return function (request, response) {
+ response.setStatusLine(request.httpVersion, 200, "OK");
+
+ let bos = new BinaryOutputStream(response.bodyOutputStream);
+ writer(bos);
+ };
+}
+
+const server = new HttpServer();
+server.registerDirectory("/", do_get_cwd());
+server.registerPathHandler("/u8", getHandler(write8));
+server.registerPathHandler("/u16be", getHandler(write16be));
+server.registerPathHandler("/u16le", getHandler(write16le));
+server.start(-1);
+
+const port = server.identity.primaryPort;
+const serverURL = "http://localhost:" + port;
+
+do_register_cleanup(() => {
+ return new Promise(resolve => server.stop(resolve));
+});
+
+add_task(function* () {
+ yield test_one(serverURL + "/u8", "UTF-8");
+ yield test_one(serverURL + "/u16be", "UTF-16BE");
+ yield test_one(serverURL + "/u16le", "UTF-16LE");
+});
+
+function* test_one(url, encoding) {
+ // Be sure to set the encoding to something that will yield an
+ // invalid result if BOM sniffing is not done.
+ yield DevToolsUtils.fetch(url, { charset: "ISO-8859-1" }).then(({content}) => {
+ do_check_eq(content, "hı", "The content looks correct for " + encoding);
+ });
+}
--- a/devtools/shared/tests/unit/xpcshell.ini
+++ b/devtools/shared/tests/unit/xpcshell.ini
@@ -5,16 +5,17 @@ tail =
firefox-appdir = browser
skip-if = toolkit == 'android' || toolkit == 'gonk'
support-files =
exposeLoader.js
[test_assert.js]
[test_csslexer.js]
[test_css-properties-db.js]
+[test_fetch-bom.js]
[test_fetch-chrome.js]
[test_fetch-file.js]
[test_fetch-http.js]
[test_fetch-resource.js]
[test_flatten.js]
[test_indentation.js]
[test_independent_loaders.js]
[test_invisible_loader.js]