Bug 1396286 - Support UTF-16 in JSON Viewer. draft
authorOriol Brufau <oriol-bugzilla@hotmail.com>
Sat, 23 Sep 2017 20:10:04 +0200
changeset 675600 4aa9e0a67dff574f0ae2e9fba9cd0c6c6678554c
parent 675194 c3b7759671deae73e40ebca01d7f23a326a4b8c2
child 734651 31fd865a38dec8ee1e4c6089d36a17d663989f75
push id83181
push userbmo:oriol-bugzilla@hotmail.com
push dateThu, 05 Oct 2017 15:58:30 +0000
bugs1396286
milestone58.0a1
Bug 1396286 - Support UTF-16 in JSON Viewer. MozReview-Commit-ID: Dy7474tyVyc
devtools/client/jsonview/converter-child.js
devtools/client/jsonview/test/browser.ini
devtools/client/jsonview/test/browser_jsonview_encoding.js
devtools/client/jsonview/test/browser_jsonview_ignore_charset.js
devtools/client/jsonview/test/browser_jsonview_utf8.js
--- a/devtools/client/jsonview/converter-child.js
+++ b/devtools/client/jsonview/converter-child.js
@@ -1,30 +1,36 @@
 /* -*- indent-tabs-mode: nil; js-indent-level: 2 -*- */
 /* vim: set ft=javascript ts=2 et sw=2 tw=80: */
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 "use strict";
 
-const {Cc, Ci, Cu} = require("chrome");
+const {Cc, Ci, Cu, CC} = require("chrome");
 const { XPCOMUtils } = Cu.import("resource://gre/modules/XPCOMUtils.jsm", {});
 const Services = require("Services");
 
 loader.lazyRequireGetter(this, "NetworkHelper",
                                "devtools/shared/webconsole/network-helper");
 loader.lazyGetter(this, "debug", function () {
   let {AppConstants} = require("resource://gre/modules/AppConstants.jsm");
   return !!(AppConstants.DEBUG || AppConstants.DEBUG_JS_MODULES);
 });
 
 const childProcessMessageManager =
   Cc["@mozilla.org/childprocessmessagemanager;1"]
     .getService(Ci.nsISyncMessageSender);
+const BinaryInput = CC("@mozilla.org/binaryinputstream;1",
+                       "nsIBinaryInputStream", "setInputStream");
+const BufferStream = CC("@mozilla.org/io/arraybuffer-input-stream;1",
+                       "nsIArrayBufferInputStream", "setData");
+const encodingLength = 2;
+const encoder = new TextEncoder();
 
 // Localization
 loader.lazyGetter(this, "jsonViewStrings", () => {
   return Services.strings.createBundle(
     "chrome://devtools/locale/jsonview.properties");
 });
 
 /**
@@ -47,72 +53,151 @@ Converter.prototype = {
     return this;
   },
 
   /**
    * This component works as such:
    * 1. asyncConvertData captures the listener
    * 2. onStartRequest fires, initializes stuff, modifies the listener
    *    to match our output type
-   * 3. onDataAvailable spits it back to the listener
-   * 4. onStopRequest spits it back to the listener
+   * 3. onDataAvailable converts to UTF-8 and spits back to the listener
+   * 4. onStopRequest flushes data and spits back to the listener
    * 5. convert does nothing, it's just the synchronous version
    *    of asyncConvertData
    */
   convert: function (fromStream, fromType, toType, ctx) {
     return fromStream;
   },
 
   asyncConvertData: function (fromType, toType, listener, ctx) {
     this.listener = listener;
   },
 
   onDataAvailable: function (request, context, inputStream, offset, count) {
-    this.listener.onDataAvailable(...arguments);
+    // If the encoding is not known, store data in an array until we have enough bytes.
+    if (this.encodingArray) {
+      let desired = encodingLength - this.encodingArray.length;
+      let n = Math.min(desired, count);
+      let bytes = new BinaryInput(inputStream).readByteArray(n);
+      offset += n;
+      count -= n;
+      this.encodingArray.push(...bytes);
+      if (n < desired) {
+        // Wait until there is more data.
+        return;
+      }
+      this.determineEncoding(request, context);
+    }
+
+    // Spit back the data if the encoding is UTF-8, otherwise convert it first.
+    if (!this.decoder) {
+      this.listener.onDataAvailable(request, context, inputStream, offset, count);
+    } else {
+      let buffer = new ArrayBuffer(count);
+      new BinaryInput(inputStream).readArrayBuffer(count, buffer);
+      this.convertAndSendBuffer(request, context, buffer);
+    }
   },
 
   onStartRequest: function (request, context) {
     // Set the content type to HTML in order to parse the doctype, styles
     // and scripts, but later a <plaintext> element will switch the tokenizer
     // to the plaintext state in order to parse the JSON.
     request.QueryInterface(Ci.nsIChannel);
     request.contentType = "text/html";
 
-    // JSON enforces UTF-8 charset (see bug 741776).
+    // Don't honor the charset parameter and use UTF-8 (see bug 741776).
     request.contentCharset = "UTF-8";
 
     // Changing the content type breaks saving functionality. Fix it.
     fixSave(request);
 
     // Because content might still have a reference to this window,
     // force setting it to a null principal to avoid it being same-
     // origin with (other) content.
     request.loadInfo.resetPrincipalToInheritToNullPrincipal();
 
     // Start the request.
     this.listener.onStartRequest(request, context);
 
     // Initialize stuff.
     let win = NetworkHelper.getWindowForRequest(request);
-    exportData(win, request);
+    this.data = exportData(win, request);
     win.addEventListener("DOMContentLoaded", event => {
       win.addEventListener("contentMessage", onContentMessage, false, true);
     }, {once: true});
 
-    // Insert the initial HTML code.
-    let converter = Cc["@mozilla.org/intl/scriptableunicodeconverter"]
-                      .createInstance(Ci.nsIScriptableUnicodeConverter);
-    converter.charset = "UTF-8";
-    let stream = converter.convertToInputStream(initialHTML(win.document));
-    this.listener.onDataAvailable(request, context, stream, 0, stream.available());
+    // Send the initial HTML code.
+    let bytes = encoder.encode(initialHTML(win.document));
+    this.convertAndSendBuffer(request, context, bytes.buffer);
+
+    // Create an array to store data until the encoding is determined.
+    this.encodingArray = [];
   },
 
   onStopRequest: function (request, context, statusCode) {
+    // Flush data.
+    if (this.encodingArray) {
+      this.determineEncoding(request, context, true);
+    } else {
+      this.convertAndSendBuffer(request, context, new ArrayBuffer(0), true);
+    }
+
+    // Stop the request.
     this.listener.onStopRequest(request, context, statusCode);
     this.listener = null;
+    this.decoder = null;
+    this.data = null;
+  },
+
+  // Determines the encoding of the response.
+  determineEncoding: function (request, context, flush = false) {
+    // Determine the encoding using the bytes in encodingArray, defaulting to UTF-8.
+    // An initial byte order mark character (U+FEFF) does the trick.
+    // If there is no BOM, since the first character of valid JSON will be ASCII,
+    // the pattern of nulls in the first two bytes can be used instead.
+    //  - UTF-16BE:  00 xx  or  FE FF
+    //  - UTF-16LE:  xx 00  or  FF FE
+    //  - UTF-8:  anything else.
+    let encoding = "UTF-8";
+    let bytes = this.encodingArray;
+    if (bytes.length >= 2) {
+      if (!bytes[0] && bytes[1] || bytes[0] == 0xFE && bytes[1] == 0xFF) {
+        encoding = "UTF-16BE";
+      } else if (bytes[0] && !bytes[1] || bytes[0] == 0xFF && bytes[1] == 0xFE) {
+        encoding = "UTF-16LE";
+      }
+    }
+
+    // Create a decoder unless the data is already in UTF-8.
+    if (encoding !== "UTF-8") {
+      this.decoder = new TextDecoder(encoding, {ignoreBOM: true});
+    }
+
+    this.data.encoding = encoding;
+
+    // Send the bytes in encodingArray, and remove it.
+    let buffer = new Uint8Array(bytes).buffer;
+    this.convertAndSendBuffer(request, context, buffer, flush);
+    this.encodingArray = null;
+  },
+
+  // Converts an ArrayBuffer to UTF-8 and sends it.
+  convertAndSendBuffer: function (request, context, buffer, flush = false) {
+    // If the encoding is not UTF-8, decode the buffer and encode into UTF-8.
+    if (this.decoder) {
+      let data = this.decoder.decode(buffer, {stream: !flush});
+      buffer = encoder.encode(data).buffer;
+    }
+
+    // Create an input stream that contains the bytes in the buffer.
+    let stream = new BufferStream(buffer, 0, buffer.byteLength);
+
+    // Send the input stream.
+    this.listener.onDataAvailable(request, context, stream, 0, stream.available());
   }
 };
 
 // Lets "save as" save the original JSON, not the viewer.
 // To save with the proper extension we need the original content type,
 // which has been replaced by application/vnd.mozilla.json.view
 function fixSave(request) {
   let originalType;
@@ -172,16 +257,18 @@ function exportData(win, request) {
     });
     request.visitRequestHeaders({
       visitHeader: function (name, value) {
         headers.request.push({name: name, value: value});
       }
     });
   }
   data.headers = Cu.cloneInto(headers, win);
+
+  return data;
 }
 
 // Serializes a qualifiedName and an optional set of attributes into an HTML
 // start tag. Be aware qualifiedName and attribute names are not validated.
 // Attribute values are escaped with escapingString algorithm in attribute mode
 // (https://html.spec.whatwg.org/multipage/syntax.html#escapingString).
 function startTag(qualifiedName, attributes = {}) {
   return Object.entries(attributes).reduce(function (prev, [attr, value]) {
--- a/devtools/client/jsonview/test/browser.ini
+++ b/devtools/client/jsonview/test/browser.ini
@@ -16,32 +16,33 @@ support-files =
   simple_json.json^headers^
   valid_json.json
   valid_json.json^headers^
   !/devtools/client/commandline/test/head.js
   !/devtools/client/framework/test/head.js
   !/devtools/client/framework/test/shared-head.js
 
 [browser_jsonview_bug_1380828.js]
+[browser_jsonview_ignore_charset.js]
 [browser_jsonview_copy_headers.js]
 subsuite = clipboard
 skip-if = (os == 'linux' && bits == 32 && debug) # bug 1328915, disable linux32 debug devtools for timeouts
 [browser_jsonview_copy_json.js]
 subsuite = clipboard
 skip-if = (os == 'linux' && bits == 32 && debug) # bug 1328915, disable linux32 debug devtools for timeouts
 [browser_jsonview_copy_rawdata.js]
 subsuite = clipboard
 skip-if = (os == 'linux' && bits == 32 && debug) # bug 1328915, disable linux32 debug devtools for timeouts
 [browser_jsonview_csp_json.js]
 [browser_jsonview_empty_object.js]
+[browser_jsonview_encoding.js]
 [browser_jsonview_filter.js]
 [browser_jsonview_invalid_json.js]
 [browser_jsonview_manifest.js]
 [browser_jsonview_nojs.js]
 [browser_jsonview_nul.js]
 [browser_jsonview_object-type.js]
 [browser_jsonview_save_json.js]
 support-files =
   !/toolkit/content/tests/browser/common/mockTransfer.js
 [browser_jsonview_slash.js]
-[browser_jsonview_utf8.js]
 [browser_jsonview_valid_json.js]
 [browser_json_refresh.js]
rename from devtools/client/jsonview/test/browser_jsonview_utf8.js
rename to devtools/client/jsonview/test/browser_jsonview_encoding.js
--- a/devtools/client/jsonview/test/browser_jsonview_utf8.js
+++ b/devtools/client/jsonview/test/browser_jsonview_encoding.js
@@ -1,39 +1,70 @@
 /* -*- indent-tabs-mode: nil; js-indent-level: 2 -*- */
 /* vim: set ts=2 et sw=2 tw=80: */
 /* Any copyright is dedicated to the Public Domain.
  * http://creativecommons.org/publicdomain/zero/1.0/ */
 
 "use strict";
 
-// In UTF-8 this is a heavy black heart.
-const encodedChar = "%E2%9D%A4";
+add_task(function* () {
+  info("Test JSON encoding started");
 
-add_task(function* () {
-  info("Test UTF-8 JSON started");
+  const text = Symbol("text");
 
-  info("Test 1: UTF-8 is used by default");
-  yield testUrl("data:application/json,[\"" + encodedChar + "\"]");
+  const tests = [
+    {
+      "UTF-8 with BOM": "",
+      "UTF-16BE with BOM": "",
+      "UTF-16LE with BOM": "",
+      [text]: ""
+    }, {
+      "UTF-8": "%30",
+      "UTF-16BE": "%00%30",
+      "UTF-16LE": "%30%00",
+      [text]: "0"
+    }, {
+      "UTF-8": "%30%FF",
+      "UTF-16BE": "%00%30%00",
+      "UTF-16LE": "%30%00%00",
+      [text]: "0\uFFFD" // 0�
+    }, {
+      "UTF-8": "%C3%A0",
+      "UTF-16BE": "%00%E0",
+      "UTF-16LE": "%E0%00",
+      [text]: "\u00E0" // à
+    }, {
+      "UTF-8 with BOM": "%E2%9D%A4",
+      "UTF-16BE with BOM": "%27%64",
+      "UTF-16LE with BOM": "%64%27",
+      [text]: "\u2764" // ❤
+    }, {
+      "UTF-8": "%30%F0%9F%9A%80",
+      "UTF-16BE": "%00%30%D8%3D%DE%80",
+      "UTF-16LE": "%30%00%3D%D8%80%DE",
+      [text]: "0\uD83D\uDE80" // 0🚀
+    }
+  ];
 
-  info("Test 2: The charset parameter is ignored");
-  yield testUrl("data:application/json;charset=ANSI,[\"" + encodedChar + "\"]");
+  const bom = {
+    "UTF-8": "%EF%BB%BF",
+    "UTF-16BE": "%FE%FF",
+    "UTF-16LE": "%FF%FE"
+  };
+
+  for (let test of tests) {
+    let result = test[text];
+    for (let [encoding, data] of Object.entries(test)) {
+      info("Testing " + JSON.stringify(result) + " encoded in " + encoding + ".");
 
-  info("Test 3: The UTF-8 BOM is tolerated.");
-  const bom = "%EF%BB%BF";
-  yield testUrl("data:application/json," + bom + "[\"" + encodedChar + "\"]");
+      if (encoding.endsWith("BOM")) {
+        data = bom[encoding.split(" ")[0]] + data;
+      }
+
+      yield addJsonViewTab("data:application/json," + data);
+      yield selectJsonViewContentTab("rawdata");
+
+      // Check displayed data.
+      let output = yield getElementText(".textPanelBox .data");
+      is(output, result, "The right data has been received.");
+    }
+  }
 });
-
-function* testUrl(TEST_JSON_URL) {
-  yield addJsonViewTab(TEST_JSON_URL);
-
-  let countBefore = yield getElementCount(".jsonPanelBox .treeTable .treeRow");
-  is(countBefore, 1, "There must be one row.");
-
-  let objectCellCount = yield getElementCount(
-    ".jsonPanelBox .treeTable .stringCell");
-  is(objectCellCount, 1, "There must be one string cell.");
-
-  let objectCellText = yield getElementText(
-    ".jsonPanelBox .treeTable .stringCell");
-  is(objectCellText, JSON.stringify(decodeURIComponent(encodedChar)),
-     "The source has been parsed as UTF-8, ignoring the charset parameter.");
-}
new file mode 100644
--- /dev/null
+++ b/devtools/client/jsonview/test/browser_jsonview_ignore_charset.js
@@ -0,0 +1,20 @@
+/* -*- indent-tabs-mode: nil; js-indent-level: 2 -*- */
+/* vim: set ts=2 et sw=2 tw=80: */
+/* Any copyright is dedicated to the Public Domain.
+ * http://creativecommons.org/publicdomain/zero/1.0/ */
+
+"use strict";
+
+add_task(function* () {
+  info("Test ignored charset parameter started");
+
+  const encodedChar = "%E2%9D%A4"; // In UTF-8 this is a heavy black heart
+  const result = "\u2764"; // ❤
+  const TEST_JSON_URL = "data:application/json;charset=ANSI," + encodedChar;
+
+  yield addJsonViewTab(TEST_JSON_URL);
+  yield selectJsonViewContentTab("rawdata");
+
+  let text = yield getElementText(".textPanelBox .data");
+  is(text, result, "The charset parameter is ignored and UTF-8 is used.");
+});