Bug 1402247 - Use encoding_rs for XPCOM string encoding conversions. draft
authorHenri Sivonen <hsivonen@hsivonen.fi>
Fri, 06 Jul 2018 10:44:43 +0300
changeset 827195 70e8b1fa104560990a0b7f25bea82cb880879791
parent 827194 afdf7d11181f3b90578f94fd3824f7064827a85a
child 827197 6378a57d5c9b04102600861ef1205af643be5df0
push id118488
push userbmo:hsivonen@hsivonen.fi
push dateTue, 07 Aug 2018 12:28:14 +0000
bugs1402247, 1472113
milestone63.0a1
Bug 1402247 - Use encoding_rs for XPCOM string encoding conversions. Correctness improvements: * UTF errors are handled safely per spec instead of dangerously truncating strings. * There are fewer converter implementations. Performance improvements: * The old code did exact buffer length math, which meant doing UTF math twice on each input string (once for length calculation and another time for conversion). Exact length math is more complicated when handling errors properly, which the old code didn't do. The new code does UTF math on the string content only once (when converting) but risks allocating more than once. There are heuristics in place to lower the probability of reallocation in cases where the double math avoidance isn't enough of a saving to absorb an allocation and memcpy. * Previously, in UTF-16 <-> UTF-8 conversions, an ASCII prefix was optimized but a single non-ASCII code point pessimized the rest of the string. The new code tries to get back on the fast ASCII path. * UTF-16 to Latin1 conversion guarantees less about handling of out-of-range input to eliminate an operation from the inner loop on x86/x86_64. * When assigning to a pre-existing string, the new code tries to reuse the old buffer instead of first releasing the old buffer and then allocating a new one. * When reallocating from the new code, the memcpy covers only the data that is part of the logical length of the old string instead of memcpying the whole capacity. (For old callers old excess memcpy behavior is preserved due to bogus callers. See bug 1472113.) * UTF-8 strings in XPConnect that are in the Latin1 range are passed to SpiderMonkey as Latin1. New features: * Conversion between UTF-8 and Latin1 is added in order to enable faster future interop between Rust code (or otherwise UTF-8-using code) and text node and SpiderMonkey code that uses Latin1. MozReview-Commit-ID: JaJuExfILM9
Cargo.lock
intl/encoding_glue/src/lib.rs
js/xpconnect/src/XPCConvert.cpp
servo/ports/geckolib/glue.rs
servo/support/gecko/nsstring/Cargo.toml
servo/support/gecko/nsstring/src/conversions.rs
servo/support/gecko/nsstring/src/lib.rs
toolkit/xre/nsWindowsRestart.cpp
toolkit/xre/nsWindowsWMain.cpp
xpcom/base/nsAlgorithm.h
xpcom/ds/nsAtomTable.cpp
xpcom/string/moz.build
xpcom/string/nsReadableUtils.cpp
xpcom/string/nsReadableUtils.h
xpcom/string/nsSubstring.cpp
xpcom/string/nsTStringObsolete.cpp
xpcom/string/nsTSubstring.cpp
xpcom/string/nsTSubstring.h
xpcom/string/nsUTF8Utils.h
xpcom/tests/gtest/TestAtoms.cpp
xpcom/tests/gtest/TestStrings.cpp
xpcom/tests/gtest/TestTextFormatter.cpp
xpcom/tests/gtest/TestUTF.cpp
xpcom/tests/gtest/UTFStrings.h
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1425,16 +1425,17 @@ dependencies = [
  "nsstring 0.1.0",
 ]
 
 [[package]]
 name = "nsstring"
 version = "0.1.0"
 dependencies = [
  "bitflags 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "encoding_rs 0.8.4 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
 [[package]]
 name = "nsstring-gtest"
 version = "0.1.0"
 dependencies = [
  "nsstring 0.1.0",
 ]
--- a/intl/encoding_glue/src/lib.rs
+++ b/intl/encoding_glue/src/lib.rs
@@ -7,23 +7,23 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 
 // Adapted from third_party/rust/encoding_rs/src/lib.rs, so the
 // "top-level directory" in the above notice refers to
 // third_party/rust/encoding_rs/.
 
 extern crate encoding_rs;
+extern crate nserror;
 extern crate nsstring;
-extern crate nserror;
 
+use encoding_rs::*;
+use nserror::*;
+use nsstring::*;
 use std::slice;
-use encoding_rs::*;
-use nsstring::*;
-use nserror::*;
 
 // nsStringBuffer's internal bookkeeping takes 8 bytes from
 // the allocation. Plus one for termination.
 const NS_CSTRING_OVERHEAD: usize = 9;
 
 /// Takes `Option<usize>`, the destination string and a value
 /// to return on failure and tries to set the length of the
 /// destination string to the `usize` wrapped in the first
@@ -585,8 +585,100 @@ fn checked_min(one: Option<usize>, other
 }
 
 // Bindings for encoding_rs::mem. These may move to a separate crate in the future.
 
 #[no_mangle]
 pub unsafe extern "C" fn encoding_mem_is_utf16_bidi(buffer: *const u16, len: usize) -> bool {
     encoding_rs::mem::is_utf16_bidi(::std::slice::from_raw_parts(buffer, len))
 }
+
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_is_ascii(buffer: *const u8, len: usize) -> bool {
+    encoding_rs::mem::is_ascii(::std::slice::from_raw_parts(buffer, len))
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_is_basic_latin(buffer: *const u16, len: usize) -> bool {
+    encoding_rs::mem::is_basic_latin(::std::slice::from_raw_parts(buffer, len))
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_is_utf8_latin1(buffer: *const u8, len: usize) -> bool {
+    encoding_rs::mem::is_utf8_latin1(::std::slice::from_raw_parts(buffer, len))
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_is_utf16_latin1(buffer: *const u16, len: usize) -> bool {
+    encoding_rs::mem::is_utf16_latin1(::std::slice::from_raw_parts(buffer, len))
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_is_str_latin1(buffer: *const u8, len: usize) -> bool {
+    encoding_rs::mem::is_str_latin1(::std::str::from_utf8_unchecked(
+        ::std::slice::from_raw_parts(buffer, len),
+    ))
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_convert_utf16_to_latin1_lossy(
+    src: *const u16,
+    src_len: usize,
+    dst: *mut u8,
+    dst_len: usize,
+) {
+    encoding_rs::mem::convert_utf16_to_latin1_lossy(
+        ::std::slice::from_raw_parts(src, src_len),
+        ::std::slice::from_raw_parts_mut(dst, dst_len),
+    );
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_convert_utf8_to_latin1_lossy(
+    src: *const u8,
+    src_len: usize,
+    dst: *mut u8,
+    dst_len: usize,
+) -> usize {
+    encoding_rs::mem::convert_utf8_to_latin1_lossy(
+        ::std::slice::from_raw_parts(src, src_len),
+        ::std::slice::from_raw_parts_mut(dst, dst_len),
+    )
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_convert_latin1_to_utf16(
+    src: *const u8,
+    src_len: usize,
+    dst: *mut u16,
+    dst_len: usize,
+) {
+    encoding_rs::mem::convert_latin1_to_utf16(
+        ::std::slice::from_raw_parts(src, src_len),
+        ::std::slice::from_raw_parts_mut(dst, dst_len),
+    );
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_convert_utf16_to_utf8(
+    src: *const u16,
+    src_len: usize,
+    dst: *mut u8,
+    dst_len: usize,
+) -> usize {
+    encoding_rs::mem::convert_utf16_to_utf8(
+        ::std::slice::from_raw_parts(src, src_len),
+        ::std::slice::from_raw_parts_mut(dst, dst_len),
+    )
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_convert_utf8_to_utf16(
+    src: *const u8,
+    src_len: usize,
+    dst: *mut u16,
+    dst_len: usize,
+) -> usize {
+    encoding_rs::mem::convert_utf8_to_utf16(
+        ::std::slice::from_raw_parts(src, src_len),
+        ::std::slice::from_raw_parts_mut(dst, dst_len),
+    )
+}
--- a/js/xpconnect/src/XPCConvert.cpp
+++ b/js/xpconnect/src/XPCConvert.cpp
@@ -277,46 +277,88 @@ XPCConvert::NativeData2JS(MutableHandleV
             return true;
         }
 
         if (utf8String->IsEmpty()) {
             d.set(JS_GetEmptyStringValue(cx));
             return true;
         }
 
-        const uint32_t len = CalcUTF8ToUnicodeLength(*utf8String);
-        // The cString is not empty at this point, but the calculated
-        // UTF-16 length is zero, meaning no valid conversion exists.
-        if (!len)
-            return false;
-
-        const size_t buffer_size = (len + 1) * sizeof(char16_t);
-        char16_t* buffer =
-            static_cast<char16_t*>(JS_malloc(cx, buffer_size));
-        if (!buffer)
-            return false;
-
-        uint32_t copied;
-        if (!UTF8ToUnicodeBuffer(*utf8String, buffer, &copied) ||
-            len != copied) {
-            // Copy or conversion during copy failed. Did not copy the
-            // whole string.
-            JS_free(cx, buffer);
+        uint32_t len = utf8String->Length();
+        auto allocLen = CheckedUint32(len) + 1;
+        if (!allocLen.isValid()) {
             return false;
         }
 
-        // JS_NewUCString takes ownership on success, i.e. a
+        // Usage of UTF-8 in XPConnect is mostly for things that are
+        // almost always ASCII, so the inexact allocations below
+        // should be fine.
+
+        if (IsUTF8Latin1(*utf8String)) {
+            char* buffer = static_cast<char*>(JS_malloc(cx, allocLen.value()));
+            if (!buffer) {
+                return false;
+            }
+            size_t written =
+                LossyConvertUTF8toLatin1(*utf8String, MakeSpan(buffer, len));
+            buffer[written] = 0;
+
+            // JS_NewLatin1String takes ownership on success, i.e. a
+            // successful call will make it the responsiblity of the JS VM
+            // to free the buffer.
+            // written can never exceed len, so the truncation is OK.
+            JSString* str = JS_NewLatin1String(
+                cx, reinterpret_cast<JS::Latin1Char*>(buffer), written);
+            if (!str) {
+                JS_free(cx, buffer);
+                return false;
+            }
+            d.setString(str);
+            return true;
+        }
+
+        // 1-byte sequences decode to 1 UTF-16 code unit
+        // 2-byte sequences decode to 1 UTF-16 code unit
+        // 3-byte sequences decode to 1 UTF-16 code unit
+        // 4-byte sequences decode to 2 UTF-16 code units
+        // So the number of output code units never exceeds
+        // the number of input code units (but see the comment
+        // below). allocLen already takes the zero terminator
+        // into account.
+        allocLen *= sizeof(char16_t);
+        if (!allocLen.isValid()) {
+            return false;
+        }
+
+        char16_t* buffer =
+            static_cast<char16_t*>(JS_malloc(cx, allocLen.value()));
+        if (!buffer) {
+            return false;
+        }
+
+        // For its internal simplicity, ConvertUTF8toUTF16 requires the
+        // destination to be one code unit longer than the source, but
+        // it never actually writes more code units than the number of
+        // code units in the source. That's why it's OK to claim the
+        // output buffer has len + 1 space but then still expect to
+        // have space for the zero terminator.
+        size_t written =
+            ConvertUTF8toUTF16(*utf8String, MakeSpan(buffer, len + 1));
+        MOZ_RELEASE_ASSERT(written <= len);
+        buffer[written] = 0;
+
+        // JS_NewUCStringDontDeflate takes ownership on success, i.e. a
         // successful call will make it the responsiblity of the JS VM
         // to free the buffer.
-        JSString* str = JS_NewUCString(cx, buffer, len);
+        // written can never exceed len + 1, so the truncation is OK.
+        JSString* str = JS_NewUCStringDontDeflate(cx, buffer, written);
         if (!str) {
             JS_free(cx, buffer);
             return false;
         }
-
         d.setString(str);
         return true;
     }
     case nsXPTType::T_CSTRING:
     {
         const nsACString* cString = static_cast<const nsACString*>(s);
 
         if (!cString || cString->IsVoid()) {
@@ -1218,19 +1260,20 @@ JSErrorToXPCException(const char* toStri
                       Exception** exceptn)
 {
     AutoJSContext cx;
     nsresult rv = NS_ERROR_FAILURE;
     RefPtr<nsScriptError> data;
     if (report) {
         nsAutoString bestMessage;
         if (report && report->message()) {
-            CopyUTF8toUTF16(report->message().c_str(), bestMessage);
+            CopyUTF8toUTF16(mozilla::MakeStringSpan(report->message().c_str()),
+                            bestMessage);
         } else if (toStringResult) {
-            CopyUTF8toUTF16(toStringResult, bestMessage);
+            CopyUTF8toUTF16(mozilla::MakeStringSpan(toStringResult), bestMessage);
         } else {
             bestMessage.AssignLiteral("JavaScript Error");
         }
 
         const char16_t* linebuf = report->linebuf();
 
         data = new nsScriptError();
         data->InitWithWindowID(
--- a/servo/ports/geckolib/glue.rs
+++ b/servo/ports/geckolib/glue.rs
@@ -3515,17 +3515,17 @@ pub extern "C" fn Servo_DeclarationBlock
 pub extern "C" fn Servo_DeclarationBlock_GetNthProperty(
     declarations: RawServoDeclarationBlockBorrowed,
     index: u32,
     result: *mut nsAString,
 ) -> bool {
     read_locked_arc(declarations, |decls: &PropertyDeclarationBlock| {
         if let Some(decl) = decls.declarations().get(index as usize) {
             let result = unsafe { result.as_mut().unwrap() };
-            result.assign_utf8(&decl.id().name());
+            result.assign_str(&decl.id().name());
             true
         } else {
             false
         }
     })
 }
 
 macro_rules! get_property_id_from_property {
--- a/servo/support/gecko/nsstring/Cargo.toml
+++ b/servo/support/gecko/nsstring/Cargo.toml
@@ -5,9 +5,9 @@ authors = ["nobody@mozilla.com"]
 license = "MPL-2.0"
 description = "Rust bindings to xpcom string types"
 
 [features]
 gecko_debug = []
 
 [dependencies]
 bitflags = "1.0"
-
+encoding_rs = "0.8.0"
new file mode 100644
--- /dev/null
+++ b/servo/support/gecko/nsstring/src/conversions.rs
@@ -0,0 +1,712 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+extern crate encoding_rs;
+
+use std::slice;
+
+use super::nsACString;
+use super::nsAString;
+use super::nsCStringLike;
+use super::BulkWriteOk;
+use super::Gecko_FallibleAssignCString;
+use super::Latin1StringLike;
+
+use conversions::encoding_rs::mem::*;
+use conversions::encoding_rs::Encoding;
+
+/// Required math stated in the docs of
+/// `convert_utf16_to_utf8()`.
+#[inline(always)]
+fn times_three_plus_one(a: usize) -> Option<usize> {
+    a.checked_mul(3)?.checked_add(1)
+}
+
+#[inline(always)]
+fn identity(a: usize) -> Option<usize> {
+    Some(a)
+}
+
+#[inline(always)]
+fn plus_one(a: usize) -> Option<usize> {
+    a.checked_add(1)
+}
+
+/// Typical cache line size per
+/// https://stackoverflow.com/questions/14707803/line-size-of-l1-and-l2-caches
+///
+/// For consistent behavior, not trying to use 128 on aarch64
+/// or other fanciness like that.
+const CACHE_LINE: usize = 64;
+
+const CACHE_LINE_MASK: usize = CACHE_LINE - 1;
+
+#[inline(always)]
+fn starts_with_ascii(buffer: &[u8]) -> bool {
+    // We examine data only up to the end of the cache line
+    // to make this check minimally disruptive.
+    let bound = if buffer.len() <= CACHE_LINE {
+        buffer.len()
+    } else {
+        CACHE_LINE - ((buffer.as_ptr() as usize) & CACHE_LINE_MASK)
+    };
+    is_ascii(&buffer[..bound])
+}
+
+#[inline(always)]
+fn starts_with_basic_latin(buffer: &[u16]) -> bool {
+    // We look at two cache lines with code unit size of two. There is need
+    // to look at more than one cache line in the UTF-16 case, because looking
+    // at just one cache line wouldn't catch non-ASCII Latin with high enough
+    // probability with Latin-script languages that have relatively infrequent
+    // non-ASCII characters.
+    let bound = if buffer.len() <= CACHE_LINE {
+        buffer.len()
+    } else {
+        (CACHE_LINE * 2 - ((buffer.as_ptr() as usize) & CACHE_LINE_MASK)) / 2
+    };
+    is_basic_latin(&buffer[..bound])
+}
+
+// Ignoring the copy avoidance complications of conversions between Latin1 and
+// UTF-8, a conversion function has the outward form of
+// `fn F(&mut self, other: &[T], old_len: usize) -> Result<BulkWriteOk, ()>`,
+// where `T` is either `u8` or `u16`. `other` is the slice whose converted
+// content are to be appended to `self` and `old_len` indicates how many
+// code unit of `self` are to be preserved (0 for the assignment case and
+// `self.len()` for the appending case).
+//
+// As implementation parameters a conversion function needs to know the
+// math for computing the worst case conversion length in code units given
+// the input length in code units. For a _constant conversion_ the number
+// of code units the conversion produces equals the number of code units
+// in the input. For a _shinking conversion_ the maximum number of code
+// units the conversion can produce equals the number of code units in
+// the input, but the conversion can produce fewer code units. Still, due
+// to implementation details, the function might want _one_ unit more of
+// output space. For an _expanding conversion_ (no need for macro), the
+// minimum number of code units produced by the conversion is the number
+// of code units in the input, but the conversion can produce more.
+//
+// Copy avoidance conversions avoid copying a refcounted buffer when it's
+// ASCII-only.
+//
+// Internally, a conversion function needs to know the underlying
+// encoding_rs conversion function, the math for computing the required
+// output buffer size and, depending on the case, the underlying
+// encoding_rs ASCII prefix handling function.
+
+/// A conversion where the number of code units in the output is potentially
+/// smaller than the number of code units in the input.
+///
+/// Takes the name of the method to be generated, the name of the conversion
+/// function and the type of the input slice.
+///
+/// `$name` is the name of the function to generate
+/// `$convert` is the underlying `encoding_rs::mem` function to use
+/// `$other_ty` is the type of the input slice
+/// `$math` is the worst-case length math that `$convert` expects
+macro_rules! shrinking_conversion {
+    (name = $name:ident,
+     convert = $convert:ident,
+     other_ty = $other_ty:ty,
+     math = $math:ident) => (
+        fn $name(&mut self, other: $other_ty, old_len: usize) -> Result<BulkWriteOk, ()> {
+            let needed = $math(other.len()).ok_or(())?;
+            let mut handle = unsafe {
+                self.bulk_write(old_len.checked_add(needed).ok_or(())?, old_len, false)?
+            };
+            let written = $convert(other, &mut handle.as_mut_slice()[old_len..]);
+            Ok(handle.finish(old_len + written, true))
+        }
+     )
+}
+
+/// A conversion where the number of code units in the output is always equal
+/// to the number of code units in the input.
+///
+/// Takes the name of the method to be generated, the name of the conversion
+/// function and the type of the input slice.
+///
+/// `$name` is the name of the function to generate
+/// `$convert` is the underlying `encoding_rs::mem` function to use
+/// `$other_ty` is the type of the input slice
+macro_rules! constant_conversion {
+    (name = $name:ident,
+     convert = $convert:ident,
+     other_ty = $other_ty:ty) => (
+        fn $name(&mut self, other: $other_ty, old_len: usize) -> Result<BulkWriteOk, ()> {
+            let new_len = old_len.checked_add(other.len()).ok_or(())?;
+            let mut handle = unsafe {
+                self.bulk_write(new_len, old_len, true)?
+            };
+            $convert(other, &mut handle.as_mut_slice()[old_len..]);
+            Ok(handle.finish(new_len, false))
+        }
+     )
+}
+
+/// An intermediate check for avoiding a copy and having an `nsStringBuffer`
+/// refcount increment instead when both `self` and `other` are `nsACString`s,
+/// `other` is entirely ASCII and all old data in `self` is discarded.
+///
+/// `$name` is the name of the function to generate
+/// `$impl` is the underlying conversion that takes a slice and that is used
+///         when we can't just adopt the incoming buffer as-is
+/// `$string_like` is the kind of input taken
+macro_rules! ascii_copy_avoidance {
+    (name = $name:ident,
+     implementation = $implementation:ident,
+     string_like = $string_like:ident) => (
+        fn $name<T: $string_like + ?Sized>(&mut self, other: &T, old_len: usize) -> Result<BulkWriteOk, ()> {
+            let adapter = other.adapt();
+            let other_slice = adapter.as_ref();
+            let num_ascii = if adapter.is_abstract() && old_len == 0 {
+                let up_to = Encoding::ascii_valid_up_to(other_slice);
+                if up_to == other_slice.len() {
+                    // Calling something whose argument can be obtained from
+                    // the adapter rather than an nsStringLike avoids a huge
+                    // lifetime mess by keeping nsStringLike and
+                    // Latin1StringLike free of lifetime interdependencies.
+                    if unsafe { Gecko_FallibleAssignCString(self, other.adapt().as_ptr()) } {
+                        return Ok(BulkWriteOk{});
+                    } else {
+                        return Err(());
+                    }
+                }
+                Some(up_to)
+            } else {
+                None
+            };
+            self.$implementation(other_slice, old_len, num_ascii)
+        }
+    )
+}
+
+impl nsAString {
+    // Valid UTF-8 to UTF-16
+
+    // Documentation says the destination buffer needs to have
+    // as many code units as the input.
+    shrinking_conversion!(
+        name = fallible_append_str_impl,
+        convert = convert_str_to_utf16,
+        other_ty = &str,
+        math = identity
+    );
+
+    /// Convert a valid UTF-8 string into valid UTF-16 and replace the content
+    /// of this string with the conversion result.
+    pub fn assign_str(&mut self, other: &str) {
+        self.fallible_append_str_impl(other, 0)
+            .expect("Out of memory");
+    }
+
+    /// Convert a valid UTF-8 string into valid UTF-16 and fallibly replace the
+    /// content of this string with the conversion result.
+    pub fn fallible_assign_str(&mut self, other: &str) -> Result<(), ()> {
+        self.fallible_append_str_impl(other, 0).map(|_| ())
+    }
+
+    /// Convert a valid UTF-8 string into valid UTF-16 and append the conversion
+    /// to this string.
+    pub fn append_str(&mut self, other: &str) {
+        let len = self.len();
+        self.fallible_append_str_impl(other, len)
+            .expect("Out of memory");
+    }
+
+    /// Convert a valid UTF-8 string into valid UTF-16 and fallibly append the
+    /// conversion to this string.
+    pub fn fallible_append_str(&mut self, other: &str) -> Result<(), ()> {
+        let len = self.len();
+        self.fallible_append_str_impl(other, len).map(|_| ())
+    }
+
+    // Potentially-invalid UTF-8 to UTF-16
+
+    // Documentation says the destination buffer needs to have
+    // one more code unit than the input.
+    shrinking_conversion!(
+        name = fallible_append_utf8_impl,
+        convert = convert_utf8_to_utf16,
+        other_ty = &[u8],
+        math = plus_one
+    );
+
+    /// Convert a potentially-invalid UTF-8 string into valid UTF-16
+    /// (replacing invalid sequences with the REPLACEMENT CHARACTER) and
+    /// replace the content of this string with the conversion result.
+    pub fn assign_utf8(&mut self, other: &[u8]) {
+        self.fallible_append_utf8_impl(other, 0)
+            .expect("Out of memory");
+    }
+
+    /// Convert a potentially-invalid UTF-8 string into valid UTF-16
+    /// (replacing invalid sequences with the REPLACEMENT CHARACTER) and
+    /// fallibly replace the content of this string with the conversion result.
+    pub fn fallible_assign_utf8(&mut self, other: &[u8]) -> Result<(), ()> {
+        self.fallible_append_utf8_impl(other, 0).map(|_| ())
+    }
+
+    /// Convert a potentially-invalid UTF-8 string into valid UTF-16
+    /// (replacing invalid sequences with the REPLACEMENT CHARACTER) and
+    /// append the conversion result to this string.
+    pub fn append_utf8(&mut self, other: &[u8]) {
+        let len = self.len();
+        self.fallible_append_utf8_impl(other, len)
+            .expect("Out of memory");
+    }
+
+    /// Convert a potentially-invalid UTF-8 string into valid UTF-16
+    /// (replacing invalid sequences with the REPLACEMENT CHARACTER) and
+    /// fallibly append the conversion result to this string.
+    pub fn fallible_append_utf8(&mut self, other: &[u8]) -> Result<(), ()> {
+        let len = self.len();
+        self.fallible_append_utf8_impl(other, len).map(|_| ())
+    }
+
+    // Latin1 to UTF-16
+
+    constant_conversion!(
+        name = fallible_append_latin1_impl,
+        convert = convert_latin1_to_utf16,
+        other_ty = &[u8]
+    );
+
+    /// Convert a Latin1 (i.e. byte value equals scalar value; not windows-1252!)
+    /// into UTF-16 and replace the content of this string with the conversion result.
+    pub fn assign_latin1(&mut self, other: &[u8]) {
+        self.fallible_append_latin1_impl(other, 0)
+            .expect("Out of memory");
+    }
+
+    /// Convert a Latin1 (i.e. byte value equals scalar value; not windows-1252!)
+    /// into UTF-16 and fallibly replace the content of this string with the
+    /// conversion result.
+    pub fn fallible_assign_latin1(&mut self, other: &[u8]) -> Result<(), ()> {
+        self.fallible_append_latin1_impl(other, 0).map(|_| ())
+    }
+
+    /// Convert a Latin1 (i.e. byte value equals scalar value; not windows-1252!)
+    /// into UTF-16 and append the conversion result to this string.
+    pub fn append_latin1(&mut self, other: &[u8]) {
+        let len = self.len();
+        self.fallible_append_latin1_impl(other, len)
+            .expect("Out of memory");
+    }
+
+    /// Convert a Latin1 (i.e. byte value equals scalar value; not windows-1252!)
+    /// into UTF-16 and fallibly append the conversion result to this string.
+    pub fn fallible_append_latin1(&mut self, other: &[u8]) -> Result<(), ()> {
+        let len = self.len();
+        self.fallible_append_latin1_impl(other, len).map(|_| ())
+    }
+}
+
+impl nsACString {
+    // UTF-16 to UTF-8
+
+    fn fallible_append_utf16_to_utf8_impl(
+        &mut self,
+        other: &[u16],
+        old_len: usize,
+    ) -> Result<BulkWriteOk, ()> {
+        // We first size the buffer for ASCII if the first cache line is ASCII. If that turns out not to
+        // be enough, we size for the worst case given the length of the remaining input at that point.
+        let (filled, num_ascii, mut handle) = if starts_with_basic_latin(other) {
+            let new_len_with_ascii = old_len.checked_add(other.len()).ok_or(())?;
+            let mut handle = unsafe { self.bulk_write(new_len_with_ascii, old_len, false)? };
+            let num_ascii = copy_basic_latin_to_ascii(other, &mut handle.as_mut_slice()[old_len..]);
+            let left = other.len() - num_ascii;
+            if left == 0 {
+                return Ok(handle.finish(old_len + num_ascii, true));
+            }
+            let filled = old_len + num_ascii;
+            let needed = times_three_plus_one(left).ok_or(())?;
+            let new_len = filled.checked_add(needed).ok_or(())?;
+            unsafe {
+                handle.restart_bulk_write(new_len, filled, false)?;
+            }
+            (filled, num_ascii, handle)
+        } else {
+            // Started with non-ASCII. Compute worst case
+            let needed = times_three_plus_one(other.len()).ok_or(())?;
+            let new_len = old_len.checked_add(needed).ok_or(())?;
+            let mut handle = unsafe { self.bulk_write(new_len, old_len, false)? };
+            (old_len, 0, handle)
+        };
+        let written =
+            convert_utf16_to_utf8(&other[num_ascii..], &mut handle.as_mut_slice()[filled..]);
+        Ok(handle.finish(filled + written, true))
+    }
+
+    /// Convert a potentially-invalid UTF-16 string into valid UTF-8
+    /// (replacing invalid sequences with the REPLACEMENT CHARACTER) and
+    /// replace the content of this string with the conversion result.
+    pub fn assign_utf16_to_utf8(&mut self, other: &[u16]) {
+        self.fallible_append_utf16_to_utf8_impl(other, 0)
+            .expect("Out of memory");
+    }
+
+    /// Convert a potentially-invalid UTF-16 string into valid UTF-8
+    /// (replacing invalid sequences with the REPLACEMENT CHARACTER) and
+    /// fallibly replace the content of this string with the conversion result.
+    pub fn fallible_assign_utf16_to_utf8(&mut self, other: &[u16]) -> Result<(), ()> {
+        self.fallible_append_utf16_to_utf8_impl(other, 0)
+            .map(|_| ())
+    }
+
+    /// Convert a potentially-invalid UTF-16 string into valid UTF-8
+    /// (replacing invalid sequences with the REPLACEMENT CHARACTER) and
+    /// append the conversion result to this string.
+    pub fn append_utf16_to_utf8(&mut self, other: &[u16]) {
+        let len = self.len();
+        self.fallible_append_utf16_to_utf8_impl(other, len)
+            .expect("Out of memory");
+    }
+
+    /// Convert a potentially-invalid UTF-16 string into valid UTF-8
+    /// (replacing invalid sequences with the REPLACEMENT CHARACTER) and
+    /// fallibly append the conversion result to this string.
+    pub fn fallible_append_utf16_to_utf8(&mut self, other: &[u16]) -> Result<(), ()> {
+        let len = self.len();
+        self.fallible_append_utf16_to_utf8_impl(other, len)
+            .map(|_| ())
+    }
+
+    // UTF-16 to Latin1
+
+    constant_conversion!(
+        name = fallible_append_utf16_to_latin1_lossy_impl,
+        convert = convert_utf16_to_latin1_lossy,
+        other_ty = &[u16]
+    );
+
+    /// Convert a UTF-16 string whose all code points are below U+0100 into
+    /// a Latin1 (scalar value is byte value; not windows-1252!) string and
+    /// replace the content of this string with the conversion result.
+    ///
+    /// # Panics
+    ///
+    /// If the input contains code points above U+00FF or is not valid UTF-16,
+    /// panics in debug mode and produces garbage in a memory-safe way in
+    /// release builds. The nature of the garbage may differ based on CPU
+    /// architecture and must not be relied upon.
+    pub fn assign_utf16_to_latin1_lossy(&mut self, other: &[u16]) {
+        self.fallible_append_utf16_to_latin1_lossy_impl(other, 0)
+            .expect("Out of memory");
+    }
+
+    /// Convert a UTF-16 string whose all code points are below U+0100 into
+    /// a Latin1 (scalar value is byte value; not windows-1252!) string and
+    /// fallibly replace the content of this string with the conversion result.
+    ///
+    /// # Panics
+    ///
+    /// If the input contains code points above U+00FF or is not valid UTF-16,
+    /// panics in debug mode and produces garbage in a memory-safe way in
+    /// release builds. The nature of the garbage may differ based on CPU
+    /// architecture and must not be relied upon.
+    pub fn fallible_assign_utf16_to_latin1_lossy(&mut self, other: &[u16]) -> Result<(), ()> {
+        self.fallible_append_utf16_to_latin1_lossy_impl(other, 0)
+            .map(|_| ())
+    }
+
+    /// Convert a UTF-16 string whose all code points are below U+0100 into
+    /// a Latin1 (scalar value is byte value; not windows-1252!) string and
+    /// append the conversion result to this string.
+    ///
+    /// # Panics
+    ///
+    /// If the input contains code points above U+00FF or is not valid UTF-16,
+    /// panics in debug mode and produces garbage in a memory-safe way in
+    /// release builds. The nature of the garbage may differ based on CPU
+    /// architecture and must not be relied upon.
+    pub fn append_utf16_to_latin1_lossy(&mut self, other: &[u16]) {
+        let len = self.len();
+        self.fallible_append_utf16_to_latin1_lossy_impl(other, len)
+            .expect("Out of memory");
+    }
+
+    /// Convert a UTF-16 string whose all code points are below U+0100 into
+    /// a Latin1 (scalar value is byte value; not windows-1252!) string and
+    /// fallibly append the conversion result to this string.
+    ///
+    /// # Panics
+    ///
+    /// If the input contains code points above U+00FF or is not valid UTF-16,
+    /// panics in debug mode and produces garbage in a memory-safe way in
+    /// release builds. The nature of the garbage may differ based on CPU
+    /// architecture and must not be relied upon.
+    pub fn fallible_append_utf16_to_latin1_lossy(&mut self, other: &[u16]) -> Result<(), ()> {
+        let len = self.len();
+        self.fallible_append_utf16_to_latin1_lossy_impl(other, len)
+            .map(|_| ())
+    }
+
+    // UTF-8 to Latin1
+
+    ascii_copy_avoidance!(
+        name = fallible_append_utf8_to_latin1_lossy_check,
+        implementation = fallible_append_utf8_to_latin1_lossy_impl,
+        string_like = nsCStringLike
+    );
+
+    fn fallible_append_utf8_to_latin1_lossy_impl(
+        &mut self,
+        other: &[u8],
+        old_len: usize,
+        maybe_num_ascii: Option<usize>,
+    ) -> Result<BulkWriteOk, ()> {
+        let new_len = old_len.checked_add(other.len()).ok_or(())?;
+        let num_ascii = maybe_num_ascii.unwrap_or(0);
+        // Already checked for overflow above, so this can't overflow.
+        let old_len_plus_num_ascii = old_len + num_ascii;
+        let mut handle = unsafe { self.bulk_write(new_len, old_len, false)? };
+        let written = {
+            let buffer = handle.as_mut_slice();
+            if num_ascii != 0 {
+                (&mut buffer[old_len..old_len_plus_num_ascii]).copy_from_slice(&other[..num_ascii]);
+            }
+            convert_utf8_to_latin1_lossy(&other[num_ascii..], &mut buffer[old_len_plus_num_ascii..])
+        };
+        Ok(handle.finish(old_len_plus_num_ascii + written, true))
+    }
+
+    /// Convert a UTF-8 string whose all code points are below U+0100 into
+    /// a Latin1 (scalar value is byte value; not windows-1252!) string and
+    /// replace the content of this string with the conversion result.
+    ///
+    /// # Panics
+    ///
+    /// If the input contains code points above U+00FF or is not valid UTF-8,
+    /// panics in debug mode and produces garbage in a memory-safe way in
+    /// release builds. The nature of the garbage may differ based on CPU
+    /// architecture and must not be relied upon.
+    pub fn assign_utf8_to_latin1_lossy<T: nsCStringLike + ?Sized>(&mut self, other: &T) {
+        self.fallible_append_utf8_to_latin1_lossy_check(other, 0)
+            .expect("Out of memory");
+    }
+
+    /// Convert a UTF-8 string whose all code points are below U+0100 into
+    /// a Latin1 (scalar value is byte value; not windows-1252!) string and
+    /// fallibly replace the content of this string with the conversion result.
+    ///
+    /// # Panics
+    ///
+    /// If the input contains code points above U+00FF or is not valid UTF-8,
+    /// panics in debug mode and produces garbage in a memory-safe way in
+    /// release builds. The nature of the garbage may differ based on CPU
+    /// architecture and must not be relied upon.
+    pub fn fallible_assign_utf8_to_latin1_lossy<T: nsCStringLike + ?Sized>(
+        &mut self,
+        other: &T,
+    ) -> Result<(), ()> {
+        self.fallible_append_utf8_to_latin1_lossy_check(other, 0)
+            .map(|_| ())
+    }
+
+    /// Convert a UTF-8 string whose all code points are below U+0100 into
+    /// a Latin1 (scalar value is byte value; not windows-1252!) string and
+    /// append the conversion result to this string.
+    ///
+    /// # Panics
+    ///
+    /// If the input contains code points above U+00FF or is not valid UTF-8,
+    /// panics in debug mode and produces garbage in a memory-safe way in
+    /// release builds. The nature of the garbage may differ based on CPU
+    /// architecture and must not be relied upon.
+    pub fn append_utf8_to_latin1_lossy<T: nsCStringLike + ?Sized>(&mut self, other: &T) {
+        let len = self.len();
+        self.fallible_append_utf8_to_latin1_lossy_check(other, len)
+            .expect("Out of memory");
+    }
+
+    /// Convert a UTF-8 string whose all code points are below U+0100 into
+    /// a Latin1 (scalar value is byte value; not windows-1252!) string and
+    /// fallibly append the conversion result to this string.
+    ///
+    /// # Panics
+    ///
+    /// If the input contains code points above U+00FF or is not valid UTF-8,
+    /// panics in debug mode and produces garbage in a memory-safe way in
+    /// release builds. The nature of the garbage may differ based on CPU
+    /// architecture and must not be relied upon.
+    pub fn fallible_append_utf8_to_latin1_lossy<T: nsCStringLike + ?Sized>(
+        &mut self,
+        other: &T,
+    ) -> Result<(), ()> {
+        let len = self.len();
+        self.fallible_append_utf8_to_latin1_lossy_check(other, len)
+            .map(|_| ())
+    }
+
+    // Latin1 to UTF-8 CString
+
+    ascii_copy_avoidance!(
+        name = fallible_append_latin1_to_utf8_check,
+        implementation = fallible_append_latin1_to_utf8_impl,
+        string_like = Latin1StringLike
+    );
+
+    fn fallible_append_latin1_to_utf8_impl(
+        &mut self,
+        other: &[u8],
+        old_len: usize,
+        maybe_num_ascii: Option<usize>,
+    ) -> Result<BulkWriteOk, ()> {
+        let (filled, num_ascii, mut handle) = if let Some(num_ascii) = maybe_num_ascii {
+            // Wrapper checked for ASCII
+            let left = other.len() - num_ascii;
+            let filled = old_len + num_ascii;
+            let needed = left.checked_mul(2).ok_or(())?;
+            let new_len = filled.checked_add(needed).ok_or(())?;
+            let mut handle = unsafe { self.bulk_write(new_len, old_len, false)? };
+            if num_ascii != 0 {
+                (&mut handle.as_mut_slice()[old_len..filled]).copy_from_slice(&other[..num_ascii]);
+            }
+            (filled, num_ascii, handle)
+        } else if starts_with_ascii(other) {
+            // Wrapper didn't check for ASCII, so let's see if `other` starts with ASCII
+            // `other` starts with ASCII, so let's first size the buffer
+            // with optimism that it's ASCII-only.
+            let new_len_with_ascii = old_len.checked_add(other.len()).ok_or(())?;
+            let mut handle = unsafe { self.bulk_write(new_len_with_ascii, old_len, false)? };
+            let num_ascii = copy_ascii_to_ascii(other, &mut handle.as_mut_slice()[old_len..]);
+            let left = other.len() - num_ascii;
+            let filled = old_len + num_ascii;
+            if left == 0 {
+                // `other` was all ASCII
+                return Ok(handle.finish(filled, true));
+            }
+            let needed = left.checked_mul(2).ok_or(())?;
+            let new_len = filled.checked_add(needed).ok_or(())?;
+            unsafe {
+                handle.restart_bulk_write(new_len, filled, false)?;
+            }
+            (filled, num_ascii, handle)
+        } else {
+            // Started with non-ASCII. Assume worst case.
+            let needed = other.len().checked_mul(2).ok_or(())?;
+            let new_len = old_len.checked_add(needed).ok_or(())?;
+            let mut handle = unsafe { self.bulk_write(new_len, old_len, false)? };
+            (old_len, 0, handle)
+        };
+        let written =
+            convert_latin1_to_utf8(&other[num_ascii..], &mut handle.as_mut_slice()[filled..]);
+        Ok(handle.finish(filled + written, true))
+    }
+
+    /// Convert a Latin1 (i.e. byte value equals scalar value; not windows-1252!)
+    /// into UTF-8 and replace the content of this string with the conversion result.
+    pub fn assign_latin1_to_utf8<T: Latin1StringLike + ?Sized>(&mut self, other: &T) {
+        self.fallible_append_latin1_to_utf8_check(other, 0)
+            .expect("Out of memory");
+    }
+
+    /// Convert a Latin1 (i.e. byte value equals scalar value; not windows-1252!)
+    /// into UTF-8 and fallibly replace the content of this string with the
+    /// conversion result.
+    pub fn fallible_assign_latin1_to_utf8<T: Latin1StringLike + ?Sized>(
+        &mut self,
+        other: &T,
+    ) -> Result<(), ()> {
+        self.fallible_append_latin1_to_utf8_check(other, 0)
+            .map(|_| ())
+    }
+
+    /// Convert a Latin1 (i.e. byte value equals scalar value; not windows-1252!)
+    /// into UTF-8 and append the conversion result to this string.
+    pub fn append_latin1_to_utf8<T: Latin1StringLike + ?Sized>(&mut self, other: &T) {
+        let len = self.len();
+        self.fallible_append_latin1_to_utf8_check(other, len)
+            .expect("Out of memory");
+    }
+
+    /// Convert a Latin1 (i.e. byte value equals scalar value; not windows-1252!)
+    /// into UTF-8 and fallibly append the conversion result to this string.
+    pub fn fallible_append_latin1_to_utf8<T: Latin1StringLike + ?Sized>(
+        &mut self,
+        other: &T,
+    ) -> Result<(), ()> {
+        let len = self.len();
+        self.fallible_append_latin1_to_utf8_check(other, len)
+            .map(|_| ())
+    }
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn nsstring_fallible_append_utf8_impl(
+    this: *mut nsAString,
+    other: *const u8,
+    other_len: usize,
+    old_len: usize,
+) -> bool {
+    let other_slice = slice::from_raw_parts(other, other_len);
+    (*this)
+        .fallible_append_utf8_impl(other_slice, old_len)
+        .is_ok()
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn nsstring_fallible_append_latin1_impl(
+    this: *mut nsAString,
+    other: *const u8,
+    other_len: usize,
+    old_len: usize,
+) -> bool {
+    let other_slice = slice::from_raw_parts(other, other_len);
+    (*this)
+        .fallible_append_latin1_impl(other_slice, old_len)
+        .is_ok()
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn nscstring_fallible_append_utf16_to_utf8_impl(
+    this: *mut nsACString,
+    other: *const u16,
+    other_len: usize,
+    old_len: usize,
+) -> bool {
+    let other_slice = slice::from_raw_parts(other, other_len);
+    (*this)
+        .fallible_append_utf16_to_utf8_impl(other_slice, old_len)
+        .is_ok()
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn nscstring_fallible_append_utf16_to_latin1_lossy_impl(
+    this: *mut nsACString,
+    other: *const u16,
+    other_len: usize,
+    old_len: usize,
+) -> bool {
+    let other_slice = slice::from_raw_parts(other, other_len);
+    (*this)
+        .fallible_append_utf16_to_latin1_lossy_impl(other_slice, old_len)
+        .is_ok()
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn nscstring_fallible_append_utf8_to_latin1_lossy_check(
+    this: *mut nsACString,
+    other: *const nsACString,
+    old_len: usize,
+) -> bool {
+    (*this)
+        .fallible_append_utf8_to_latin1_lossy_check(&*other, old_len)
+        .is_ok()
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn nscstring_fallible_append_latin1_to_utf8_check(
+    this: *mut nsACString,
+    other: *const nsACString,
+    old_len: usize,
+) -> bool {
+    (*this)
+        .fallible_append_latin1_to_utf8_check(&*other, old_len)
+        .is_ok()
+}
--- a/servo/support/gecko/nsstring/src/lib.rs
+++ b/servo/support/gecko/nsstring/src/lib.rs
@@ -119,20 +119,36 @@ extern crate bitflags;
 
 use std::borrow;
 use std::cmp;
 use std::fmt;
 use std::marker::PhantomData;
 use std::mem;
 use std::ops::{Deref, DerefMut};
 use std::os::raw::c_void;
+use std::ptr;
 use std::slice;
 use std::str;
 use std::u32;
 
+mod conversions;
+
+pub use self::conversions::nscstring_fallible_append_latin1_to_utf8_check;
+pub use self::conversions::nscstring_fallible_append_utf16_to_latin1_lossy_impl;
+pub use self::conversions::nscstring_fallible_append_utf16_to_utf8_impl;
+pub use self::conversions::nscstring_fallible_append_utf8_to_latin1_lossy_check;
+pub use self::conversions::nsstring_fallible_append_latin1_impl;
+pub use self::conversions::nsstring_fallible_append_utf8_impl;
+
+/// A type for showing that `finish()` was called on a `BulkWriteHandle`.
+/// Instantiating this type from elsewhere is basically an assertion that
+/// there is no `BulkWriteHandle` around, so be very careful with instantiating
+/// this type!
+pub struct BulkWriteOk;
+
 ///////////////////////////////////
 // Internal Implementation Flags //
 ///////////////////////////////////
 
 mod data_flags {
     bitflags! {
         // While this has the same layout as u16, it cannot be passed
         // over FFI safely as a u16.
@@ -163,60 +179,203 @@ mod class_flags {
 
 use class_flags::ClassFlags;
 use data_flags::DataFlags;
 
 ////////////////////////////////////
 // Generic String Bindings Macros //
 ////////////////////////////////////
 
+macro_rules! string_like {
+    {
+        char_t = $char_t: ty;
+
+        AString = $AString: ident;
+        String = $String: ident;
+        Str = $Str: ident;
+
+        StringLike = $StringLike: ident;
+        StringAdapter = $StringAdapter: ident;
+    } => {
+        /// This trait is implemented on types which are `ns[C]String`-like, in
+        /// that they can at very low cost be converted to a borrowed
+        /// `&nsA[C]String`. Unfortunately, the intermediate type
+        /// `ns[C]StringAdapter` is required as well due to types like `&[u8]`
+        /// needing to be (cheaply) wrapped in a `nsCString` on the stack to
+        /// create the `&nsACString`.
+        ///
+        /// This trait is used to DWIM when calling the methods on
+        /// `nsA[C]String`.
+        pub trait $StringLike {
+            fn adapt(&self) -> $StringAdapter;
+        }
+
+        impl<'a, T: $StringLike + ?Sized> $StringLike for &'a T {
+            fn adapt(&self) -> $StringAdapter {
+                <T as $StringLike>::adapt(*self)
+            }
+        }
+
+        impl<'a, T> $StringLike for borrow::Cow<'a, T>
+            where T: $StringLike + borrow::ToOwned + ?Sized {
+            fn adapt(&self) -> $StringAdapter {
+                <T as $StringLike>::adapt(self.as_ref())
+            }
+        }
+
+        impl $StringLike for $AString {
+            fn adapt(&self) -> $StringAdapter {
+                $StringAdapter::Abstract(self)
+            }
+        }
+
+        impl<'a> $StringLike for $Str<'a> {
+            fn adapt(&self) -> $StringAdapter {
+                $StringAdapter::Abstract(self)
+            }
+        }
+
+        impl $StringLike for $String {
+            fn adapt(&self) -> $StringAdapter {
+                $StringAdapter::Abstract(self)
+            }
+        }
+
+        impl $StringLike for [$char_t] {
+            fn adapt(&self) -> $StringAdapter {
+                $StringAdapter::Borrowed($Str::from(self))
+            }
+        }
+
+        impl $StringLike for Vec<$char_t> {
+            fn adapt(&self) -> $StringAdapter {
+                $StringAdapter::Borrowed($Str::from(&self[..]))
+            }
+        }
+
+        impl $StringLike for Box<[$char_t]> {
+            fn adapt(&self) -> $StringAdapter {
+                $StringAdapter::Borrowed($Str::from(&self[..]))
+            }
+        }
+    }
+}
+
+impl<'a> Drop for nsAStringBulkWriteHandle<'a> {
+    /// This only runs in error cases. In success cases, `finish()`
+    /// calls `forget(self)`.
+    fn drop(&mut self) {
+        if self.capacity == 0 {
+            // If capacity is 0, the string is a zero-length
+            // string, so we have nothing to do.
+            return;
+        }
+        // The old zero terminator may be gone by now, so we need
+        // to write a new one somewhere and make length match.
+        // We can use a length between 1 and self.capacity.
+        // Seems prudent to overwrite the uninitialized memory.
+        // Using the length 1 leaves the shortest memory to overwrite.
+        // U+FFFD is the safest placeholder. Merely truncating the
+        // string to a zero-length string might be dangerous in some
+        // scenarios. See
+        // https://www.unicode.org/reports/tr36/#Substituting_for_Ill_Formed_Subsequences
+        // for closely related scenario.
+        unsafe {
+            let mut this = self.string.as_repr();
+            this.as_mut().length = 1u32;
+            *(this.as_mut().data.as_mut()) = 0xFFFDu16;
+            *(this.as_mut().data.as_ptr().offset(1isize)) = 0;
+        }
+    }
+}
+
+impl<'a> Drop for nsACStringBulkWriteHandle<'a> {
+    /// This only runs in error cases. In success cases, `finish()`
+    /// calls `forget(self)`.
+    fn drop(&mut self) {
+        if self.capacity == 0 {
+            // If capacity is 0, the string is a zero-length
+            // string, so we have nothing to do.
+            return;
+        }
+        // The old zero terminator may be gone by now, so we need
+        // to write a new one somewhere and make length match.
+        // We can use a length between 1 and self.capacity.
+        // Seems prudent to overwrite the uninitialized memory.
+        // Using the length 1 leaves the shortest memory to overwrite.
+        // U+FFFD is the safest placeholder, but when it doesn't fit,
+        // let's use ASCII substitute. Merely truncating the
+        // string to a zero-length string might be dangerous in some
+        // scenarios. See
+        // https://www.unicode.org/reports/tr36/#Substituting_for_Ill_Formed_Subsequences
+        // for closely related scenario.
+        unsafe {
+            let mut this = self.string.as_repr();
+            if self.capacity >= 3 {
+                this.as_mut().length = 3u32;
+                *(this.as_mut().data.as_mut()) = 0xEFu8;
+                *(this.as_mut().data.as_ptr().offset(1isize)) = 0xBFu8;
+                *(this.as_mut().data.as_ptr().offset(2isize)) = 0xBDu8;
+                *(this.as_mut().data.as_ptr().offset(3isize)) = 0;
+            } else {
+                this.as_mut().length = 1u32;
+                *(this.as_mut().data.as_mut()) = 0x1Au8; // U+FFFD doesn't fit
+                *(this.as_mut().data.as_ptr().offset(1isize)) = 0;
+            }
+        }
+    }
+}
+
 macro_rules! define_string_types {
     {
         char_t = $char_t: ty;
 
         AString = $AString: ident;
         String = $String: ident;
         Str = $Str: ident;
 
         StringLike = $StringLike: ident;
         StringAdapter = $StringAdapter: ident;
 
         StringRepr = $StringRepr: ident;
 
+        BulkWriteHandle = $BulkWriteHandle: ident;
+
         drop = $drop: ident;
         assign = $assign: ident, $fallible_assign: ident;
         take_from = $take_from: ident, $fallible_take_from: ident;
         append = $append: ident, $fallible_append: ident;
         set_length = $set_length: ident, $fallible_set_length: ident;
         begin_writing = $begin_writing: ident, $fallible_begin_writing: ident;
+        start_bulk_write = $start_bulk_write: ident;
     } => {
         /// The representation of a ns[C]String type in C++. This type is
         /// used internally by our definition of ns[C]String to ensure layout
         /// compatibility with the C++ ns[C]String type.
         ///
         /// This type may also be used in place of a C++ ns[C]String inside of
         /// struct definitions which are shared with C++, as it has identical
         /// layout to our ns[C]String type.
         ///
         /// This struct will leak its data if dropped from rust. See the module
         /// documentation for more information on this type.
         #[repr(C)]
         #[derive(Debug)]
         pub struct $StringRepr {
-            data: *const $char_t,
+            data: ptr::NonNull<$char_t>,
             length: u32,
             dataflags: DataFlags,
             classflags: ClassFlags,
         }
 
         impl $StringRepr {
             fn new(classflags: ClassFlags) -> $StringRepr {
                 static NUL: $char_t = 0;
                 $StringRepr {
-                    data: &NUL,
+                    data: unsafe { ptr::NonNull::new_unchecked(&NUL as *const _ as *mut _) },
                     length: 0,
                     dataflags: DataFlags::TERMINATED | DataFlags::LITERAL,
                     classflags: classflags,
                 }
             }
         }
 
         impl Deref for $StringRepr {
@@ -231,16 +390,73 @@ macro_rules! define_string_types {
         impl DerefMut for $StringRepr {
             fn deref_mut(&mut self) -> &mut $AString {
                 unsafe {
                     mem::transmute(self)
                 }
             }
         }
 
+        pub struct $BulkWriteHandle<'a> {
+            string: &'a mut $AString,
+            capacity: usize,
+        }
+
+        impl<'a> $BulkWriteHandle<'a> {
+            fn new(string: &'a mut $AString, capacity: usize) -> Self {
+                $BulkWriteHandle{ string: string, capacity: capacity }
+            }
+
+            pub unsafe fn restart_bulk_write(&mut self,
+                                             capacity: usize,
+                                             units_to_preserve: usize,
+                                             allow_shrinking: bool) -> Result<(), ()> {
+                self.capacity =
+                    self.string.start_bulk_write_impl(capacity,
+                                                      units_to_preserve,
+                                                      allow_shrinking)?;
+                Ok(())
+            }
+
+            pub fn finish(mut self, length: usize, allow_shrinking: bool) -> BulkWriteOk {
+                // NOTE: Drop is implemented outside the macro earlier in this file,
+                // because it needs to deal with different code unit representations
+                // for the REPLACEMENT CHARACTER in the UTF-16 and UTF-8 cases and
+                // needs to deal with a REPLACEMENT CHARACTER not fitting in the
+                // buffer in the UTF-8 case.
+                assert!(length <= self.capacity);
+                if length == 0 {
+                    // `truncate()` is OK even when the string
+                    // is in invalid state.
+                    self.string.truncate();
+                    mem::forget(self); // Don't run the failure path in drop()
+                    return BulkWriteOk{};
+                }
+                if allow_shrinking {
+                    unsafe {
+                        let _ = self.restart_bulk_write(length, length, true);
+                    }
+                }
+                unsafe {
+                    let mut this = self.string.as_repr();
+                    this.as_mut().length = length as u32;
+                    *(this.as_mut().data.as_ptr().offset(length as isize)) = 0;
+                }
+                mem::forget(self); // Don't run the failure path in drop()
+                BulkWriteOk{}
+            }
+
+            pub fn as_mut_slice(&mut self) -> &mut [$char_t] {
+                unsafe {
+                    let mut this = self.string.as_repr();
+                    slice::from_raw_parts_mut(this.as_mut().data.as_ptr(), self.capacity)
+                }
+            }
+        }
+
         /// This type is the abstract type which is used for interacting with
         /// strings in rust. Each string type can derefence to an instance of
         /// this type, which provides the useful operations on strings.
         ///
         /// NOTE: Rust thinks this type has a size of 0, because the data
         /// associated with it is not necessarially safe to move. It is not safe
         /// to construct a nsAString yourself, unless it is received by
         /// dereferencing one of these types.
@@ -340,65 +556,100 @@ macro_rules! define_string_types {
 
             /// Get a `&mut` reference to the backing data for this string.
             /// This method will allocate and copy if the current backing buffer
             /// is immutable or shared.
             pub fn to_mut(&mut self) -> &mut [$char_t] {
                 unsafe {
                     let len = self.len();
                     if len == 0 {
-                        // Use an arbitrary non-null value as the pointer
-                        slice::from_raw_parts_mut(0x1 as *mut $char_t, 0)
+                        // Use an arbitrary but aligned non-null value as the pointer
+                        slice::from_raw_parts_mut(ptr::NonNull::<$char_t>::dangling().as_ptr(), 0)
                     } else {
                         slice::from_raw_parts_mut($begin_writing(self), len)
                     }
                 }
             }
 
             /// Get a `&mut` reference to the backing data for this string.
             /// This method will allocate and copy if the current backing buffer
             /// is immutable or shared.
             ///
             /// Returns `Ok(&mut [T])` on success, and `Err(())` if the
             /// allocation failed.
             pub fn fallible_to_mut(&mut self) -> Result<&mut [$char_t], ()> {
                 unsafe {
                     let len = self.len();
                     if len == 0 {
-                        // Use an arbitrary non-null value as the pointer
-                        Ok(slice::from_raw_parts_mut(0x1 as *mut $char_t, 0))
+                        // Use an arbitrary but aligned non-null value as the pointer
+                        Ok(slice::from_raw_parts_mut(
+                            ptr::NonNull::<$char_t>::dangling().as_ptr() as *mut $char_t, 0))
                     } else {
                         let ptr = $fallible_begin_writing(self);
                         if ptr.is_null() {
                             Err(())
                         } else {
                             Ok(slice::from_raw_parts_mut(ptr, len))
                         }
                     }
                 }
             }
 
+            /// Unshares the buffer of the string and returns a handle
+            /// from which a writable slice whose length is the rounded-up
+            /// capacity can be obtained.
+            ///
+            /// Fails also if the new length doesn't fit in 32 bits.
+            ///
+            /// # Safety
+            ///
+            /// Unsafe because of exposure of uninitialized memory.
+            pub unsafe fn bulk_write(&mut self,
+                                     capacity: usize,
+                                     units_to_preserve: usize,
+                                     allow_shrinking: bool) -> Result<$BulkWriteHandle, ()> {
+                let capacity =
+                    self.start_bulk_write_impl(capacity, units_to_preserve, allow_shrinking)?;
+                Ok($BulkWriteHandle::new(self, capacity))
+            }
+
+            unsafe fn start_bulk_write_impl(&mut self,
+                                            capacity: usize,
+                                            units_to_preserve: usize,
+                                            allow_shrinking: bool) -> Result<usize, ()> {
+                if capacity > u32::max_value() as usize {
+                    Err(())
+                } else {
+                    let capacity32 = capacity as u32;
+                    let rounded = $start_bulk_write(self,
+                                                    capacity32,
+                                                    units_to_preserve as u32,
+                                                    allow_shrinking);
+                    if rounded == u32::max_value() {
+                        return Err(())
+                    }
+                    Ok(rounded as usize)
+                }
+            }
+
+            fn as_repr(&mut self) -> ptr::NonNull<$StringRepr> {
+                unsafe { ptr::NonNull::new_unchecked(self as *mut _ as *mut $StringRepr)}
+            }
         }
 
         impl Deref for $AString {
             type Target = [$char_t];
             fn deref(&self) -> &[$char_t] {
                 unsafe {
                     // All $AString values point to a struct prefix which is
                     // identical to $StringRepr, this we can transmute `self`
                     // into $StringRepr to get the reference to the underlying
                     // data.
                     let this: &$StringRepr = mem::transmute(self);
-                    if this.data.is_null() {
-                        debug_assert_eq!(this.length, 0);
-                        // Use an arbitrary non-null value as the pointer
-                        slice::from_raw_parts(0x1 as *const $char_t, 0)
-                    } else {
-                        slice::from_raw_parts(this.data, this.length as usize)
-                    }
+                    slice::from_raw_parts(this.data.as_ptr(), this.length as usize)
                 }
             }
         }
 
         impl AsRef<[$char_t]> for $AString {
             fn as_ref(&self) -> &[$char_t] {
                 self
             }
@@ -473,17 +724,17 @@ macro_rules! define_string_types {
         impl<'a> From<&'a [$char_t]> for $Str<'a> {
             fn from(s: &'a [$char_t]) -> $Str<'a> {
                 assert!(s.len() < (u32::MAX as usize));
                 if s.is_empty() {
                     return $Str::new();
                 }
                 $Str {
                     hdr: $StringRepr {
-                        data: s.as_ptr(),
+                        data: unsafe { ptr::NonNull::new_unchecked(s.as_ptr() as *mut _) },
                         length: s.len() as u32,
                         dataflags: DataFlags::empty(),
                         classflags: ClassFlags::empty(),
                     },
                     _marker: PhantomData,
                 }
             }
         }
@@ -633,24 +884,24 @@ macro_rules! define_string_types {
                 let length = s.len() as u32;
                 s.push(0); // null terminator
 
                 // SAFETY NOTE: This method produces an data_flags::OWNED
                 // ns[C]String from a Box<[$char_t]>. this is only safe
                 // because in the Gecko tree, we use the same allocator for
                 // Rust code as for C++ code, meaning that our box can be
                 // legally freed with libc::free().
-                let ptr = s.as_ptr();
+                let ptr = s.as_mut_ptr();
                 mem::forget(s);
                 unsafe {
                     Gecko_IncrementStringAdoptCount(ptr as *mut _);
                 }
                 $String {
                     hdr: $StringRepr {
-                        data: ptr,
+                        data: unsafe { ptr::NonNull::new_unchecked(ptr) },
                         length: length,
                         dataflags: DataFlags::OWNED | DataFlags::TERMINATED,
                         classflags: ClassFlags::NULL_TERMINATED,
                     }
                 }
             }
         }
 
@@ -722,76 +973,35 @@ macro_rules! define_string_types {
             fn deref(&self) -> &$AString {
                 match *self {
                     $StringAdapter::Borrowed(ref s) => s,
                     $StringAdapter::Abstract(ref s) => s,
                 }
             }
         }
 
-        /// This trait is implemented on types which are `ns[C]String`-like, in
-        /// that they can at very low cost be converted to a borrowed
-        /// `&nsA[C]String`. Unfortunately, the intermediate type
-        /// `ns[C]StringAdapter` is required as well due to types like `&[u8]`
-        /// needing to be (cheaply) wrapped in a `nsCString` on the stack to
-        /// create the `&nsACString`.
-        ///
-        /// This trait is used to DWIM when calling the methods on
-        /// `nsA[C]String`.
-        pub trait $StringLike {
-            fn adapt(&self) -> $StringAdapter;
-        }
-
-        impl<'a, T: $StringLike + ?Sized> $StringLike for &'a T {
-            fn adapt(&self) -> $StringAdapter {
-                <T as $StringLike>::adapt(*self)
-            }
-        }
-
-        impl<'a, T> $StringLike for borrow::Cow<'a, T>
-            where T: $StringLike + borrow::ToOwned + ?Sized {
-            fn adapt(&self) -> $StringAdapter {
-                <T as $StringLike>::adapt(self.as_ref())
+        impl<'a> $StringAdapter<'a> {
+            #[allow(dead_code)]
+            fn is_abstract(&self) -> bool {
+                match *self {
+                    $StringAdapter::Borrowed(_) => false,
+                    $StringAdapter::Abstract(_) => true,
+                }
             }
         }
 
-        impl $StringLike for $AString {
-            fn adapt(&self) -> $StringAdapter {
-                $StringAdapter::Abstract(self)
-            }
-        }
-
-        impl<'a> $StringLike for $Str<'a> {
-            fn adapt(&self) -> $StringAdapter {
-                $StringAdapter::Abstract(self)
-            }
-        }
+        string_like! {
+            char_t = $char_t;
 
-        impl $StringLike for $String {
-            fn adapt(&self) -> $StringAdapter {
-                $StringAdapter::Abstract(self)
-            }
-        }
+            AString = $AString;
+            String = $String;
+            Str = $Str;
 
-        impl $StringLike for [$char_t] {
-            fn adapt(&self) -> $StringAdapter {
-                $StringAdapter::Borrowed($Str::from(self))
-            }
-        }
-
-        impl $StringLike for Vec<$char_t> {
-            fn adapt(&self) -> $StringAdapter {
-                $StringAdapter::Borrowed($Str::from(&self[..]))
-            }
-        }
-
-        impl $StringLike for Box<[$char_t]> {
-            fn adapt(&self) -> $StringAdapter {
-                $StringAdapter::Borrowed($Str::from(&self[..]))
-            }
+            StringLike = $StringLike;
+            StringAdapter = $StringAdapter;
         }
     }
 }
 
 ///////////////////////////////////////////
 // Bindings for nsCString (u8 char type) //
 ///////////////////////////////////////////
 
@@ -802,49 +1012,28 @@ define_string_types! {
     String = nsCString;
     Str = nsCStr;
 
     StringLike = nsCStringLike;
     StringAdapter = nsCStringAdapter;
 
     StringRepr = nsCStringRepr;
 
+    BulkWriteHandle = nsACStringBulkWriteHandle;
+
     drop = Gecko_FinalizeCString;
     assign = Gecko_AssignCString, Gecko_FallibleAssignCString;
     take_from = Gecko_TakeFromCString, Gecko_FallibleTakeFromCString;
     append = Gecko_AppendCString, Gecko_FallibleAppendCString;
     set_length = Gecko_SetLengthCString, Gecko_FallibleSetLengthCString;
     begin_writing = Gecko_BeginWritingCString, Gecko_FallibleBeginWritingCString;
+    start_bulk_write = Gecko_StartBulkWriteCString;
 }
 
 impl nsACString {
-    pub fn assign_utf16<T: nsStringLike + ?Sized>(&mut self, other: &T) {
-        self.truncate();
-        self.append_utf16(other);
-    }
-
-    pub fn fallible_assign_utf16<T: nsStringLike + ?Sized>(&mut self, other: &T) -> Result<(), ()> {
-        self.truncate();
-        self.fallible_append_utf16(other)
-    }
-
-    pub fn append_utf16<T: nsStringLike + ?Sized>(&mut self, other: &T) {
-        unsafe {
-            Gecko_AppendUTF16toCString(self, other.adapt().as_ptr());
-        }
-    }
-
-    pub fn fallible_append_utf16<T: nsStringLike + ?Sized>(&mut self, other: &T) -> Result<(), ()> {
-        if unsafe { Gecko_FallibleAppendUTF16toCString(self, other.adapt().as_ptr()) } {
-            Ok(())
-        } else {
-            Err(())
-        }
-    }
-
     pub unsafe fn as_str_unchecked(&self) -> &str {
         str::from_utf8_unchecked(self)
     }
 }
 
 impl<'a> From<&'a str> for nsCStr<'a> {
     fn from(s: &'a str) -> nsCStr<'a> {
         s.as_bytes().into()
@@ -920,64 +1109,58 @@ impl nsCStringLike for String {
 }
 
 impl nsCStringLike for Box<str> {
     fn adapt(&self) -> nsCStringAdapter {
         nsCStringAdapter::Borrowed(nsCStr::from(&self[..]))
     }
 }
 
+/// This trait is implemented on types which are Latin1 `nsCString`-like,
+/// in that they can at very low cost be converted to a borrowed
+/// `&nsACString` and do not denote UTF-8ness in the Rust type system.
+///
+/// This trait is used to DWIM when calling the methods on
+/// `nsACString`.
+string_like! {
+    char_t = u8;
+
+    AString = nsACString;
+    String = nsCString;
+    Str = nsCStr;
+
+    StringLike = Latin1StringLike;
+    StringAdapter = nsCStringAdapter;
+}
+
 ///////////////////////////////////////////
 // Bindings for nsString (u16 char type) //
 ///////////////////////////////////////////
 
 define_string_types! {
     char_t = u16;
 
     AString = nsAString;
     String = nsString;
     Str = nsStr;
 
     StringLike = nsStringLike;
     StringAdapter = nsStringAdapter;
 
     StringRepr = nsStringRepr;
 
+    BulkWriteHandle = nsAStringBulkWriteHandle;
+
     drop = Gecko_FinalizeString;
     assign = Gecko_AssignString, Gecko_FallibleAssignString;
     take_from = Gecko_TakeFromString, Gecko_FallibleTakeFromString;
     append = Gecko_AppendString, Gecko_FallibleAppendString;
     set_length = Gecko_SetLengthString, Gecko_FallibleSetLengthString;
     begin_writing = Gecko_BeginWritingString, Gecko_FallibleBeginWritingString;
-}
-
-impl nsAString {
-    pub fn assign_utf8<T: nsCStringLike + ?Sized>(&mut self, other: &T) {
-        self.truncate();
-        self.append_utf8(other);
-    }
-
-    pub fn fallible_assign_utf8<T: nsCStringLike + ?Sized>(&mut self, other: &T) -> Result<(), ()> {
-        self.truncate();
-        self.fallible_append_utf8(other)
-    }
-
-    pub fn append_utf8<T: nsCStringLike + ?Sized>(&mut self, other: &T) {
-        unsafe {
-            Gecko_AppendUTF8toString(self, other.adapt().as_ptr());
-        }
-    }
-
-    pub fn fallible_append_utf8<T: nsCStringLike + ?Sized>(&mut self, other: &T) -> Result<(), ()> {
-        if unsafe { Gecko_FallibleAppendUTF8toString(self, other.adapt().as_ptr()) } {
-            Ok(())
-        } else {
-            Err(())
-        }
-    }
+    start_bulk_write = Gecko_StartBulkWriteString;
 }
 
 // NOTE: The From impl for a string slice for nsString produces a <'static>
 // lifetime, as it allocates.
 impl<'a> From<&'a str> for nsString {
     fn from(s: &'a str) -> nsString {
         s.encode_utf16().collect::<Vec<u16>>().into()
     }
@@ -989,17 +1172,17 @@ impl<'a> From<&'a String> for nsString {
     }
 }
 
 // Support for the write!() macro for writing to nsStrings
 impl fmt::Write for nsAString {
     fn write_str(&mut self, s: &str) -> Result<(), fmt::Error> {
         // Directly invoke gecko's routines for appending utf8 strings to
         // nsAString values, to avoid as much overhead as possible
-        self.append_utf8(s);
+        self.append_str(s);
         Ok(())
     }
 }
 
 impl fmt::Display for nsAString {
     fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
         fmt::Display::fmt(&String::from_utf16_lossy(&self[..]), f)
     }
@@ -1033,52 +1216,58 @@ extern "C" {
     fn Gecko_AppendCString(this: *mut nsACString, other: *const nsACString);
     fn Gecko_SetLengthCString(this: *mut nsACString, length: u32);
     fn Gecko_BeginWritingCString(this: *mut nsACString) -> *mut u8;
     fn Gecko_FallibleAssignCString(this: *mut nsACString, other: *const nsACString) -> bool;
     fn Gecko_FallibleTakeFromCString(this: *mut nsACString, other: *mut nsACString) -> bool;
     fn Gecko_FallibleAppendCString(this: *mut nsACString, other: *const nsACString) -> bool;
     fn Gecko_FallibleSetLengthCString(this: *mut nsACString, length: u32) -> bool;
     fn Gecko_FallibleBeginWritingCString(this: *mut nsACString) -> *mut u8;
+    fn Gecko_StartBulkWriteCString(
+        this: *mut nsACString,
+        capacity: u32,
+        units_to_preserve: u32,
+        allow_shrinking: bool,
+    ) -> u32;
 
     fn Gecko_FinalizeString(this: *mut nsAString);
 
     fn Gecko_AssignString(this: *mut nsAString, other: *const nsAString);
     fn Gecko_TakeFromString(this: *mut nsAString, other: *mut nsAString);
     fn Gecko_AppendString(this: *mut nsAString, other: *const nsAString);
     fn Gecko_SetLengthString(this: *mut nsAString, length: u32);
     fn Gecko_BeginWritingString(this: *mut nsAString) -> *mut u16;
     fn Gecko_FallibleAssignString(this: *mut nsAString, other: *const nsAString) -> bool;
     fn Gecko_FallibleTakeFromString(this: *mut nsAString, other: *mut nsAString) -> bool;
     fn Gecko_FallibleAppendString(this: *mut nsAString, other: *const nsAString) -> bool;
     fn Gecko_FallibleSetLengthString(this: *mut nsAString, length: u32) -> bool;
     fn Gecko_FallibleBeginWritingString(this: *mut nsAString) -> *mut u16;
-
-    // Gecko implementation in nsReadableUtils.cpp
-    fn Gecko_AppendUTF16toCString(this: *mut nsACString, other: *const nsAString);
-    fn Gecko_AppendUTF8toString(this: *mut nsAString, other: *const nsACString);
-    fn Gecko_FallibleAppendUTF16toCString(this: *mut nsACString, other: *const nsAString) -> bool;
-    fn Gecko_FallibleAppendUTF8toString(this: *mut nsAString, other: *const nsACString) -> bool;
+    fn Gecko_StartBulkWriteString(
+        this: *mut nsAString,
+        capacity: u32,
+        units_to_preserve: u32,
+        allow_shrinking: bool,
+    ) -> u32;
 }
 
 //////////////////////////////////////
 // Repr Validation Helper Functions //
 //////////////////////////////////////
 
 pub mod test_helpers {
     //! This module only exists to help with ensuring that the layout of the
     //! structs inside of rust and C++ are identical.
     //!
     //! It is public to ensure that these testing functions are avaliable to
     //! gtest code.
 
-    use std::mem;
-    use super::{ClassFlags, DataFlags};
     use super::{nsCStr, nsCString, nsCStringRepr};
     use super::{nsStr, nsString, nsStringRepr};
+    use super::{ClassFlags, DataFlags};
+    use std::mem;
 
     /// Generates an #[no_mangle] extern "C" function which returns the size and
     /// alignment of the given type with the given name.
     macro_rules! size_align_check {
         ($T:ty, $fname:ident) => {
             #[no_mangle]
             #[allow(non_snake_case)]
             pub extern fn $fname(size: *mut usize, align: *mut usize) {
--- a/toolkit/xre/nsWindowsRestart.cpp
+++ b/toolkit/xre/nsWindowsRestart.cpp
@@ -23,24 +23,25 @@
 /**
  * Convert UTF8 to UTF16 without using the normal XPCOM goop, which we
  * can't link to updater.exe.
  */
 static char16_t*
 AllocConvertUTF8toUTF16(const char *arg)
 {
   // UTF16 can't be longer in units than UTF8
-  int len = strlen(arg);
+  size_t len = strlen(arg);
   char16_t *s = new char16_t[(len + 1) * sizeof(char16_t)];
   if (!s)
     return nullptr;
 
-  ConvertUTF8toUTF16 convert(s);
-  convert.write(arg, len);
-  convert.write_terminator();
+  size_t dstLen = ::MultiByteToWideChar(
+    CP_UTF8, 0, arg, len, reinterpret_cast<wchar_t*>(s), len);
+  s[dstLen] = 0;
+
   return s;
 }
 
 static void
 FreeAllocStrings(int argc, wchar_t **argv)
 {
   while (argc) {
     --argc;
--- a/toolkit/xre/nsWindowsWMain.cpp
+++ b/toolkit/xre/nsWindowsWMain.cpp
@@ -72,24 +72,27 @@ SanitizeEnvironmentVariables()
     delete[] originalPath;
   }
 }
 
 static char*
 AllocConvertUTF16toUTF8(char16ptr_t arg)
 {
   // be generous... UTF16 units can expand up to 3 UTF8 units
-  int len = wcslen(arg);
-  char *s = new char[len * 3 + 1];
+  size_t len = wcslen(arg);
+  // ConvertUTF16toUTF8 requires +1. Let's do that here, too, lacking
+  // knowledge of Windows internals.
+  size_t dstLen = len * 3 + 1;
+  char* s = new char[dstLen + 1]; // Another +1 for zero terminator
   if (!s)
     return nullptr;
 
-  ConvertUTF16toUTF8 convert(s);
-  convert.write(arg, len);
-  convert.write_terminator();
+  int written =
+    ::WideCharToMultiByte(CP_UTF8, 0, arg, len, s, dstLen, nullptr, nullptr);
+  s[written] = 0;
   return s;
 }
 
 static void
 FreeAllocStrings(int argc, char **argv)
 {
   while (argc) {
     --argc;
--- a/xpcom/base/nsAlgorithm.h
+++ b/xpcom/base/nsAlgorithm.h
@@ -54,22 +54,9 @@ NS_COUNT(InputIterator& aFirst, const In
   uint32_t result = 0;
   for (; aFirst != aLast; ++aFirst)
     if (*aFirst == aValue) {
       ++result;
     }
   return result;
 }
 
-template <class InputIterator, class OutputIterator>
-inline OutputIterator&
-copy_string(const InputIterator& aFirst, const InputIterator& aLast,
-            OutputIterator& aResult)
-{
-  typedef nsCharSourceTraits<InputIterator> source_traits;
-  typedef nsCharSinkTraits<OutputIterator>  sink_traits;
-
-  sink_traits::write(aResult, source_traits::read(aFirst),
-                     source_traits::readable_distance(aFirst, aLast));
-  return aResult;
-}
-
 #endif // !defined(nsAlgorithm_h___)
--- a/xpcom/ds/nsAtomTable.cpp
+++ b/xpcom/ds/nsAtomTable.cpp
@@ -191,28 +191,25 @@ struct AtomTableKey
     : mUTF16String(aUTF16String)
     , mUTF8String(nullptr)
     , mLength(aLength)
   {
     mHash = HashString(mUTF16String, mLength);
     *aHashOut = mHash;
   }
 
-  AtomTableKey(const char* aUTF8String, uint32_t aLength, uint32_t* aHashOut)
+  AtomTableKey(const char* aUTF8String,
+               uint32_t aLength,
+               uint32_t* aHashOut,
+               bool* aErr)
     : mUTF16String(nullptr)
     , mUTF8String(aUTF8String)
     , mLength(aLength)
   {
-    bool err;
-    mHash = HashUTF8AsUTF16(mUTF8String, mLength, &err);
-    if (err) {
-      mUTF8String = nullptr;
-      mLength = 0;
-      mHash = 0;
-    }
+    mHash = HashUTF8AsUTF16(mUTF8String, mLength, aErr);
     *aHashOut = mHash;
   }
 
   const char16_t* mUTF16String;
   const char* mUTF8String;
   uint32_t mLength;
   uint32_t mHash;
 };
@@ -328,20 +325,22 @@ AtomTableGetHash(const void* aKey)
 
 static bool
 AtomTableMatchKey(const PLDHashEntryHdr* aEntry, const void* aKey)
 {
   const AtomTableEntry* he = static_cast<const AtomTableEntry*>(aEntry);
   const AtomTableKey* k = static_cast<const AtomTableKey*>(aKey);
 
   if (k->mUTF8String) {
-    return
-      CompareUTF8toUTF16(nsDependentCSubstring(k->mUTF8String,
-                                               k->mUTF8String + k->mLength),
-                         nsDependentAtomString(he->mAtom)) == 0;
+    bool err = false;
+    return (CompareUTF8toUTF16(nsDependentCSubstring(
+                                 k->mUTF8String, k->mUTF8String + k->mLength),
+                               nsDependentAtomString(he->mAtom),
+                               &err) == 0) &&
+           !err;
   }
 
   return he->mAtom->Equals(k->mUTF16String, k->mLength);
 }
 
 void
 nsAtomTable::AtomTableClearEntry(PLDHashTable* aTable, PLDHashEntryHdr* aEntry)
 {
@@ -682,17 +681,26 @@ NS_Atomize(const char* aUTF8String)
   MOZ_ASSERT(gAtomTable);
   return gAtomTable->Atomize(nsDependentCString(aUTF8String));
 }
 
 already_AddRefed<nsAtom>
 nsAtomTable::Atomize(const nsACString& aUTF8String)
 {
   uint32_t hash;
-  AtomTableKey key(aUTF8String.Data(), aUTF8String.Length(), &hash);
+  bool err;
+  AtomTableKey key(aUTF8String.Data(), aUTF8String.Length(), &hash, &err);
+  if (MOZ_UNLIKELY(err)) {
+    MOZ_ASSERT_UNREACHABLE("Tried to atomize invalid UTF-8.");
+    // The input was invalid UTF-8. Let's replace the errors with U+FFFD
+    // and atomize the result.
+    nsString str;
+    CopyUTF8toUTF16(aUTF8String, str);
+    return Atomize(str);
+  }
   nsAtomSubTable& table = SelectSubTable(key);
   MutexAutoLock lock(table.mLock);
   AtomTableEntry* he = table.Add(key);
 
   if (he->mAtom) {
     RefPtr<nsAtom> atom = he->mAtom;
 
     return atom.forget();
--- a/xpcom/string/moz.build
+++ b/xpcom/string/moz.build
@@ -44,21 +44,9 @@ UNIFIED_SOURCES += [
     'nsStringComparator.cpp',
     'nsStringObsolete.cpp',
     'nsSubstring.cpp',
     'nsTextFormatter.cpp',
     'nsTSubstringTuple.cpp',
     'precompiled_templates.cpp',
 ]
 
-# Are we targeting x86 or x86-64?  If so, compile the SSE2 functions for
-# nsUTF8Utils.cpp and nsReadableUtils.cpp.
-if CONFIG['INTEL_ARCHITECTURE']:
-    SOURCES += ['nsUTF8UtilsSSE2.cpp']
-    SOURCES['nsUTF8UtilsSSE2.cpp'].flags += CONFIG['SSE2_FLAGS']
-    SOURCES += ['nsReadableUtilsSSE2.cpp']
-    SOURCES['nsReadableUtilsSSE2.cpp'].flags += CONFIG['SSE2_FLAGS']
-
-if CONFIG['HAVE_ARM_NEON'] or CONFIG['CPU_ARCH'] == 'aarch64':
-    SOURCES += ['nsUTF8UtilsNEON.cpp']
-    SOURCES['nsUTF8UtilsNEON.cpp'].flags += CONFIG['NEON_FLAGS']
-
 FINAL_LIBRARY = 'xul'
--- a/xpcom/string/nsReadableUtils.cpp
+++ b/xpcom/string/nsReadableUtils.cpp
@@ -1,793 +1,246 @@
 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "nsReadableUtils.h"
-#include "nsReadableUtilsImpl.h"
 
 #include <algorithm>
 
 #include "mozilla/CheckedInt.h"
 
 #include "nscore.h"
 #include "nsMemory.h"
 #include "nsString.h"
 #include "nsTArray.h"
 #include "nsUTF8Utils.h"
 
-using mozilla::IsASCII;
-
-/**
- * Fallback implementation for finding the first non-ASCII character in a
- * UTF-16 string.
- */
-static inline int32_t
-FirstNonASCIIUnvectorized(const char16_t* aBegin, const char16_t* aEnd)
-{
-  typedef mozilla::NonASCIIParameters<sizeof(size_t)> p;
-  const size_t kMask = p::mask();
-  const uintptr_t kAlignMask = p::alignMask();
-  const size_t kNumUnicharsPerWord = p::numUnicharsPerWord();
-
-  const char16_t* idx = aBegin;
-
-  // Align ourselves to a word boundary.
-  for (; idx != aEnd && ((uintptr_t(idx) & kAlignMask) != 0); idx++) {
-    if (!IsASCII(*idx)) {
-      return idx - aBegin;
-    }
-  }
-
-  // Check one word at a time.
-  const char16_t* wordWalkEnd = mozilla::aligned(aEnd, kAlignMask);
-  for (; idx != wordWalkEnd; idx += kNumUnicharsPerWord) {
-    const size_t word = *reinterpret_cast<const size_t*>(idx);
-    if (word & kMask) {
-      return idx - aBegin;
-    }
-  }
-
-  // Take care of the remainder one character at a time.
-  for (; idx != aEnd; idx++) {
-    if (!IsASCII(*idx)) {
-      return idx - aBegin;
-    }
-  }
-
-  return -1;
-}
-
-/*
- * This function returns -1 if all characters in str are ASCII characters.
- * Otherwise, it returns a value less than or equal to the index of the first
- * ASCII character in str. For example, if first non-ASCII character is at
- * position 25, it may return 25, 24, or 16. But it guarantees
- * there are only ASCII characters before returned value.
- */
-static inline int32_t
-FirstNonASCII(const char16_t* aBegin, const char16_t* aEnd)
-{
-#ifdef MOZILLA_MAY_SUPPORT_SSE2
-  if (mozilla::supports_sse2()) {
-    return mozilla::SSE2::FirstNonASCII(aBegin, aEnd);
-  }
-#endif
-
-  return FirstNonASCIIUnvectorized(aBegin, aEnd);
-}
-
-void
-LossyCopyUTF16toASCII(const nsAString& aSource, nsACString& aDest)
-{
-  aDest.Truncate();
-  LossyAppendUTF16toASCII(aSource, aDest);
-}
-
-void
-CopyASCIItoUTF16(const nsACString& aSource, nsAString& aDest)
-{
-  if (!CopyASCIItoUTF16(aSource, aDest, mozilla::fallible)) {
-    // Note that this may wildly underestimate the allocation that failed, as
-    // we report the length of aSource as UTF-16 instead of UTF-8.
-    aDest.AllocFailed(aDest.Length() + aSource.Length());
-  }
-}
-
-bool
-CopyASCIItoUTF16(const nsACString& aSource, nsAString& aDest,
-                 const mozilla::fallible_t& aFallible)
-{
-  aDest.Truncate();
-  return AppendASCIItoUTF16(aSource, aDest, aFallible);
-}
-
-void
-LossyCopyUTF16toASCII(const char16ptr_t aSource, nsACString& aDest)
-{
-  aDest.Truncate();
-  if (aSource) {
-    LossyAppendUTF16toASCII(nsDependentString(aSource), aDest);
-  }
-}
-
-void
-CopyASCIItoUTF16(const char* aSource, nsAString& aDest)
-{
-  aDest.Truncate();
-  if (aSource) {
-    AppendASCIItoUTF16(nsDependentCString(aSource), aDest);
-  }
-}
-
-void
-CopyUTF16toUTF8(const nsAString& aSource, nsACString& aDest)
-{
-  if (!CopyUTF16toUTF8(aSource, aDest, mozilla::fallible)) {
-    // Note that this may wildly underestimate the allocation that failed, as
-    // we report the length of aSource as UTF-16 instead of UTF-8.
-    aDest.AllocFailed(aDest.Length() + aSource.Length());
-  }
-}
-
-bool
-CopyUTF16toUTF8(const nsAString& aSource, nsACString& aDest,
-                const mozilla::fallible_t& aFallible)
-{
-  aDest.Truncate();
-  if (!AppendUTF16toUTF8(aSource, aDest, aFallible)) {
-    return false;
-  }
-  return true;
-}
-
-void
-CopyUTF8toUTF16(const nsACString& aSource, nsAString& aDest)
-{
-  aDest.Truncate();
-  AppendUTF8toUTF16(aSource, aDest);
-}
-
-void
-CopyUTF16toUTF8(const char16ptr_t aSource, nsACString& aDest)
-{
-  aDest.Truncate();
-  AppendUTF16toUTF8(aSource, aDest);
-}
-
-void
-CopyUTF8toUTF16(const char* aSource, nsAString& aDest)
-{
-  aDest.Truncate();
-  AppendUTF8toUTF16(aSource, aDest);
-}
-
-void
-LossyAppendUTF16toASCII(const nsAString& aSource, nsACString& aDest)
-{
-  uint32_t old_dest_length = aDest.Length();
-  aDest.SetLength(old_dest_length + aSource.Length());
-
-  nsAString::const_iterator fromBegin, fromEnd;
-
-  nsACString::iterator dest;
-  aDest.BeginWriting(dest);
-
-  dest.advance(old_dest_length);
-
-  // right now, this won't work on multi-fragment destinations
-  LossyConvertEncoding16to8 converter(dest.get());
-
-  copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
-              converter);
-}
-
-void
-AppendASCIItoUTF16(const nsACString& aSource, nsAString& aDest)
-{
-  if (!AppendASCIItoUTF16(aSource, aDest, mozilla::fallible)) {
-    aDest.AllocFailed(aDest.Length() + aSource.Length());
-  }
-}
-
-bool
-AppendASCIItoUTF16(const nsACString& aSource, nsAString& aDest,
-                   const mozilla::fallible_t& aFallible)
-{
-  uint32_t old_dest_length = aDest.Length();
-  if (!aDest.SetLength(old_dest_length + aSource.Length(),
-                       aFallible)) {
-    return false;
-  }
-
-  nsACString::const_iterator fromBegin, fromEnd;
-
-  nsAString::iterator dest;
-  aDest.BeginWriting(dest);
-
-  dest.advance(old_dest_length);
-
-  // right now, this won't work on multi-fragment destinations
-  LossyConvertEncoding8to16 converter(dest.get());
-
-  copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
-              converter);
-  return true;
-}
-
-void
-LossyAppendUTF16toASCII(const char16ptr_t aSource, nsACString& aDest)
-{
-  if (aSource) {
-    LossyAppendUTF16toASCII(nsDependentString(aSource), aDest);
-  }
-}
-
-bool
-AppendASCIItoUTF16(const char* aSource, nsAString& aDest, const mozilla::fallible_t& aFallible)
-{
-  if (aSource) {
-    return AppendASCIItoUTF16(nsDependentCString(aSource), aDest, aFallible);
-  }
-
-  return true;
-}
-
-void
-AppendASCIItoUTF16(const char* aSource, nsAString& aDest)
-{
-  if (aSource) {
-    AppendASCIItoUTF16(nsDependentCString(aSource), aDest);
-  }
-}
-
-void
-AppendUTF16toUTF8(const nsAString& aSource, nsACString& aDest)
-{
-  if (!AppendUTF16toUTF8(aSource, aDest, mozilla::fallible)) {
-    // Note that this may wildly underestimate the allocation that failed, as
-    // we report the length of aSource as UTF-16 instead of UTF-8.
-    aDest.AllocFailed(aDest.Length() + aSource.Length());
-  }
-}
-
-bool
-AppendUTF16toUTF8(const nsAString& aSource, nsACString& aDest,
-                  const mozilla::fallible_t& aFallible)
-{
-  // At 16 characters analysis showed better performance of both the all ASCII
-  // and non-ASCII cases, so we limit calling |FirstNonASCII| to strings of
-  // that length.
-  const nsAString::size_type kFastPathMinLength = 16;
-
-  int32_t firstNonASCII = 0;
-  if (aSource.Length() >= kFastPathMinLength) {
-    firstNonASCII = FirstNonASCII(aSource.BeginReading(), aSource.EndReading());
-  }
-
-  if (firstNonASCII == -1) {
-    // This is all ASCII, we can use the more efficient lossy append.
-    mozilla::CheckedInt<nsACString::size_type> new_length(aSource.Length());
-    new_length += aDest.Length();
-
-    if (!new_length.isValid() ||
-        !aDest.SetCapacity(new_length.value(), aFallible)) {
-      return false;
-    }
-
-    LossyAppendUTF16toASCII(aSource, aDest);
-    return true;
-  }
-
-  nsAString::const_iterator source_start, source_end;
-  CalculateUTF8Size calculator;
-  aSource.BeginReading(source_start);
-  aSource.EndReading(source_end);
-
-  // Skip the characters that we know are single byte.
-  source_start.advance(firstNonASCII);
-
-  copy_string(source_start,
-              source_end, calculator);
-
-  // Include the ASCII characters that were skipped in the count.
-  size_t count = calculator.Size() + firstNonASCII;
-
-  if (count) {
-    auto old_dest_length = aDest.Length();
-    // Grow the buffer if we need to.
-    mozilla::CheckedInt<nsACString::size_type> new_length(count);
-    new_length += old_dest_length;
-
-    if (!new_length.isValid() ||
-        !aDest.SetLength(new_length.value(), aFallible)) {
-      return false;
-    }
-
-    // All ready? Time to convert
-
-    nsAString::const_iterator ascii_end;
-    aSource.BeginReading(ascii_end);
-
-    if (firstNonASCII >= static_cast<int32_t>(kFastPathMinLength)) {
-      // Use the more efficient lossy converter for the ASCII portion.
-      LossyConvertEncoding16to8 lossy_converter(
-          aDest.BeginWriting() + old_dest_length);
-      nsAString::const_iterator ascii_start;
-      aSource.BeginReading(ascii_start);
-      ascii_end.advance(firstNonASCII);
-
-      copy_string(ascii_start, ascii_end, lossy_converter);
-    } else {
-      // Not using the lossy shortcut, we need to include the leading ASCII
-      // chars.
-      firstNonASCII = 0;
-    }
-
-    ConvertUTF16toUTF8 converter(
-        aDest.BeginWriting() + old_dest_length + firstNonASCII);
-    copy_string(ascii_end,
-                aSource.EndReading(source_end), converter);
-
-    NS_ASSERTION(converter.Size() == count - firstNonASCII,
-                 "Unexpected disparity between CalculateUTF8Size and "
-                 "ConvertUTF16toUTF8");
-  }
-
-  return true;
-}
-
-void
-AppendUTF8toUTF16(const nsACString& aSource, nsAString& aDest)
-{
-  if (!AppendUTF8toUTF16(aSource, aDest, mozilla::fallible)) {
-    aDest.AllocFailed(aDest.Length() + aSource.Length());
-  }
-}
-
-bool
-AppendUTF8toUTF16(const nsACString& aSource, nsAString& aDest,
-                  const mozilla::fallible_t& aFallible)
-{
-  nsACString::const_iterator source_start, source_end;
-  CalculateUTF8Length calculator;
-  copy_string(aSource.BeginReading(source_start),
-              aSource.EndReading(source_end), calculator);
-
-  uint32_t count = calculator.Length();
-
-  // Avoid making the string mutable if we're appending an empty string
-  if (count) {
-    uint32_t old_dest_length = aDest.Length();
-
-    // Grow the buffer if we need to.
-    if (!aDest.SetLength(old_dest_length + count, aFallible)) {
-      return false;
-    }
-
-    // All ready? Time to convert
-
-    ConvertUTF8toUTF16 converter(aDest.BeginWriting() + old_dest_length);
-    copy_string(aSource.BeginReading(source_start),
-                aSource.EndReading(source_end), converter);
-
-    NS_ASSERTION(converter.ErrorEncountered() ||
-                 converter.Length() == count,
-                 "CalculateUTF8Length produced the wrong length");
-
-    if (converter.ErrorEncountered()) {
-      NS_ERROR("Input wasn't UTF8 or incorrect length was calculated");
-      aDest.SetLength(old_dest_length);
-    }
-  }
-
-  return true;
-}
-
-void
-AppendUTF16toUTF8(const char16ptr_t aSource, nsACString& aDest)
-{
-  if (aSource) {
-    AppendUTF16toUTF8(nsDependentString(aSource), aDest);
-  }
-}
-
-void
-AppendUTF8toUTF16(const char* aSource, nsAString& aDest)
-{
-  if (aSource) {
-    AppendUTF8toUTF16(nsDependentCString(aSource), aDest);
-  }
-}
-
+using mozilla::MakeSpan;
 
 /**
  * A helper function that allocates a buffer of the desired character type big enough to hold a copy of the supplied string (plus a zero terminator).
  *
  * @param aSource an string you will eventually be making a copy of
  * @return a new buffer (of the type specified by the second parameter) which you must free with |free|.
  *
  */
 template <class FromStringT, class ToCharT>
 inline
 ToCharT*
 AllocateStringCopy(const FromStringT& aSource, ToCharT*)
 {
-  return static_cast<ToCharT*>(moz_xmalloc(
-    (aSource.Length() + 1) * sizeof(ToCharT)));
+  // Can't overflow due to the definition of nsTSubstring<T>::kMaxCapacity
+  return static_cast<ToCharT*>(
+    moz_xmalloc((size_t(aSource.Length()) + 1) * sizeof(ToCharT)));
 }
 
 
 char*
 ToNewCString(const nsAString& aSource)
 {
-  char* result = AllocateStringCopy(aSource, (char*)0);
-  if (!result) {
+  char* dest = AllocateStringCopy(aSource, (char*)nullptr);
+  if (!dest) {
     return nullptr;
   }
 
-  nsAString::const_iterator fromBegin, fromEnd;
-  LossyConvertEncoding16to8 converter(result);
-  copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
-              converter).write_terminator();
-  return result;
+  auto len = aSource.Length();
+  LossyConvertUTF16toLatin1(aSource, MakeSpan(dest, len));
+  dest[len] = 0;
+  return dest;
 }
 
 char*
 ToNewUTF8String(const nsAString& aSource, uint32_t* aUTF8Count)
 {
-  nsAString::const_iterator start, end;
-  CalculateUTF8Size calculator;
-  copy_string(aSource.BeginReading(start), aSource.EndReading(end),
-              calculator);
-
-  if (aUTF8Count) {
-    *aUTF8Count = calculator.Size();
+  auto len = aSource.Length();
+  // The uses of this function seem temporary enough that it's not
+  // worthwhile to be fancy about the allocation size. Let's just use
+  // the worst case.
+  // Times 3 plus 2, because ConvertUTF16toUTF8 requires times 3 plus 1 and
+  // then we have the terminator.
+  mozilla::CheckedInt<size_t> destLen(len);
+  destLen *= 3;
+  destLen += 2;
+  if (!destLen.isValid()) {
+    return nullptr;
   }
-
-  char* result = static_cast<char*>
-                 (moz_xmalloc(calculator.Size() + 1));
-  if (!result) {
+  size_t destLenVal = destLen.value();
+  if (destLenVal > UINT32_MAX) {
+    return nullptr;
+  }
+  char* dest = static_cast<char*>(moz_xmalloc(destLenVal));
+  if (!dest) {
     return nullptr;
   }
 
-  ConvertUTF16toUTF8 converter(result);
-  copy_string(aSource.BeginReading(start), aSource.EndReading(end),
-              converter).write_terminator();
-  NS_ASSERTION(calculator.Size() == converter.Size(), "length mismatch");
+  size_t written = ConvertUTF16toUTF8(aSource, MakeSpan(dest, destLenVal));
+  dest[written] = 0;
 
-  return result;
+  if (aUTF8Count) {
+    *aUTF8Count = written;
+  }
+
+  return dest;
 }
 
 char*
 ToNewCString(const nsACString& aSource)
 {
   // no conversion needed, just allocate a buffer of the correct length and copy into it
 
-  char* result = AllocateStringCopy(aSource, (char*)0);
-  if (!result) {
+  char* dest = AllocateStringCopy(aSource, (char*)nullptr);
+  if (!dest) {
     return nullptr;
   }
 
-  nsACString::const_iterator fromBegin, fromEnd;
-  char* toBegin = result;
-  *copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
-               toBegin) = char(0);
-  return result;
+  auto len = aSource.Length();
+  memcpy(dest, aSource.BeginReading(), len * sizeof(char));
+  dest[len] = 0;
+  return dest;
 }
 
 char16_t*
 ToNewUnicode(const nsAString& aSource)
 {
   // no conversion needed, just allocate a buffer of the correct length and copy into it
 
-  char16_t* result = AllocateStringCopy(aSource, (char16_t*)0);
-  if (!result) {
+  char16_t* dest = AllocateStringCopy(aSource, (char16_t*)nullptr);
+  if (!dest) {
     return nullptr;
   }
 
-  nsAString::const_iterator fromBegin, fromEnd;
-  char16_t* toBegin = result;
-  *copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
-               toBegin) = char16_t(0);
-  return result;
+  auto len = aSource.Length();
+  memcpy(dest, aSource.BeginReading(), len * sizeof(char16_t));
+  dest[len] = 0;
+  return dest;
 }
 
 char16_t*
 ToNewUnicode(const nsACString& aSource)
 {
-  char16_t* result = AllocateStringCopy(aSource, (char16_t*)0);
-  if (!result) {
+  char16_t* dest = AllocateStringCopy(aSource, (char16_t*)nullptr);
+  if (!dest) {
     return nullptr;
   }
 
-  nsACString::const_iterator fromBegin, fromEnd;
-  LossyConvertEncoding8to16 converter(result);
-  copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
-              converter).write_terminator();
-  return result;
-}
-
-uint32_t
-CalcUTF8ToUnicodeLength(const nsACString& aSource)
-{
-  nsACString::const_iterator start, end;
-  CalculateUTF8Length calculator;
-  copy_string(aSource.BeginReading(start), aSource.EndReading(end),
-              calculator);
-  return calculator.Length();
-}
-
-char16_t*
-UTF8ToUnicodeBuffer(const nsACString& aSource, char16_t* aBuffer,
-                    uint32_t* aUTF16Count)
-{
-  nsACString::const_iterator start, end;
-  ConvertUTF8toUTF16 converter(aBuffer);
-  copy_string(aSource.BeginReading(start),
-              aSource.EndReading(end),
-              converter).write_terminator();
-  if (aUTF16Count) {
-    *aUTF16Count = converter.Length();
-  }
-  return aBuffer;
+  auto len = aSource.Length();
+  ConvertLatin1toUTF16(aSource, MakeSpan(dest, len));
+  dest[len] = 0;
+  return dest;
 }
 
 char16_t*
 UTF8ToNewUnicode(const nsACString& aSource, uint32_t* aUTF16Count)
 {
-  const uint32_t length = CalcUTF8ToUnicodeLength(aSource);
-  const size_t buffer_size = (length + 1) * sizeof(char16_t);
-  char16_t* buffer = static_cast<char16_t*>(moz_xmalloc(buffer_size));
-  if (!buffer) {
+  // Compute length plus one as required by ConvertUTF8toUTF16
+  uint32_t lengthPlusOne = aSource.Length() + 1; // Can't overflow
+
+  mozilla::CheckedInt<size_t> allocLength(lengthPlusOne);
+  // Add space for zero-termination
+  allocLength += 1;
+  // We need UTF-16 units
+  allocLength *= sizeof(char16_t);
+
+  if (!allocLength.isValid()) {
     return nullptr;
   }
 
-  uint32_t copied;
-  UTF8ToUnicodeBuffer(aSource, buffer, &copied);
-  NS_ASSERTION(length == copied, "length mismatch");
+  char16_t* dest = (char16_t*)moz_xmalloc(allocLength.value());
+  if (!dest) {
+    return nullptr;
+  }
+
+  size_t written = ConvertUTF8toUTF16(aSource, MakeSpan(dest, lengthPlusOne));
+  dest[written] = 0;
 
   if (aUTF16Count) {
-    *aUTF16Count = copied;
+    *aUTF16Count = written;
   }
-  return buffer;
+
+  return dest;
 }
 
 char16_t*
 CopyUnicodeTo(const nsAString& aSource, uint32_t aSrcOffset, char16_t* aDest,
               uint32_t aLength)
 {
-  nsAString::const_iterator fromBegin, fromEnd;
-  char16_t* toBegin = aDest;
-  copy_string(aSource.BeginReading(fromBegin).advance(int32_t(aSrcOffset)),
-              aSource.BeginReading(fromEnd).advance(int32_t(aSrcOffset + aLength)),
-              toBegin);
+  MOZ_ASSERT(aSrcOffset + aLength <= aSource.Length());
+  memcpy(aDest,
+         aSource.BeginReading() + aSrcOffset,
+         size_t(aLength) * sizeof(char16_t));
   return aDest;
 }
 
 void
-CopyUnicodeTo(const nsAString::const_iterator& aSrcStart,
-              const nsAString::const_iterator& aSrcEnd,
-              nsAString& aDest)
-{
-  aDest.SetLength(Distance(aSrcStart, aSrcEnd));
-
-  nsAString::char_iterator dest = aDest.BeginWriting();
-  nsAString::const_iterator fromBegin(aSrcStart);
-
-  copy_string(fromBegin, aSrcEnd, dest);
-}
-
-void
-AppendUnicodeTo(const nsAString::const_iterator& aSrcStart,
-                const nsAString::const_iterator& aSrcEnd,
-                nsAString& aDest)
-{
-  uint32_t oldLength = aDest.Length();
-  aDest.SetLength(oldLength + Distance(aSrcStart, aSrcEnd));
-
-  nsAString::char_iterator dest = aDest.BeginWriting() + oldLength;
-  nsAString::const_iterator fromBegin(aSrcStart);
-
-  copy_string(fromBegin, aSrcEnd, dest);
-}
-
-bool
-IsASCII(const nsAString& aString)
-{
-  static const char16_t NOT_ASCII = char16_t(~0x007F);
-
-
-  // Don't want to use |copy_string| for this task, since we can stop at the first non-ASCII character
-
-  nsAString::const_iterator iter, done_reading;
-  aString.BeginReading(iter);
-  aString.EndReading(done_reading);
-
-  const char16_t* c = iter.get();
-  const char16_t* end = done_reading.get();
-
-  while (c < end) {
-    if (*c++ & NOT_ASCII) {
-      return false;
-    }
-  }
-
-  return true;
-}
-
-/**
- * A character sink for in-place case conversion.
- */
-class ConvertToUpperCase
-{
-public:
-  typedef char value_type;
-
-  uint32_t
-  write(const char* aSource, uint32_t aSourceLength)
-  {
-    char* cp = const_cast<char*>(aSource);
-    const char* end = aSource + aSourceLength;
-    while (cp != end) {
-      char ch = *cp;
-      if (ch >= 'a' && ch <= 'z') {
-        *cp = ch - ('a' - 'A');
-      }
-      ++cp;
-    }
-    return aSourceLength;
-  }
-};
-
-void
 ToUpperCase(nsACString& aCString)
 {
-  ConvertToUpperCase converter;
-  char* start;
-  converter.write(aCString.BeginWriting(start), aCString.Length());
-}
-
-/**
- * A character sink for copying with case conversion.
- */
-class CopyToUpperCase
-{
-public:
-  typedef char value_type;
-
-  explicit CopyToUpperCase(nsACString::iterator& aDestIter,
-                           const nsACString::iterator& aEndIter)
-    : mIter(aDestIter)
-    , mEnd(aEndIter)
-  {
+  char* cp = aCString.BeginWriting();
+  char* end = cp + aCString.Length();
+  while (cp != end) {
+    char ch = *cp;
+    if (ch >= 'a' && ch <= 'z') {
+      *cp = ch - ('a' - 'A');
+    }
+    ++cp;
   }
-
-  uint32_t
-  write(const char* aSource, uint32_t aSourceLength)
-  {
-    uint32_t len = XPCOM_MIN(uint32_t(mEnd - mIter), aSourceLength);
-    char* cp = mIter.get();
-    const char* end = aSource + len;
-    while (aSource != end) {
-      char ch = *aSource;
-      if ((ch >= 'a') && (ch <= 'z')) {
-        *cp = ch - ('a' - 'A');
-      } else {
-        *cp = ch;
-      }
-      ++aSource;
-      ++cp;
-    }
-    mIter.advance(len);
-    return len;
-  }
-
-protected:
-  nsACString::iterator& mIter;
-  const nsACString::iterator& mEnd;
-};
+}
 
 void
 ToUpperCase(const nsACString& aSource, nsACString& aDest)
 {
-  nsACString::const_iterator fromBegin, fromEnd;
-  nsACString::iterator toBegin, toEnd;
   aDest.SetLength(aSource.Length());
-
-  CopyToUpperCase converter(aDest.BeginWriting(toBegin), aDest.EndWriting(toEnd));
-  copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
-              converter);
+  const char* src = aSource.BeginReading();
+  const char* end = src + aSource.Length();
+  char* dst = aDest.BeginWriting();
+  while (src != end) {
+    char ch = *src;
+    if (ch >= 'a' && ch <= 'z') {
+      *dst = ch - ('a' - 'A');
+    } else {
+      *dst = ch;
+    }
+    ++src;
+    ++dst;
+  }
 }
 
-/**
- * A character sink for case conversion.
- */
-class ConvertToLowerCase
-{
-public:
-  typedef char value_type;
-
-  uint32_t
-  write(const char* aSource, uint32_t aSourceLength)
-  {
-    char* cp = const_cast<char*>(aSource);
-    const char* end = aSource + aSourceLength;
-    while (cp != end) {
-      char ch = *cp;
-      if ((ch >= 'A') && (ch <= 'Z')) {
-        *cp = ch + ('a' - 'A');
-      }
-      ++cp;
-    }
-    return aSourceLength;
-  }
-};
-
 void
 ToLowerCase(nsACString& aCString)
 {
-  ConvertToLowerCase converter;
-  char* start;
-  converter.write(aCString.BeginWriting(start), aCString.Length());
-}
-
-/**
- * A character sink for copying with case conversion.
- */
-class CopyToLowerCase
-{
-public:
-  typedef char value_type;
-
-  explicit CopyToLowerCase(nsACString::iterator& aDestIter,
-                           const nsACString::iterator& aEndIter)
-    : mIter(aDestIter)
-    , mEnd(aEndIter)
-  {
+  char* cp = aCString.BeginWriting();
+  char* end = cp + aCString.Length();
+  while (cp != end) {
+    char ch = *cp;
+    if (ch >= 'A' && ch <= 'Z') {
+      *cp = ch + ('a' - 'A');
+    }
+    ++cp;
   }
-
-  uint32_t
-  write(const char* aSource, uint32_t aSourceLength)
-  {
-    uint32_t len = XPCOM_MIN(uint32_t(mEnd - mIter), aSourceLength);
-    char* cp = mIter.get();
-    const char* end = aSource + len;
-    while (aSource != end) {
-      char ch = *aSource;
-      if ((ch >= 'A') && (ch <= 'Z')) {
-        *cp = ch + ('a' - 'A');
-      } else {
-        *cp = ch;
-      }
-      ++aSource;
-      ++cp;
-    }
-    mIter.advance(len);
-    return len;
-  }
-
-protected:
-  nsACString::iterator& mIter;
-  const nsACString::iterator& mEnd;
-};
+}
 
 void
 ToLowerCase(const nsACString& aSource, nsACString& aDest)
 {
-  nsACString::const_iterator fromBegin, fromEnd;
-  nsACString::iterator toBegin, toEnd;
   aDest.SetLength(aSource.Length());
-
-  CopyToLowerCase converter(aDest.BeginWriting(toBegin), aDest.EndWriting(toEnd));
-  copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
-              converter);
+  const char* src = aSource.BeginReading();
+  const char* end = src + aSource.Length();
+  char* dst = aDest.BeginWriting();
+  while (src != end) {
+    char ch = *src;
+    if (ch >= 'A' && ch <= 'Z') {
+      *dst = ch + ('a' - 'A');
+    } else {
+      *dst = ch;
+    }
+    ++src;
+    ++dst;
+  }
 }
 
 bool
 ParseString(const nsACString& aSource, char aDelimiter,
             nsTArray<nsCString>& aArray)
 {
   nsACString::const_iterator start, end;
   aSource.BeginReading(start);
@@ -1178,117 +631,56 @@ VoidCString()
 {
   static const nsCString sNull(mozilla::detail::StringDataFlags::VOIDED);
 
   return sNull;
 }
 
 int32_t
 CompareUTF8toUTF16(const nsACString& aUTF8String,
-                   const nsAString& aUTF16String)
+                   const nsAString& aUTF16String,
+                   bool* aErr)
 {
-  static const uint32_t NOT_ASCII = uint32_t(~0x7F);
-
   const char* u8;
   const char* u8end;
   aUTF8String.BeginReading(u8);
   aUTF8String.EndReading(u8end);
 
   const char16_t* u16;
   const char16_t* u16end;
   aUTF16String.BeginReading(u16);
   aUTF16String.EndReading(u16end);
 
-  while (u8 != u8end && u16 != u16end) {
-    // Cast away the signedness of *u8 to prevent signextension when
-    // converting to uint32_t
-    uint32_t c8_32 = (uint8_t)*u8;
-
-    if (c8_32 & NOT_ASCII) {
-      bool err;
-      c8_32 = UTF8CharEnumerator::NextChar(&u8, u8end, &err);
-      if (err) {
-        return INT32_MIN;
+  for (;;) {
+    if (u8 == u8end) {
+      if (u16 == u16end) {
+        return 0;
       }
-
-      uint32_t c16_32 = UTF16CharEnumerator::NextChar(&u16, u16end);
-      // The above UTF16CharEnumerator::NextChar() calls can
-      // fail, but if it does for anything other than no data to
-      // look at (which can't happen here), it returns the
-      // Unicode replacement character 0xFFFD for the invalid
-      // data they were fed. Ignore that error and treat invalid
-      // UTF16 as 0xFFFD.
-      //
-      // This matches what our UTF16 to UTF8 conversion code
-      // does, and thus a UTF8 string that came from an invalid
-      // UTF16 string will compare equal to the invalid UTF16
-      // string it came from. Same is true for any other UTF16
-      // string differs only in the invalid part of the string.
-
-      if (c8_32 != c16_32) {
-        return c8_32 < c16_32 ? -1 : 1;
-      }
-    } else {
-      if (c8_32 != *u16) {
-        return c8_32 > *u16 ? 1 : -1;
-      }
-
-      ++u8;
-      ++u16;
+      return -1;
+    }
+    if (u16 == u16end) {
+      return 1;
     }
-  }
-
-  if (u8 != u8end) {
-    // We get to the end of the UTF16 string, but no to the end of
-    // the UTF8 string. The UTF8 string is longer than the UTF16
-    // string
-
+    // No need for ASCII optimization, since both NextChar()
+    // calls get inlined.
+    uint32_t scalar8 = UTF8CharEnumerator::NextChar(&u8, u8end, aErr);
+    uint32_t scalar16 = UTF16CharEnumerator::NextChar(&u16, u16end, aErr);
+    if (scalar16 == scalar8) {
+      continue;
+    }
+    if (scalar8 < scalar16) {
+      return -1;
+    }
     return 1;
   }
-
-  if (u16 != u16end) {
-    // We get to the end of the UTF8 string, but no to the end of
-    // the UTF16 string. The UTF16 string is longer than the UTF8
-    // string
-
-    return -1;
-  }
-
-  // The two strings match.
-
-  return 0;
 }
 
 void
 AppendUCS4ToUTF16(const uint32_t aSource, nsAString& aDest)
 {
   NS_ASSERTION(IS_VALID_CHAR(aSource), "Invalid UCS4 char");
   if (IS_IN_BMP(aSource)) {
     aDest.Append(char16_t(aSource));
   } else {
     aDest.Append(H_SURROGATE(aSource));
     aDest.Append(L_SURROGATE(aSource));
   }
 }
-
-extern "C" {
-
-void Gecko_AppendUTF16toCString(nsACString* aThis, const nsAString* aOther)
-{
-  AppendUTF16toUTF8(*aOther, *aThis);
-}
-
-void Gecko_AppendUTF8toString(nsAString* aThis, const nsACString* aOther)
-{
-  AppendUTF8toUTF16(*aOther, *aThis);
-}
-
-bool Gecko_FallibleAppendUTF16toCString(nsACString* aThis, const nsAString* aOther)
-{
-  return AppendUTF16toUTF8(*aOther, *aThis, mozilla::fallible);
-}
-
-bool Gecko_FallibleAppendUTF8toString(nsAString* aThis, const nsACString* aOther)
-{
-  return AppendUTF8toUTF16(*aOther, *aThis, mozilla::fallible);
-}
-
-}
--- a/xpcom/string/nsReadableUtils.h
+++ b/xpcom/string/nsReadableUtils.h
@@ -15,107 +15,391 @@
 
 #include "mozilla/Assertions.h"
 #include "nsAString.h"
 
 #include "nsTArrayForwardDeclare.h"
 
 // Can't include mozilla/Encoding.h here
 extern "C" {
-  size_t encoding_utf8_valid_up_to(uint8_t const* buffer, size_t buffer_len);
-  size_t encoding_ascii_valid_up_to(uint8_t const* buffer, size_t buffer_len);
+  size_t
+  encoding_utf8_valid_up_to(uint8_t const* buffer, size_t buffer_len);
+
+  bool
+  encoding_mem_is_ascii(uint8_t const* buffer, size_t buffer_len);
+
+  bool
+  encoding_mem_is_basic_latin(char16_t const* buffer, size_t buffer_len);
+
+  bool
+  encoding_mem_is_utf8_latin1(uint8_t const* buffer, size_t buffer_len);
+
+  bool
+  encoding_mem_is_str_latin1(uint8_t const* buffer, size_t buffer_len);
+
+  bool
+  encoding_mem_is_utf16_latin1(char16_t const* buffer, size_t buffer_len);
+
+  void
+  encoding_mem_convert_utf16_to_latin1_lossy(const char16_t* src,
+                                             size_t src_len,
+                                             char* dst,
+                                             size_t dst_len);
+
+  size_t
+  encoding_mem_convert_utf8_to_latin1_lossy(const char* src,
+                                            size_t src_len,
+                                            char* dst,
+                                            size_t dst_len);
+
+  void
+  encoding_mem_convert_latin1_to_utf16(const char* src,
+                                       size_t src_len,
+                                       char16_t* dst,
+                                       size_t dst_len);
+
+  size_t
+  encoding_mem_convert_utf16_to_utf8(const char16_t* src,
+                                     size_t src_len,
+                                     char* dst,
+                                     size_t dst_len);
+
+  size_t
+  encoding_mem_convert_utf8_to_utf16(const char* src,
+                                     size_t src_len,
+                                     char16_t* dst,
+                                     size_t dst_len);
+}
+
+// From the nsstring crate
+extern "C" {
+  bool
+  nsstring_fallible_append_utf8_impl(nsAString* aThis,
+                                     const char* aOther,
+                                     size_t aOtherLen,
+                                     size_t aOldLen);
+
+  bool
+  nsstring_fallible_append_latin1_impl(nsAString* aThis,
+                                       const char* aOther,
+                                       size_t aOtherLen,
+                                       size_t aOldLen);
+
+  bool
+  nscstring_fallible_append_utf16_to_utf8_impl(nsACString* aThis,
+                                               const char16_t*,
+                                               size_t aOtherLen,
+                                               size_t aOldLen);
+
+  bool
+  nscstring_fallible_append_utf16_to_latin1_lossy_impl(nsACString* aThis,
+                                                       const char16_t*,
+                                                       size_t aOtherLen,
+                                                       size_t aOldLen);
+
+  bool
+  nscstring_fallible_append_utf8_to_latin1_lossy_check(nsACString* aThis,
+                                                       const nsACString* aOther,
+                                                       size_t aOldLen);
+
+  bool
+  nscstring_fallible_append_latin1_to_utf8_check(nsACString* aThis,
+                                                 const nsACString* aOther,
+                                                 size_t aOldLen);
+}
+
+/**
+ * If all the code points in the input are below U+0100, converts to Latin1,
+ * i.e. unsigned byte value is Unicode scalar value; not windows-1252. If
+ * there are code points above U+00FF, asserts in debug builds and produces
+ * garbage in release builds. The nature of the garbage depends on the CPU
+ * architecture and must not be relied upon.
+ *
+ * The length of aDest must be not be less than the length of aSource.
+ */
+inline void
+LossyConvertUTF16toLatin1(mozilla::Span<const char16_t> aSource,
+                          mozilla::Span<char> aDest)
+{
+  encoding_mem_convert_utf16_to_latin1_lossy(
+    aSource.Elements(), aSource.Length(), aDest.Elements(), aDest.Length());
+}
+
+/**
+ * If all the code points in the input are below U+0100, converts to Latin1,
+ * i.e. unsigned byte value is Unicode scalar value; not windows-1252. If
+ * there are code points above U+00FF, asserts in debug builds and produces
+ * garbage in release builds. The nature of the garbage may depend on the CPU
+ * architecture and must not be relied upon.
+ *
+ * The length of aDest must be not be less than the length of aSource.
+ */
+inline size_t
+LossyConvertUTF8toLatin1(mozilla::Span<const char> aSource,
+                         mozilla::Span<char> aDest)
+{
+  return encoding_mem_convert_utf8_to_latin1_lossy(
+    aSource.Elements(), aSource.Length(), aDest.Elements(), aDest.Length());
+}
+
+/**
+ * Interprets unsigned byte value as Unicode scalar value (i.e. not
+ * windows-1252!).
+ *
+ * The length of aDest must be not be less than the length of aSource.
+ */
+inline void
+ConvertLatin1toUTF16(mozilla::Span<const char> aSource,
+                     mozilla::Span<char16_t> aDest)
+{
+  encoding_mem_convert_latin1_to_utf16(
+    aSource.Elements(), aSource.Length(), aDest.Elements(), aDest.Length());
+}
+
+/**
+ * Lone surrogates are replaced with the REPLACEMENT CHARACTER.
+ *
+ * The length of aDest must be at least the length of aSource times three
+ * _plus one_.
+ *
+ * Returns the number of code units written.
+ */
+inline size_t
+ConvertUTF16toUTF8(mozilla::Span<const char16_t> aSource,
+                   mozilla::Span<char> aDest)
+{
+  return encoding_mem_convert_utf16_to_utf8(
+    aSource.Elements(), aSource.Length(), aDest.Elements(), aDest.Length());
+}
+
+/**
+ * Malformed byte sequences are replaced with the REPLACEMENT CHARACTER.
+ *
+ * The length of aDest must at least one greater than the length of aSource.
+ *
+ * Returns the number of code units written.
+ */
+inline size_t
+ConvertUTF8toUTF16(mozilla::Span<const char> aSource,
+                   mozilla::Span<char16_t> aDest)
+{
+  return encoding_mem_convert_utf8_to_utf16(
+    aSource.Elements(), aSource.Length(), aDest.Elements(), aDest.Length());
 }
 
 inline size_t
 Distance(const nsReadingIterator<char16_t>& aStart,
          const nsReadingIterator<char16_t>& aEnd)
 {
   MOZ_ASSERT(aStart.get() <= aEnd.get());
   return static_cast<size_t>(aEnd.get() - aStart.get());
 }
+
 inline size_t
 Distance(const nsReadingIterator<char>& aStart,
          const nsReadingIterator<char>& aEnd)
 {
   MOZ_ASSERT(aStart.get() <= aEnd.get());
   return static_cast<size_t>(aEnd.get() - aStart.get());
 }
 
-void LossyCopyUTF16toASCII(const nsAString& aSource, nsACString& aDest);
-void CopyASCIItoUTF16(const nsACString& aSource, nsAString& aDest);
-MOZ_MUST_USE bool CopyASCIItoUTF16(const nsACString& aSource, nsAString& aDest,
-                                   const mozilla::fallible_t&);
+// UTF-8 to UTF-16
+// Invalid UTF-8 byte sequences are replaced with the REPLACEMENT CHARACTER.
+
+inline MOZ_MUST_USE bool
+CopyUTF8toUTF16(mozilla::Span<const char> aSource,
+                nsAString& aDest,
+                const mozilla::fallible_t&)
+{
+  return nsstring_fallible_append_utf8_impl(
+    &aDest, aSource.Elements(), aSource.Length(), 0);
+}
 
-void LossyCopyUTF16toASCII(const char16ptr_t aSource, nsACString& aDest);
-void CopyASCIItoUTF16(const char* aSource, nsAString& aDest);
+inline void
+CopyUTF8toUTF16(mozilla::Span<const char> aSource, nsAString& aDest)
+{
+  if (MOZ_UNLIKELY(!CopyUTF8toUTF16(aSource, aDest, mozilla::fallible))) {
+    aDest.AllocFailed(aSource.Length());
+  }
+}
+
+inline MOZ_MUST_USE bool
+AppendUTF8toUTF16(mozilla::Span<const char> aSource,
+                  nsAString& aDest,
+                  const mozilla::fallible_t&)
+{
+  return nsstring_fallible_append_utf8_impl(
+    &aDest, aSource.Elements(), aSource.Length(), aDest.Length());
+}
+
+inline void
+AppendUTF8toUTF16(mozilla::Span<const char> aSource, nsAString& aDest)
+{
+  if (MOZ_UNLIKELY(!AppendUTF8toUTF16(aSource, aDest, mozilla::fallible))) {
+    aDest.AllocFailed(aDest.Length() + aSource.Length());
+  }
+}
 
-void CopyUTF16toUTF8(const nsAString& aSource, nsACString& aDest);
-MOZ_MUST_USE bool CopyUTF16toUTF8(const nsAString& aSource, nsACString& aDest,
-                                  const mozilla::fallible_t&);
-void CopyUTF8toUTF16(const nsACString& aSource, nsAString& aDest);
+// Latin1 to UTF-16
+// Interpret each incoming unsigned byte value as a Unicode scalar value (not
+// windows-1252!). The function names say "ASCII" instead of "Latin1" for
+// legacy reasons.
+
+inline MOZ_MUST_USE bool
+CopyASCIItoUTF16(mozilla::Span<const char> aSource,
+                 nsAString& aDest,
+                 const mozilla::fallible_t&)
+{
+  return nsstring_fallible_append_latin1_impl(
+    &aDest, aSource.Elements(), aSource.Length(), 0);
+}
 
-void CopyUTF16toUTF8(const char16ptr_t aSource, nsACString& aDest);
-void CopyUTF8toUTF16(const char* aSource, nsAString& aDest);
+inline void
+CopyASCIItoUTF16(mozilla::Span<const char> aSource, nsAString& aDest)
+{
+  if (MOZ_UNLIKELY(!CopyASCIItoUTF16(aSource, aDest, mozilla::fallible))) {
+    aDest.AllocFailed(aSource.Length());
+  }
+}
+
+inline MOZ_MUST_USE bool
+AppendASCIItoUTF16(mozilla::Span<const char> aSource,
+                   nsAString& aDest,
+                   const mozilla::fallible_t&)
+{
+  return nsstring_fallible_append_latin1_impl(
+    &aDest, aSource.Elements(), aSource.Length(), aDest.Length());
+}
+
+inline void
+AppendASCIItoUTF16(mozilla::Span<const char> aSource, nsAString& aDest)
+{
+  if (MOZ_UNLIKELY(!AppendASCIItoUTF16(aSource, aDest, mozilla::fallible))) {
+    aDest.AllocFailed(aDest.Length() + aSource.Length());
+  }
+}
 
-void LossyAppendUTF16toASCII(const nsAString& aSource, nsACString& aDest);
-void AppendASCIItoUTF16(const nsACString& aSource, nsAString& aDest);
-MOZ_MUST_USE bool AppendASCIItoUTF16(const nsACString& aSource,
-                                     nsAString& aDest,
-                                     const mozilla::fallible_t&);
+// UTF-16 to UTF-8
+// Unpaired surrogates are replaced with the REPLACEMENT CHARACTER.
+
+inline MOZ_MUST_USE bool
+CopyUTF16toUTF8(mozilla::Span<const char16_t> aSource,
+                nsACString& aDest,
+                const mozilla::fallible_t&)
+{
+  return nscstring_fallible_append_utf16_to_utf8_impl(
+    &aDest, aSource.Elements(), aSource.Length(), 0);
+}
 
-void LossyAppendUTF16toASCII(const char16ptr_t aSource, nsACString& aDest);
-MOZ_MUST_USE bool AppendASCIItoUTF16(const char* aSource,
-                                     nsAString& aDest,
-                                     const mozilla::fallible_t&);
-void AppendASCIItoUTF16(const char* aSource, nsAString& aDest);
+inline void
+CopyUTF16toUTF8(mozilla::Span<const char16_t> aSource, nsACString& aDest)
+{
+  if (MOZ_UNLIKELY(!CopyUTF16toUTF8(aSource, aDest, mozilla::fallible))) {
+    aDest.AllocFailed(aSource.Length());
+  }
+}
+
+inline MOZ_MUST_USE bool
+AppendUTF16toUTF8(mozilla::Span<const char16_t> aSource,
+                  nsACString& aDest,
+                  const mozilla::fallible_t&)
+{
+  return nscstring_fallible_append_utf16_to_utf8_impl(
+    &aDest, aSource.Elements(), aSource.Length(), aDest.Length());
+}
+
+inline void
+AppendUTF16toUTF8(mozilla::Span<const char16_t> aSource, nsACString& aDest)
+{
+  if (MOZ_UNLIKELY(!AppendUTF16toUTF8(aSource, aDest, mozilla::fallible))) {
+    aDest.AllocFailed(aDest.Length() + aSource.Length());
+  }
+}
 
-void AppendUTF16toUTF8(const nsAString& aSource, nsACString& aDest);
-MOZ_MUST_USE bool AppendUTF16toUTF8(const nsAString& aSource,
-                                    nsACString& aDest,
-                                    const mozilla::fallible_t&);
-void AppendUTF8toUTF16(const nsACString& aSource, nsAString& aDest);
-MOZ_MUST_USE bool AppendUTF8toUTF16(const nsACString& aSource,
-                                    nsAString& aDest,
-                                    const mozilla::fallible_t&);
+// UTF-16 to Latin1
+// If all code points in the input are below U+0100, represents each scalar
+// value as an unsigned byte. (This is not windows-1252!) If there are code
+// points above U+00FF, asserts in debug builds and memory-safely produces
+// garbage in release builds. The nature of the garbage may differ based on
+// CPU architecture and must not be relied upon. The names say "ASCII" instead
+// of "Latin1" for legacy reasons.
+
+inline MOZ_MUST_USE bool
+LossyCopyUTF16toASCII(mozilla::Span<const char16_t> aSource,
+                      nsACString& aDest,
+                      const mozilla::fallible_t&)
+{
+  return nscstring_fallible_append_utf16_to_latin1_lossy_impl(
+    &aDest, aSource.Elements(), aSource.Length(), 0);
+}
 
-void AppendUTF16toUTF8(const char16ptr_t aSource, nsACString& aDest);
-void AppendUTF8toUTF16(const char* aSource, nsAString& aDest);
+inline void
+LossyCopyUTF16toASCII(mozilla::Span<const char16_t> aSource, nsACString& aDest)
+{
+  if (MOZ_UNLIKELY(!LossyCopyUTF16toASCII(aSource, aDest, mozilla::fallible))) {
+    aDest.AllocFailed(aSource.Length());
+  }
+}
+
+inline MOZ_MUST_USE bool
+LossyAppendUTF16toASCII(mozilla::Span<const char16_t> aSource,
+                        nsACString& aDest,
+                        const mozilla::fallible_t&)
+{
+  return nscstring_fallible_append_utf16_to_latin1_lossy_impl(
+    &aDest, aSource.Elements(), aSource.Length(), aDest.Length());
+}
+
+inline void
+LossyAppendUTF16toASCII(mozilla::Span<const char16_t> aSource,
+                        nsACString& aDest)
+{
+  if (MOZ_UNLIKELY(
+        !LossyAppendUTF16toASCII(aSource, aDest, mozilla::fallible))) {
+    aDest.AllocFailed(aDest.Length() + aSource.Length());
+  }
+}
 
 /**
  * Returns a new |char| buffer containing a zero-terminated copy of |aSource|.
  *
  * Allocates and returns a new |char| buffer which you must free with |free|.
- * Performs a lossy encoding conversion by chopping 16-bit wide characters down to 8-bits wide while copying |aSource| to your new buffer.
- * This conversion is not well defined; but it reproduces legacy string behavior.
- * The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls.
+ * Performs a conversion with LossyConvertUTF16toLatin1() writing into the
+ * newly-allocated buffer.
+ *
+ * The new buffer is zero-terminated, but that may not help you if |aSource|
+ * contains embedded nulls.
  *
  * @param aSource a 16-bit wide string
  * @return a new |char| buffer you must free with |free|.
  */
 char* ToNewCString(const nsAString& aSource);
 
-
 /**
  * Returns a new |char| buffer containing a zero-terminated copy of |aSource|.
  *
  * Allocates and returns a new |char| buffer which you must free with |free|.
- * The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls.
+ *
+ * The new buffer is zero-terminated, but that may not help you if |aSource|
+ * contains embedded nulls.
  *
  * @param aSource an 8-bit wide string
  * @return a new |char| buffer you must free with |free|.
  */
 char* ToNewCString(const nsACString& aSource);
 
 /**
  * Returns a new |char| buffer containing a zero-terminated copy of |aSource|.
  *
  * Allocates and returns a new |char| buffer which you must free with
  * |free|.
- * Performs an encoding conversion from a UTF-16 string to a UTF-8 string
- * copying |aSource| to your new buffer.
+ * Performs an encoding conversion from a UTF-16 string to a UTF-8 string with
+ * unpaired surrogates replaced with the REPLACEMENT CHARACTER copying
+ * |aSource| to your new buffer.
+ *
  * The new buffer is zero-terminated, but that may not help you if |aSource|
  * contains embedded nulls.
  *
  * @param aSource a UTF-16 string (made of char16_t's)
  * @param aUTF8Count the number of 8-bit units that was returned
  * @return a new |char| buffer you must free with |free|.
  */
 
@@ -123,79 +407,56 @@ char* ToNewUTF8String(const nsAString& a
 
 
 /**
  * Returns a new |char16_t| buffer containing a zero-terminated copy of
  * |aSource|.
  *
  * Allocates and returns a new |char16_t| buffer which you must free with
  * |free|.
+ *
  * The new buffer is zero-terminated, but that may not help you if |aSource|
  * contains embedded nulls.
  *
  * @param aSource a UTF-16 string
  * @return a new |char16_t| buffer you must free with |free|.
  */
 char16_t* ToNewUnicode(const nsAString& aSource);
 
 
 /**
- * Returns a new |char16_t| buffer containing a zero-terminated copy of |aSource|.
+ * Returns a new |char16_t| buffer containing a zero-terminated copy of
+ * |aSource|.
+ *
+ * Allocates and returns a new |char16_t| buffer which you must free with
+ * |free|.
  *
- * Allocates and returns a new |char16_t| buffer which you must free with |free|.
- * Performs an encoding conversion by 0-padding 8-bit wide characters up to 16-bits wide while copying |aSource| to your new buffer.
- * This conversion is not well defined; but it reproduces legacy string behavior.
- * The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls.
+ * Performs an encoding conversion by 0-padding 8-bit wide characters up to
+ * 16-bits wide (i.e. Latin1 to UTF-16 conversion) while copying |aSource|
+ * to your new buffer.
  *
- * @param aSource an 8-bit wide string (a C-string, NOT UTF-8)
+ * The new buffer is zero-terminated, but that may not help you if |aSource|
+ * contains embedded nulls.
+ *
+ * @param aSource a Latin1 string
  * @return a new |char16_t| buffer you must free with |free|.
  */
 char16_t* ToNewUnicode(const nsACString& aSource);
 
 /**
- * Returns the required length for a char16_t buffer holding
- * a copy of aSource, using UTF-8 to UTF-16 conversion.
- * The length does NOT include any space for zero-termination.
- *
- * @param aSource an 8-bit wide string, UTF-8 encoded
- * @return length of UTF-16 encoded string copy, not zero-terminated
- */
-uint32_t CalcUTF8ToUnicodeLength(const nsACString& aSource);
-
-/**
- * Copies the source string into the specified buffer, converting UTF-8 to
- * UTF-16 in the process. The conversion is well defined for valid UTF-8
- * strings.
- * The copied string will be zero-terminated! Any embedded nulls will be
- * copied nonetheless. It is the caller's responsiblity to ensure the buffer
- * is large enough to hold the string copy plus one char16_t for
- * zero-termination!
- *
- * @see CalcUTF8ToUnicodeLength( const nsACString& )
- * @see UTF8ToNewUnicode( const nsACString&, uint32_t* )
- *
- * @param aSource an 8-bit wide string, UTF-8 encoded
- * @param aBuffer the buffer holding the converted string copy
- * @param aUTF16Count receiving optionally the number of 16-bit units that
- *                    were copied
- * @return aBuffer pointer, for convenience
- */
-char16_t* UTF8ToUnicodeBuffer(const nsACString& aSource,
-                              char16_t* aBuffer,
-                              uint32_t* aUTF16Count = nullptr);
-
-/**
  * Returns a new |char16_t| buffer containing a zero-terminated copy
  * of |aSource|.
  *
  * Allocates and returns a new |char| buffer which you must free with
  * |free|.  Performs an encoding conversion from UTF-8 to UTF-16
- * while copying |aSource| to your new buffer.  This conversion is well defined
- * for a valid UTF-8 string.  The new buffer is zero-terminated, but that
- * may not help you if |aSource| contains embedded nulls.
+ * while copying |aSource| to your new buffer.  Malformed byte sequences
+ * are replaced with the REPLACEMENT CHARACTER.
+ *
+ * The new buffer is zero-terminated, but that may not help you if |aSource|
+ * contains embedded nulls.
  *
  * @param aSource an 8-bit wide string, UTF-8 encoded
  * @param aUTF16Count the number of 16-bit units that was returned
  * @return a new |char16_t| buffer you must free with |free|.
  *         (UTF-16 encoded)
  */
 char16_t* UTF8ToNewUnicode(const nsACString& aSource,
                            uint32_t* aUTF16Count = nullptr);
@@ -212,99 +473,163 @@ char16_t* UTF8ToNewUnicode(const nsACStr
  * @param aLength the number of 16-bit code units to copy
  * @return pointer to destination buffer - identical to |aDest|
  */
 char16_t* CopyUnicodeTo(const nsAString& aSource,
                         uint32_t aSrcOffset,
                         char16_t* aDest,
                         uint32_t aLength);
 
-
 /**
- * Copies 16-bit characters between iterators |aSrcStart| and
- * |aSrcEnd| to the writable string |aDest|. Similar to the
- * |nsString::Mid| method.
- *
- * After this operation |aDest| is not null terminated.
- *
- * @param aSrcStart start source iterator
- * @param aSrcEnd end source iterator
- * @param aDest destination for the copy
- */
-void CopyUnicodeTo(const nsAString::const_iterator& aSrcStart,
-                   const nsAString::const_iterator& aSrcEnd,
-                   nsAString& aDest);
-
-/**
- * Appends 16-bit characters between iterators |aSrcStart| and
- * |aSrcEnd| to the writable string |aDest|.
- *
- * After this operation |aDest| is not null terminated.
- *
- * @param aSrcStart start source iterator
- * @param aSrcEnd end source iterator
- * @param aDest destination for the copy
- */
-void AppendUnicodeTo(const nsAString::const_iterator& aSrcStart,
-                     const nsAString::const_iterator& aSrcEnd,
-                     nsAString& aDest);
-
-/**
- * Returns |true| if |aString| contains only ASCII characters, that is, characters in the range (0x00, 0x7F).
+ * Returns |true| if |aString| contains only ASCII characters, that is,
+ * characters in the range (0x00, 0x7F).
  *
  * @param aString a 16-bit wide string to scan
  */
-bool IsASCII(const nsAString& aString);
+inline bool
+IsASCII(mozilla::Span<const char16_t> aString)
+{
+  size_t length = aString.Length();
+  const char16_t* ptr = aString.Elements();
+  // For short strings, calling into Rust is a pessimization, and the SIMD
+  // code won't have a chance to kick in anyway.
+  if (length < 16) {
+    char16_t accu = 0;
+    for (size_t i = 0; i < length; i++) {
+      accu |= ptr[i];
+    }
+    return accu < 0x80U;
+  }
+  return encoding_mem_is_basic_latin(ptr, length);
+}
 
 /**
- * Returns |true| if |aString| contains only ASCII characters, that is, characters in the range (0x00, 0x7F).
+ * Returns |true| if |aString| contains only ASCII characters, that is,
+ * characters in the range (0x00, 0x7F).
  *
  * @param aString a 8-bit wide string to scan
  */
-inline bool IsASCII(const nsACString& aString)
+inline bool
+IsASCII(mozilla::Span<const char> aString)
 {
   size_t length = aString.Length();
-  const uint8_t* ptr = reinterpret_cast<const uint8_t*>(aString.BeginReading());
+  const uint8_t* ptr = reinterpret_cast<const uint8_t*>(aString.Elements());
   // For short strings, calling into Rust is a pessimization, and the SIMD
-  // code won't have a chance to kick in anyway. Additionally, handling the
-  // case of the empty string here makes null-checking ptr unnecessary.
-  // (Passing nullptr to Rust would technically be UB.)
+  // code won't have a chance to kick in anyway.
   if (length < 16) {
-    size_t accu = 0;
+    uint8_t accu = 0;
+    for (size_t i = 0; i < length; i++) {
+      accu |= ptr[i];
+    }
+    return accu < 0x80U;
+  }
+  return encoding_mem_is_ascii(ptr, length);
+}
+
+/**
+ * Returns |true| if |aString| contains only Latin1 characters, that is,
+ * characters in the range (U+0000, U+00FF).
+ *
+ * @param aString a potentially-invalid UTF-16 string to scan
+ */
+inline bool
+IsUTF16Latin1(mozilla::Span<const char16_t> aString)
+{
+  size_t length = aString.Length();
+  const char16_t* ptr = aString.Elements();
+  // For short strings, calling into Rust is a pessimization, and the SIMD
+  // code won't have a chance to kick in anyway.
+  if (length < 16) {
+    char16_t accu = 0;
     for (size_t i = 0; i < length; i++) {
       accu |= ptr[i];
     }
-    return accu < 0x80;
+    return accu < 0x100U;
+  }
+  return encoding_mem_is_utf16_latin1(ptr, length);
+}
+
+/**
+ * Returns |true| if |aString| contains only Latin1 characters, that is,
+ * characters in the range (U+0000, U+00FF).
+ *
+ * If you know that the argument is always absolutely guaranteed to be valid
+ * UTF-8, use the faster UnsafeIsValidUTF8Latin1() instead.
+ *
+ * @param aString potentially-invalid UTF-8 string to scan
+ */
+inline bool
+IsUTF8Latin1(mozilla::Span<const char> aString)
+{
+  size_t length = aString.Length();
+  const uint8_t* ptr = reinterpret_cast<const uint8_t*>(aString.Elements());
+  // For short strings, calling into Rust is a pessimization, and the SIMD
+  // code won't have a chance to kick in anyway.
+  if (length < 16) {
+    for (size_t i = 0; i < length; i++) {
+      if (ptr[i] >= 0x80U) {
+        ptr += i;
+        length -= i;
+        goto end;
+      }
+    }
+    return true;
   }
-  // This is not quite optimal, because it's not fail-fast when the by-register
-  // check already finds non-ASCII. Also, input to this function is almost
-  // always ASCII, so even the by-register check wouldn't need to be fail-fast
-  // and could be more like the loop above.
-  return length == encoding_ascii_valid_up_to(ptr, length);
+end:
+  return encoding_mem_is_utf8_latin1(ptr, length);
+}
+
+/**
+ * Returns |true| if |aString| contains only Latin1 characters, that is,
+ * characters in the range (U+0000, U+00FF).
+ *
+ * The argument MUST be valid UTF-8. If you are at all unsure, use IsUTF8Latin1
+ * instead!
+ *
+ * @param aString known-valid UTF-8 string to scan
+ */
+inline bool
+UnsafeIsValidUTF8Latin1(mozilla::Span<const char> aString)
+{
+  size_t length = aString.Length();
+  const uint8_t* ptr = reinterpret_cast<const uint8_t*>(aString.Elements());
+  // For short strings, calling into Rust is a pessimization, and the SIMD
+  // code won't have a chance to kick in anyway.
+  if (length < 16) {
+    for (size_t i = 0; i < length; i++) {
+      if (ptr[i] >= 0x80U) {
+        ptr += i;
+        length -= i;
+        goto end;
+      }
+    }
+    return true;
+  }
+end:
+  return encoding_mem_is_str_latin1(ptr, length);
 }
 
 /**
  * Returns |true| if |aString| is a valid UTF-8 string.
  *
  * Note that this doesn't check whether the string might look like a valid
  * string in another encoding, too, e.g. ISO-2022-JP.
  *
  * @param aString an 8-bit wide string to scan
  */
-inline bool IsUTF8(const nsACString& aString)
+inline bool
+IsUTF8(mozilla::Span<const char> aString)
 {
   size_t length = aString.Length();
-  const uint8_t* ptr = reinterpret_cast<const uint8_t*>(aString.BeginReading());
+  const uint8_t* ptr = reinterpret_cast<const uint8_t*>(aString.Elements());
   // For short strings, calling into Rust is a pessimization, and the SIMD
-  // code won't have a chance to kick in anyway. Additionally, handling the
-  // case of the empty string here makes null-checking ptr unnecessary.
-  // (Passing nullptr to Rust would technically be UB.)
+  // code won't have a chance to kick in anyway.
   if (length < 16) {
     for (size_t i = 0; i < length; i++) {
-      if (ptr[i] >= 0x80) {
+      if (ptr[i] >= 0x80U) {
         ptr += i;
         length -= i;
         goto end;
       }
     }
     return true;
   }
   end:
@@ -328,22 +653,26 @@ void ToLowerCase(nsACString&);
 /**
  * Converts case from string aSource to aDest.
  */
 void ToUpperCase(const nsACString& aSource, nsACString& aDest);
 
 void ToLowerCase(const nsACString& aSource, nsACString& aDest);
 
 /**
- * Finds the leftmost occurrence of |aPattern|, if any in the range |aSearchStart|..|aSearchEnd|.
+ * Finds the leftmost occurrence of |aPattern|, if any in the range
+ * |aSearchStart|..|aSearchEnd|.
  *
- * Returns |true| if a match was found, and adjusts |aSearchStart| and |aSearchEnd| to
- * point to the match.  If no match was found, returns |false| and makes |aSearchStart == aSearchEnd|.
+ * Returns |true| if a match was found, and adjusts |aSearchStart| and
+ * |aSearchEnd| to point to the match.  If no match was found, returns |false|
+ * and makes |aSearchStart == aSearchEnd|.
  *
- * Currently, this is equivalent to the O(m*n) implementation previously on |ns[C]String|.
+ * Currently, this is equivalent to the O(m*n) implementation previously on
+ * |ns[C]String|.
+ *
  * If we need something faster, then we can implement that later.
  */
 
 bool FindInReadable(const nsAString& aPattern, nsAString::const_iterator&,
                     nsAString::const_iterator&,
                     const nsStringComparator& = nsDefaultStringComparator());
 bool FindInReadable(const nsACString& aPattern, nsACString::const_iterator&,
                     nsACString::const_iterator&,
@@ -373,19 +702,19 @@ FindInReadable(const nsACString& aPatter
 
 
 bool CaseInsensitiveFindInReadable(const nsACString& aPattern,
                                    nsACString::const_iterator&,
                                    nsACString::const_iterator&);
 
 /**
  * Finds the rightmost occurrence of |aPattern|
- * Returns |true| if a match was found, and adjusts |aSearchStart| and |aSearchEnd| to
- * point to the match.  If no match was found, returns |false| and makes |aSearchStart == aSearchEnd|.
- *
+ * Returns |true| if a match was found, and adjusts |aSearchStart| and
+ * |aSearchEnd| to point to the match.  If no match was found, returns |false|
+ * and makes |aSearchStart == aSearchEnd|.
  */
 bool RFindInReadable(const nsAString& aPattern, nsAString::const_iterator&,
                      nsAString::const_iterator&,
                      const nsStringComparator& = nsDefaultStringComparator());
 bool RFindInReadable(const nsACString& aPattern, nsACString::const_iterator&,
                      nsACString::const_iterator&,
                      const nsCStringComparator& = nsDefaultCStringComparator());
 
@@ -425,25 +754,28 @@ bool StringEndsWith(const nsACString& aS
 
 const nsString& EmptyString();
 const nsCString& EmptyCString();
 
 const nsString& VoidString();
 const nsCString& VoidCString();
 
 /**
-* Compare a UTF-8 string to an UTF-16 string.
-*
-* Returns 0 if the strings are equal, -1 if aUTF8String is less
-* than aUTF16Count, and 1 in the reverse case.  In case of fatal
-* error (eg the strings are not valid UTF8 and UTF16 respectively),
-* this method will return INT32_MIN.
-*/
-int32_t CompareUTF8toUTF16(const nsACString& aUTF8String,
-                           const nsAString& aUTF16String);
+ * Compare a UTF-8 string to an UTF-16 string.
+ *
+ * Returns 0 if the strings are equal, -1 if aUTF8String is less
+ * than aUTF16Count, and 1 in the reverse case. Errors are replaced
+ * with U+FFFD and then the U+FFFD is compared as if it had occurred
+ * in the input. If aErr is not nullptr, *aErr is set to true if
+ * either string had malformed sequences.
+ */
+int32_t
+CompareUTF8toUTF16(const nsACString& aUTF8String,
+                   const nsAString& aUTF16String,
+                   bool* aErr = nullptr);
 
 void AppendUCS4ToUTF16(const uint32_t aSource, nsAString& aDest);
 
 template<class T>
 inline bool
 EnsureStringLength(T& aStr, uint32_t aLen)
 {
   aStr.SetLength(aLen);
--- a/xpcom/string/nsSubstring.cpp
+++ b/xpcom/string/nsSubstring.cpp
@@ -454,16 +454,24 @@ char* Gecko_BeginWritingCString(nsACStri
   return aThis->BeginWriting();
 }
 
 char* Gecko_FallibleBeginWritingCString(nsACString* aThis)
 {
   return aThis->BeginWriting(mozilla::fallible);
 }
 
+uint32_t
+Gecko_StartBulkWriteCString(nsACString* aThis,
+                            uint32_t aCapacity,
+                            uint32_t aUnitsToPreserve)
+{
+  return aThis->StartBulkWrite(aCapacity, aUnitsToPreserve);
+}
+
 void Gecko_FinalizeString(nsAString* aThis)
 {
   aThis->~nsAString();
 }
 
 void Gecko_AssignString(nsAString* aThis, const nsAString* aOther)
 {
   aThis->Assign(*aOther);
@@ -509,9 +517,17 @@ char16_t* Gecko_BeginWritingString(nsASt
   return aThis->BeginWriting();
 }
 
 char16_t* Gecko_FallibleBeginWritingString(nsAString* aThis)
 {
   return aThis->BeginWriting(mozilla::fallible);
 }
 
+uint32_t
+Gecko_StartBulkWriteString(nsAString* aThis,
+                           uint32_t aCapacity,
+                           uint32_t aUnitsToPreserve)
+{
+  return aThis->StartBulkWrite(aCapacity, aUnitsToPreserve);
+}
+
 } // extern "C"
--- a/xpcom/string/nsTStringObsolete.cpp
+++ b/xpcom/string/nsTStringObsolete.cpp
@@ -320,24 +320,21 @@ nsTString<T>::ReplaceSubstring(const sel
                "We should have the correct non-matching segment.");
     return true;
   }
 
   // Make sure that we can mutate our buffer.
   // Note that we always allocate at least an this->mLength sized buffer, because the
   // rest of the algorithm relies on having access to all of the original
   // string.  In other words, we over-allocate in the shrinking case.
-  char_type* oldData;
-  DataFlags oldFlags;
-  if (!this->MutatePrep(XPCOM_MAX(this->mLength, newLength.value()), &oldData, &oldFlags))
+  uint32_t oldLen = this->mLength;
+  uint32_t capacity =
+    this->StartBulkWrite(XPCOM_MAX(oldLen, newLength.value()), oldLen);
+  if (capacity == UINT32_MAX) {
     return false;
-  if (oldData) {
-    // Copy all of the old data to the new buffer.
-    char_traits::copy(this->mData, oldData, this->mLength);
-    ::ReleaseData(oldData, oldFlags);
   }
 
   if (aTarget.Length() >= aNewValue.Length()) {
     // In the shrinking case, start filling the buffer from the beginning.
     const uint32_t delta = (aTarget.Length() - aNewValue.Length());
     for (i = 1; i < nonMatching.Length(); ++i) {
       // When we move the i'th non-matching segment into position, we need to
       // account for the characters deleted by the previous |i| replacements by
@@ -365,18 +362,17 @@ nsTString<T>::ReplaceSubstring(const sel
       // Write the i'th replacement immediately before the new i'th non-matching
       // segment.
       char_traits::copy(destinationSegmentPtr - aNewValue.Length(),
                         aNewValue.Data(), aNewValue.Length());
     }
   }
 
   // Adjust the length and make sure the string is null terminated.
-  this->mLength = newLength.value();
-  this->mData[this->mLength] = char_type(0);
+  this->FinishBulkWrite(newLength.value());
 
   return true;
 }
 
 /**
  * nsTString::Trim
  */
 
--- a/xpcom/string/nsTSubstring.cpp
+++ b/xpcom/string/nsTSubstring.cpp
@@ -42,55 +42,82 @@ nsTSubstring<T>::nsTSubstring(char_type*
  */
 template <typename T>
 inline const nsTAutoString<T>*
 AsAutoString(const nsTSubstring<T>* aStr)
 {
   return static_cast<const nsTAutoString<T>*>(aStr);
 }
 
-/**
- * this function is called to prepare mData for writing.  the given capacity
- * indicates the required minimum storage size for mData, in sizeof(char_type)
- * increments.  this function returns true if the operation succeeds.  it also
- * returns the old data and old flags members if mData is newly allocated.
- * the old data must be released by the caller.
- */
-template <typename T>
-bool
-nsTSubstring<T>::MutatePrep(size_type aCapacity, char_type** aOldData,
-                            DataFlags* aOldDataFlags)
+template<typename T>
+uint32_t
+nsTSubstring<T>::StartBulkWrite(size_type aCapacity,
+                                size_type aPrefixToPreserve,
+                                bool aAllowShrinking,
+                                size_type aSuffixLength,
+                                size_type aOldSuffixStart,
+                                size_type aNewSuffixStart)
 {
-  // initialize to no old data
-  *aOldData = nullptr;
-  *aOldDataFlags = DataFlags(0);
+  // Note! Capacity does not include room for the terminating null char.
+
+  MOZ_ASSERT(aPrefixToPreserve <= aCapacity,
+             "Requested preservation of an overlong prefix.");
+  MOZ_ASSERT(aNewSuffixStart + aSuffixLength <= aCapacity,
+             "Requesed move of suffix to out-of-bounds location.");
+  // Can't assert aOldSuffixStart, because mLength may not be valid anymore,
+  // since this method allows itself to be called more than once.
 
+  // If zero capacity is requested, set the string to the special empty
+  // string.
+  if (MOZ_UNLIKELY(!aCapacity)) {
+    ::ReleaseData(this->mData, this->mDataFlags);
+    SetToEmptyBuffer();
+    this->mDataFlags &= ~DataFlags::VOIDED; // mutation clears voided flag
+    return 0;
+  }
+
+  // Note! Capacity() returns 0 when the string is immutable.
   size_type curCapacity = Capacity();
 
-  // If |aCapacity > kMaxCapacity|, then our doubling algorithm may not be
-  // able to allocate it.  Just bail out in cases like that.  We don't want
-  // to be allocating 2GB+ strings anyway.
-  static_assert((sizeof(nsStringBuffer) & 0x1) == 0,
-                "bad size for nsStringBuffer");
-  if (!CheckCapacity(aCapacity)) {
-      return false;
-  }
-
+  // We've established that aCapacity > 0.
   // |curCapacity == 0| means that the buffer is immutable or 0-sized, so we
   // need to allocate a new buffer. We cannot use the existing buffer even
   // though it might be large enough.
 
-  if (curCapacity != 0) {
-    if (aCapacity <= curCapacity) {
-      this->mDataFlags &= ~DataFlags::VOIDED;  // mutation clears voided flag
-      return true;
-    }
+  if (!aAllowShrinking && aCapacity <= curCapacity) {
+    char_traits::move(this->mData + aNewSuffixStart,
+                      this->mData + aOldSuffixStart,
+                      aSuffixLength);
+    return curCapacity;
   }
 
-  if (curCapacity < aCapacity) {
+  char_type* oldData = this->mData;
+  DataFlags oldFlags = this->mDataFlags;
+
+  char_type* newData;
+  DataFlags newDataFlags;
+  size_type newCapacity;
+
+  // If this is an nsTAutoStringN, it's possible that we can use the inline
+  // buffer.
+  if ((this->mClassFlags & ClassFlags::INLINE) &&
+      (aCapacity <= AsAutoString(this)->mInlineCapacity)) {
+    newCapacity = AsAutoString(this)->mInlineCapacity;
+    newData = (char_type*)AsAutoString(this)->mStorage;
+    newDataFlags = DataFlags::TERMINATED | DataFlags::INLINE;
+  } else {
+    // If |aCapacity > kMaxCapacity|, then our doubling algorithm may not be
+    // able to allocate it.  Just bail out in cases like that.  We don't want
+    // to be allocating 2GB+ strings anyway.
+    static_assert((sizeof(nsStringBuffer) & 0x1) == 0,
+                  "bad size for nsStringBuffer");
+    if (MOZ_UNLIKELY(!CheckCapacity(aCapacity))) {
+      return UINT32_MAX;
+    }
+
     // We increase our capacity so that the allocated buffer grows
     // exponentially, which gives us amortized O(1) appending. Below the
     // threshold, we use powers-of-two. Above the threshold, we grow by at
     // least 1.125, rounding up to the nearest MiB.
     const size_type slowGrowthThreshold = 8 * 1024 * 1024;
 
     // nsStringBuffer allocates sizeof(nsStringBuffer) + passed size, and
     // storageSize below wants extra 1 * sizeof(char_type).
@@ -108,87 +135,70 @@ nsTSubstring<T>::MutatePrep(size_type aC
       const size_t MiB = 1 << 20;
       temp = (MiB * ((temp + MiB - 1) / MiB)) - neededExtraSpace;
     } else {
       // Round up to the next power of two.
       temp =
         mozilla::RoundUpPow2(aCapacity + neededExtraSpace) - neededExtraSpace;
     }
 
-    MOZ_ASSERT(XPCOM_MIN(temp, kMaxCapacity) >= aCapacity,
+    newCapacity = XPCOM_MIN(temp, kMaxCapacity);
+    MOZ_ASSERT(newCapacity >= aCapacity,
                "should have hit the early return at the top");
-    aCapacity = XPCOM_MIN(temp, kMaxCapacity);
-  }
-
-  //
-  // several cases:
-  //
-  //  (1) we have a refcounted shareable buffer (this->mDataFlags &
-  //      DataFlags::REFCOUNTED)
-  //  (2) we have an owned buffer (this->mDataFlags & DataFlags::OWNED)
-  //  (3) we have an inline buffer (this->mDataFlags & DataFlags::INLINE)
-  //  (4) we have a readonly buffer
-  //
-  // requiring that we in some cases preserve the data before creating
-  // a new buffer complicates things just a bit ;-)
-  //
-
-  size_type storageSize = (aCapacity + 1) * sizeof(char_type);
-
-  // case #1
-  if (this->mDataFlags & DataFlags::REFCOUNTED) {
-    nsStringBuffer* hdr = nsStringBuffer::FromData(this->mData);
-    if (!hdr->IsReadonly()) {
-      nsStringBuffer* newHdr = nsStringBuffer::Realloc(hdr, storageSize);
+    // Avoid shinking if new buffer within 300 of the old. Note that
+    // signed underflow is defined behavior.
+    if ((curCapacity - newCapacity) <= 300 &&
+        (this->mDataFlags & DataFlags::REFCOUNTED)) {
+      MOZ_ASSERT(aAllowShrinking, "How come we didn't return earlier?");
+      // We're already close enough to the right size.
+      newData = oldData;
+    } else {
+      size_type storageSize = (newCapacity + 1) * sizeof(char_type);
+      // Since we allocate only if we need a different jemalloc bucket
+      // size, it's not useful to use realloc, which may spend time
+      // uselessly copying too much.
+      nsStringBuffer* newHdr = nsStringBuffer::Alloc(storageSize).take();
       if (!newHdr) {
-        return false;  // out-of-memory (original header left intact)
+        return UINT32_MAX; // we are still in a consistent state
       }
 
-      hdr = newHdr;
-      this->mData = (char_type*)hdr->Data();
-      this->mDataFlags &= ~DataFlags::VOIDED;  // mutation clears voided flag
-      return true;
+      newData = (char_type*)newHdr->Data();
     }
-  }
-
-  char_type* newData;
-  DataFlags newDataFlags;
-
-  // If this is an nsTAutoStringN whose inline buffer is sufficiently large,
-  // then use it. This helps avoid heap allocations.
-  if ((this->mClassFlags & ClassFlags::INLINE) &&
-      (aCapacity < AsAutoString(this)->mInlineCapacity)) {
-    newData = (char_type*)AsAutoString(this)->mStorage;
-    newDataFlags = DataFlags::TERMINATED | DataFlags::INLINE;
-  } else {
-    // if we reach here then, we must allocate a new buffer.  we cannot
-    // make use of our DataFlags::OWNED or DataFlags::INLINE buffers because
-    // they are not large enough.
-
-    nsStringBuffer* newHdr =
-      nsStringBuffer::Alloc(storageSize).take();
-    if (!newHdr) {
-      return false;  // we are still in a consistent state
-    }
-
-    newData = (char_type*)newHdr->Data();
     newDataFlags = DataFlags::TERMINATED | DataFlags::REFCOUNTED;
   }
 
-  // save old data and flags
-  *aOldData = this->mData;
-  *aOldDataFlags = this->mDataFlags;
+  this->mData = newData;
+  this->mDataFlags = newDataFlags;
+
+  if (oldData == newData) {
+    char_traits::move(
+      newData + aNewSuffixStart, oldData + aOldSuffixStart, aSuffixLength);
+  } else {
+    char_traits::copy(newData, oldData, aPrefixToPreserve);
+    char_traits::copy(
+      newData + aNewSuffixStart, oldData + aOldSuffixStart, aSuffixLength);
+    ::ReleaseData(oldData, oldFlags);
+  }
 
-  // this->mLength does not change
-  SetData(newData, this->mLength, newDataFlags);
+  return newCapacity;
+}
 
-  // though we are not necessarily terminated at the moment, now is probably
-  // still the best time to set DataFlags::TERMINATED.
-
-  return true;
+template<typename T>
+void
+nsTSubstring<T>::FinishBulkWrite(size_type aLength)
+{
+  MOZ_ASSERT(aLength != UINT32_MAX, "OOM magic value passed as length.");
+  if (aLength) {
+    this->mData[aLength] = char_type(0);
+    this->mLength = aLength;
+  } else {
+    ::ReleaseData(this->mData, this->mDataFlags);
+    SetToEmptyBuffer();
+  }
+  AssertValid();
 }
 
 template <typename T>
 void
 nsTSubstring<T>::Finalize()
 {
   ::ReleaseData(this->mData, this->mDataFlags);
   // this->mData, this->mLength, and this->mDataFlags are purposefully left dangling
@@ -220,58 +230,26 @@ nsTSubstring<T>::ReplacePrep(index_type 
                              newTotalLen.value());
 }
 
 template <typename T>
 bool
 nsTSubstring<T>::ReplacePrepInternal(index_type aCutStart, size_type aCutLen,
                                      size_type aFragLen, size_type aNewLen)
 {
-  char_type* oldData;
-  DataFlags oldFlags;
-  if (!MutatePrep(aNewLen, &oldData, &oldFlags)) {
-    return false;  // out-of-memory
-  }
-
-  if (oldData) {
-    // determine whether or not we need to copy part of the old string
-    // over to the new string.
-
-    if (aCutStart > 0) {
-      // copy prefix from old string
-      char_traits::copy(this->mData, oldData, aCutStart);
-    }
+  size_type newSuffixStart = aCutStart + aFragLen;
+  size_type oldSuffixStart = aCutStart + aCutLen;
+  size_type suffixLength = this->mLength - oldSuffixStart;
 
-    if (aCutStart + aCutLen < this->mLength) {
-      // copy suffix from old string to new offset
-      size_type from = aCutStart + aCutLen;
-      size_type fromLen = this->mLength - from;
-      uint32_t to = aCutStart + aFragLen;
-      char_traits::copy(this->mData + to, oldData + from, fromLen);
-    }
-
-    ::ReleaseData(oldData, oldFlags);
-  } else {
-    // original data remains intact
-
-    // determine whether or not we need to move part of the existing string
-    // to make room for the requested hole.
-    if (aFragLen != aCutLen && aCutStart + aCutLen < this->mLength) {
-      uint32_t from = aCutStart + aCutLen;
-      uint32_t fromLen = this->mLength - from;
-      uint32_t to = aCutStart + aFragLen;
-      char_traits::move(this->mData + to, this->mData + from, fromLen);
-    }
+  size_type capacity = StartBulkWrite(
+    aNewLen, aCutStart, true, suffixLength, oldSuffixStart, newSuffixStart);
+  if (capacity == UINT32_MAX) {
+    return false;
   }
-
-  // add null terminator (mutable this->mData always has room for the null-
-  // terminator).
-  this->mData[aNewLen] = char_type(0);
-  this->mLength = aNewLen;
-
+  FinishBulkWrite(aNewLen);
   return true;
 }
 
 template <typename T>
 typename nsTSubstring<T>::size_type
 nsTSubstring<T>::Capacity() const
 {
   // return 0 to indicate an immutable or 0-sized buffer
@@ -556,30 +534,24 @@ nsTSubstring<T>::Assign(const substring_
 {
   if (aTuple.IsDependentOn(this->mData, this->mData + this->mLength)) {
     // take advantage of sharing here...
     return Assign(string_type(aTuple), aFallible);
   }
 
   size_type length = aTuple.Length();
 
-  // don't use ReplacePrep here because it changes the length
-  char_type* oldData;
-  DataFlags oldFlags;
-  if (!MutatePrep(length, &oldData, &oldFlags)) {
+  size_type capacity = StartBulkWrite(length);
+  if (capacity == UINT32_MAX) {
     return false;
   }
 
-  if (oldData) {
-    ::ReleaseData(oldData, oldFlags);
-  }
+  aTuple.WriteTo(this->mData, length);
 
-  aTuple.WriteTo(this->mData, length);
-  this->mData[length] = 0;
-  this->mLength = length;
+  FinishBulkWrite(length);
   return true;
 }
 
 template <typename T>
 void
 nsTSubstring<T>::Adopt(char_type* aData, size_type aLength)
 {
   if (aData) {
@@ -765,50 +737,57 @@ nsTSubstring<T>::SetCapacity(size_type a
 }
 
 template <typename T>
 bool
 nsTSubstring<T>::SetCapacity(size_type aCapacity, const fallible_t&)
 {
   // capacity does not include room for the terminating null char
 
-  // if our capacity is reduced to zero, then free our buffer.
-  if (aCapacity == 0) {
-    ::ReleaseData(this->mData, this->mDataFlags);
-    SetToEmptyBuffer();
-    return true;
+  // Sadly, existing callers assume that it's valid to
+  // first call SetCapacity(), then write past mLength
+  // and then call SetLength() with the assumption that
+  // SetLength still preserves the written data past
+  // mLength!!!
+
+  size_type preserve;
+  if (this->mDataFlags & DataFlags::REFCOUNTED) {
+    nsStringBuffer* hdr = nsStringBuffer::FromData(this->mData);
+    preserve = (hdr->StorageSize() / sizeof(char_type)) - 1;
+  } else if (this->mDataFlags & DataFlags::INLINE) {
+    preserve = AsAutoString(this)->mInlineCapacity;
+  } else {
+    preserve = this->mLength;
   }
 
-  char_type* oldData;
-  DataFlags oldFlags;
-  if (!MutatePrep(aCapacity, &oldData, &oldFlags)) {
-    return false;  // out-of-memory
+  if (preserve > aCapacity) {
+    preserve = aCapacity;
   }
 
-  // compute new string length
-  size_type newLen = XPCOM_MIN(this->mLength, aCapacity);
-
-  if (oldData) {
-    // preserve old data
-    if (this->mLength > 0) {
-      char_traits::copy(this->mData, oldData, newLen);
-    }
-
-    ::ReleaseData(oldData, oldFlags);
+  size_type capacity = StartBulkWrite(aCapacity, preserve);
+  if (capacity == UINT32_MAX) {
+    return false;
   }
+  if (capacity) {
+    // In the zero case StartBulkWrite already put the string
+    // in a valid state.
 
-  // adjust this->mLength if our buffer shrunk down in size
-  if (newLen < this->mLength) {
-    this->mLength = newLen;
+    // Otherwise, instead of calling FinishBulkWrite,
+    // intentionally leave the string in the weird state
+    // required by the legacy semantics of this method.
+    if (aCapacity < this->mLength) {
+      // aCapacity not capacity for legacy reasons;
+      // maybe capacity would work, too.
+      this->mLength = aCapacity;
+    }
+    // Note that we can't write a terminator at
+    // mData[mLength], because doing so would overwrite
+    // data when this method is called from SetLength.
+    this->mData[aCapacity] = char_type(0);
   }
-
-  // always null-terminate here, even if the buffer got longer.  this is
-  // for backwards compat with the old string implementation.
-  this->mData[aCapacity] = char_type(0);
-
   return true;
 }
 
 template <typename T>
 void
 nsTSubstring<T>::SetLength(size_type aLength)
 {
   SetCapacity(aLength);
--- a/xpcom/string/nsTSubstring.h
+++ b/xpcom/string/nsTSubstring.h
@@ -895,38 +895,74 @@ protected:
 
   /**
    * this function releases mData and does not change the value of
    * any of its member variables.  in other words, this function acts
    * like a destructor.
    */
   void NS_FASTCALL Finalize();
 
+public:
   /**
-   * this function prepares mData to be mutated.
+   * Prepares mData to be mutated such that the capacity of the string
+   * (not counting the zero-terminator) is at least aCapacity.
+   * Returns the actual capacity, which may be larger than what was
+   * requested or UINT32_MAX on allocation failure.
+   *
+   * mLength is ignored by this method. If the buffer is reallocated,
+   * aUnitsToPreserve specifies how many code units to copy over to
+   * the new buffer. The old buffer is freed if applicable.
    *
-   * @param aCapacity    specifies the required capacity of mData
-   * @param aOldData     returns null or the old value of mData
-   * @param aOldFlags    returns 0 or the old value of mDataFlags
+   * Unless the return value is UINT32_MAX to signal failure or 0 to
+   * signal that the string has been set to the special empty state,
+   * this method leaves the string in an invalid state! The caller is
+   * responsible for calling FinishBulkWrite() (or in Rust calling
+   * nsA[C]StringBulkWriteHandle::finish()), which put the string
+   * into a valid state by setting mLength and zero-terminating.
+   * This method sets the flag to claim that the string is
+   * zero-terminated before it actually is.
+   *
+   * Once this method has been called and before FinishBulkWrite()
+   * has been called, only calls to Data() or this method again
+   * are valid. Do not call any other methods between calling this
+   * method and FinishBulkWrite().
    *
-   * if mData is already mutable and of sufficient capacity, then this
-   * function will return immediately.  otherwise, it will either resize
-   * mData or allocate a new shared buffer.  if it needs to allocate a
-   * new buffer, then it will return the old buffer and the corresponding
-   * flags.  this allows the caller to decide when to free the old data.
+   * @param aCapacity The requested capacity. The return value
+   *                  will be greater than or equal to this value.
+   * @param aPrefixToPreserve The number of code units at the start
+   *                          of the old buffer to copy into the
+   *                          new buffer.
+   * @parem aAllowShrinking If true, an allocation may be performed
+   *                        if the requested capacity is smaller
+   *                        than the current capacity.
+   * @param aSuffixLength The length, in code units, of a suffix
+   *                      to move.
+   * @param aOldSuffixStart The old start index of the suffix to
+   *                        move.
+   * @param aNewSuffixStart The new start index of the suffix to
+   *                        move.
    *
-   * this function returns false if is unable to allocate sufficient
-   * memory.
-   *
-   * XXX we should expose a way for subclasses to free old_data.
    */
-  bool NS_FASTCALL MutatePrep(size_type aCapacity,
-                              char_type** aOldData, DataFlags* aOldDataFlags);
+  uint32_t NS_FASTCALL StartBulkWrite(size_type aCapacity,
+                                      size_type aPrefixToPreserve = 0,
+                                      bool aAllowShrinking = true,
+                                      size_type aSuffixLength = 0,
+                                      size_type aOldSuffixStart = 0,
+                                      size_type aNewSuffixStart = 0);
 
   /**
+   * Restores the string to a valid state after a call to StartBulkWrite()
+   * that returned a non-UINT32_MAX value. The argument to this method
+   * must be less than or equal to the non-UINT32_MAX value returned by
+   * the most recent StartBulkWrite() call.
+   */
+  void NS_FASTCALL FinishBulkWrite(size_type aLength);
+
+protected:
+  /**
    * this function prepares a section of mData to be modified.  if
    * necessary, this function will reallocate mData and possibly move
    * existing data to open up the specified section.
    *
    * @param aCutStart    specifies the starting offset of the section
    * @param aCutLength   specifies the length of the section to be replaced
    * @param aNewLength   specifies the length of the new section
    *
--- a/xpcom/string/nsUTF8Utils.h
+++ b/xpcom/string/nsUTF8Utils.h
@@ -6,20 +6,18 @@
 #ifndef nsUTF8Utils_h_
 #define nsUTF8Utils_h_
 
 // This file may be used in two ways: if MOZILLA_INTERNAL_API is defined, this
 // file will provide signatures for the Mozilla abstract string types. It will
 // use XPCOM assertion/debugging macros, etc.
 
 #include "nscore.h"
-#include "mozilla/arm.h"
 #include "mozilla/Assertions.h"
 #include "mozilla/EndianUtils.h"
-#include "mozilla/SSE.h"
 #include "mozilla/TypeTraits.h"
 
 #include "nsCharTraits.h"
 
 #ifdef MOZILLA_INTERNAL_API
 #define UTF8UTILS_WARNING(msg) NS_WARNING(msg)
 #else
 #define UTF8UTILS_WARNING(msg)
@@ -66,721 +64,196 @@ public:
       return 2;
     }
     if (is3byte(aChar)) {
       return 3;
     }
     if (is4byte(aChar)) {
       return 4;
     }
-    if (is5byte(aChar)) {
-      return 5;
-    }
-    if (is6byte(aChar)) {
-      return 6;
-    }
     MOZ_ASSERT_UNREACHABLE("should not be used for in-sequence characters");
     return 1;
   }
 };
 
 /**
- * Extract the next UCS-4 character from the buffer and return it.  The
+ * Extract the next Unicode scalar value from the buffer and return it. The
  * pointer passed in is advanced to the start of the next character in the
- * buffer.  If non-null, the parameters err and overlong are filled in to
- * indicate that the character was represented by an overlong sequence, or
- * that an error occurred.
+ * buffer. Upon error, the return value is 0xFFFD, *aBuffer is advanced
+ * over the maximal valid prefix and *aErr is set to true (if aErr is not
+ * null).
+ *
+ * Note: This method never sets *aErr to false to allow error accumulation
+ * across multiple calls.
+ *
+ * Precondition: *aBuffer < aEnd
  */
-
 class UTF8CharEnumerator
 {
 public:
-  static uint32_t NextChar(const char** aBuffer, const char* aEnd, bool* aErr)
+  static inline char32_t NextChar(const char** aBuffer,
+                                  const char* aEnd,
+                                  bool* aErr = nullptr)
   {
-    NS_ASSERTION(aBuffer && *aBuffer, "null buffer!");
-
-    const char* p = *aBuffer;
-    *aErr = false;
-
-    if (p >= aEnd) {
-      *aErr = true;
-
-      return 0;
-    }
-
-    char c = *p++;
-
-    if (UTF8traits::isASCII(c)) {
-      *aBuffer = p;
-      return c;
-    }
-
-    uint32_t ucs4;
-    uint32_t minUcs4;
-    int32_t state = 0;
+    MOZ_ASSERT(aBuffer, "null buffer pointer pointer");
+    MOZ_ASSERT(aEnd, "null end pointer");
 
-    if (!CalcState(c, ucs4, minUcs4, state)) {
-      NS_ERROR("Not a UTF-8 string. This code should only be used for converting from known UTF-8 strings.");
-      *aErr = true;
-
-      return 0;
-    }
+    const unsigned char* p = reinterpret_cast<const unsigned char*>(*aBuffer);
+    const unsigned char* end = reinterpret_cast<const unsigned char*>(aEnd);
 
-    while (state--) {
-      if (p == aEnd) {
-        *aErr = true;
-
-        return 0;
-      }
+    MOZ_ASSERT(p, "null buffer");
+    MOZ_ASSERT(p < end, "Bogus range");
 
-      c = *p++;
-
-      if (!AddByte(c, state, ucs4)) {
-        *aErr = true;
+    unsigned char first = *p++;
 
-        return 0;
-      }
-    }
-
-    if (ucs4 < minUcs4) {
-      // Overlong sequence
-      ucs4 = UCS2_REPLACEMENT_CHAR;
-    } else if (ucs4 >= 0xD800 &&
-               (ucs4 <= 0xDFFF || ucs4 >= UCS_END)) {
-      // Surrogates and code points outside the Unicode range.
-      ucs4 = UCS2_REPLACEMENT_CHAR;
+    if (MOZ_LIKELY(first < 0x80U)) {
+      *aBuffer = reinterpret_cast<const char*>(p);
+      return first;
     }
 
-    *aBuffer = p;
-    return ucs4;
-  }
-
-private:
-  static bool CalcState(char aChar, uint32_t& aUcs4, uint32_t& aMinUcs4,
-                        int32_t& aState)
-  {
-    if (UTF8traits::is2byte(aChar)) {
-      aUcs4 = (uint32_t(aChar) << 6) & 0x000007C0L;
-      aState = 1;
-      aMinUcs4 = 0x00000080;
-    } else if (UTF8traits::is3byte(aChar)) {
-      aUcs4 = (uint32_t(aChar) << 12) & 0x0000F000L;
-      aState = 2;
-      aMinUcs4 = 0x00000800;
-    } else if (UTF8traits::is4byte(aChar)) {
-      aUcs4 = (uint32_t(aChar) << 18) & 0x001F0000L;
-      aState = 3;
-      aMinUcs4 = 0x00010000;
-    } else if (UTF8traits::is5byte(aChar)) {
-      aUcs4 = (uint32_t(aChar) << 24) & 0x03000000L;
-      aState = 4;
-      aMinUcs4 = 0x00200000;
-    } else if (UTF8traits::is6byte(aChar)) {
-      aUcs4 = (uint32_t(aChar) << 30) & 0x40000000L;
-      aState = 5;
-      aMinUcs4 = 0x04000000;
-    } else {
-      return false;
+    // Unsigned underflow is defined behavior
+    if (MOZ_UNLIKELY((p == end) || ((first - 0xC2U) >= (0xF5U - 0xC2U)))) {
+      *aBuffer = reinterpret_cast<const char*>(p);
+      if (aErr) {
+        *aErr = true;
+      }
+      return 0xFFFDU;
     }
 
-    return true;
-  }
-
-  static bool AddByte(char aChar, int32_t aState, uint32_t& aUcs4)
-  {
-    if (UTF8traits::isInSeq(aChar)) {
-      int32_t shift = aState * 6;
-      aUcs4 |= (uint32_t(aChar) & 0x3F) << shift;
-      return true;
-    }
-
-    return false;
-  }
-};
-
+    unsigned char second = *p;
 
-/**
- * Extract the next UCS-4 character from the buffer and return it.  The
- * pointer passed in is advanced to the start of the next character in the
- * buffer.  If non-null, the err parameter is filled in if an error occurs.
- *
- * If an error occurs that causes UCS2_REPLACEMENT_CHAR to be returned, then
- * the buffer will be updated to move only a single UCS-2 character.
- *
- * Any other error returns 0 and does not move the buffer position.
- */
-
-
-class UTF16CharEnumerator
-{
-public:
-  static uint32_t NextChar(const char16_t** aBuffer, const char16_t* aEnd,
-                           bool* aErr = nullptr)
-  {
-    NS_ASSERTION(aBuffer && *aBuffer, "null buffer!");
-
-    const char16_t* p = *aBuffer;
-
-    if (p >= aEnd) {
-      NS_ERROR("No input to work with");
+    if (first < 0xE0U) {
+      // Two-byte
+      if (MOZ_LIKELY((second & 0xC0U) == 0x80U)) {
+        *aBuffer = reinterpret_cast<const char*>(++p);
+        return ((uint32_t(first) & 0x1FU) << 6) | (uint32_t(second) & 0x3FU);
+      }
+      *aBuffer = reinterpret_cast<const char*>(p);
       if (aErr) {
         *aErr = true;
       }
-
-      return 0;
+      return 0xFFFDU;
     }
 
-    char16_t c = *p++;
-
-    if (!IS_SURROGATE(c)) { // U+0000 - U+D7FF,U+E000 - U+FFFF
-      if (aErr) {
-        *aErr = false;
-      }
-      *aBuffer = p;
-      return c;
-    } else if (NS_IS_HIGH_SURROGATE(c)) { // U+D800 - U+DBFF
-      if (p == aEnd) {
-        // Found a high surrogate at the end of the buffer. Flag this
-        // as an error and return the Unicode replacement
-        // character 0xFFFD.
-
-        UTF8UTILS_WARNING("Unexpected end of buffer after high surrogate");
-
-        if (aErr) {
-          *aErr = true;
-        }
-        *aBuffer = p;
-        return 0xFFFD;
+    if (MOZ_LIKELY(first < 0xF0U)) {
+      // Three-byte
+      unsigned char lower = 0x80U;
+      unsigned char upper = 0xBFU;
+      if (first == 0xE0U) {
+        lower = 0xA0U;
+      } else if (first == 0xEDU) {
+        upper = 0x9FU;
       }
-
-      // D800- DBFF - High Surrogate
-      char16_t h = c;
-
-      c = *p++;
-
-      if (NS_IS_LOW_SURROGATE(c)) {
-        // DC00- DFFF - Low Surrogate
-        // N = (H - D800) *400 + 10000 + (L - DC00)
-        uint32_t ucs4 = SURROGATE_TO_UCS4(h, c);
-        if (aErr) {
-          *aErr = false;
+      if (MOZ_LIKELY(second >= lower && second <= upper)) {
+        if (MOZ_LIKELY(p != end)) {
+          unsigned char third = *++p;
+          if (MOZ_LIKELY((third & 0xC0U) == 0x80U)) {
+            *aBuffer = reinterpret_cast<const char*>(++p);
+            return ((uint32_t(first) & 0xFU) << 12) |
+                   ((uint32_t(second) & 0x3FU) << 6) |
+                   (uint32_t(third) & 0x3FU);
+          }
         }
-        *aBuffer = p;
-        return ucs4;
-      } else {
-        // Found a high surrogate followed by something other than
-        // a low surrogate. Flag this as an error and return the
-        // Unicode replacement character 0xFFFD.  Note that the
-        // pointer to the next character points to the second 16-bit
-        // value, not beyond it, as per Unicode 5.0.0 Chapter 3 C10,
-        // only the first code unit of an illegal sequence must be
-        // treated as an illegally terminated code unit sequence
-        // (also Chapter 3 D91, "isolated [not paired and ill-formed]
-        // UTF-16 code units in the range D800..DFFF are ill-formed").
-        UTF8UTILS_WARNING("got a High Surrogate but no low surrogate");
-
-        if (aErr) {
-          *aErr = true;
-        }
-        *aBuffer = p - 1;
-        return 0xFFFD;
       }
-    } else { // U+DC00 - U+DFFF
-      // DC00- DFFF - Low Surrogate
-
-      // Found a low surrogate w/o a preceding high surrogate. Flag
-      // this as an error and return the Unicode replacement
-      // character 0xFFFD.
-
-      UTF8UTILS_WARNING("got a low Surrogate but no high surrogate");
+      *aBuffer = reinterpret_cast<const char*>(p);
       if (aErr) {
         *aErr = true;
       }
-      *aBuffer = p;
-      return 0xFFFD;
+      return 0xFFFDU;
     }
 
-    MOZ_ASSERT_UNREACHABLE("Impossible UCS-2 character value.");
-  }
-};
-
-
-/**
- * A character sink (see |copy_string| in nsAlgorithm.h) for converting
- * UTF-8 to UTF-16
- */
-class ConvertUTF8toUTF16
-{
-public:
-  typedef char value_type;
-  typedef char16_t buffer_type;
-
-  explicit ConvertUTF8toUTF16(buffer_type* aBuffer)
-    : mStart(aBuffer), mBuffer(aBuffer), mErrorEncountered(false)
-  {
-  }
-
-  size_t Length() const
-  {
-    return mBuffer - mStart;
-  }
-
-  bool ErrorEncountered() const
-  {
-    return mErrorEncountered;
-  }
-
-  void write(const value_type* aStart, uint32_t aN)
-  {
-    if (mErrorEncountered) {
-      return;
+    // Four-byte
+    unsigned char lower = 0x80U;
+    unsigned char upper = 0xBFU;
+    if (first == 0xF0U) {
+      lower = 0x90U;
+    } else if (first == 0xF4U) {
+      upper = 0x8FU;
     }
-
-    // algorithm assumes utf8 units won't
-    // be spread across fragments
-    const value_type* p = aStart;
-    const value_type* end = aStart + aN;
-    buffer_type* out = mBuffer;
-    for (; p != end /* && *p */;) {
-      bool err;
-      uint32_t ucs4 = UTF8CharEnumerator::NextChar(&p, end, &err);
-
-      if (err) {
-        mErrorEncountered = true;
-        mBuffer = out;
-        return;
-      }
-
-      if (ucs4 >= PLANE1_BASE) {
-        *out++ = (buffer_type)H_SURROGATE(ucs4);
-        *out++ = (buffer_type)L_SURROGATE(ucs4);
-      } else {
-        *out++ = ucs4;
+    if (MOZ_LIKELY(second >= lower && second <= upper)) {
+      if (MOZ_LIKELY(p != end)) {
+        unsigned char third = *++p;
+        if (MOZ_LIKELY((third & 0xC0U) == 0x80U)) {
+          if (MOZ_LIKELY(p != end)) {
+            unsigned char fourth = *++p;
+            if (MOZ_LIKELY((fourth & 0xC0U) == 0x80U)) {
+              *aBuffer = reinterpret_cast<const char*>(++p);
+              return ((uint32_t(first) & 0x7U) << 18) |
+                     ((uint32_t(second) & 0x3FU) << 12) |
+                     ((uint32_t(third) & 0x3FU) << 6) |
+                     (uint32_t(fourth) & 0x3FU);
+            }
+          }
+        }
       }
     }
-    mBuffer = out;
+    *aBuffer = reinterpret_cast<const char*>(p);
+    if (aErr) {
+      *aErr = true;
+    }
+    return 0xFFFDU;
   }
-
-  void write_terminator()
-  {
-    *mBuffer = buffer_type(0);
-  }
-
-private:
-  buffer_type* const mStart;
-  buffer_type* mBuffer;
-  bool mErrorEncountered;
 };
 
 /**
- * A character sink (see |copy_string| in nsAlgorithm.h) for computing
- * the length of the UTF-16 string equivalent to a UTF-8 string.
+ * Extract the next Unicode scalar value from the buffer and return it. The
+ * pointer passed in is advanced to the start of the next character in the
+ * buffer. Upon error, the return value is 0xFFFD, *aBuffer is advanced over
+ * the unpaired surrogate and *aErr is set to true (if aErr is not null).
+ *
+ * Note: This method never sets *aErr to false to allow error accumulation
+ * across multiple calls.
+ *
+ * Precondition: *aBuffer < aEnd
  */
-class CalculateUTF8Length
+class UTF16CharEnumerator
 {
 public:
-  typedef char value_type;
-
-  CalculateUTF8Length()
-    : mLength(0), mErrorEncountered(false)
-  {
-  }
-
-  size_t Length() const
-  {
-    return mLength;
-  }
-
-  void write(const value_type* aStart, uint32_t aN)
+  static inline char32_t NextChar(const char16_t** aBuffer,
+                                  const char16_t* aEnd,
+                                  bool* aErr = nullptr)
   {
-    // ignore any further requests
-    if (mErrorEncountered) {
-      return;
-    }
-
-    // algorithm assumes utf8 units won't
-    // be spread across fragments
-    const value_type* p = aStart;
-    const value_type* end = aStart + aN;
-    for (; p < end /* && *p */; ++mLength) {
-      if (UTF8traits::isASCII(*p)) {
-        p += 1;
-      } else if (UTF8traits::is2byte(*p)) {
-        p += 2;
-      } else if (UTF8traits::is3byte(*p)) {
-        p += 3;
-      } else if (UTF8traits::is4byte(*p)) {
-        // Because a UTF-8 sequence of 4 bytes represents a codepoint
-        // greater than 0xFFFF, it will become a surrogate pair in the
-        // UTF-16 string, so add 1 more to mLength.
-        // This doesn't happen with is5byte and is6byte because they
-        // are illegal UTF-8 sequences (greater than 0x10FFFF) so get
-        // converted to a single replacement character.
-
-        // However, there is one case when a 4 byte UTF-8 sequence will
-        // only generate 2 UTF-16 bytes. If we have a properly encoded
-        // sequence, but with an invalid value (too small or too big),
-        // that will result in a replacement character being written
-        // This replacement character is encoded as just 1 single
-        // UTF-16 character, which is 2 bytes.
+    MOZ_ASSERT(aBuffer, "null buffer pointer pointer");
+    MOZ_ASSERT(aEnd, "null end pointer");
 
-        // The below code therefore only adds 1 to mLength if the UTF8
-        // data will produce a decoded character which is greater than
-        // or equal to 0x010000 and less than 0x0110000.
-
-        // A 4byte UTF8 character is encoded as
-        // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
-        // Bit 1-3 on the first byte, and bit 5-6 on the second byte,
-        // map to bit 17-21 in the final result. If these bits are
-        // between 0x01 and 0x11, that means that the final result is
-        // between 0x010000 and 0x110000. The below code reads these
-        // bits out and assigns them to c, but shifted up 4 bits to
-        // avoid having to shift twice.
-
-        // It doesn't matter what to do in the case where p + 4 > end
-        // since no UTF16 characters will be written in that case by
-        // ConvertUTF8toUTF16. Likewise it doesn't matter what we do if
-        // any of the surrogate bits are wrong since no UTF16
-        // characters will be written in that case either.
+    const char16_t* p = *aBuffer;
 
-        if (p + 4 <= end) {
-          uint32_t c = ((uint32_t)(p[0] & 0x07)) << 6 |
-                       ((uint32_t)(p[1] & 0x30));
-          if (c >= 0x010 && c < 0x110) {
-            ++mLength;
-          }
-        }
+    MOZ_ASSERT(p, "null buffer");
+    MOZ_ASSERT(p < aEnd, "Bogus range");
 
-        p += 4;
-      } else if (UTF8traits::is5byte(*p)) {
-        p += 5;
-      } else if (UTF8traits::is6byte(*p)) {
-        p += 6;
-      } else { // error
-        ++mLength; // to account for the decrement below
-        break;
-      }
-    }
-    if (p != end) {
-      NS_ERROR("Not a UTF-8 string. This code should only be used for converting from known UTF-8 strings.");
-      --mLength; // The last multi-byte char wasn't complete, discard it.
-      mErrorEncountered = true;
-    }
-  }
-
-private:
-  size_t mLength;
-  bool mErrorEncountered;
-};
+    char16_t c = *p++;
 
-/**
- * A character sink (see |copy_string| in nsAlgorithm.h) for
- * converting UTF-16 to UTF-8. Treats invalid UTF-16 data as 0xFFFD
- * (0xEFBFBD in UTF-8).
- */
-class ConvertUTF16toUTF8
-{
-public:
-  typedef char16_t value_type;
-  typedef char buffer_type;
-
-  // The error handling here is more lenient than that in
-  // |ConvertUTF8toUTF16|, but it's that way for backwards
-  // compatibility.
-
-  explicit ConvertUTF16toUTF8(buffer_type* aBuffer)
-    : mStart(aBuffer), mBuffer(aBuffer)
-  {
-  }
-
-  size_t Size() const
-  {
-    return mBuffer - mStart;
-  }
-
-  void write(const value_type* aStart, uint32_t aN)
-  {
-    buffer_type* out = mBuffer; // gcc isn't smart enough to do this!
-
-    for (const value_type* p = aStart, *end = aStart + aN; p < end; ++p) {
-      value_type c = *p;
-      if (!(c & 0xFF80)) { // U+0000 - U+007F
-        *out++ = (char)c;
-      } else if (!(c & 0xF800)) { // U+0100 - U+07FF
-        *out++ = 0xC0 | (char)(c >> 6);
-        *out++ = 0x80 | (char)(0x003F & c);
-      } else if (!IS_SURROGATE(c)) { // U+0800 - U+D7FF,U+E000 - U+FFFF
-        *out++ = 0xE0 | (char)(c >> 12);
-        *out++ = 0x80 | (char)(0x003F & (c >> 6));
-        *out++ = 0x80 | (char)(0x003F & c);
-      } else if (NS_IS_HIGH_SURROGATE(c)) { // U+D800 - U+DBFF
-        // D800- DBFF - High Surrogate
-        value_type h = c;
-
-        ++p;
-        if (p == end) {
-          // Treat broken characters as the Unicode
-          // replacement character 0xFFFD (0xEFBFBD in
-          // UTF-8)
-          *out++ = '\xEF';
-          *out++ = '\xBF';
-          *out++ = '\xBD';
-
-          UTF8UTILS_WARNING("String ending in half a surrogate pair!");
-
-          break;
+    // Let's use encoding_rs-style code golf here.
+    // Unsigned underflow is defined behavior
+    char16_t cMinusSurrogateStart = c - 0xD800U;
+    if (MOZ_LIKELY(cMinusSurrogateStart > (0xDFFFU - 0xD800U))) {
+      *aBuffer = p;
+      return c;
+    }
+    if (MOZ_LIKELY(cMinusSurrogateStart <= (0xDBFFU - 0xD800U))) {
+      // High surrogate
+      if (MOZ_LIKELY(p != aEnd)) {
+        char16_t second = *p;
+        // Unsigned underflow is defined behavior
+        if (MOZ_LIKELY((second - 0xDC00U) <= (0xDFFFU - 0xDC00U))) {
+          *aBuffer = ++p;
+          return (uint32_t(c) << 10) + uint32_t(second) -
+                 (((0xD800U << 10) - 0x10000U) + 0xDC00U);
         }
-        c = *p;
-
-        if (NS_IS_LOW_SURROGATE(c)) {
-          // DC00- DFFF - Low Surrogate
-          // N = (H - D800) *400 + 10000 + ( L - DC00 )
-          uint32_t ucs4 = SURROGATE_TO_UCS4(h, c);
-
-          // 0001 0000-001F FFFF
-          *out++ = 0xF0 | (char)(ucs4 >> 18);
-          *out++ = 0x80 | (char)(0x003F & (ucs4 >> 12));
-          *out++ = 0x80 | (char)(0x003F & (ucs4 >> 6));
-          *out++ = 0x80 | (char)(0x003F & ucs4);
-        } else {
-          // Treat broken characters as the Unicode
-          // replacement character 0xFFFD (0xEFBFBD in
-          // UTF-8)
-          *out++ = '\xEF';
-          *out++ = '\xBF';
-          *out++ = '\xBD';
-
-          // The pointer to the next character points to the second
-          // 16-bit value, not beyond it, as per Unicode 5.0.0
-          // Chapter 3 C10, only the first code unit of an illegal
-          // sequence must be treated as an illegally terminated
-          // code unit sequence (also Chapter 3 D91, "isolated [not
-          // paired and ill-formed] UTF-16 code units in the range
-          // D800..DFFF are ill-formed").
-          p--;
-
-          UTF8UTILS_WARNING("got a High Surrogate but no low surrogate");
-        }
-      } else { // U+DC00 - U+DFFF
-        // Treat broken characters as the Unicode replacement
-        // character 0xFFFD (0xEFBFBD in UTF-8)
-        *out++ = '\xEF';
-        *out++ = '\xBF';
-        *out++ = '\xBD';
-
-        // DC00- DFFF - Low Surrogate
-        UTF8UTILS_WARNING("got a low Surrogate but no high surrogate");
       }
     }
-
-    mBuffer = out;
-  }
-
-  void write_terminator()
-  {
-    *mBuffer = buffer_type(0);
-  }
-
-private:
-  buffer_type* const mStart;
-  buffer_type* mBuffer;
-};
-
-/**
- * A character sink (see |copy_string| in nsAlgorithm.h) for computing
- * the number of bytes a UTF-16 would occupy in UTF-8. Treats invalid
- * UTF-16 data as 0xFFFD (0xEFBFBD in UTF-8).
- */
-class CalculateUTF8Size
-{
-public:
-  typedef char16_t value_type;
-
-  CalculateUTF8Size()
-    : mSize(0)
-  {
-  }
-
-  size_t Size() const
-  {
-    return mSize;
+    // Unpaired surrogate
+    *aBuffer = p;
+    if (aErr) {
+      *aErr = true;
+    }
+    return 0xFFFDU;
   }
-
-  void write(const value_type* aStart, uint32_t aN)
-  {
-    // Assume UCS2 surrogate pairs won't be spread across fragments.
-    for (const value_type* p = aStart, *end = aStart + aN; p < end; ++p) {
-      value_type c = *p;
-      if (!(c & 0xFF80)) { // U+0000 - U+007F
-        mSize += 1;
-      } else if (!(c & 0xF800)) { // U+0100 - U+07FF
-        mSize += 2;
-      } else if (0xD800 != (0xF800 & c)) { // U+0800 - U+D7FF,U+E000 - U+FFFF
-        mSize += 3;
-      } else if (0xD800 == (0xFC00 & c)) { // U+D800 - U+DBFF
-        ++p;
-        if (p == end) {
-          // Treat broken characters as the Unicode
-          // replacement character 0xFFFD (0xEFBFBD in
-          // UTF-8)
-          mSize += 3;
-
-          UTF8UTILS_WARNING("String ending in half a surrogate pair!");
-
-          break;
-        }
-        c = *p;
-
-        if (0xDC00 == (0xFC00 & c)) {
-          mSize += 4;
-        } else {
-          // Treat broken characters as the Unicode
-          // replacement character 0xFFFD (0xEFBFBD in
-          // UTF-8)
-          mSize += 3;
-
-          // The next code unit is the second 16-bit value, not
-          // the one beyond it, as per Unicode 5.0.0 Chapter 3 C10,
-          // only the first code unit of an illegal sequence must
-          // be treated as an illegally terminated code unit
-          // sequence (also Chapter 3 D91, "isolated [not paired and
-          // ill-formed] UTF-16 code units in the range D800..DFFF
-          // are ill-formed").
-          p--;
-
-          UTF8UTILS_WARNING("got a high Surrogate but no low surrogate");
-        }
-      } else { // U+DC00 - U+DFFF
-        // Treat broken characters as the Unicode replacement
-        // character 0xFFFD (0xEFBFBD in UTF-8)
-        mSize += 3;
-
-        UTF8UTILS_WARNING("got a low Surrogate but no high surrogate");
-      }
-    }
-  }
-
-private:
-  size_t mSize;
 };
 
-#ifdef MOZILLA_INTERNAL_API
-/**
- * A character sink that performs a |reinterpret_cast|-style conversion
- * from char to char16_t.
- */
-class LossyConvertEncoding8to16
-{
-public:
-  typedef char value_type;
-  typedef char input_type;
-  typedef char16_t output_type;
-
-public:
-  explicit LossyConvertEncoding8to16(char16_t* aDestination) :
-    mDestination(aDestination)
-  {
-  }
-
-  void
-  write(const char* aSource, uint32_t aSourceLength)
-  {
-#ifdef MOZILLA_MAY_SUPPORT_SSE2
-    if (mozilla::supports_sse2()) {
-      write_sse2(aSource, aSourceLength);
-      return;
-    }
-#endif
-#if defined(MOZILLA_MAY_SUPPORT_NEON) && defined(MOZ_LITTLE_ENDIAN)
-    if (mozilla::supports_neon()) {
-      write_neon(aSource, aSourceLength);
-      return;
-    }
-#endif
-    const char* done_writing = aSource + aSourceLength;
-    while (aSource < done_writing) {
-      *mDestination++ = (char16_t)(unsigned char)(*aSource++);
-    }
-  }
-
-  void
-  write_sse2(const char* aSource, uint32_t aSourceLength);
-#if defined(MOZILLA_MAY_SUPPORT_NEON) && defined(MOZ_LITTLE_ENDIAN)
-  void
-  write_neon(const char* aSource, uint32_t aSourceLength);
-#endif
-
-  void
-  write_terminator()
-  {
-    *mDestination = (char16_t)(0);
-  }
-
-private:
-  char16_t* mDestination;
-};
-
-/**
- * A character sink that performs a |reinterpret_cast|-style conversion
- * from char16_t to char.
- */
-class LossyConvertEncoding16to8
-{
-public:
-  typedef char16_t value_type;
-  typedef char16_t input_type;
-  typedef char output_type;
-
-  explicit LossyConvertEncoding16to8(char* aDestination)
-    : mDestination(aDestination)
-  {
-  }
-
-  void
-  write(const char16_t* aSource, uint32_t aSourceLength)
-  {
-#ifdef MOZILLA_MAY_SUPPORT_SSE2
-    if (mozilla::supports_sse2()) {
-      write_sse2(aSource, aSourceLength);
-      return;
-    }
-#endif
-#if defined(MOZILLA_MAY_SUPPORT_NEON) && defined(MOZ_LITTLE_ENDIAN)
-    if (mozilla::supports_neon()) {
-      write_neon(aSource, aSourceLength);
-      return;
-    }
-#endif
-    const char16_t* done_writing = aSource + aSourceLength;
-    while (aSource < done_writing) {
-      *mDestination++ = (char)(*aSource++);
-    }
-  }
-
-#ifdef MOZILLA_MAY_SUPPORT_SSE2
-  void
-  write_sse2(const char16_t* aSource, uint32_t aSourceLength);
-#endif
-#if defined(MOZILLA_MAY_SUPPORT_NEON) && defined(MOZ_LITTLE_ENDIAN)
-  void
-  write_neon(const char16_t* aSource, uint32_t aSourceLength);
-#endif
-
-  void
-  write_terminator()
-  {
-    *mDestination = '\0';
-  }
-
-private:
-  char* mDestination;
-};
-#endif // MOZILLA_INTERNAL_API
-
-
 template<typename Char, typename UnsignedT>
 inline UnsignedT
 RewindToPriorUTF8Codepoint(const Char* utf8Chars, UnsignedT index)
 {
   static_assert(mozilla::IsSame<Char, char>::value ||
                 mozilla::IsSame<Char, unsigned char>::value ||
                 mozilla::IsSame<Char, signed char>::value,
                 "UTF-8 data must be in 8-bit units");
--- a/xpcom/tests/gtest/TestAtoms.cpp
+++ b/xpcom/tests/gtest/TestAtoms.cpp
@@ -77,39 +77,40 @@ TEST(Atoms, Invalid)
 
     {
       RefPtr<nsAtom> atom16 = NS_Atomize(Invalid16Strings[i].m16);
       EXPECT_TRUE(atom16->Equals(nsDependentString(Invalid16Strings[i].m16)));
     }
 
     EXPECT_EQ(count, NS_GetNumberOfAtoms());
   }
-
+#ifndef DEBUG
+// Don't run this test in debug builds as that intentionally asserts.
   for (unsigned int i = 0; i < ArrayLength(Invalid8Strings); ++i) {
     nsrefcnt count = NS_GetNumberOfAtoms();
 
     {
       RefPtr<nsAtom> atom8 = NS_Atomize(Invalid8Strings[i].m8);
       RefPtr<nsAtom> atom16 = NS_Atomize(Invalid8Strings[i].m16);
       EXPECT_EQ(atom16, atom8);
       EXPECT_TRUE(atom16->Equals(nsDependentString(Invalid8Strings[i].m16)));
     }
 
     EXPECT_EQ(count, NS_GetNumberOfAtoms());
   }
 
-// Don't run this test in debug builds as that intentionally asserts.
-#ifndef DEBUG
-  RefPtr<nsAtom> emptyAtom = NS_Atomize("");
-
   for (unsigned int i = 0; i < ArrayLength(Malformed8Strings); ++i) {
     nsrefcnt count = NS_GetNumberOfAtoms();
 
-    RefPtr<nsAtom> atom8 = NS_Atomize(Malformed8Strings[i]);
-    EXPECT_EQ(atom8, emptyAtom);
+    {
+      RefPtr<nsAtom> atom8 = NS_Atomize(Malformed8Strings[i].m8);
+      RefPtr<nsAtom> atom16 = NS_Atomize(Malformed8Strings[i].m16);
+      EXPECT_EQ(atom8, atom16);
+    }
+
     EXPECT_EQ(count, NS_GetNumberOfAtoms());
   }
 #endif
 }
 
 #define FIRST_ATOM_STR "first static atom. Hello!"
 #define SECOND_ATOM_STR "second static atom. @World!"
 #define THIRD_ATOM_STR "third static atom?!"
--- a/xpcom/tests/gtest/TestStrings.cpp
+++ b/xpcom/tests/gtest/TestStrings.cpp
@@ -764,22 +764,20 @@ TEST_F(Strings, replace_substr)
 
   s.AssignLiteral("foofoofoo");
   s.ReplaceSubstring("of", "fo");
   EXPECT_STREQ(s.get(), "fofoofooo");
 }
 
 TEST_F(Strings, replace_substr_2)
 {
-  const char *oldName = nullptr;
   const char *newName = "user";
   nsString acctName; acctName.AssignLiteral("forums.foo.com");
   nsAutoString newAcctName, oldVal, newVal;
-  CopyASCIItoUTF16(oldName, oldVal);
-  CopyASCIItoUTF16(newName, newVal);
+  CopyASCIItoUTF16(mozilla::MakeStringSpan(newName), newVal);
   newAcctName.Assign(acctName);
 
   // here, oldVal is empty.  we are testing that this function
   // does not hang.  see bug 235355.
   newAcctName.ReplaceSubstring(oldVal, newVal);
 
   // we expect that newAcctName will be unchanged.
   EXPECT_TRUE(newAcctName.Equals(acctName));
@@ -1282,16 +1280,45 @@ TEST(String, strip_chars)
   test_strip_chars_helper(u"foo",
                           u"foo",
                           NS_LITERAL_STRING(""));
   test_strip_chars_helper(u" foo",
                           u" ",
                           NS_LITERAL_STRING("foo"));
 }
 
+TEST_F(Strings, append_with_capacity)
+{
+  nsAutoString s;
+  const char16_t* origPtr = s.BeginReading();
+  s.SetCapacity(100);
+  const char16_t* ptr = s.BeginReading();
+  EXPECT_NE(origPtr, ptr);
+  for (int i = 0; i < 100; i++) {
+    s.Append(u'a');
+    EXPECT_EQ(s.BeginReading(), ptr);
+  }
+}
+
+TEST_F(Strings, append_string_with_capacity)
+{
+  nsAutoString aa;
+  aa.Append(u'a');
+  aa.Append(u'a');
+  nsAutoString s;
+  const char16_t* origPtr = s.BeginReading();
+  s.SetCapacity(200);
+  const char16_t* ptr = s.BeginReading();
+  EXPECT_NE(origPtr, ptr);
+  for (int i = 0; i < 100; i++) {
+    s.Append(aa);
+    EXPECT_EQ(s.BeginReading(), ptr);
+  }
+}
+
 TEST_F(Strings, huge_capacity)
 {
   nsString a, b, c, d, e, f, g, h, i, j, k, l, m, n;
   nsCString n1;
 
   // Ignore the result if the address space is less than 64-bit because
   // some of the allocations above will exhaust the address space.
   if (sizeof(void*) >= 8) {
--- a/xpcom/tests/gtest/TestTextFormatter.cpp
+++ b/xpcom/tests/gtest/TestTextFormatter.cpp
@@ -12,17 +12,16 @@ TEST(TextFormatter, Tests)
   nsAutoString fmt(NS_LITERAL_STRING("%3$s %4$S %1$d %2$d %2$d %3$s"));
   char utf8[] = "Hello";
   char16_t ucs2[]={'W', 'o', 'r', 'l', 'd', 0x4e00, 0xAc00, 0xFF45, 0x0103, 0x00};
   int d=3;
 
   char16_t buf[256];
   nsTextFormatter::snprintf(buf, 256, fmt.get(), d, 333, utf8, ucs2);
   nsAutoString out(buf);
-  ASSERT_STREQ("Hello World", NS_LossyConvertUTF16toASCII(out).get());
 
   const char16_t *uout = out.get();
   const char16_t expected[] = {0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20,
                                 0x57, 0x6F, 0x72, 0x6C, 0x64, 0x4E00,
                                 0xAC00, 0xFF45, 0x0103, 0x20, 0x33,
                                 0x20, 0x33, 0x33, 0x33, 0x20, 0x33,
                                 0x33, 0x33, 0x20, 0x48, 0x65, 0x6C,
                                 0x6C, 0x6F};
--- a/xpcom/tests/gtest/TestUTF.cpp
+++ b/xpcom/tests/gtest/TestUTF.cpp
@@ -9,16 +9,17 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include "nsString.h"
 #include "nsStringBuffer.h"
 #include "nsReadableUtils.h"
 #include "UTFStrings.h"
 #include "nsUnicharUtils.h"
 #include "mozilla/HashFunctions.h"
+#include "nsUTF8Utils.h"
 
 #include "gtest/gtest.h"
 
 using namespace mozilla;
 
 namespace TestUTF {
 
 TEST(UTF, Valid)
@@ -72,59 +73,53 @@ TEST(UTF, Invalid8)
     EXPECT_TRUE(tmp16.Equals(NS_LITERAL_STRING("string ") + str16));
 
     EXPECT_EQ(CompareUTF8toUTF16(str8, str16), 0);
   }
 }
 
 TEST(UTF, Malformed8)
 {
-// Don't run this test in debug builds as that intentionally asserts.
-#ifndef DEBUG
   for (unsigned int i = 0; i < ArrayLength(Malformed8Strings); ++i) {
-    nsDependentCString str8(Malformed8Strings[i]);
+    nsDependentString str16(Malformed8Strings[i].m16);
+    nsDependentCString str8(Malformed8Strings[i].m8);
 
-    EXPECT_TRUE(NS_ConvertUTF8toUTF16(str8).IsEmpty());
+    EXPECT_TRUE(NS_ConvertUTF8toUTF16(str8).Equals(str16));
 
-    nsString tmp16(NS_LITERAL_STRING("string"));
+    nsString tmp16(NS_LITERAL_STRING("string "));
     AppendUTF8toUTF16(str8, tmp16);
-    EXPECT_TRUE(tmp16.EqualsLiteral("string"));
+    EXPECT_TRUE(tmp16.Equals(NS_LITERAL_STRING("string ") + str16));
 
-    EXPECT_NE(CompareUTF8toUTF16(str8, EmptyString()), 0);
+    EXPECT_EQ(CompareUTF8toUTF16(str8, str16), 0);
   }
-#endif
 }
 
 TEST(UTF, Hash16)
 {
   for (unsigned int i = 0; i < ArrayLength(ValidStrings); ++i) {
     nsDependentCString str8(ValidStrings[i].m8);
     bool err;
     EXPECT_EQ(HashString(ValidStrings[i].m16),
               HashUTF8AsUTF16(str8.get(), str8.Length(), &err));
     EXPECT_FALSE(err);
   }
 
   for (unsigned int i = 0; i < ArrayLength(Invalid8Strings); ++i) {
     nsDependentCString str8(Invalid8Strings[i].m8);
     bool err;
-    EXPECT_EQ(HashString(Invalid8Strings[i].m16),
-              HashUTF8AsUTF16(str8.get(), str8.Length(), &err));
-    EXPECT_FALSE(err);
+    EXPECT_EQ(HashUTF8AsUTF16(str8.get(), str8.Length(), &err), 0u);
+    EXPECT_TRUE(err);
   }
 
-// Don't run this test in debug builds as that intentionally asserts.
-#ifndef DEBUG
   for (unsigned int i = 0; i < ArrayLength(Malformed8Strings); ++i) {
-    nsDependentCString str8(Malformed8Strings[i]);
+    nsDependentCString str8(Malformed8Strings[i].m8);
     bool err;
     EXPECT_EQ(HashUTF8AsUTF16(str8.get(), str8.Length(), &err), 0u);
     EXPECT_TRUE(err);
   }
-#endif
 }
 
 /**
  * This tests the handling of a non-ascii character at various locations in a
  * UTF-16 string that is being converted to UTF-8.
  */
 void NonASCII16_helper(const size_t aStrSize)
 {
@@ -173,19 +168,81 @@ void NonASCII16_helper(const size_t aStr
 
     // And finish with the trailing ASCII chars.
     expected.Append(asciiCString.BeginReading() + i + 1, kTestSize - i - 1);
 
     EXPECT_STREQ(dest.BeginReading(), expected.BeginReading());
   }
 }
 
-TEST(UTF, NonASCII16)
+TEST(UTF, UTF8CharEnumerator)
 {
-  // Test with various string sizes to catch any special casing.
-  NonASCII16_helper(1);
-  NonASCII16_helper(8);
-  NonASCII16_helper(16);
-  NonASCII16_helper(32);
-  NonASCII16_helper(512);
+  const char* p = "\x61\xC0\xC2\xC2\x80\xE0\x80\x80\xE0\xA0\x80\xE1\x80\x80\xED\xBF\xBF\xED\x9F\xBF\xEE\x80\x80\xEE\x80\xFF\xF0\x90\x80\x80\xF0\x80\x80\x80\xF1\x80\x80\x80\xF4\x8F\xBF\xF4\x8F\xBF\xBF\xF4\xBF\xBF\xBF";
+  const char* end = p + 49;
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0x0061U);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0x0080U);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0x0800U);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0x1000U);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xD7FFU);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xE000U);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0x10000U);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0x40000U);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0x10FFFFU);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
+  EXPECT_EQ(p, end);
+  p = "\xC2";
+  end = p + 1;
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
+  EXPECT_EQ(p, end);
+  p = "\xE1\x80";
+  end = p + 2;
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
+  EXPECT_EQ(p, end);
+  p = "\xF1\x80\x80";
+  end = p + 3;
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
+  EXPECT_EQ(p, end);
+}
+
+TEST(UTF, UTF16CharEnumerator)
+{
+  const char16_t* p = u"\u0061\U0001F4A9";
+  const char16_t* end = p + 3;
+  EXPECT_EQ(UTF16CharEnumerator::NextChar(&p, end), 0x0061U);
+  EXPECT_EQ(UTF16CharEnumerator::NextChar(&p, end), 0x1F4A9U);
+  EXPECT_EQ(p, end);
+  const char16_t loneHigh = 0xD83D;
+  p = &loneHigh;
+  end = p + 1;
+  EXPECT_EQ(UTF16CharEnumerator::NextChar(&p, end), 0xFFFDU);
+  EXPECT_EQ(p, end);
+  const char16_t loneLow = 0xDCA9;
+  p = &loneLow;
+  end = p + 1;
+  EXPECT_EQ(UTF16CharEnumerator::NextChar(&p, end), 0xFFFDU);
+  EXPECT_EQ(p, end);
+  const char16_t loneHighStr[] = { 0xD83D, 0x0061 };
+  p = loneHighStr;
+  end = p + 2;
+  EXPECT_EQ(UTF16CharEnumerator::NextChar(&p, end), 0xFFFDU);
+  EXPECT_EQ(UTF16CharEnumerator::NextChar(&p, end), 0x0061U);
+  EXPECT_EQ(p, end);
 }
 
 } // namespace TestUTF
--- a/xpcom/tests/gtest/UTFStrings.h
+++ b/xpcom/tests/gtest/UTFStrings.h
@@ -56,57 +56,73 @@ static const UTFStringsStringPair Invali
     { { 0xDC00, 0xD800, 0xDC00, 0xD800 },
       { char(0xEF), char(0xBF), char(0xBD), char(0xF0), char(0x90), char(0x80), char(0x80), char(0xEF), char(0xBF), char(0xBD) } },
     { { 0xDC00, 0xD800, 0xD800, 0xDC00 },
       { char(0xEF), char(0xBF), char(0xBD), char(0xEF), char(0xBF), char(0xBD), char(0xF0), char(0x90), char(0x80), char(0x80) } },
   };
 
 static const UTFStringsStringPair Invalid8Strings[] =
   {
-    { { 'a', 0xFFFD, 'b' },
+    { { 'a', 0xFFFD, 0xFFFD, 'b' },
       { 'a', char(0xC0), char(0x80), 'b' } },
-    { { 0xFFFD, 0x80 },
+    { { 0xFFFD, 0xFFFD, 0x80 },
       { char(0xC1), char(0xBF), char(0xC2), char(0x80) } },
-    { { 0xFFFD },
+    { { 0xFFFD, 0xFFFD },
       { char(0xC1), char(0xBF) } },
-    { { 0xFFFD, 'x', 0x0800 },
+    { { 0xFFFD, 0xFFFD, 0xFFFD, 'x', 0x0800 },
       { char(0xE0), char(0x80), char(0x80), 'x', char(0xE0), char(0xA0), char(0x80) } },
-    { { 0xFFFD, 'x', 0xFFFD },
+    { { 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 'x', 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD },
       { char(0xF0), char(0x80), char(0x80), char(0x80), 'x', char(0xF0), char(0x80), char(0x8F), char(0x80) } },
-    { { 0xFFFD, 0xFFFD },
+    { { 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD },
       { char(0xF4), char(0x90), char(0x80), char(0x80), char(0xF7), char(0xBF), char(0xBF), char(0xBF) } },
-    { { 0xFFFD, 'x', 0xD800, 0xDC00, 0xFFFD },
+    { { 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 'x', 0xD800, 0xDC00, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD },
       { char(0xF0), char(0x8F), char(0xBF), char(0xBF), 'x', char(0xF0), char(0x90), char(0x80), char(0x80), char(0xF0), char(0x8F), char(0xBF), char(0xBF) } },
-    { { 0xFFFD, 'x', 0xFFFD },
+    { { 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 'x', 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD },
       { char(0xF8), char(0x80), char(0x80), char(0x80), char(0x80), 'x', char(0xF8), char(0x88), char(0x80), char(0x80), char(0x80) } },
-    { { 0xFFFD, 0xFFFD },
+    { { 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD },
       { char(0xFB), char(0xBF), char(0xBF), char(0xBF), char(0xBF), char(0xFC), char(0xA0), char(0x80), char(0x80), char(0x80), char(0x80) } },
-    { { 0xFFFD, 0xFFFD },
+    { { 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD },
       { char(0xFC), char(0x80), char(0x80), char(0x80), char(0x80), char(0x80), char(0xFD), char(0xBF), char(0xBF), char(0xBF), char(0xBF), char(0xBF) } },
   };
 
-// Don't use this array in debug builds as that intentionally asserts.
-#ifndef DEBUG
-static const char Malformed8Strings[][16] =
+static const UTFStringsStringPair Malformed8Strings[] =
   {
-    { char(0x80) },
-    { 'a', char(0xC8), 'c' },
-    { 'a', char(0xC0) },
-    { 'a', char(0xE8), 'c' },
-    { 'a', char(0xE8), char(0x80), 'c' },
-    { 'a', char(0xE8), char(0x80) },
-    { char(0xE8), 0x7F, char(0x80) },
-    { 'a', char(0xE8), char(0xE8), char(0x80) },
-    { 'a', char(0xF4) },
-    { 'a', char(0xF4), char(0x80), char(0x80), 'c', 'c' },
-    { 'a', char(0xF4), char(0x80), 'x', char(0x80) },
-    { char(0xF4), char(0x80), char(0x80), char(0x80), char(0x80) },
-    { 'a', char(0xFA), 'c' },
-    { 'a', char(0xFA), char(0x80), char(0x80), 0x7F, char(0x80), 'c' },
-    { 'a', char(0xFA), char(0x80), char(0x80), char(0x80), char(0x80), char(0x80), 'c' },
-    { 'a', char(0xFD) },
-    { 'a', char(0xFD), char(0x80), char(0x80), char(0x80), char(0x80), 'c' },
-    { 'a', char(0xFD), char(0x80), char(0x80), char(0x80), char(0x80), char(0x80), char(0x80) },
-    { 'a', char(0xFC), char(0x80), char(0x80), 0x40, char(0x80), char(0x80), 'c' },
+    { { 0xFFFD },
+      { char(0x80) } },
+    { { 'a', 0xFFFD, 'c' },
+      { 'a', char(0xC8), 'c' } },
+    { { 'a', 0xFFFD },
+      { 'a', char(0xC8) } },
+    { { 'a', 0xFFFD, 'c' },
+      { 'a', char(0xE8), 'c' } },
+    { { 'a', 0xFFFD, 'c' },
+      { 'a', char(0xE8), char(0x80), 'c' } },
+    { { 'a', 0xFFFD },
+      { 'a', char(0xE8), char(0x80) } },
+    { { 0xFFFD, 0x7F, 0xFFFD },
+      { char(0xE8), 0x7F, char(0x80) } },
+    { { 'a', 0xFFFD, 0xFFFD },
+      { 'a', char(0xE8), char(0xE8), char(0x80) } },
+    { { 'a', 0xFFFD },
+      { 'a', char(0xF4) } },
+    { { 'a', 0xFFFD, 'c', 'c' },
+      { 'a', char(0xF4), char(0x80), char(0x80), 'c', 'c' } },
+    { { 'a', 0xFFFD, 'x', 0xFFFD },
+      { 'a', char(0xF4), char(0x80), 'x', char(0x80) } },
+    { { 0xDBC0, 0xDC00, 0xFFFD },
+      { char(0xF4), char(0x80), char(0x80), char(0x80), char(0x80) } },
+    { { 'a', 0xFFFD, 'c' },
+      { 'a', char(0xFA), 'c' } },
+    { { 'a', 0xFFFD, 0xFFFD, 0xFFFD, 0x7F, 0xFFFD, 'c' },
+      { 'a', char(0xFA), char(0x80), char(0x80), 0x7F, char(0x80), 'c' } },
+    { { 'a', 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 'c' },
+      { 'a', char(0xFA), char(0x80), char(0x80), char(0x80), char(0x80), char(0x80), 'c' } },
+    { { 'a', 0xFFFD },
+      { 'a', char(0xFD) } },
+    { { 'a', 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 'c' },
+      { 'a', char(0xFD), char(0x80), char(0x80), char(0x80), char(0x80), 'c' } },
+    { { 'a', 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD },
+      { 'a', char(0xFD), char(0x80), char(0x80), char(0x80), char(0x80), char(0x80), char(0x80) } },
+    { { 'a', 0xFFFD, 0xFFFD, 0xFFFD, 0x40, 0xFFFD, 0xFFFD, 'c' },
+      { 'a', char(0xFD), char(0x80), char(0x80), 0x40, char(0x80), char(0x80), 'c' } },
   };
-#endif
 
 #endif