author Henri Sivonen <hsivonen@hsivonen.fi>

Fri, 06 Jul 2018 10:44:43 +0300

changeset 827195 70e8b1fa104560990a0b7f25bea82cb880879791

parent 827194 afdf7d11181f3b90578f94fd3824f7064827a85a

child 827197 6378a57d5c9b04102600861ef1205af643be5df0

push id 118488

push user bmo:hsivonen@hsivonen.fi

push date Tue, 07 Aug 2018 12:28:14 +0000

bugs 1402247, 1472113

milestone 63.0a1

Cargo.lock file | annotate | diff | comparison | revisions

intl/encoding_glue/src/lib.rs file | annotate | diff | comparison | revisions

js/xpconnect/src/XPCConvert.cpp file | annotate | diff | comparison | revisions

servo/ports/geckolib/glue.rs file | annotate | diff | comparison | revisions

servo/support/gecko/nsstring/Cargo.toml file | annotate | diff | comparison | revisions

servo/support/gecko/nsstring/src/conversions.rs file | annotate | diff | comparison | revisions

servo/support/gecko/nsstring/src/lib.rs file | annotate | diff | comparison | revisions

toolkit/xre/nsWindowsRestart.cpp file | annotate | diff | comparison | revisions

toolkit/xre/nsWindowsWMain.cpp file | annotate | diff | comparison | revisions

xpcom/base/nsAlgorithm.h file | annotate | diff | comparison | revisions

xpcom/ds/nsAtomTable.cpp file | annotate | diff | comparison | revisions

xpcom/string/moz.build file | annotate | diff | comparison | revisions

xpcom/string/nsReadableUtils.cpp file | annotate | diff | comparison | revisions

xpcom/string/nsReadableUtils.h file | annotate | diff | comparison | revisions

xpcom/string/nsSubstring.cpp file | annotate | diff | comparison | revisions

xpcom/string/nsTStringObsolete.cpp file | annotate | diff | comparison | revisions

xpcom/string/nsTSubstring.cpp file | annotate | diff | comparison | revisions

xpcom/string/nsTSubstring.h file | annotate | diff | comparison | revisions

xpcom/string/nsUTF8Utils.h file | annotate | diff | comparison | revisions

xpcom/tests/gtest/TestAtoms.cpp file | annotate | diff | comparison | revisions

xpcom/tests/gtest/TestStrings.cpp file | annotate | diff | comparison | revisions

xpcom/tests/gtest/TestTextFormatter.cpp file | annotate | diff | comparison | revisions

xpcom/tests/gtest/TestUTF.cpp file | annotate | diff | comparison | revisions

xpcom/tests/gtest/UTFStrings.h file | annotate | diff | comparison | revisions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1425,16 +1425,17 @@ dependencies = [
  "nsstring 0.1.0",
 ]
 
 [[package]]
 name = "nsstring"
 version = "0.1.0"
 dependencies = [
  "bitflags 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "encoding_rs 0.8.4 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
 [[package]]
 name = "nsstring-gtest"
 version = "0.1.0"
 dependencies = [
  "nsstring 0.1.0",
 ]
--- a/intl/encoding_glue/src/lib.rs
+++ b/intl/encoding_glue/src/lib.rs
@@ -7,23 +7,23 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 
 // Adapted from third_party/rust/encoding_rs/src/lib.rs, so the
 // "top-level directory" in the above notice refers to
 // third_party/rust/encoding_rs/.
 
 extern crate encoding_rs;
+extern crate nserror;
 extern crate nsstring;
-extern crate nserror;
 
+use encoding_rs::*;
+use nserror::*;
+use nsstring::*;
 use std::slice;
-use encoding_rs::*;
-use nsstring::*;
-use nserror::*;
 
 // nsStringBuffer's internal bookkeeping takes 8 bytes from
 // the allocation. Plus one for termination.
 const NS_CSTRING_OVERHEAD: usize = 9;
 
 /// Takes `Option<usize>`, the destination string and a value
 /// to return on failure and tries to set the length of the
 /// destination string to the `usize` wrapped in the first
@@ -585,8 +585,100 @@ fn checked_min(one: Option<usize>, other
 }
 
 // Bindings for encoding_rs::mem. These may move to a separate crate in the future.
 
 #[no_mangle]
 pub unsafe extern "C" fn encoding_mem_is_utf16_bidi(buffer: *const u16, len: usize) -> bool {
     encoding_rs::mem::is_utf16_bidi(::std::slice::from_raw_parts(buffer, len))
 }
+
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_is_ascii(buffer: *const u8, len: usize) -> bool {
+    encoding_rs::mem::is_ascii(::std::slice::from_raw_parts(buffer, len))
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_is_basic_latin(buffer: *const u16, len: usize) -> bool {
+    encoding_rs::mem::is_basic_latin(::std::slice::from_raw_parts(buffer, len))
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_is_utf8_latin1(buffer: *const u8, len: usize) -> bool {
+    encoding_rs::mem::is_utf8_latin1(::std::slice::from_raw_parts(buffer, len))
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_is_utf16_latin1(buffer: *const u16, len: usize) -> bool {
+    encoding_rs::mem::is_utf16_latin1(::std::slice::from_raw_parts(buffer, len))
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_is_str_latin1(buffer: *const u8, len: usize) -> bool {
+    encoding_rs::mem::is_str_latin1(::std::str::from_utf8_unchecked(
+        ::std::slice::from_raw_parts(buffer, len),
+    ))
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_convert_utf16_to_latin1_lossy(
+    src: *const u16,
+    src_len: usize,
+    dst: *mut u8,
+    dst_len: usize,
+) {
+    encoding_rs::mem::convert_utf16_to_latin1_lossy(
+        ::std::slice::from_raw_parts(src, src_len),
+        ::std::slice::from_raw_parts_mut(dst, dst_len),
+    );
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_convert_utf8_to_latin1_lossy(
+    src: *const u8,
+    src_len: usize,
+    dst: *mut u8,
+    dst_len: usize,
+) -> usize {
+    encoding_rs::mem::convert_utf8_to_latin1_lossy(
+        ::std::slice::from_raw_parts(src, src_len),
+        ::std::slice::from_raw_parts_mut(dst, dst_len),
+    )
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_convert_latin1_to_utf16(
+    src: *const u8,
+    src_len: usize,
+    dst: *mut u16,
+    dst_len: usize,
+) {
+    encoding_rs::mem::convert_latin1_to_utf16(
+        ::std::slice::from_raw_parts(src, src_len),
+        ::std::slice::from_raw_parts_mut(dst, dst_len),
+    );
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_convert_utf16_to_utf8(
+    src: *const u16,
+    src_len: usize,
+    dst: *mut u8,
+    dst_len: usize,
+) -> usize {
+    encoding_rs::mem::convert_utf16_to_utf8(
+        ::std::slice::from_raw_parts(src, src_len),
+        ::std::slice::from_raw_parts_mut(dst, dst_len),
+    )
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_convert_utf8_to_utf16(
+    src: *const u8,
+    src_len: usize,
+    dst: *mut u16,
+    dst_len: usize,
+) -> usize {
+    encoding_rs::mem::convert_utf8_to_utf16(
+        ::std::slice::from_raw_parts(src, src_len),
+        ::std::slice::from_raw_parts_mut(dst, dst_len),
+    )
+}
--- a/js/xpconnect/src/XPCConvert.cpp
+++ b/js/xpconnect/src/XPCConvert.cpp
@@ -277,46 +277,88 @@ XPCConvert::NativeData2JS(MutableHandleV
             return true;
         }
 
         if (utf8String->IsEmpty()) {
             d.set(JS_GetEmptyStringValue(cx));
             return true;
         }
 
-        const uint32_t len = CalcUTF8ToUnicodeLength(*utf8String);
-        // The cString is not empty at this point, but the calculated
-        // UTF-16 length is zero, meaning no valid conversion exists.
-        if (!len)
-            return false;
-
-        const size_t buffer_size = (len + 1) * sizeof(char16_t);
-        char16_t* buffer =
-            static_cast<char16_t*>(JS_malloc(cx, buffer_size));
-        if (!buffer)
-            return false;
-
-        uint32_t copied;
-        if (!UTF8ToUnicodeBuffer(*utf8String, buffer, &copied) ||
-            len != copied) {
-            // Copy or conversion during copy failed. Did not copy the
-            // whole string.
-            JS_free(cx, buffer);
+        uint32_t len = utf8String->Length();
+        auto allocLen = CheckedUint32(len) + 1;
+        if (!allocLen.isValid()) {
             return false;
         }
 
-        // JS_NewUCString takes ownership on success, i.e. a
+        // Usage of UTF-8 in XPConnect is mostly for things that are
+        // almost always ASCII, so the inexact allocations below
+        // should be fine.
+
+        if (IsUTF8Latin1(*utf8String)) {
+            char* buffer = static_cast<char*>(JS_malloc(cx, allocLen.value()));
+            if (!buffer) {
+                return false;
+            }
+            size_t written =
+                LossyConvertUTF8toLatin1(*utf8String, MakeSpan(buffer, len));
+            buffer[written] = 0;
+
+            // JS_NewLatin1String takes ownership on success, i.e. a
+            // successful call will make it the responsiblity of the JS VM
+            // to free the buffer.
+            // written can never exceed len, so the truncation is OK.
+            JSString* str = JS_NewLatin1String(
+                cx, reinterpret_cast<JS::Latin1Char*>(buffer), written);
+            if (!str) {
+                JS_free(cx, buffer);
+                return false;
+            }
+            d.setString(str);
+            return true;
+        }
+
+        // 1-byte sequences decode to 1 UTF-16 code unit
+        // 2-byte sequences decode to 1 UTF-16 code unit
+        // 3-byte sequences decode to 1 UTF-16 code unit
+        // 4-byte sequences decode to 2 UTF-16 code units
+        // So the number of output code units never exceeds
+        // the number of input code units (but see the comment
+        // below). allocLen already takes the zero terminator
+        // into account.
+        allocLen *= sizeof(char16_t);
+        if (!allocLen.isValid()) {
+            return false;
+        }
+
+        char16_t* buffer =
+            static_cast<char16_t*>(JS_malloc(cx, allocLen.value()));
+        if (!buffer) {
+            return false;
+        }
+
+        // For its internal simplicity, ConvertUTF8toUTF16 requires the
+        // destination to be one code unit longer than the source, but
+        // it never actually writes more code units than the number of
+        // code units in the source. That's why it's OK to claim the
+        // output buffer has len + 1 space but then still expect to
+        // have space for the zero terminator.
+        size_t written =
+            ConvertUTF8toUTF16(*utf8String, MakeSpan(buffer, len + 1));
+        MOZ_RELEASE_ASSERT(written <= len);
+        buffer[written] = 0;
+
+        // JS_NewUCStringDontDeflate takes ownership on success, i.e. a
         // successful call will make it the responsiblity of the JS VM
         // to free the buffer.
-        JSString* str = JS_NewUCString(cx, buffer, len);
+        // written can never exceed len + 1, so the truncation is OK.
+        JSString* str = JS_NewUCStringDontDeflate(cx, buffer, written);
         if (!str) {
             JS_free(cx, buffer);
             return false;
         }
-
         d.setString(str);
         return true;
     }
     case nsXPTType::T_CSTRING:
     {
         const nsACString* cString = static_cast<const nsACString*>(s);
 
         if (!cString || cString->IsVoid()) {
@@ -1218,19 +1260,20 @@ JSErrorToXPCException(const char* toStri
                       Exception** exceptn)
 {
     AutoJSContext cx;
     nsresult rv = NS_ERROR_FAILURE;
     RefPtr<nsScriptError> data;
     if (report) {
         nsAutoString bestMessage;
         if (report && report->message()) {
-            CopyUTF8toUTF16(report->message().c_str(), bestMessage);
+            CopyUTF8toUTF16(mozilla::MakeStringSpan(report->message().c_str()),
+                            bestMessage);
         } else if (toStringResult) {
-            CopyUTF8toUTF16(toStringResult, bestMessage);
+            CopyUTF8toUTF16(mozilla::MakeStringSpan(toStringResult), bestMessage);
         } else {
             bestMessage.AssignLiteral("JavaScript Error");
         }
 
         const char16_t* linebuf = report->linebuf();
 
         data = new nsScriptError();
         data->InitWithWindowID(
--- a/servo/ports/geckolib/glue.rs
+++ b/servo/ports/geckolib/glue.rs
@@ -3515,17 +3515,17 @@ pub extern "C" fn Servo_DeclarationBlock
 pub extern "C" fn Servo_DeclarationBlock_GetNthProperty(
     declarations: RawServoDeclarationBlockBorrowed,
     index: u32,
     result: *mut nsAString,
 ) -> bool {
     read_locked_arc(declarations, |decls: &PropertyDeclarationBlock| {
         if let Some(decl) = decls.declarations().get(index as usize) {
             let result = unsafe { result.as_mut().unwrap() };
-            result.assign_utf8(&decl.id().name());
+            result.assign_str(&decl.id().name());
             true
         } else {
             false
         }
     })
 }
 
 macro_rules! get_property_id_from_property {
--- a/servo/support/gecko/nsstring/Cargo.toml
+++ b/servo/support/gecko/nsstring/Cargo.toml
@@ -5,9 +5,9 @@ authors = ["nobody@mozilla.com"]
 license = "MPL-2.0"
 description = "Rust bindings to xpcom string types"
 
 [features]
 gecko_debug = []
 
 [dependencies]
 bitflags = "1.0"
-
+encoding_rs = "0.8.0"
new file mode 100644
--- /dev/null
+++ b/servo/support/gecko/nsstring/src/conversions.rs
@@ -0,0 +1,712 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+extern crate encoding_rs;
+
+use std::slice;
+
+use super::nsACString;
+use super::nsAString;
+use super::nsCStringLike;
+use super::BulkWriteOk;
+use super::Gecko_FallibleAssignCString;
+use super::Latin1StringLike;
+
+use conversions::encoding_rs::mem::*;
+use conversions::encoding_rs::Encoding;
+
+/// Required math stated in the docs of
+/// `convert_utf16_to_utf8()`.
+#[inline(always)]
+fn times_three_plus_one(a: usize) -> Option<usize> {
+    a.checked_mul(3)?.checked_add(1)
+}
+
+#[inline(always)]
+fn identity(a: usize) -> Option<usize> {
+    Some(a)
+}
+
+#[inline(always)]
+fn plus_one(a: usize) -> Option<usize> {
+    a.checked_add(1)
+}
+
+/// Typical cache line size per
+/// https://stackoverflow.com/questions/14707803/line-size-of-l1-and-l2-caches
+///
+/// For consistent behavior, not trying to use 128 on aarch64
+/// or other fanciness like that.
+const CACHE_LINE: usize = 64;
+
+const CACHE_LINE_MASK: usize = CACHE_LINE - 1;
+
+#[inline(always)]
+fn starts_with_ascii(buffer: &[u8]) -> bool {
+    // We examine data only up to the end of the cache line
+    // to make this check minimally disruptive.
+    let bound = if buffer.len() <= CACHE_LINE {
+        buffer.len()
+    } else {
+        CACHE_LINE - ((buffer.as_ptr() as usize) & CACHE_LINE_MASK)
+    };
+    is_ascii(&buffer[..bound])
+}
+
+#[inline(always)]
+fn starts_with_basic_latin(buffer: &[u16]) -> bool {
+    // We look at two cache lines with code unit size of two. There is need
+    // to look at more than one cache line in the UTF-16 case, because looking
+    // at just one cache line wouldn't catch non-ASCII Latin with high enough
+    // probability with Latin-script languages that have relatively infrequent
+    // non-ASCII characters.
+    let bound = if buffer.len() <= CACHE_LINE {
+        buffer.len()
+    } else {
+        (CACHE_LINE * 2 - ((buffer.as_ptr() as usize) & CACHE_LINE_MASK)) / 2
+    };
+    is_basic_latin(&buffer[..bound])
+}
+
+// Ignoring the copy avoidance complications of conversions between Latin1 and
+// UTF-8, a conversion function has the outward form of
+// `fn F(&mut self, other: &[T], old_len: usize) -> Result<BulkWriteOk, ()>`,
+// where `T` is either `u8` or `u16`. `other` is the slice whose converted
+// content are to be appended to `self` and `old_len` indicates how many
+// code unit of `self` are to be preserved (0 for the assignment case and
+// `self.len()` for the appending case).
+//
+// As implementation parameters a conversion function needs to know the
+// math for computing the worst case conversion length in code units given
+// the input length in code units. For a _constant conversion_ the number
+// of code units the conversion produces equals the number of code units
+// in the input. For a _shinking conversion_ the maximum number of code
+// units the conversion can produce equals the number of code units in
+// the input, but the conversion can produce fewer code units. Still, due
+// to implementation details, the function might want _one_ unit more of
+// output space. For an _expanding conversion_ (no need for macro), the
+// minimum number of code units produced by the conversion is the number
+// of code units in the input, but the conversion can produce more.
+//
+// Copy avoidance conversions avoid copying a refcounted buffer when it's
+// ASCII-only.
+//
+// Internally, a conversion function needs to know the underlying
+// encoding_rs conversion function, the math for computing the required
+// output buffer size and, depending on the case, the underlying
+// encoding_rs ASCII prefix handling function.
+
+/// A conversion where the number of code units in the output is potentially
+/// smaller than the number of code units in the input.
+///
+/// Takes the name of the method to be generated, the name of the conversion
+/// function and the type of the input slice.
+///
+/// `$name` is the name of the function to generate
+/// `$convert` is the underlying `encoding_rs::mem` function to use
+/// `$other_ty` is the type of the input slice
+/// `$math` is the worst-case length math that `$convert` expects
+macro_rules! shrinking_conversion {
+    (name = $name:ident,
+     convert = $convert:ident,
+     other_ty = $other_ty:ty,
+     math = $math:ident) => (
+        fn $name(&mut self, other: $other_ty, old_len: usize) -> Result<BulkWriteOk, ()> {
+            let needed = $math(other.len()).ok_or(())?;
+            let mut handle = unsafe {
+                self.bulk_write(old_len.checked_add(needed).ok_or(())?, old_len, false)?
+            };
+            let written = $convert(other, &mut handle.as_mut_slice()[old_len..]);
+            Ok(handle.finish(old_len + written, true))
+        }
+     )
+}
+
+/// A conversion where the number of code units in the output is always equal
+/// to the number of code units in the input.
+///
+/// Takes the name of the method to be generated, the name of the conversion
+/// function and the type of the input slice.
+///
+/// `$name` is the name of the function to generate
+/// `$convert` is the underlying `encoding_rs::mem` function to use
+/// `$other_ty` is the type of the input slice
+macro_rules! constant_conversion {
+    (name = $name:ident,
+     convert = $convert:ident,
+     other_ty = $other_ty:ty) => (
+        fn $name(&mut self, other: $other_ty, old_len: usize) -> Result<BulkWriteOk, ()> {
+            let new_len = old_len.checked_add(other.len()).ok_or(())?;
+            let mut handle = unsafe {
+                self.bulk_write(new_len, old_len, true)?
+            };
+            $convert(other, &mut handle.as_mut_slice()[old_len..]);
+            Ok(handle.finish(new_len, false))
+        }
+     )
+}
+
+/// An intermediate check for avoiding a copy and having an `nsStringBuffer`
+/// refcount increment instead when both `self` and `other` are `nsACString`s,
+/// `other` is entirely ASCII and all old data in `self` is discarded.
+///
+/// `$name` is the name of the function to generate
+/// `$impl` is the underlying conversion that takes a slice and that is used
+///         when we can't just adopt the incoming buffer as-is
+/// `$string_like` is the kind of input taken
+macro_rules! ascii_copy_avoidance {
+    (name = $name:ident,
+     implementation = $implementation:ident,
+     string_like = $string_like:ident) => (
+        fn $name<T: $string_like + ?Sized>(&mut self, other: &T, old_len: usize) -> Result<BulkWriteOk, ()> {
+            let adapter = other.adapt();
+            let other_slice = adapter.as_ref();
+            let num_ascii = if adapter.is_abstract() && old_len == 0 {
+                let up_to = Encoding::ascii_valid_up_to(other_slice);
+                if up_to == other_slice.len() {
+                    // Calling something whose argument can be obtained from
+                    // the adapter rather than an nsStringLike avoids a huge
+                    // lifetime mess by keeping nsStringLike and
+                    // Latin1StringLike free of lifetime interdependencies.
+                    if unsafe { Gecko_FallibleAssignCString(self, other.adapt().as_ptr()) } {
+                        return Ok(BulkWriteOk{});
+                    } else {
+                        return Err(());
+                    }
+                }
+                Some(up_to)
+            } else {
+                None
+            };
+            self.$implementation(other_slice, old_len, num_ascii)
+        }
+    )
+}
+
+impl nsAString {
+    // Valid UTF-8 to UTF-16
+
+    // Documentation says the destination buffer needs to have
+    // as many code units as the input.
+    shrinking_conversion!(
+        name = fallible_append_str_impl,
+        convert = convert_str_to_utf16,
+        other_ty = &str,
+        math = identity
+    );
+
+    /// Convert a valid UTF-8 string into valid UTF-16 and replace the content
+    /// of this string with the conversion result.
+    pub fn assign_str(&mut self, other: &str) {
+        self.fallible_append_str_impl(other, 0)
+            .expect("Out of memory");
+    }
+
+    /// Convert a valid UTF-8 string into valid UTF-16 and fallibly replace the
+    /// content of this string with the conversion result.
+    pub fn fallible_assign_str(&mut self, other: &str) -> Result<(), ()> {
+        self.fallible_append_str_impl(other, 0).map(|_| ())
+    }
+
+    /// Convert a valid UTF-8 string into valid UTF-16 and append the conversion
+    /// to this string.
+    pub fn append_str(&mut self, other: &str) {
+        let len = self.len();
+        self.fallible_append_str_impl(other, len)
+            .expect("Out of memory");
+    }
+
+    /// Convert a valid UTF-8 string into valid UTF-16 and fallibly append the
+    /// conversion to this string.
+    pub fn fallible_append_str(&mut self, other: &str) -> Result<(), ()> {
+        let len = self.len();
+        self.fallible_append_str_impl(other, len).map(|_| ())
+    }
+
+    // Potentially-invalid UTF-8 to UTF-16
+
+    // Documentation says the destination buffer needs to have
+    // one more code unit than the input.
+    shrinking_conversion!(
+        name = fallible_append_utf8_impl,
+        convert = convert_utf8_to_utf16,
+        other_ty = &[u8],
+        math = plus_one
+    );
+
+    /// Convert a potentially-invalid UTF-8 string into valid UTF-16
+    /// (replacing invalid sequences with the REPLACEMENT CHARACTER) and
+    /// replace the content of this string with the conversion result.
+    pub fn assign_utf8(&mut self, other: &[u8]) {
+        self.fallible_append_utf8_impl(other, 0)
+            .expect("Out of memory");
+    }
+
+    /// Convert a potentially-invalid UTF-8 string into valid UTF-16
+    /// (replacing invalid sequences with the REPLACEMENT CHARACTER) and
+    /// fallibly replace the content of this string with the conversion result.
+    pub fn fallible_assign_utf8(&mut self, other: &[u8]) -> Result<(), ()> {
+        self.fallible_append_utf8_impl(other, 0).map(|_| ())
+    }
+
+    /// Convert a potentially-invalid UTF-8 string into valid UTF-16
+    /// (replacing invalid sequences with the REPLACEMENT CHARACTER) and
+    /// append the conversion result to this string.
+    pub fn append_utf8(&mut self, other: &[u8]) {
+        let len = self.len();
+        self.fallible_append_utf8_impl(other, len)
+            .expect("Out of memory");
+    }
+
+    /// Convert a potentially-invalid UTF-8 string into valid UTF-16
+    /// (replacing invalid sequences with the REPLACEMENT CHARACTER) and
+    /// fallibly append the conversion result to this string.
+    pub fn fallible_append_utf8(&mut self, other: &[u8]) -> Result<(), ()> {
+        let len = self.len();
+        self.fallible_append_utf8_impl(other, len).map(|_| ())
+    }
+
+    // Latin1 to UTF-16
+
+    constant_conversion!(
+        name = fallible_append_latin1_impl,
+        convert = convert_latin1_to_utf16,
+        other_ty = &[u8]
+    );
+
+    /// Convert a Latin1 (i.e. byte value equals scalar value; not windows-1252!)
+    /// into UTF-16 and replace the content of this string with the conversion result.
+    pub fn assign_latin1(&mut self, other: &[u8]) {
+        self.fallible_append_latin1_impl(other, 0)
+            .expect("Out of memory");
+    }
+
+    /// Convert a Latin1 (i.e. byte value equals scalar value; not windows-1252!)
+    /// into UTF-16 and fallibly replace the content of this string with the
+    /// conversion result.
+    pub fn fallible_assign_latin1(&mut self, other: &[u8]) -> Result<(), ()> {
+        self.fallible_append_latin1_impl(other, 0).map(|_| ())
+    }
+
+    /// Convert a Latin1 (i.e. byte value equals scalar value; not windows-1252!)
+    /// into UTF-16 and append the conversion result to this string.
+    pub fn append_latin1(&mut self, other: &[u8]) {
+        let len = self.len();
+        self.fallible_append_latin1_impl(other, len)
+            .expect("Out of memory");
+    }
+
+    /// Convert a Latin1 (i.e. byte value equals scalar value; not windows-1252!)
+    /// into UTF-16 and fallibly append the conversion result to this string.
+    pub fn fallible_append_latin1(&mut self, other: &[u8]) -> Result<(), ()> {
+        let len = self.len();
+        self.fallible_append_latin1_impl(other, len).map(|_| ())
+    }
+}
+
+impl nsACString {
+    // UTF-16 to UTF-8
+
+    fn fallible_append_utf16_to_utf8_impl(
+        &mut self,
+        other: &[u16],
+        old_len: usize,
+    ) -> Result<BulkWriteOk, ()> {
+        // We first size the buffer for ASCII if the first cache line is ASCII. If that turns out not to
+        // be enough, we size for the worst case given the length of the remaining input at that point.
+        let (filled, num_ascii, mut handle) = if starts_with_basic_latin(other) {
+            let new_len_with_ascii = old_len.checked_add(other.len()).ok_or(())?;
+            let mut handle = unsafe { self.bulk_write(new_len_with_ascii, old_len, false)? };
+            let num_ascii = copy_basic_latin_to_ascii(other, &mut handle.as_mut_slice()[old_len..]);
+            let left = other.len() - num_ascii;
+            if left == 0 {
+                return Ok(handle.finish(old_len + num_ascii, true));
+            }
+            let filled = old_len + num_ascii;
+            let needed = times_three_plus_one(left).ok_or(())?;
+            let new_len = filled.checked_add(needed).ok_or(())?;
+            unsafe {
+                handle.restart_bulk_write(new_len, filled, false)?;
+            }
+            (filled, num_ascii, handle)
+        } else {
+            // Started with non-ASCII. Compute worst case
+            let needed = times_three_plus_one(other.len()).ok_or(())?;
+            let new_len = old_len.checked_add(needed).ok_or(())?;
+            let mut handle = unsafe { self.bulk_write(new_len, old_len, false)? };
+            (old_len, 0, handle)
+        };
+        let written =
+            convert_utf16_to_utf8(&other[num_ascii..], &mut handle.as_mut_slice()[filled..]);
+        Ok(handle.finish(filled + written, true))
+    }
+
+    /// Convert a potentially-invalid UTF-16 string into valid UTF-8
+    /// (replacing invalid sequences with the REPLACEMENT CHARACTER) and
+    /// replace the content of this string with the conversion result.
+    pub fn assign_utf16_to_utf8(&mut self, other: &[u16]) {
+        self.fallible_append_utf16_to_utf8_impl(other, 0)
+            .expect("Out of memory");
+    }
+
+    /// Convert a potentially-invalid UTF-16 string into valid UTF-8
+    /// (replacing invalid sequences with the REPLACEMENT CHARACTER) and
+    /// fallibly replace the content of this string with the conversion result.
+    pub fn fallible_assign_utf16_to_utf8(&mut self, other: &[u16]) -> Result<(), ()> {
+        self.fallible_append_utf16_to_utf8_impl(other, 0)
+            .map(|_| ())
+    }
+
+    /// Convert a potentially-invalid UTF-16 string into valid UTF-8
+    /// (replacing invalid sequences with the REPLACEMENT CHARACTER) and
+    /// append the conversion result to this string.
+    pub fn append_utf16_to_utf8(&mut self, other: &[u16]) {
+        let len = self.len();
+        self.fallible_append_utf16_to_utf8_impl(other, len)
+            .expect("Out of memory");
+    }
+
+    /// Convert a potentially-invalid UTF-16 string into valid UTF-8
+    /// (replacing invalid sequences with the REPLACEMENT CHARACTER) and
+    /// fallibly append the conversion result to this string.
+    pub fn fallible_append_utf16_to_utf8(&mut self, other: &[u16]) -> Result<(), ()> {
+        let len = self.len();
+        self.fallible_append_utf16_to_utf8_impl(other, len)
+            .map(|_| ())
+    }
+
+    // UTF-16 to Latin1
+
+    constant_conversion!(
+        name = fallible_append_utf16_to_latin1_lossy_impl,
+        convert = convert_utf16_to_latin1_lossy,
+        other_ty = &[u16]
+    );
+
+    /// Convert a UTF-16 string whose all code points are below U+0100 into
+    /// a Latin1 (scalar value is byte value; not windows-1252!) string and
+    /// replace the content of this string with the conversion result.
+    ///
+    /// # Panics
+    ///
+    /// If the input contains code points above U+00FF or is not valid UTF-16,
+    /// panics in debug mode and produces garbage in a memory-safe way in
+    /// release builds. The nature of the garbage may differ based on CPU
+    /// architecture and must not be relied upon.
+    pub fn assign_utf16_to_latin1_lossy(&mut self, other: &[u16]) {
+        self.fallible_append_utf16_to_latin1_lossy_impl(other, 0)
+            .expect("Out of memory");
+    }
+
+    /// Convert a UTF-16 string whose all code points are below U+0100 into
+    /// a Latin1 (scalar value is byte value; not windows-1252!) string and
+    /// fallibly replace the content of this string with the conversion result.
+    ///
+    /// # Panics
+    ///
+    /// If the input contains code points above U+00FF or is not valid UTF-16,
+    /// panics in debug mode and produces garbage in a memory-safe way in
+    /// release builds. The nature of the garbage may differ based on CPU
+    /// architecture and must not be relied upon.
+    pub fn fallible_assign_utf16_to_latin1_lossy(&mut self, other: &[u16]) -> Result<(), ()> {
+        self.fallible_append_utf16_to_latin1_lossy_impl(other, 0)
+            .map(|_| ())
+    }
+
+    /// Convert a UTF-16 string whose all code points are below U+0100 into
+    /// a Latin1 (scalar value is byte value; not windows-1252!) string and
+    /// append the conversion result to this string.
+    ///
+    /// # Panics
+    ///
+    /// If the input contains code points above U+00FF or is not valid UTF-16,
+    /// panics in debug mode and produces garbage in a memory-safe way in
+    /// release builds. The nature of the garbage may differ based on CPU
+    /// architecture and must not be relied upon.
+    pub fn append_utf16_to_latin1_lossy(&mut self, other: &[u16]) {
+        let len = self.len();
+        self.fallible_append_utf16_to_latin1_lossy_impl(other, len)
+            .expect("Out of memory");
+    }
+
+    /// Convert a UTF-16 string whose all code points are below U+0100 into
+    /// a Latin1 (scalar value is byte value; not windows-1252!) string and
+    /// fallibly append the conversion result to this string.
+    ///
+    /// # Panics
+    ///
+    /// If the input contains code points above U+00FF or is not valid UTF-16,
+    /// panics in debug mode and produces garbage in a memory-safe way in
+    /// release builds. The nature of the garbage may differ based on CPU
+    /// architecture and must not be relied upon.
+    pub fn fallible_append_utf16_to_latin1_lossy(&mut self, other: &[u16]) -> Result<(), ()> {
+        let len = self.len();
+        self.fallible_append_utf16_to_latin1_lossy_impl(other, len)
+            .map(|_| ())
+    }
+
+    // UTF-8 to Latin1
+
+    ascii_copy_avoidance!(
+        name = fallible_append_utf8_to_latin1_lossy_check,
+        implementation = fallible_append_utf8_to_latin1_lossy_impl,
+        string_like = nsCStringLike
+    );
+
+    fn fallible_append_utf8_to_latin1_lossy_impl(
+        &mut self,
+        other: &[u8],
+        old_len: usize,
+        maybe_num_ascii: Option<usize>,
+    ) -> Result<BulkWriteOk, ()> {
+        let new_len = old_len.checked_add(other.len()).ok_or(())?;
+        let num_ascii = maybe_num_ascii.unwrap_or(0);
+        // Already checked for overflow above, so this can't overflow.
+        let old_len_plus_num_ascii = old_len + num_ascii;
+        let mut handle = unsafe { self.bulk_write(new_len, old_len, false)? };
+        let written = {
+            let buffer = handle.as_mut_slice();
+            if num_ascii != 0 {
+                (&mut buffer[old_len..old_len_plus_num_ascii]).copy_from_slice(&other[..num_ascii]);
+            }
+            convert_utf8_to_latin1_lossy(&other[num_ascii..], &mut buffer[old_len_plus_num_ascii..])
+        };
+        Ok(handle.finish(old_len_plus_num_ascii + written, true))
+    }
+
+    /// Convert a UTF-8 string whose all code points are below U+0100 into
+    /// a Latin1 (scalar value is byte value; not windows-1252!) string and
+    /// replace the content of this string with the conversion result.
+    ///
+    /// # Panics
+    ///
+    /// If the input contains code points above U+00FF or is not valid UTF-8,
+    /// panics in debug mode and produces garbage in a memory-safe way in
+    /// release builds. The nature of the garbage may differ based on CPU
+    /// architecture and must not be relied upon.
+    pub fn assign_utf8_to_latin1_lossy<T: nsCStringLike + ?Sized>(&mut self, other: &T) {
+        self.fallible_append_utf8_to_latin1_lossy_check(other, 0)
+            .expect("Out of memory");
+    }
+
+    /// Convert a UTF-8 string whose all code points are below U+0100 into
+    /// a Latin1 (scalar value is byte value; not windows-1252!) string and
+    /// fallibly replace the content of this string with the conversion result.
+    ///
+    /// # Panics
+    ///
+    /// If the input contains code points above U+00FF or is not valid UTF-8,
+    /// panics in debug mode and produces garbage in a memory-safe way in
+    /// release builds. The nature of the garbage may differ based on CPU
+    /// architecture and must not be relied upon.
+    pub fn fallible_assign_utf8_to_latin1_lossy<T: nsCStringLike + ?Sized>(
+        &mut self,
+        other: &T,
+    ) -> Result<(), ()> {
+        self.fallible_append_utf8_to_latin1_lossy_check(other, 0)
+            .map(|_| ())
+    }
+
+    /// Convert a UTF-8 string whose all code points are below U+0100 into
+    /// a Latin1 (scalar value is byte value; not windows-1252!) string and
+    /// append the conversion result to this string.
+    ///
+    /// # Panics
+    ///
+    /// If the input contains code points above U+00FF or is not valid UTF-8,
+    /// panics in debug mode and produces garbage in a memory-safe way in
+    /// release builds. The nature of the garbage may differ based on CPU
+    /// architecture and must not be relied upon.
+    pub fn append_utf8_to_latin1_lossy<T: nsCStringLike + ?Sized>(&mut self, other: &T) {
+        let len = self.len();
+        self.fallible_append_utf8_to_latin1_lossy_check(other, len)
+            .expect("Out of memory");
+    }
+
+    /// Convert a UTF-8 string whose all code points are below U+0100 into
+    /// a Latin1 (scalar value is byte value; not windows-1252!) string and
+    /// fallibly append the conversion result to this string.
+    ///
+    /// # Panics
+    ///
+    /// If the input contains code points above U+00FF or is not valid UTF-8,
+    /// panics in debug mode and produces garbage in a memory-safe way in
+    /// release builds. The nature of the garbage may differ based on CPU
+    /// architecture and must not be relied upon.
+    pub fn fallible_append_utf8_to_latin1_lossy<T: nsCStringLike + ?Sized>(
+        &mut self,
+        other: &T,
+    ) -> Result<(), ()> {
+        let len = self.len();
+        self.fallible_append_utf8_to_latin1_lossy_check(other, len)
+            .map(|_| ())
+    }
+
+    // Latin1 to UTF-8 CString
+
+    ascii_copy_avoidance!(
+        name = fallible_append_latin1_to_utf8_check,
+        implementation = fallible_append_latin1_to_utf8_impl,
+        string_like = Latin1StringLike
+    );
+
+    fn fallible_append_latin1_to_utf8_impl(
+        &mut self,
+        other: &[u8],
+        old_len: usize,
+        maybe_num_ascii: Option<usize>,
+    ) -> Result<BulkWriteOk, ()> {
+        let (filled, num_ascii, mut handle) = if let Some(num_ascii) = maybe_num_ascii {
+            // Wrapper checked for ASCII
+            let left = other.len() - num_ascii;
+            let filled = old_len + num_ascii;
+            let needed = left.checked_mul(2).ok_or(())?;
+            let new_len = filled.checked_add(needed).ok_or(())?;
+            let mut handle = unsafe { self.bulk_write(new_len, old_len, false)? };
+            if num_ascii != 0 {
+                (&mut handle.as_mut_slice()[old_len..filled]).copy_from_slice(&other[..num_ascii]);
+            }
+            (filled, num_ascii, handle)
+        } else if starts_with_ascii(other) {
+            // Wrapper didn't check for ASCII, so let's see if `other` starts with ASCII
+            // `other` starts with ASCII, so let's first size the buffer
+            // with optimism that it's ASCII-only.
+            let new_len_with_ascii = old_len.checked_add(other.len()).ok_or(())?;
+            let mut handle = unsafe { self.bulk_write(new_len_with_ascii, old_len, false)? };
+            let num_ascii = copy_ascii_to_ascii(other, &mut handle.as_mut_slice()[old_len..]);
+            let left = other.len() - num_ascii;
+            let filled = old_len + num_ascii;
+            if left == 0 {
+                // `other` was all ASCII
+                return Ok(handle.finish(filled, true));
+            }
+            let needed = left.checked_mul(2).ok_or(())?;
+            let new_len = filled.checked_add(needed).ok_or(())?;
+            unsafe {
+                handle.restart_bulk_write(new_len, filled, false)?;
+            }
+            (filled, num_ascii, handle)
+        } else {
+            // Started with non-ASCII. Assume worst case.
+            let needed = other.len().checked_mul(2).ok_or(())?;
+            let new_len = old_len.checked_add(needed).ok_or(())?;
+            let mut handle = unsafe { self.bulk_write(new_len, old_len, false)? };
+            (old_len, 0, handle)
+        };
+        let written =
+            convert_latin1_to_utf8(&other[num_ascii..], &mut handle.as_mut_slice()[filled..]);
+        Ok(handle.finish(filled + written, true))
+    }
+
+    /// Convert a Latin1 (i.e. byte value equals scalar value; not windows-1252!)
+    /// into UTF-8 and replace the content of this string with the conversion result.
+    pub fn assign_latin1_to_utf8<T: Latin1StringLike + ?Sized>(&mut self, other: &T) {
+        self.fallible_append_latin1_to_utf8_check(other, 0)
+            .expect("Out of memory");
+    }
+
+    /// Convert a Latin1 (i.e. byte value equals scalar value; not windows-1252!)
+    /// into UTF-8 and fallibly replace the content of this string with the
+    /// conversion result.
+    pub fn fallible_assign_latin1_to_utf8<T: Latin1StringLike + ?Sized>(
+        &mut self,
+        other: &T,
+    ) -> Result<(), ()> {
+        self.fallible_append_latin1_to_utf8_check(other, 0)
+            .map(|_| ())
+    }
+
+    /// Convert a Latin1 (i.e. byte value equals scalar value; not windows-1252!)
+    /// into UTF-8 and append the conversion result to this string.
+    pub fn append_latin1_to_utf8<T: Latin1StringLike + ?Sized>(&mut self, other: &T) {
+        let len = self.len();
+        self.fallible_append_latin1_to_utf8_check(other, len)
+            .expect("Out of memory");
+    }
+
+    /// Convert a Latin1 (i.e. byte value equals scalar value; not windows-1252!)
+    /// into UTF-8 and fallibly append the conversion result to this string.
+    pub fn fallible_append_latin1_to_utf8<T: Latin1StringLike + ?Sized>(
+        &mut self,
+        other: &T,
+    ) -> Result<(), ()> {
+        let len = self.len();
+        self.fallible_append_latin1_to_utf8_check(other, len)
+            .map(|_| ())
+    }
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn nsstring_fallible_append_utf8_impl(
+    this: *mut nsAString,
+    other: *const u8,
+    other_len: usize,
+    old_len: usize,
+) -> bool {
+    let other_slice = slice::from_raw_parts(other, other_len);
+    (*this)
+        .fallible_append_utf8_impl(other_slice, old_len)
+        .is_ok()
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn nsstring_fallible_append_latin1_impl(
+    this: *mut nsAString,
+    other: *const u8,
+    other_len: usize,
+    old_len: usize,
+) -> bool {
+    let other_slice = slice::from_raw_parts(other, other_len);
+    (*this)
+        .fallible_append_latin1_impl(other_slice, old_len)
+        .is_ok()
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn nscstring_fallible_append_utf16_to_utf8_impl(
+    this: *mut nsACString,
+    other: *const u16,
+    other_len: usize,
+    old_len: usize,
+) -> bool {
+    let other_slice = slice::from_raw_parts(other, other_len);
+    (*this)
+        .fallible_append_utf16_to_utf8_impl(other_slice, old_len)
+        .is_ok()
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn nscstring_fallible_append_utf16_to_latin1_lossy_impl(
+    this: *mut nsACString,
+    other: *const u16,
+    other_len: usize,
+    old_len: usize,
+) -> bool {
+    let other_slice = slice::from_raw_parts(other, other_len);
+    (*this)
+        .fallible_append_utf16_to_latin1_lossy_impl(other_slice, old_len)
+        .is_ok()
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn nscstring_fallible_append_utf8_to_latin1_lossy_check(
+    this: *mut nsACString,
+    other: *const nsACString,
+    old_len: usize,
+) -> bool {
+    (*this)
+        .fallible_append_utf8_to_latin1_lossy_check(&*other, old_len)
+        .is_ok()
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn nscstring_fallible_append_latin1_to_utf8_check(
+    this: *mut nsACString,
+    other: *const nsACString,
+    old_len: usize,
+) -> bool {
+    (*this)
+        .fallible_append_latin1_to_utf8_check(&*other, old_len)
+        .is_ok()
+}
--- a/servo/support/gecko/nsstring/src/lib.rs
+++ b/servo/support/gecko/nsstring/src/lib.rs
@@ -119,20 +119,36 @@ extern crate bitflags;
 
 use std::borrow;
 use std::cmp;
 use std::fmt;
 use std::marker::PhantomData;
 use std::mem;
 use std::ops::{Deref, DerefMut};
 use std::os::raw::c_void;
+use std::ptr;
 use std::slice;
 use std::str;
 use std::u32;
 
+mod conversions;
+
+pub use self::conversions::nscstring_fallible_append_latin1_to_utf8_check;
+pub use self::conversions::nscstring_fallible_append_utf16_to_latin1_lossy_impl;
+pub use self::conversions::nscstring_fallible_append_utf16_to_utf8_impl;
+pub use self::conversions::nscstring_fallible_append_utf8_to_latin1_lossy_check;
+pub use self::conversions::nsstring_fallible_append_latin1_impl;
+pub use self::conversions::nsstring_fallible_append_utf8_impl;
+
+/// A type for showing that `finish()` was called on a `BulkWriteHandle`.
+/// Instantiating this type from elsewhere is basically an assertion that
+/// there is no `BulkWriteHandle` around, so be very careful with instantiating
+/// this type!
+pub struct BulkWriteOk;
+
 ///////////////////////////////////
 // Internal Implementation Flags //
 ///////////////////////////////////
 
 mod data_flags {
     bitflags! {
         // While this has the same layout as u16, it cannot be passed
         // over FFI safely as a u16.
@@ -163,60 +179,203 @@ mod class_flags {
 
 use class_flags::ClassFlags;
 use data_flags::DataFlags;
 
 ////////////////////////////////////
 // Generic String Bindings Macros //
 ////////////////////////////////////
 
+macro_rules! string_like {
+    {
+        char_t = $char_t: ty;
+
+        AString = $AString: ident;
+        String = $String: ident;
+        Str = $Str: ident;
+
+        StringLike = $StringLike: ident;
+        StringAdapter = $StringAdapter: ident;
+    } => {
+        /// This trait is implemented on types which are `ns[C]String`-like, in
+        /// that they can at very low cost be converted to a borrowed
+        /// `&nsA[C]String`. Unfortunately, the intermediate type
+        /// `ns[C]StringAdapter` is required as well due to types like `&[u8]`
+        /// needing to be (cheaply) wrapped in a `nsCString` on the stack to
+        /// create the `&nsACString`.
+        ///
+        /// This trait is used to DWIM when calling the methods on
+        /// `nsA[C]String`.
+        pub trait $StringLike {
+            fn adapt(&self) -> $StringAdapter;
+        }
+
+        impl<'a, T: $StringLike + ?Sized> $StringLike for &'a T {
+            fn adapt(&self) -> $StringAdapter {
+                <T as $StringLike>::adapt(*self)
+            }
+        }
+
+        impl<'a, T> $StringLike for borrow::Cow<'a, T>
+            where T: $StringLike + borrow::ToOwned + ?Sized {
+            fn adapt(&self) -> $StringAdapter {
+                <T as $StringLike>::adapt(self.as_ref())
+            }
+        }
+
+        impl $StringLike for $AString {
+            fn adapt(&self) -> $StringAdapter {
+                $StringAdapter::Abstract(self)
+            }
+        }
+
+        impl<'a> $StringLike for $Str<'a> {
+            fn adapt(&self) -> $StringAdapter {
+                $StringAdapter::Abstract(self)
+            }
+        }
+
+        impl $StringLike for $String {
+            fn adapt(&self) -> $StringAdapter {
+                $StringAdapter::Abstract(self)
+            }
+        }
+
+        impl $StringLike for [$char_t] {
+            fn adapt(&self) -> $StringAdapter {
+                $StringAdapter::Borrowed($Str::from(self))
+            }
+        }
+
+        impl $StringLike for Vec<$char_t> {
+            fn adapt(&self) -> $StringAdapter {
+                $StringAdapter::Borrowed($Str::from(&self[..]))
+            }
+        }
+
+        impl $StringLike for Box<[$char_t]> {
+            fn adapt(&self) -> $StringAdapter {
+                $StringAdapter::Borrowed($Str::from(&self[..]))
+            }
+        }
+    }
+}
+
+impl<'a> Drop for nsAStringBulkWriteHandle<'a> {
+    /// This only runs in error cases. In success cases, `finish()`
+    /// calls `forget(self)`.
+    fn drop(&mut self) {
+        if self.capacity == 0 {
+            // If capacity is 0, the string is a zero-length
+            // string, so we have nothing to do.
+            return;
+        }
+        // The old zero terminator may be gone by now, so we need
+        // to write a new one somewhere and make length match.
+        // We can use a length between 1 and self.capacity.
+        // Seems prudent to overwrite the uninitialized memory.
+        // Using the length 1 leaves the shortest memory to overwrite.
+        // U+FFFD is the safest placeholder. Merely truncating the
+        // string to a zero-length string might be dangerous in some
+        // scenarios. See
+        // https://www.unicode.org/reports/tr36/#Substituting_for_Ill_Formed_Subsequences
+        // for closely related scenario.
+        unsafe {
+            let mut this = self.string.as_repr();
+            this.as_mut().length = 1u32;
+            *(this.as_mut().data.as_mut()) = 0xFFFDu16;
+            *(this.as_mut().data.as_ptr().offset(1isize)) = 0;
+        }
+    }
+}
+
+impl<'a> Drop for nsACStringBulkWriteHandle<'a> {
+    /// This only runs in error cases. In success cases, `finish()`
+    /// calls `forget(self)`.
+    fn drop(&mut self) {
+        if self.capacity == 0 {
+            // If capacity is 0, the string is a zero-length
+            // string, so we have nothing to do.
+            return;
+        }
+        // The old zero terminator may be gone by now, so we need
+        // to write a new one somewhere and make length match.
+        // We can use a length between 1 and self.capacity.
+        // Seems prudent to overwrite the uninitialized memory.
+        // Using the length 1 leaves the shortest memory to overwrite.
+        // U+FFFD is the safest placeholder, but when it doesn't fit,
+        // let's use ASCII substitute. Merely truncating the
+        // string to a zero-length string might be dangerous in some
+        // scenarios. See
+        // https://www.unicode.org/reports/tr36/#Substituting_for_Ill_Formed_Subsequences
+        // for closely related scenario.
+        unsafe {
+            let mut this = self.string.as_repr();
+            if self.capacity >= 3 {
+                this.as_mut().length = 3u32;
+                *(this.as_mut().data.as_mut()) = 0xEFu8;
+                *(this.as_mut().data.as_ptr().offset(1isize)) = 0xBFu8;
+                *(this.as_mut().data.as_ptr().offset(2isize)) = 0xBDu8;
+                *(this.as_mut().data.as_ptr().offset(3isize)) = 0;
+            } else {
+                this.as_mut().length = 1u32;
+                *(this.as_mut().data.as_mut()) = 0x1Au8; // U+FFFD doesn't fit
+                *(this.as_mut().data.as_ptr().offset(1isize)) = 0;
+            }
+        }
+    }
+}
+
 macro_rules! define_string_types {
     {
         char_t = $char_t: ty;
 
         AString = $AString: ident;
         String = $String: ident;
         Str = $Str: ident;
 
         StringLike = $StringLike: ident;
         StringAdapter = $StringAdapter: ident;
 
         StringRepr = $StringRepr: ident;
 
+        BulkWriteHandle = $BulkWriteHandle: ident;
+
         drop = $drop: ident;
         assign = $assign: ident, $fallible_assign: ident;
         take_from = $take_from: ident, $fallible_take_from: ident;
         append = $append: ident, $fallible_append: ident;
         set_length = $set_length: ident, $fallible_set_length: ident;
         begin_writing = $begin_writing: ident, $fallible_begin_writing: ident;
+        start_bulk_write = $start_bulk_write: ident;
     } => {
         /// The representation of a ns[C]String type in C++. This type is
         /// used internally by our definition of ns[C]String to ensure layout
         /// compatibility with the C++ ns[C]String type.
         ///
         /// This type may also be used in place of a C++ ns[C]String inside of
         /// struct definitions which are shared with C++, as it has identical
         /// layout to our ns[C]String type.
         ///
         /// This struct will leak its data if dropped from rust. See the module
         /// documentation for more information on this type.
         #[repr(C)]
         #[derive(Debug)]
         pub struct $StringRepr {
-            data: *const $char_t,
+            data: ptr::NonNull<$char_t>,
             length: u32,
             dataflags: DataFlags,
             classflags: ClassFlags,
         }
 
         impl $StringRepr {
             fn new(classflags: ClassFlags) -> $StringRepr {
                 static NUL: $char_t = 0;
                 $StringRepr {
-                    data: &NUL,
+                    data: unsafe { ptr::NonNull::new_unchecked(&NUL as *const _ as *mut _) },
                     length: 0,
                     dataflags: DataFlags::TERMINATED | DataFlags::LITERAL,
                     classflags: classflags,
                 }
             }
         }
 
         impl Deref for $StringRepr {
@@ -231,16 +390,73 @@ macro_rules! define_string_types {
         impl DerefMut for $StringRepr {
             fn deref_mut(&mut self) -> &mut $AString {
                 unsafe {
                     mem::transmute(self)
                 }
             }
         }
 
+        pub struct $BulkWriteHandle<'a> {
+            string: &'a mut $AString,
+            capacity: usize,
+        }
+
+        impl<'a> $BulkWriteHandle<'a> {
+            fn new(string: &'a mut $AString, capacity: usize) -> Self {
+                $BulkWriteHandle{ string: string, capacity: capacity }
+            }
+
+            pub unsafe fn restart_bulk_write(&mut self,
+                                             capacity: usize,
+                                             units_to_preserve: usize,
+                                             allow_shrinking: bool) -> Result<(), ()> {
+                self.capacity =
+                    self.string.start_bulk_write_impl(capacity,
+                                                      units_to_preserve,
+                                                      allow_shrinking)?;
+                Ok(())
+            }
+
+            pub fn finish(mut self, length: usize, allow_shrinking: bool) -> BulkWriteOk {
+                // NOTE: Drop is implemented outside the macro earlier in this file,
+                // because it needs to deal with different code unit representations
+                // for the REPLACEMENT CHARACTER in the UTF-16 and UTF-8 cases and
+                // needs to deal with a REPLACEMENT CHARACTER not fitting in the
+                // buffer in the UTF-8 case.
+                assert!(length <= self.capacity);
+                if length == 0 {
+                    // `truncate()` is OK even when the string
+                    // is in invalid state.
+                    self.string.truncate();
+                    mem::forget(self); // Don't run the failure path in drop()
+                    return BulkWriteOk{};
+                }
+                if allow_shrinking {
+                    unsafe {
+                        let _ = self.restart_bulk_write(length, length, true);
+                    }
+                }
+                unsafe {
+                    let mut this = self.string.as_repr();
+                    this.as_mut().length = length as u32;
+                    *(this.as_mut().data.as_ptr().offset(length as isize)) = 0;
+                }
+                mem::forget(self); // Don't run the failure path in drop()
+                BulkWriteOk{}
+            }
+
+            pub fn as_mut_slice(&mut self) -> &mut [$char_t] {
+                unsafe {
+                    let mut this = self.string.as_repr();
+                    slice::from_raw_parts_mut(this.as_mut().data.as_ptr(), self.capacity)
+                }
+            }
+        }
+
         /// This type is the abstract type which is used for interacting with
         /// strings in rust. Each string type can derefence to an instance of
         /// this type, which provides the useful operations on strings.
         ///
         /// NOTE: Rust thinks this type has a size of 0, because the data
         /// associated with it is not necessarially safe to move. It is not safe
         /// to construct a nsAString yourself, unless it is received by
         /// dereferencing one of these types.
@@ -340,65 +556,100 @@ macro_rules! define_string_types {
 
             /// Get a `&mut` reference to the backing data for this string.
             /// This method will allocate and copy if the current backing buffer
             /// is immutable or shared.
             pub fn to_mut(&mut self) -> &mut [$char_t] {
                 unsafe {
                     let len = self.len();
                     if len == 0 {
-                        // Use an arbitrary non-null value as the pointer
-                        slice::from_raw_parts_mut(0x1 as *mut $char_t, 0)
+                        // Use an arbitrary but aligned non-null value as the pointer
+                        slice::from_raw_parts_mut(ptr::NonNull::<$char_t>::dangling().as_ptr(), 0)
                     } else {
                         slice::from_raw_parts_mut($begin_writing(self), len)
                     }
                 }
             }
 
             /// Get a `&mut` reference to the backing data for this string.
             /// This method will allocate and copy if the current backing buffer
             /// is immutable or shared.
             ///
             /// Returns `Ok(&mut [T])` on success, and `Err(())` if the
             /// allocation failed.
             pub fn fallible_to_mut(&mut self) -> Result<&mut [$char_t], ()> {
                 unsafe {
                     let len = self.len();
                     if len == 0 {
-                        // Use an arbitrary non-null value as the pointer
-                        Ok(slice::from_raw_parts_mut(0x1 as *mut $char_t, 0))
+                        // Use an arbitrary but aligned non-null value as the pointer
+                        Ok(slice::from_raw_parts_mut(
+                            ptr::NonNull::<$char_t>::dangling().as_ptr() as *mut $char_t, 0))
                     } else {
                         let ptr = $fallible_begin_writing(self);
                         if ptr.is_null() {
                             Err(())
                         } else {
                             Ok(slice::from_raw_parts_mut(ptr, len))
                         }
                     }
                 }
             }
 
+            /// Unshares the buffer of the string and returns a handle
+            /// from which a writable slice whose length is the rounded-up
+            /// capacity can be obtained.
+            ///
+            /// Fails also if the new length doesn't fit in 32 bits.
+            ///
+            /// # Safety
+            ///
+            /// Unsafe because of exposure of uninitialized memory.
+            pub unsafe fn bulk_write(&mut self,
+                                     capacity: usize,
+                                     units_to_preserve: usize,
+                                     allow_shrinking: bool) -> Result<$BulkWriteHandle, ()> {
+                let capacity =
+                    self.start_bulk_write_impl(capacity, units_to_preserve, allow_shrinking)?;
+                Ok($BulkWriteHandle::new(self, capacity))
+            }
+
+            unsafe fn start_bulk_write_impl(&mut self,
+                                            capacity: usize,
+                                            units_to_preserve: usize,
+                                            allow_shrinking: bool) -> Result<usize, ()> {
+                if capacity > u32::max_value() as usize {
+                    Err(())
+                } else {
+                    let capacity32 = capacity as u32;
+                    let rounded = $start_bulk_write(self,
+                                                    capacity32,
+                                                    units_to_preserve as u32,
+                                                    allow_shrinking);
+                    if rounded == u32::max_value() {
+                        return Err(())
+                    }
+                    Ok(rounded as usize)
+                }
+            }
+
+            fn as_repr(&mut self) -> ptr::NonNull<$StringRepr> {
+                unsafe { ptr::NonNull::new_unchecked(self as *mut _ as *mut $StringRepr)}
+            }
         }
 
         impl Deref for $AString {
             type Target = [$char_t];
             fn deref(&self) -> &[$char_t] {
                 unsafe {
                     // All $AString values point to a struct prefix which is
                     // identical to $StringRepr, this we can transmute `self`
                     // into $StringRepr to get the reference to the underlying
                     // data.
                     let this: &$StringRepr = mem::transmute(self);
-                    if this.data.is_null() {
-                        debug_assert_eq!(this.length, 0);
-                        // Use an arbitrary non-null value as the pointer
-                        slice::from_raw_parts(0x1 as *const $char_t, 0)
-                    } else {
-                        slice::from_raw_parts(this.data, this.length as usize)
-                    }
+                    slice::from_raw_parts(this.data.as_ptr(), this.length as usize)
                 }
             }
         }
 
         impl AsRef<[$char_t]> for $AString {
             fn as_ref(&self) -> &[$char_t] {
                 self
             }
@@ -473,17 +724,17 @@ macro_rules! define_string_types {
         impl<'a> From<&'a [$char_t]> for $Str<'a> {
             fn from(s: &'a [$char_t]) -> $Str<'a> {
                 assert!(s.len() < (u32::MAX as usize));
                 if s.is_empty() {
                     return $Str::new();
                 }
                 $Str {
                     hdr: $StringRepr {
-                        data: s.as_ptr(),
+                        data: unsafe { ptr::NonNull::new_unchecked(s.as_ptr() as *mut _) },
                         length: s.len() as u32,
                         dataflags: DataFlags::empty(),
                         classflags: ClassFlags::empty(),
                     },
                     _marker: PhantomData,
                 }
             }
         }
@@ -633,24 +884,24 @@ macro_rules! define_string_types {
                 let length = s.len() as u32;
                 s.push(0); // null terminator
 
                 // SAFETY NOTE: This method produces an data_flags::OWNED
                 // ns[C]String from a Box<[$char_t]>. this is only safe
                 // because in the Gecko tree, we use the same allocator for
                 // Rust code as for C++ code, meaning that our box can be
                 // legally freed with libc::free().
-                let ptr = s.as_ptr();
+                let ptr = s.as_mut_ptr();
                 mem::forget(s);
                 unsafe {
                     Gecko_IncrementStringAdoptCount(ptr as *mut _);
                 }
                 $String {
                     hdr: $StringRepr {
-                        data: ptr,
+                        data: unsafe { ptr::NonNull::new_unchecked(ptr) },
                         length: length,
                         dataflags: DataFlags::OWNED | DataFlags::TERMINATED,
                         classflags: ClassFlags::NULL_TERMINATED,
                     }
                 }
             }
         }
 
@@ -722,76 +973,35 @@ macro_rules! define_string_types {
             fn deref(&self) -> &$AString {
                 match *self {
                     $StringAdapter::Borrowed(ref s) => s,
                     $StringAdapter::Abstract(ref s) => s,
                 }
             }
         }
 
-        /// This trait is implemented on types which are `ns[C]String`-like, in
-        /// that they can at very low cost be converted to a borrowed
-        /// `&nsA[C]String`. Unfortunately, the intermediate type
-        /// `ns[C]StringAdapter` is required as well due to types like `&[u8]`
-        /// needing to be (cheaply) wrapped in a `nsCString` on the stack to
-        /// create the `&nsACString`.
-        ///
-        /// This trait is used to DWIM when calling the methods on
-        /// `nsA[C]String`.
-        pub trait $StringLike {
-            fn adapt(&self) -> $StringAdapter;
-        }
-
-        impl<'a, T: $StringLike + ?Sized> $StringLike for &'a T {
-            fn adapt(&self) -> $StringAdapter {
-                <T as $StringLike>::adapt(*self)
-            }
-        }
-
-        impl<'a, T> $StringLike for borrow::Cow<'a, T>
-            where T: $StringLike + borrow::ToOwned + ?Sized {
-            fn adapt(&self) -> $StringAdapter {
-                <T as $StringLike>::adapt(self.as_ref())
+        impl<'a> $StringAdapter<'a> {
+            #[allow(dead_code)]
+            fn is_abstract(&self) -> bool {
+                match *self {
+                    $StringAdapter::Borrowed(_) => false,
+                    $StringAdapter::Abstract(_) => true,
+                }
             }
         }
 
-        impl $StringLike for $AString {
-            fn adapt(&self) -> $StringAdapter {
-                $StringAdapter::Abstract(self)
-            }
-        }
-
-        impl<'a> $StringLike for $Str<'a> {
-            fn adapt(&self) -> $StringAdapter {
-                $StringAdapter::Abstract(self)
-            }
-        }
+        string_like! {
+            char_t = $char_t;
 
-        impl $StringLike for $String {
-            fn adapt(&self) -> $StringAdapter {
-                $StringAdapter::Abstract(self)
-            }
-        }
+            AString = $AString;
+            String = $String;
+            Str = $Str;
 
-        impl $StringLike for [$char_t] {
-            fn adapt(&self) -> $StringAdapter {
-                $StringAdapter::Borrowed($Str::from(self))
-            }
-        }
-
-        impl $StringLike for Vec<$char_t> {
-            fn adapt(&self) -> $StringAdapter {
-                $StringAdapter::Borrowed($Str::from(&self[..]))
-            }
-        }
-
-        impl $StringLike for Box<[$char_t]> {
-            fn adapt(&self) -> $StringAdapter {
-                $StringAdapter::Borrowed($Str::from(&self[..]))
-            }
+            StringLike = $StringLike;
+            StringAdapter = $StringAdapter;
         }
     }
 }
 
 ///////////////////////////////////////////
 // Bindings for nsCString (u8 char type) //
 ///////////////////////////////////////////
 
@@ -802,49 +1012,28 @@ define_string_types! {
     String = nsCString;
     Str = nsCStr;
 
     StringLike = nsCStringLike;
     StringAdapter = nsCStringAdapter;
 
     StringRepr = nsCStringRepr;
 
+    BulkWriteHandle = nsACStringBulkWriteHandle;
+
     drop = Gecko_FinalizeCString;
     assign = Gecko_AssignCString, Gecko_FallibleAssignCString;
     take_from = Gecko_TakeFromCString, Gecko_FallibleTakeFromCString;
     append = Gecko_AppendCString, Gecko_FallibleAppendCString;
     set_length = Gecko_SetLengthCString, Gecko_FallibleSetLengthCString;
     begin_writing = Gecko_BeginWritingCString, Gecko_FallibleBeginWritingCString;
+    start_bulk_write = Gecko_StartBulkWriteCString;
 }
 
 impl nsACString {
-    pub fn assign_utf16<T: nsStringLike + ?Sized>(&mut self, other: &T) {
-        self.truncate();
-        self.append_utf16(other);
-    }
-
-    pub fn fallible_assign_utf16<T: nsStringLike + ?Sized>(&mut self, other: &T) -> Result<(), ()> {
-        self.truncate();
-        self.fallible_append_utf16(other)
-    }
-
-    pub fn append_utf16<T: nsStringLike + ?Sized>(&mut self, other: &T) {
-        unsafe {
-            Gecko_AppendUTF16toCString(self, other.adapt().as_ptr());
-        }
-    }
-
-    pub fn fallible_append_utf16<T: nsStringLike + ?Sized>(&mut self, other: &T) -> Result<(), ()> {
-        if unsafe { Gecko_FallibleAppendUTF16toCString(self, other.adapt().as_ptr()) } {
-            Ok(())
-        } else {
-            Err(())
-        }
-    }
-
     pub unsafe fn as_str_unchecked(&self) -> &str {
         str::from_utf8_unchecked(self)
     }
 }
 
 impl<'a> From<&'a str> for nsCStr<'a> {
     fn from(s: &'a str) -> nsCStr<'a> {
         s.as_bytes().into()
@@ -920,64 +1109,58 @@ impl nsCStringLike for String {
 }
 
 impl nsCStringLike for Box<str> {
     fn adapt(&self) -> nsCStringAdapter {
         nsCStringAdapter::Borrowed(nsCStr::from(&self[..]))
     }
 }
 
+/// This trait is implemented on types which are Latin1 `nsCString`-like,
+/// in that they can at very low cost be converted to a borrowed
+/// `&nsACString` and do not denote UTF-8ness in the Rust type system.
+///
+/// This trait is used to DWIM when calling the methods on
+/// `nsACString`.
+string_like! {
+    char_t = u8;
+
+    AString = nsACString;
+    String = nsCString;
+    Str = nsCStr;
+
+    StringLike = Latin1StringLike;
+    StringAdapter = nsCStringAdapter;
+}
+
 ///////////////////////////////////////////
 // Bindings for nsString (u16 char type) //
 ///////////////////////////////////////////
 
 define_string_types! {
     char_t = u16;
 
     AString = nsAString;
     String = nsString;
     Str = nsStr;
 
     StringLike = nsStringLike;
     StringAdapter = nsStringAdapter;
 
     StringRepr = nsStringRepr;
 
+    BulkWriteHandle = nsAStringBulkWriteHandle;
+
     drop = Gecko_FinalizeString;
     assign = Gecko_AssignString, Gecko_FallibleAssignString;
     take_from = Gecko_TakeFromString, Gecko_FallibleTakeFromString;
     append = Gecko_AppendString, Gecko_FallibleAppendString;
     set_length = Gecko_SetLengthString, Gecko_FallibleSetLengthString;
     begin_writing = Gecko_BeginWritingString, Gecko_FallibleBeginWritingString;
-}
-
-impl nsAString {
-    pub fn assign_utf8<T: nsCStringLike + ?Sized>(&mut self, other: &T) {
-        self.truncate();
-        self.append_utf8(other);
-    }
-
-    pub fn fallible_assign_utf8<T: nsCStringLike + ?Sized>(&mut self, other: &T) -> Result<(), ()> {
-        self.truncate();
-        self.fallible_append_utf8(other)
-    }
-
-    pub fn append_utf8<T: nsCStringLike + ?Sized>(&mut self, other: &T) {
-        unsafe {
-            Gecko_AppendUTF8toString(self, other.adapt().as_ptr());
-        }
-    }
-
-    pub fn fallible_append_utf8<T: nsCStringLike + ?Sized>(&mut self, other: &T) -> Result<(), ()> {
-        if unsafe { Gecko_FallibleAppendUTF8toString(self, other.adapt().as_ptr()) } {
-            Ok(())
-        } else {
-            Err(())
-        }
-    }
+    start_bulk_write = Gecko_StartBulkWriteString;
 }
 
 // NOTE: The From impl for a string slice for nsString produces a <'static>
 // lifetime, as it allocates.
 impl<'a> From<&'a str> for nsString {
     fn from(s: &'a str) -> nsString {
         s.encode_utf16().collect::<Vec<u16>>().into()
     }
@@ -989,17 +1172,17 @@ impl<'a> From<&'a String> for nsString {
     }
 }
 
 // Support for the write!() macro for writing to nsStrings
 impl fmt::Write for nsAString {
     fn write_str(&mut self, s: &str) -> Result<(), fmt::Error> {
         // Directly invoke gecko's routines for appending utf8 strings to
         // nsAString values, to avoid as much overhead as possible
-        self.append_utf8(s);
+        self.append_str(s);
         Ok(())
     }
 }
 
 impl fmt::Display for nsAString {
     fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
         fmt::Display::fmt(&String::from_utf16_lossy(&self[..]), f)
     }
@@ -1033,52 +1216,58 @@ extern "C" {
     fn Gecko_AppendCString(this: *mut nsACString, other: *const nsACString);
     fn Gecko_SetLengthCString(this: *mut nsACString, length: u32);
     fn Gecko_BeginWritingCString(this: *mut nsACString) -> *mut u8;
     fn Gecko_FallibleAssignCString(this: *mut nsACString, other: *const nsACString) -> bool;
     fn Gecko_FallibleTakeFromCString(this: *mut nsACString, other: *mut nsACString) -> bool;
     fn Gecko_FallibleAppendCString(this: *mut nsACString, other: *const nsACString) -> bool;
     fn Gecko_FallibleSetLengthCString(this: *mut nsACString, length: u32) -> bool;
     fn Gecko_FallibleBeginWritingCString(this: *mut nsACString) -> *mut u8;
+    fn Gecko_StartBulkWriteCString(
+        this: *mut nsACString,
+        capacity: u32,
+        units_to_preserve: u32,
+        allow_shrinking: bool,
+    ) -> u32;
 
     fn Gecko_FinalizeString(this: *mut nsAString);
 
     fn Gecko_AssignString(this: *mut nsAString, other: *const nsAString);
     fn Gecko_TakeFromString(this: *mut nsAString, other: *mut nsAString);
     fn Gecko_AppendString(this: *mut nsAString, other: *const nsAString);
     fn Gecko_SetLengthString(this: *mut nsAString, length: u32);
     fn Gecko_BeginWritingString(this: *mut nsAString) -> *mut u16;
     fn Gecko_FallibleAssignString(this: *mut nsAString, other: *const nsAString) -> bool;
     fn Gecko_FallibleTakeFromString(this: *mut nsAString, other: *mut nsAString) -> bool;
     fn Gecko_FallibleAppendString(this: *mut nsAString, other: *const nsAString) -> bool;
     fn Gecko_FallibleSetLengthString(this: *mut nsAString, length: u32) -> bool;
     fn Gecko_FallibleBeginWritingString(this: *mut nsAString) -> *mut u16;
-
-    // Gecko implementation in nsReadableUtils.cpp
-    fn Gecko_AppendUTF16toCString(this: *mut nsACString, other: *const nsAString);
-    fn Gecko_AppendUTF8toString(this: *mut nsAString, other: *const nsACString);
-    fn Gecko_FallibleAppendUTF16toCString(this: *mut nsACString, other: *const nsAString) -> bool;
-    fn Gecko_FallibleAppendUTF8toString(this: *mut nsAString, other: *const nsACString) -> bool;
+    fn Gecko_StartBulkWriteString(
+        this: *mut nsAString,
+        capacity: u32,
+        units_to_preserve: u32,
+        allow_shrinking: bool,
+    ) -> u32;
 }
 
 //////////////////////////////////////
 // Repr Validation Helper Functions //
 //////////////////////////////////////
 
 pub mod test_helpers {
     //! This module only exists to help with ensuring that the layout of the
     //! structs inside of rust and C++ are identical.
     //!
     //! It is public to ensure that these testing functions are avaliable to
     //! gtest code.
 
-    use std::mem;
-    use super::{ClassFlags, DataFlags};
     use super::{nsCStr, nsCString, nsCStringRepr};
     use super::{nsStr, nsString, nsStringRepr};
+    use super::{ClassFlags, DataFlags};
+    use std::mem;
 
     /// Generates an #[no_mangle] extern "C" function which returns the size and
     /// alignment of the given type with the given name.
     macro_rules! size_align_check {
         ($T:ty, $fname:ident) => {
             #[no_mangle]
             #[allow(non_snake_case)]
             pub extern fn $fname(size: *mut usize, align: *mut usize) {
--- a/toolkit/xre/nsWindowsRestart.cpp
+++ b/toolkit/xre/nsWindowsRestart.cpp
@@ -23,24 +23,25 @@
 /**
  * Convert UTF8 to UTF16 without using the normal XPCOM goop, which we
  * can't link to updater.exe.
  */
 static char16_t*
 AllocConvertUTF8toUTF16(const char *arg)
 {
   // UTF16 can't be longer in units than UTF8
-  int len = strlen(arg);
+  size_t len = strlen(arg);
   char16_t *s = new char16_t[(len + 1) * sizeof(char16_t)];
   if (!s)
     return nullptr;
 
-  ConvertUTF8toUTF16 convert(s);
-  convert.write(arg, len);
-  convert.write_terminator();
+  size_t dstLen = ::MultiByteToWideChar(
+    CP_UTF8, 0, arg, len, reinterpret_cast<wchar_t*>(s), len);
+  s[dstLen] = 0;
+
   return s;
 }
 
 static void
 FreeAllocStrings(int argc, wchar_t **argv)
 {
   while (argc) {
     --argc;
--- a/toolkit/xre/nsWindowsWMain.cpp
+++ b/toolkit/xre/nsWindowsWMain.cpp
@@ -72,24 +72,27 @@ SanitizeEnvironmentVariables()
     delete[] originalPath;
   }
 }
 
 static char*
 AllocConvertUTF16toUTF8(char16ptr_t arg)
 {
   // be generous... UTF16 units can expand up to 3 UTF8 units
-  int len = wcslen(arg);
-  char *s = new char[len * 3 + 1];
+  size_t len = wcslen(arg);
+  // ConvertUTF16toUTF8 requires +1. Let's do that here, too, lacking
+  // knowledge of Windows internals.
+  size_t dstLen = len * 3 + 1;
+  char* s = new char[dstLen + 1]; // Another +1 for zero terminator
   if (!s)
     return nullptr;
 
-  ConvertUTF16toUTF8 convert(s);
-  convert.write(arg, len);
-  convert.write_terminator();
+  int written =
+    ::WideCharToMultiByte(CP_UTF8, 0, arg, len, s, dstLen, nullptr, nullptr);
+  s[written] = 0;
   return s;
 }
 
 static void
 FreeAllocStrings(int argc, char **argv)
 {
   while (argc) {
     --argc;
--- a/xpcom/base/nsAlgorithm.h
+++ b/xpcom/base/nsAlgorithm.h
@@ -54,22 +54,9 @@ NS_COUNT(InputIterator& aFirst, const In
   uint32_t result = 0;
   for (; aFirst != aLast; ++aFirst)
     if (*aFirst == aValue) {
       ++result;
     }
   return result;
 }
 
-template <class InputIterator, class OutputIterator>
-inline OutputIterator&
-copy_string(const InputIterator& aFirst, const InputIterator& aLast,
-            OutputIterator& aResult)
-{
-  typedef nsCharSourceTraits<InputIterator> source_traits;
-  typedef nsCharSinkTraits<OutputIterator>  sink_traits;
-
-  sink_traits::write(aResult, source_traits::read(aFirst),
-                     source_traits::readable_distance(aFirst, aLast));
-  return aResult;
-}
-
 #endif // !defined(nsAlgorithm_h___)
--- a/xpcom/ds/nsAtomTable.cpp
+++ b/xpcom/ds/nsAtomTable.cpp
@@ -191,28 +191,25 @@ struct AtomTableKey
     : mUTF16String(aUTF16String)
     , mUTF8String(nullptr)
     , mLength(aLength)
   {
     mHash = HashString(mUTF16String, mLength);
     *aHashOut = mHash;
   }
 
-  AtomTableKey(const char* aUTF8String, uint32_t aLength, uint32_t* aHashOut)
+  AtomTableKey(const char* aUTF8String,
+               uint32_t aLength,
+               uint32_t* aHashOut,
+               bool* aErr)
     : mUTF16String(nullptr)
     , mUTF8String(aUTF8String)
     , mLength(aLength)
   {
-    bool err;
-    mHash = HashUTF8AsUTF16(mUTF8String, mLength, &err);
-    if (err) {
-      mUTF8String = nullptr;
-      mLength = 0;
-      mHash = 0;
-    }
+    mHash = HashUTF8AsUTF16(mUTF8String, mLength, aErr);
     *aHashOut = mHash;
   }
 
   const char16_t* mUTF16String;
   const char* mUTF8String;
   uint32_t mLength;
   uint32_t mHash;
 };
@@ -328,20 +325,22 @@ AtomTableGetHash(const void* aKey)
 
 static bool
 AtomTableMatchKey(const PLDHashEntryHdr* aEntry, const void* aKey)
 {
   const AtomTableEntry* he = static_cast<const AtomTableEntry*>(aEntry);
   const AtomTableKey* k = static_cast<const AtomTableKey*>(aKey);
 
   if (k->mUTF8String) {
-    return
-      CompareUTF8toUTF16(nsDependentCSubstring(k->mUTF8String,
-                                               k->mUTF8String + k->mLength),
-                         nsDependentAtomString(he->mAtom)) == 0;
+    bool err = false;
+    return (CompareUTF8toUTF16(nsDependentCSubstring(
+                                 k->mUTF8String, k->mUTF8String + k->mLength),
+                               nsDependentAtomString(he->mAtom),
+                               &err) == 0) &&
+           !err;
   }
 
   return he->mAtom->Equals(k->mUTF16String, k->mLength);
 }
 
 void
 nsAtomTable::AtomTableClearEntry(PLDHashTable* aTable, PLDHashEntryHdr* aEntry)
 {
@@ -682,17 +681,26 @@ NS_Atomize(const char* aUTF8String)
   MOZ_ASSERT(gAtomTable);
   return gAtomTable->Atomize(nsDependentCString(aUTF8String));
 }
 
 already_AddRefed<nsAtom>
 nsAtomTable::Atomize(const nsACString& aUTF8String)
 {
   uint32_t hash;
-  AtomTableKey key(aUTF8String.Data(), aUTF8String.Length(), &hash);
+  bool err;
+  AtomTableKey key(aUTF8String.Data(), aUTF8String.Length(), &hash, &err);
+  if (MOZ_UNLIKELY(err)) {
+    MOZ_ASSERT_UNREACHABLE("Tried to atomize invalid UTF-8.");
+    // The input was invalid UTF-8. Let's replace the errors with U+FFFD
+    // and atomize the result.
+    nsString str;
+    CopyUTF8toUTF16(aUTF8String, str);
+    return Atomize(str);
+  }
   nsAtomSubTable& table = SelectSubTable(key);
   MutexAutoLock lock(table.mLock);
   AtomTableEntry* he = table.Add(key);
 
   if (he->mAtom) {
     RefPtr<nsAtom> atom = he->mAtom;
 
     return atom.forget();
--- a/xpcom/string/moz.build
+++ b/xpcom/string/moz.build
@@ -44,21 +44,9 @@ UNIFIED_SOURCES += [
     'nsStringComparator.cpp',
     'nsStringObsolete.cpp',
     'nsSubstring.cpp',
     'nsTextFormatter.cpp',
     'nsTSubstringTuple.cpp',
     'precompiled_templates.cpp',
 ]
 
-# Are we targeting x86 or x86-64?  If so, compile the SSE2 functions for
-# nsUTF8Utils.cpp and nsReadableUtils.cpp.
-if CONFIG['INTEL_ARCHITECTURE']:
-    SOURCES += ['nsUTF8UtilsSSE2.cpp']
-    SOURCES['nsUTF8UtilsSSE2.cpp'].flags += CONFIG['SSE2_FLAGS']
-    SOURCES += ['nsReadableUtilsSSE2.cpp']
-    SOURCES['nsReadableUtilsSSE2.cpp'].flags += CONFIG['SSE2_FLAGS']
-
-if CONFIG['HAVE_ARM_NEON'] or CONFIG['CPU_ARCH'] == 'aarch64':
-    SOURCES += ['nsUTF8UtilsNEON.cpp']
-    SOURCES['nsUTF8UtilsNEON.cpp'].flags += CONFIG['NEON_FLAGS']
-
 FINAL_LIBRARY = 'xul'
--- a/xpcom/string/nsReadableUtils.cpp
+++ b/xpcom/string/nsReadableUtils.cpp
@@ -1,793 +1,246 @@
 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "nsReadableUtils.h"
-#include "nsReadableUtilsImpl.h"
 
 #include <algorithm>
 
 #include "mozilla/CheckedInt.h"
 
 #include "nscore.h"
 #include "nsMemory.h"
 #include "nsString.h"
 #include "nsTArray.h"
 #include "nsUTF8Utils.h"
 
-using mozilla::IsASCII;
-
-/**
- * Fallback implementation for finding the first non-ASCII character in a
- * UTF-16 string.
- */
-static inline int32_t
-FirstNonASCIIUnvectorized(const char16_t* aBegin, const char16_t* aEnd)
-{
-  typedef mozilla::NonASCIIParameters<sizeof(size_t)> p;
-  const size_t kMask = p::mask();
-  const uintptr_t kAlignMask = p::alignMask();
-  const size_t kNumUnicharsPerWord = p::numUnicharsPerWord();
-
-  const char16_t* idx = aBegin;
-
-  // Align ourselves to a word boundary.
-  for (; idx != aEnd && ((uintptr_t(idx) & kAlignMask) != 0); idx++) {
-    if (!IsASCII(*idx)) {
-      return idx - aBegin;
-    }
-  }
-
-  // Check one word at a time.
-  const char16_t* wordWalkEnd = mozilla::aligned(aEnd, kAlignMask);
-  for (; idx != wordWalkEnd; idx += kNumUnicharsPerWord) {
-    const size_t word = *reinterpret_cast<const size_t*>(idx);
-    if (word & kMask) {
-      return idx - aBegin;
-    }
-  }
-
-  // Take care of the remainder one character at a time.
-  for (; idx != aEnd; idx++) {
-    if (!IsASCII(*idx)) {
-      return idx - aBegin;
-    }
-  }
-
-  return -1;
-}
-
-/*
- * This function returns -1 if all characters in str are ASCII characters.
- * Otherwise, it returns a value less than or equal to the index of the first
- * ASCII character in str. For example, if first non-ASCII character is at
- * position 25, it may return 25, 24, or 16. But it guarantees
- * there are only ASCII characters before returned value.
- */
-static inline int32_t
-FirstNonASCII(const char16_t* aBegin, const char16_t* aEnd)
-{
-#ifdef MOZILLA_MAY_SUPPORT_SSE2
-  if (mozilla::supports_sse2()) {
-    return mozilla::SSE2::FirstNonASCII(aBegin, aEnd);
-  }
-#endif
-
-  return FirstNonASCIIUnvectorized(aBegin, aEnd);
-}
-
-void
-LossyCopyUTF16toASCII(const nsAString& aSource, nsACString& aDest)
-{
-  aDest.Truncate();
-  LossyAppendUTF16toASCII(aSource, aDest);
-}
-
-void
-CopyASCIItoUTF16(const nsACString& aSource, nsAString& aDest)
-{
-  if (!CopyASCIItoUTF16(aSource, aDest, mozilla::fallible)) {
-    // Note that this may wildly underestimate the allocation that failed, as
-    // we report the length of aSource as UTF-16 instead of UTF-8.
-    aDest.AllocFailed(aDest.Length() + aSource.Length());
-  }
-}
-
-bool
-CopyASCIItoUTF16(const nsACString& aSource, nsAString& aDest,
-                 const mozilla::fallible_t& aFallible)
-{
-  aDest.Truncate();
-  return AppendASCIItoUTF16(aSource, aDest, aFallible);
-}
-
-void
-LossyCopyUTF16toASCII(const char16ptr_t aSource, nsACString& aDest)
-{
-  aDest.Truncate();
-  if (aSource) {
-    LossyAppendUTF16toASCII(nsDependentString(aSource), aDest);
-  }
-}
-
-void
-CopyASCIItoUTF16(const char* aSource, nsAString& aDest)
-{
-  aDest.Truncate();
-  if (aSource) {
-    AppendASCIItoUTF16(nsDependentCString(aSource), aDest);
-  }
-}
-
-void
-CopyUTF16toUTF8(const nsAString& aSource, nsACString& aDest)
-{
-  if (!CopyUTF16toUTF8(aSource, aDest, mozilla::fallible)) {
-    // Note that this may wildly underestimate the allocation that failed, as
-    // we report the length of aSource as UTF-16 instead of UTF-8.
-    aDest.AllocFailed(aDest.Length() + aSource.Length());
-  }
-}
-
-bool
-CopyUTF16toUTF8(const nsAString& aSource, nsACString& aDest,
-                const mozilla::fallible_t& aFallible)
-{
-  aDest.Truncate();
-  if (!AppendUTF16toUTF8(aSource, aDest, aFallible)) {
-    return false;
-  }
-  return true;
-}
-
-void
-CopyUTF8toUTF16(const nsACString& aSource, nsAString& aDest)
-{
-  aDest.Truncate();
-  AppendUTF8toUTF16(aSource, aDest);
-}
-
-void
-CopyUTF16toUTF8(const char16ptr_t aSource, nsACString& aDest)
-{
-  aDest.Truncate();
-  AppendUTF16toUTF8(aSource, aDest);
-}
-
-void
-CopyUTF8toUTF16(const char* aSource, nsAString& aDest)
-{
-  aDest.Truncate();
-  AppendUTF8toUTF16(aSource, aDest);
-}
-
-void
-LossyAppendUTF16toASCII(const nsAString& aSource, nsACString& aDest)
-{
-  uint32_t old_dest_length = aDest.Length();
-  aDest.SetLength(old_dest_length + aSource.Length());
-
-  nsAString::const_iterator fromBegin, fromEnd;
-
-  nsACString::iterator dest;
-  aDest.BeginWriting(dest);
-
-  dest.advance(old_dest_length);
-
-  // right now, this won't work on multi-fragment destinations
-  LossyConvertEncoding16to8 converter(dest.get());
-
-  copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
-              converter);
-}
-
-void
-AppendASCIItoUTF16(const nsACString& aSource, nsAString& aDest)
-{
-  if (!AppendASCIItoUTF16(aSource, aDest, mozilla::fallible)) {
-    aDest.AllocFailed(aDest.Length() + aSource.Length());
-  }
-}
-
-bool
-AppendASCIItoUTF16(const nsACString& aSource, nsAString& aDest,
-                   const mozilla::fallible_t& aFallible)
-{
-  uint32_t old_dest_length = aDest.Length();
-  if (!aDest.SetLength(old_dest_length + aSource.Length(),
-                       aFallible)) {
-    return false;
-  }
-
-  nsACString::const_iterator fromBegin, fromEnd;
-
-  nsAString::iterator dest;
-  aDest.BeginWriting(dest);
-
-  dest.advance(old_dest_length);
-
-  // right now, this won't work on multi-fragment destinations
-  LossyConvertEncoding8to16 converter(dest.get());
-
-  copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
-              converter);
-  return true;
-}
-
-void
-LossyAppendUTF16toASCII(const char16ptr_t aSource, nsACString& aDest)
-{
-  if (aSource) {
-    LossyAppendUTF16toASCII(nsDependentString(aSource), aDest);
-  }
-}
-
-bool
-AppendASCIItoUTF16(const char* aSource, nsAString& aDest, const mozilla::fallible_t& aFallible)
-{
-  if (aSource) {
-    return AppendASCIItoUTF16(nsDependentCString(aSource), aDest, aFallible);
-  }
-
-  return true;
-}
-
-void
-AppendASCIItoUTF16(const char* aSource, nsAString& aDest)
-{
-  if (aSource) {
-    AppendASCIItoUTF16(nsDependentCString(aSource), aDest);
-  }
-}
-
-void
-AppendUTF16toUTF8(const nsAString& aSource, nsACString& aDest)
-{
-  if (!AppendUTF16toUTF8(aSource, aDest, mozilla::fallible)) {
-    // Note that this may wildly underestimate the allocation that failed, as
-    // we report the length of aSource as UTF-16 instead of UTF-8.
-    aDest.AllocFailed(aDest.Length() + aSource.Length());
-  }
-}
-
-bool
-AppendUTF16toUTF8(const nsAString& aSource, nsACString& aDest,
-                  const mozilla::fallible_t& aFallible)
-{
-  // At 16 characters analysis showed better performance of both the all ASCII
-  // and non-ASCII cases, so we limit calling |FirstNonASCII| to strings of
-  // that length.
-  const nsAString::size_type kFastPathMinLength = 16;
-
-  int32_t firstNonASCII = 0;
-  if (aSource.Length() >= kFastPathMinLength) {
-    firstNonASCII = FirstNonASCII(aSource.BeginReading(), aSource.EndReading());
-  }
-
-  if (firstNonASCII == -1) {
-    // This is all ASCII, we can use the more efficient lossy append.
-    mozilla::CheckedInt<nsACString::size_type> new_length(aSource.Length());
-    new_length += aDest.Length();
-
-    if (!new_length.isValid() ||
-        !aDest.SetCapacity(new_length.value(), aFallible)) {
-      return false;
-    }
-
-    LossyAppendUTF16toASCII(aSource, aDest);
-    return true;
-  }
-
-  nsAString::const_iterator source_start, source_end;
-  CalculateUTF8Size calculator;
-  aSource.BeginReading(source_start);
-  aSource.EndReading(source_end);
-
-  // Skip the characters that we know are single byte.
-  source_start.advance(firstNonASCII);
-
-  copy_string(source_start,
-              source_end, calculator);
-
-  // Include the ASCII characters that were skipped in the count.
-  size_t count = calculator.Size() + firstNonASCII;
-
-  if (count) {
-    auto old_dest_length = aDest.Length();
-    // Grow the buffer if we need to.
-    mozilla::CheckedInt<nsACString::size_type> new_length(count);
-    new_length += old_dest_length;
-
-    if (!new_length.isValid() ||
-        !aDest.SetLength(new_length.value(), aFallible)) {
-      return false;
-    }
-
-    // All ready? Time to convert
-
-    nsAString::const_iterator ascii_end;
-    aSource.BeginReading(ascii_end);
-
-    if (firstNonASCII >= static_cast<int32_t>(kFastPathMinLength)) {
-      // Use the more efficient lossy converter for the ASCII portion.
-      LossyConvertEncoding16to8 lossy_converter(
-          aDest.BeginWriting() + old_dest_length);
-      nsAString::const_iterator ascii_start;
-      aSource.BeginReading(ascii_start);
-      ascii_end.advance(firstNonASCII);
-
-      copy_string(ascii_start, ascii_end, lossy_converter);
-    } else {
-      // Not using the lossy shortcut, we need to include the leading ASCII
-      // chars.
-      firstNonASCII = 0;
-    }
-
-    ConvertUTF16toUTF8 converter(
-        aDest.BeginWriting() + old_dest_length + firstNonASCII);
-    copy_string(ascii_end,
-                aSource.EndReading(source_end), converter);
-
-    NS_ASSERTION(converter.Size() == count - firstNonASCII,
-                 "Unexpected disparity between CalculateUTF8Size and "
-                 "ConvertUTF16toUTF8");
-  }
-
-  return true;
-}
-
-void
-AppendUTF8toUTF16(const nsACString& aSource, nsAString& aDest)
-{
-  if (!AppendUTF8toUTF16(aSource, aDest, mozilla::fallible)) {
-    aDest.AllocFailed(aDest.Length() + aSource.Length());
-  }
-}
-
-bool
-AppendUTF8toUTF16(const nsACString& aSource, nsAString& aDest,
-                  const mozilla::fallible_t& aFallible)
-{
-  nsACString::const_iterator source_start, source_end;
-  CalculateUTF8Length calculator;
-  copy_string(aSource.BeginReading(source_start),
-              aSource.EndReading(source_end), calculator);
-
-  uint32_t count = calculator.Length();
-
-  // Avoid making the string mutable if we're appending an empty string
-  if (count) {
-    uint32_t old_dest_length = aDest.Length();
-
-    // Grow the buffer if we need to.
-    if (!aDest.SetLength(old_dest_length + count, aFallible)) {
-      return false;
-    }
-
-    // All ready? Time to convert
-
-    ConvertUTF8toUTF16 converter(aDest.BeginWriting() + old_dest_length);
-    copy_string(aSource.BeginReading(source_start),
-                aSource.EndReading(source_end), converter);
-
-    NS_ASSERTION(converter.ErrorEncountered() ||
-                 converter.Length() == count,
-                 "CalculateUTF8Length produced the wrong length");
-
-    if (converter.ErrorEncountered()) {
-      NS_ERROR("Input wasn't UTF8 or incorrect length was calculated");
-      aDest.SetLength(old_dest_length);
-    }
-  }
-
-  return true;
-}
-
-void
-AppendUTF16toUTF8(const char16ptr_t aSource, nsACString& aDest)
-{
-  if (aSource) {
-    AppendUTF16toUTF8(nsDependentString(aSource), aDest);
-  }
-}
-
-void
-AppendUTF8toUTF16(const char* aSource, nsAString& aDest)
-{
-  if (aSource) {
-    AppendUTF8toUTF16(nsDependentCString(aSource), aDest);
-  }
-}
-
+using mozilla::MakeSpan;
 
 /**
  * A helper function that allocates a buffer of the desired character type big enough to hold a copy of the supplied string (plus a zero terminator).
  *
  * @param aSource an string you will eventually be making a copy of
  * @return a new buffer (of the type specified by the second parameter) which you must free with |free|.
  *
  */
 template <class FromStringT, class ToCharT>
 inline
 ToCharT*
 AllocateStringCopy(const FromStringT& aSource, ToCharT*)
 {
-  return static_cast<ToCharT*>(moz_xmalloc(
-    (aSource.Length() + 1) * sizeof(ToCharT)));
+  // Can't overflow due to the definition of nsTSubstring<T>::kMaxCapacity
+  return static_cast<ToCharT*>(
+    moz_xmalloc((size_t(aSource.Length()) + 1) * sizeof(ToCharT)));
 }
 
 
 char*
 ToNewCString(const nsAString& aSource)
 {
-  char* result = AllocateStringCopy(aSource, (char*)0);
-  if (!result) {
+  char* dest = AllocateStringCopy(aSource, (char*)nullptr);
+  if (!dest) {
     return nullptr;
   }
 
-  nsAString::const_iterator fromBegin, fromEnd;
-  LossyConvertEncoding16to8 converter(result);
-  copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
-              converter).write_terminator();
-  return result;
+  auto len = aSource.Length();
+  LossyConvertUTF16toLatin1(aSource, MakeSpan(dest, len));
+  dest[len] = 0;
+  return dest;
 }
 
 char*
 ToNewUTF8String(const nsAString& aSource, uint32_t* aUTF8Count)
 {
-  nsAString::const_iterator start, end;
-  CalculateUTF8Size calculator;
-  copy_string(aSource.BeginReading(start), aSource.EndReading(end),
-              calculator);
-
-  if (aUTF8Count) {
-    *aUTF8Count = calculator.Size();
+  auto len = aSource.Length();
+  // The uses of this function seem temporary enough that it's not
+  // worthwhile to be fancy about the allocation size. Let's just use
+  // the worst case.
+  // Times 3 plus 2, because ConvertUTF16toUTF8 requires times 3 plus 1 and
+  // then we have the terminator.
+  mozilla::CheckedInt<size_t> destLen(len);
+  destLen *= 3;
+  destLen += 2;
+  if (!destLen.isValid()) {
+    return nullptr;
   }
-
-  char* result = static_cast<char*>
-                 (moz_xmalloc(calculator.Size() + 1));
-  if (!result) {
+  size_t destLenVal = destLen.value();
+  if (destLenVal > UINT32_MAX) {
+    return nullptr;
+  }
+  char* dest = static_cast<char*>(moz_xmalloc(destLenVal));
+  if (!dest) {
     return nullptr;
   }
 
-  ConvertUTF16toUTF8 converter(result);
-  copy_string(aSource.BeginReading(start), aSource.EndReading(end),
-              converter).write_terminator();
-  NS_ASSERTION(calculator.Size() == converter.Size(), "length mismatch");
+  size_t written = ConvertUTF16toUTF8(aSource, MakeSpan(dest, destLenVal));
+  dest[written] = 0;
 
-  return result;
+  if (aUTF8Count) {
+    *aUTF8Count = written;
+  }
+
+  return dest;
 }
 
 char*
 ToNewCString(const nsACString& aSource)
 {
   // no conversion needed, just allocate a buffer of the correct length and copy into it
 
-  char* result = AllocateStringCopy(aSource, (char*)0);
-  if (!result) {
+  char* dest = AllocateStringCopy(aSource, (char*)nullptr);
+  if (!dest) {
     return nullptr;
   }
 
-  nsACString::const_iterator fromBegin, fromEnd;
-  char* toBegin = result;
-  *copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
-               toBegin) = char(0);
-  return result;
+  auto len = aSource.Length();
+  memcpy(dest, aSource.BeginReading(), len * sizeof(char));
+  dest[len] = 0;
+  return dest;
 }
 
 char16_t*
 ToNewUnicode(const nsAString& aSource)
 {
   // no conversion needed, just allocate a buffer of the correct length and copy into it
 
-  char16_t* result = AllocateStringCopy(aSource, (char16_t*)0);
-  if (!result) {
+  char16_t* dest = AllocateStringCopy(aSource, (char16_t*)nullptr);
+  if (!dest) {
     return nullptr;
   }
 
-  nsAString::const_iterator fromBegin, fromEnd;
-  char16_t* toBegin = result;
-  *copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
-               toBegin) = char16_t(0);
-  return result;
+  auto len = aSource.Length();
+  memcpy(dest, aSource.BeginReading(), len * sizeof(char16_t));
+  dest[len] = 0;
+  return dest;
 }
 
 char16_t*
 ToNewUnicode(const nsACString& aSource)
 {
-  char16_t* result = AllocateStringCopy(aSource, (char16_t*)0);
-  if (!result) {
+  char16_t* dest = AllocateStringCopy(aSource, (char16_t*)nullptr);
+  if (!dest) {
     return nullptr;
   }
 
-  nsACString::const_iterator fromBegin, fromEnd;
-  LossyConvertEncoding8to16 converter(result);
-  copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
-              converter).write_terminator();
-  return result;
-}
-
-uint32_t
-CalcUTF8ToUnicodeLength(const nsACString& aSource)
-{
-  nsACString::const_iterator start, end;
-  CalculateUTF8Length calculator;
-  copy_string(aSource.BeginReading(start), aSource.EndReading(end),
-              calculator);
-  return calculator.Length();
-}
-
-char16_t*
-UTF8ToUnicodeBuffer(const nsACString& aSource, char16_t* aBuffer,
-                    uint32_t* aUTF16Count)
-{
-  nsACString::const_iterator start, end;
-  ConvertUTF8toUTF16 converter(aBuffer);
-  copy_string(aSource.BeginReading(start),
-              aSource.EndReading(end),
-              converter).write_terminator();
-  if (aUTF16Count) {
-    *aUTF16Count = converter.Length();
-  }
-  return aBuffer;
+  auto len = aSource.Length();
+  ConvertLatin1toUTF16(aSource, MakeSpan(dest, len));
+  dest[len] = 0;
+  return dest;
 }
 
 char16_t*
 UTF8ToNewUnicode(const nsACString& aSource, uint32_t* aUTF16Count)
 {
-  const uint32_t length = CalcUTF8ToUnicodeLength(aSource);
-  const size_t buffer_size = (length + 1) * sizeof(char16_t);
-  char16_t* buffer = static_cast<char16_t*>(moz_xmalloc(buffer_size));
-  if (!buffer) {
+  // Compute length plus one as required by ConvertUTF8toUTF16
+  uint32_t lengthPlusOne = aSource.Length() + 1; // Can't overflow
+
+  mozilla::CheckedInt<size_t> allocLength(lengthPlusOne);
+  // Add space for zero-termination
+  allocLength += 1;
+  // We need UTF-16 units
+  allocLength *= sizeof(char16_t);
+
+  if (!allocLength.isValid()) {
     return nullptr;
   }
 
-  uint32_t copied;
-  UTF8ToUnicodeBuffer(aSource, buffer, &copied);
-  NS_ASSERTION(length == copied, "length mismatch");
+  char16_t* dest = (char16_t*)moz_xmalloc(allocLength.value());
+  if (!dest) {
+    return nullptr;
+  }
+
+  size_t written = ConvertUTF8toUTF16(aSource, MakeSpan(dest, lengthPlusOne));
+  dest[written] = 0;
 
   if (aUTF16Count) {
-    *aUTF16Count = copied;
+    *aUTF16Count = written;
   }
-  return buffer;
+
+  return dest;
 }
 
 char16_t*
 CopyUnicodeTo(const nsAString& aSource, uint32_t aSrcOffset, char16_t* aDest,
               uint32_t aLength)
 {
-  nsAString::const_iterator fromBegin, fromEnd;
-  char16_t* toBegin = aDest;
-  copy_string(aSource.BeginReading(fromBegin).advance(int32_t(aSrcOffset)),
-              aSource.BeginReading(fromEnd).advance(int32_t(aSrcOffset + aLength)),
-              toBegin);
+  MOZ_ASSERT(aSrcOffset + aLength <= aSource.Length());
+  memcpy(aDest,
+         aSource.BeginReading() + aSrcOffset,
+         size_t(aLength) * sizeof(char16_t));
   return aDest;
 }
 
 void
-CopyUnicodeTo(const nsAString::const_iterator& aSrcStart,
-              const nsAString::const_iterator& aSrcEnd,
-              nsAString& aDest)
-{
-  aDest.SetLength(Distance(aSrcStart, aSrcEnd));
-
-  nsAString::char_iterator dest = aDest.BeginWriting();
-  nsAString::const_iterator fromBegin(aSrcStart);
-
-  copy_string(fromBegin, aSrcEnd, dest);
-}
-
-void
-AppendUnicodeTo(const nsAString::const_iterator& aSrcStart,
-                const nsAString::const_iterator& aSrcEnd,
-                nsAString& aDest)
-{
-  uint32_t oldLength = aDest.Length();
-  aDest.SetLength(oldLength + Distance(aSrcStart, aSrcEnd));
-
-  nsAString::char_iterator dest = aDest.BeginWriting() + oldLength;
-  nsAString::const_iterator fromBegin(aSrcStart);
-
-  copy_string(fromBegin, aSrcEnd, dest);
-}
-
-bool
-IsASCII(const nsAString& aString)
-{
-  static const char16_t NOT_ASCII = char16_t(~0x007F);
-
-
-  // Don't want to use |copy_string| for this task, since we can stop at the first non-ASCII character
-
-  nsAString::const_iterator iter, done_reading;
-  aString.BeginReading(iter);
-  aString.EndReading(done_reading);
-
-  const char16_t* c = iter.get();
-  const char16_t* end = done_reading.get();
-
-  while (c < end) {
-    if (*c++ & NOT_ASCII) {
-      return false;
-    }
-  }
-
-  return true;
-}
-
-/**
- * A character sink for in-place case conversion.
- */
-class ConvertToUpperCase
-{
-public:
-  typedef char value_type;
-
-  uint32_t
-  write(const char* aSource, uint32_t aSourceLength)
-  {
-    char* cp = const_cast<char*>(aSource);
-    const char* end = aSource + aSourceLength;
-    while (cp != end) {
-      char ch = *cp;
-      if (ch >= 'a' && ch <= 'z') {
-        *cp = ch - ('a' - 'A');
-      }
-      ++cp;
-    }
-    return aSourceLength;
-  }
-};
-
-void
 ToUpperCase(nsACString& aCString)
 {
-  ConvertToUpperCase converter;
-  char* start;
-  converter.write(aCString.BeginWriting(start), aCString.Length());
-}
-
-/**
- * A character sink for copying with case conversion.
- */
-class CopyToUpperCase
-{
-public:
-  typedef char value_type;
-
-  explicit CopyToUpperCase(nsACString::iterator& aDestIter,
-                           const nsACString::iterator& aEndIter)
-    : mIter(aDestIter)
-    , mEnd(aEndIter)
-  {
+  char* cp = aCString.BeginWriting();
+  char* end = cp + aCString.Length();
+  while (cp != end) {
+    char ch = *cp;
+    if (ch >= 'a' && ch <= 'z') {
+      *cp = ch - ('a' - 'A');
+    }
+    ++cp;
   }
-
-  uint32_t
-  write(const char* aSource, uint32_t aSourceLength)
-  {
-    uint32_t len = XPCOM_MIN(uint32_t(mEnd - mIter), aSourceLength);
-    char* cp = mIter.get();
-    const char* end = aSource + len;
-    while (aSource != end) {
-      char ch = *aSource;
-      if ((ch >= 'a') && (ch <= 'z')) {
-        *cp = ch - ('a' - 'A');
-      } else {
-        *cp = ch;
-      }
-      ++aSource;
-      ++cp;
-    }
-    mIter.advance(len);
-    return len;
-  }
-
-protected:
-  nsACString::iterator& mIter;
-  const nsACString::iterator& mEnd;
-};
+}
 
 void
 ToUpperCase(const nsACString& aSource, nsACString& aDest)
 {
-  nsACString::const_iterator fromBegin, fromEnd;
-  nsACString::iterator toBegin, toEnd;
   aDest.SetLength(aSource.Length());
-
-  CopyToUpperCase converter(aDest.BeginWriting(toBegin), aDest.EndWriting(toEnd));
-  copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
-              converter);
+  const char* src = aSource.BeginReading();
+  const char* end = src + aSource.Length();
+  char* dst = aDest.BeginWriting();
+  while (src != end) {
+    char ch = *src;
+    if (ch >= 'a' && ch <= 'z') {
+      *dst = ch - ('a' - 'A');
+    } else {
+      *dst = ch;
+    }
+    ++src;
+    ++dst;
+  }
 }
 
-/**
- * A character sink for case conversion.
- */
-class ConvertToLowerCase
-{
-public:
-  typedef char value_type;
-
-  uint32_t
-  write(const char* aSource, uint32_t aSourceLength)
-  {
-    char* cp = const_cast<char*>(aSource);
-    const char* end = aSource + aSourceLength;
-    while (cp != end) {
-      char ch = *cp;
-      if ((ch >= 'A') && (ch <= 'Z')) {
-        *cp = ch + ('a' - 'A');
-      }
-      ++cp;
-    }
-    return aSourceLength;
-  }
-};
-
 void
 ToLowerCase(nsACString& aCString)
 {
-  ConvertToLowerCase converter;
-  char* start;
-  converter.write(aCString.BeginWriting(start), aCString.Length());
-}
-
-/**
- * A character sink for copying with case conversion.
- */
-class CopyToLowerCase
-{
-public:
-  typedef char value_type;
-
-  explicit CopyToLowerCase(nsACString::iterator& aDestIter,
-                           const nsACString::iterator& aEndIter)
-    : mIter(aDestIter)
-    , mEnd(aEndIter)
-  {
+  char* cp = aCString.BeginWriting();
+  char* end = cp + aCString.Length();
+  while (cp != end) {
+    char ch = *cp;
+    if (ch >= 'A' && ch <= 'Z') {
+      *cp = ch + ('a' - 'A');
+    }
+    ++cp;
   }
-
-  uint32_t
-  write(const char* aSource, uint32_t aSourceLength)
-  {
-    uint32_t len = XPCOM_MIN(uint32_t(mEnd - mIter), aSourceLength);
-    char* cp = mIter.get();
-    const char* end = aSource + len;
-    while (aSource != end) {
-      char ch = *aSource;
-      if ((ch >= 'A') && (ch <= 'Z')) {
-        *cp = ch + ('a' - 'A');
-      } else {
-        *cp = ch;
-      }
-      ++aSource;
-      ++cp;
-    }
-    mIter.advance(len);
-    return len;
-  }
-
-protected:
-  nsACString::iterator& mIter;
-  const nsACString::iterator& mEnd;
-};
+}
 
 void
 ToLowerCase(const nsACString& aSource, nsACString& aDest)
 {
-  nsACString::const_iterator fromBegin, fromEnd;
-  nsACString::iterator toBegin, toEnd;
   aDest.SetLength(aSource.Length());
-
-  CopyToLowerCase converter(aDest.BeginWriting(toBegin), aDest.EndWriting(toEnd));
-  copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
-              converter);
+  const char* src = aSource.BeginReading();
+  const char* end = src + aSource.Length();
+  char* dst = aDest.BeginWriting();
+  while (src != end) {
+    char ch = *src;
+    if (ch >= 'A' && ch <= 'Z') {
+      *dst = ch + ('a' - 'A');
+    } else {
+      *dst = ch;
+    }
+    ++src;
+    ++dst;
+  }
 }
 
 bool
 ParseString(const nsACString& aSource, char aDelimiter,
             nsTArray<nsCString>& aArray)
 {
   nsACString::const_iterator start, end;
   aSource.BeginReading(start);
@@ -1178,117 +631,56 @@ VoidCString()
 {
   static const nsCString sNull(mozilla::detail::StringDataFlags::VOIDED);
 
   return sNull;
 }
 
 int32_t
 CompareUTF8toUTF16(const nsACString& aUTF8String,
-                   const nsAString& aUTF16String)
+                   const nsAString& aUTF16String,
+                   bool* aErr)
 {
-  static const uint32_t NOT_ASCII = uint32_t(~0x7F);
-
   const char* u8;
   const char* u8end;
   aUTF8String.BeginReading(u8);
   aUTF8String.EndReading(u8end);
 
   const char16_t* u16;
   const char16_t* u16end;
   aUTF16String.BeginReading(u16);
   aUTF16String.EndReading(u16end);
 
-  while (u8 != u8end && u16 != u16end) {
-    // Cast away the signedness of *u8 to prevent signextension when
-    // converting to uint32_t
-    uint32_t c8_32 = (uint8_t)*u8;
-
-    if (c8_32 & NOT_ASCII) {
-      bool err;
-      c8_32 = UTF8CharEnumerator::NextChar(&u8, u8end, &err);
-      if (err) {
-        return INT32_MIN;
+  for (;;) {
+    if (u8 == u8end) {
+      if (u16 == u16end) {
+        return 0;
       }
-
-      uint32_t c16_32 = UTF16CharEnumerator::NextChar(&u16, u16end);
-      // The above UTF16CharEnumerator::NextChar() calls can
-      // fail, but if it does for anything other than no data to
-      // look at (which can't happen here), it returns the
-      // Unicode replacement character 0xFFFD for the invalid
-      // data they were fed. Ignore that error and treat invalid
-      // UTF16 as 0xFFFD.
-      //
-      // This matches what our UTF16 to UTF8 conversion code
-      // does, and thus a UTF8 string that came from an invalid
-      // UTF16 string will compare equal to the invalid UTF16
-      // string it came from. Same is true for any other UTF16
-      // string differs only in the invalid part of the string.
-
-      if (c8_32 != c16_32) {
-        return c8_32 < c16_32 ? -1 : 1;
-      }
-    } else {
-      if (c8_32 != *u16) {
-        return c8_32 > *u16 ? 1 : -1;
-      }
-
-      ++u8;
-      ++u16;
+      return -1;
+    }
+    if (u16 == u16end) {
+      return 1;
     }
-  }
-
-  if (u8 != u8end) {
-    // We get to the end of the UTF16 string, but no to the end of
-    // the UTF8 string. The UTF8 string is longer than the UTF16
-    // string
-
+    // No need for ASCII optimization, since both NextChar()
+    // calls get inlined.
+    uint32_t scalar8 = UTF8CharEnumerator::NextChar(&u8, u8end, aErr);
+    uint32_t scalar16 = UTF16CharEnumerator::NextChar(&u16, u16end, aErr);
+    if (scalar16 == scalar8) {
+      continue;
+    }
+    if (scalar8 < scalar16) {
+      return -1;
+    }
     return 1;
   }
-
-  if (u16 != u16end) {
-    // We get to the end of the UTF8 string, but no to the end of
-    // the UTF16 string. The UTF16 string is longer than the UTF8
-    // string
-
-    return -1;
-  }
-
-  // The two strings match.
-
-  return 0;
 }
 
 void
 AppendUCS4ToUTF16(const uint32_t aSource, nsAString& aDest)
 {
   NS_ASSERTION(IS_VALID_CHAR(aSource), "Invalid UCS4 char");
   if (IS_IN_BMP(aSource)) {
     aDest.Append(char16_t(aSource));
   } else {
     aDest.Append(H_SURROGATE(aSource));
     aDest.Append(L_SURROGATE(aSource));
   }
 }
-
-extern "C" {
-
-void Gecko_AppendUTF16toCString(nsACString* aThis, const nsAString* aOther)
-{
-  AppendUTF16toUTF8(*aOther, *aThis);
-}
-
-void Gecko_AppendUTF8toString(nsAString* aThis, const nsACString* aOther)
-{
-  AppendUTF8toUTF16(*aOther, *aThis);
-}
-
-bool Gecko_FallibleAppendUTF16toCString(nsACString* aThis, const nsAString* aOther)
-{
-  return AppendUTF16toUTF8(*aOther, *aThis, mozilla::fallible);
-}
-
-bool Gecko_FallibleAppendUTF8toString(nsAString* aThis, const nsACString* aOther)
-{
-  return AppendUTF8toUTF16(*aOther, *aThis, mozilla::fallible);
-}
-
-}
--- a/xpcom/string/nsReadableUtils.h
+++ b/xpcom/string/nsReadableUtils.h
@@ -15,107 +15,391 @@
 
 #include "mozilla/Assertions.h"
 #include "nsAString.h"
 
 #include "nsTArrayForwardDeclare.h"
 
 // Can't include mozilla/Encoding.h here
 extern "C" {
-  size_t encoding_utf8_valid_up_to(uint8_t const* buffer, size_t buffer_len);
-  size_t encoding_ascii_valid_up_to(uint8_t const* buffer, size_t buffer_len);
+  size_t
+  encoding_utf8_valid_up_to(uint8_t const* buffer, size_t buffer_len);
+
+  bool
+  encoding_mem_is_ascii(uint8_t const* buffer, size_t buffer_len);
+
+  bool
+  encoding_mem_is_basic_latin(char16_t const* buffer, size_t buffer_len);
+
+  bool
+  encoding_mem_is_utf8_latin1(uint8_t const* buffer, size_t buffer_len);
+
+  bool
+  encoding_mem_is_str_latin1(uint8_t const* buffer, size_t buffer_len);
+
+  bool
+  encoding_mem_is_utf16_latin1(char16_t const* buffer, size_t buffer_len);
+
+  void
+  encoding_mem_convert_utf16_to_latin1_lossy(const char16_t* src,
+                                             size_t src_len,
+                                             char* dst,
+                                             size_t dst_len);
+
+  size_t
+  encoding_mem_convert_utf8_to_latin1_lossy(const char* src,
+                                            size_t src_len,
+                                            char* dst,
+                                            size_t dst_len);
+
+  void
+  encoding_mem_convert_latin1_to_utf16(const char* src,
+                                       size_t src_len,
+                                       char16_t* dst,
+                                       size_t dst_len);
+
+  size_t
+  encoding_mem_convert_utf16_to_utf8(const char16_t* src,
+                                     size_t src_len,
+                                     char* dst,
+                                     size_t dst_len);
+
+  size_t
+  encoding_mem_convert_utf8_to_utf16(const char* src,
+                                     size_t src_len,
+                                     char16_t* dst,
+                                     size_t dst_len);
+}
+
+// From the nsstring crate
+extern "C" {
+  bool
+  nsstring_fallible_append_utf8_impl(nsAString* aThis,
+                                     const char* aOther,
+                                     size_t aOtherLen,
+                                     size_t aOldLen);
+
+  bool
+  nsstring_fallible_append_latin1_impl(nsAString* aThis,
+                                       const char* aOther,
+                                       size_t aOtherLen,
+                                       size_t aOldLen);
+
+  bool
+  nscstring_fallible_append_utf16_to_utf8_impl(nsACString* aThis,
+                                               const char16_t*,
+                                               size_t aOtherLen,
+                                               size_t aOldLen);
+
+  bool
+  nscstring_fallible_append_utf16_to_latin1_lossy_impl(nsACString* aThis,
+                                                       const char16_t*,
+                                                       size_t aOtherLen,
+                                                       size_t aOldLen);
+
+  bool
+  nscstring_fallible_append_utf8_to_latin1_lossy_check(nsACString* aThis,
+                                                       const nsACString* aOther,
+                                                       size_t aOldLen);
+
+  bool
+  nscstring_fallible_append_latin1_to_utf8_check(nsACString* aThis,
+                                                 const nsACString* aOther,
+                                                 size_t aOldLen);
+}
+
+/**
+ * If all the code points in the input are below U+0100, converts to Latin1,
+ * i.e. unsigned byte value is Unicode scalar value; not windows-1252. If
+ * there are code points above U+00FF, asserts in debug builds and produces
+ * garbage in release builds. The nature of the garbage depends on the CPU
+ * architecture and must not be relied upon.
+ *
+ * The length of aDest must be not be less than the length of aSource.
+ */
+inline void
+LossyConvertUTF16toLatin1(mozilla::Span<const char16_t> aSource,
+                          mozilla::Span<char> aDest)
+{
+  encoding_mem_convert_utf16_to_latin1_lossy(
+    aSource.Elements(), aSource.Length(), aDest.Elements(), aDest.Length());
+}
+
+/**
+ * If all the code points in the input are below U+0100, converts to Latin1,
+ * i.e. unsigned byte value is Unicode scalar value; not windows-1252. If
+ * there are code points above U+00FF, asserts in debug builds and produces
+ * garbage in release builds. The nature of the garbage may depend on the CPU
+ * architecture and must not be relied upon.
+ *
+ * The length of aDest must be not be less than the length of aSource.
+ */
+inline size_t
+LossyConvertUTF8toLatin1(mozilla::Span<const char> aSource,
+                         mozilla::Span<char> aDest)
+{
+  return encoding_mem_convert_utf8_to_latin1_lossy(
+    aSource.Elements(), aSource.Length(), aDest.Elements(), aDest.Length());
+}
+
+/**
+ * Interprets unsigned byte value as Unicode scalar value (i.e. not
+ * windows-1252!).
+ *
+ * The length of aDest must be not be less than the length of aSource.
+ */
+inline void
+ConvertLatin1toUTF16(mozilla::Span<const char> aSource,
+                     mozilla::Span<char16_t> aDest)
+{
+  encoding_mem_convert_latin1_to_utf16(
+    aSource.Elements(), aSource.Length(), aDest.Elements(), aDest.Length());
+}
+
+/**
+ * Lone surrogates are replaced with the REPLACEMENT CHARACTER.
+ *
+ * The length of aDest must be at least the length of aSource times three
+ * _plus one_.
+ *
+ * Returns the number of code units written.
+ */
+inline size_t
+ConvertUTF16toUTF8(mozilla::Span<const char16_t> aSource,
+                   mozilla::Span<char> aDest)
+{
+  return encoding_mem_convert_utf16_to_utf8(
+    aSource.Elements(), aSource.Length(), aDest.Elements(), aDest.Length());
+}
+
+/**
+ * Malformed byte sequences are replaced with the REPLACEMENT CHARACTER.
+ *
+ * The length of aDest must at least one greater than the length of aSource.
+ *
+ * Returns the number of code units written.
+ */
+inline size_t
+ConvertUTF8toUTF16(mozilla::Span<const char> aSource,
+                   mozilla::Span<char16_t> aDest)
+{
+  return encoding_mem_convert_utf8_to_utf16(
+    aSource.Elements(), aSource.Length(), aDest.Elements(), aDest.Length());
 }
 
 inline size_t
 Distance(const nsReadingIterator<char16_t>& aStart,
          const nsReadingIterator<char16_t>& aEnd)
 {
   MOZ_ASSERT(aStart.get() <= aEnd.get());
   return static_cast<size_t>(aEnd.get() - aStart.get());
 }
+
 inline size_t
 Distance(const nsReadingIterator<char>& aStart,
          const nsReadingIterator<char>& aEnd)
 {
   MOZ_ASSERT(aStart.get() <= aEnd.get());
   return static_cast<size_t>(aEnd.get() - aStart.get());
 }
 
-void LossyCopyUTF16toASCII(const nsAString& aSource, nsACString& aDest);
-void CopyASCIItoUTF16(const nsACString& aSource, nsAString& aDest);
-MOZ_MUST_USE bool CopyASCIItoUTF16(const nsACString& aSource, nsAString& aDest,
-                                   const mozilla::fallible_t&);
+// UTF-8 to UTF-16
+// Invalid UTF-8 byte sequences are replaced with the REPLACEMENT CHARACTER.
+
+inline MOZ_MUST_USE bool
+CopyUTF8toUTF16(mozilla::Span<const char> aSource,
+                nsAString& aDest,
+                const mozilla::fallible_t&)
+{
+  return nsstring_fallible_append_utf8_impl(
+    &aDest, aSource.Elements(), aSource.Length(), 0);
+}
 
-void LossyCopyUTF16toASCII(const char16ptr_t aSource, nsACString& aDest);
-void CopyASCIItoUTF16(const char* aSource, nsAString& aDest);
+inline void
+CopyUTF8toUTF16(mozilla::Span<const char> aSource, nsAString& aDest)
+{
+  if (MOZ_UNLIKELY(!CopyUTF8toUTF16(aSource, aDest, mozilla::fallible))) {
+    aDest.AllocFailed(aSource.Length());
+  }
+}
+
+inline MOZ_MUST_USE bool
+AppendUTF8toUTF16(mozilla::Span<const char> aSource,
+                  nsAString& aDest,
+                  const mozilla::fallible_t&)
+{
+  return nsstring_fallible_append_utf8_impl(
+    &aDest, aSource.Elements(), aSource.Length(), aDest.Length());
+}
+
+inline void
+AppendUTF8toUTF16(mozilla::Span<const char> aSource, nsAString& aDest)
+{
+  if (MOZ_UNLIKELY(!AppendUTF8toUTF16(aSource, aDest, mozilla::fallible))) {
+    aDest.AllocFailed(aDest.Length() + aSource.Length());
+  }
+}
 
-void CopyUTF16toUTF8(const nsAString& aSource, nsACString& aDest);
-MOZ_MUST_USE bool CopyUTF16toUTF8(const nsAString& aSource, nsACString& aDest,
-                                  const mozilla::fallible_t&);
-void CopyUTF8toUTF16(const nsACString& aSource, nsAString& aDest);
+// Latin1 to UTF-16
+// Interpret each incoming unsigned byte value as a Unicode scalar value (not
+// windows-1252!). The function names say "ASCII" instead of "Latin1" for
+// legacy reasons.
+
+inline MOZ_MUST_USE bool
+CopyASCIItoUTF16(mozilla::Span<const char> aSource,
+                 nsAString& aDest,
+                 const mozilla::fallible_t&)
+{
+  return nsstring_fallible_append_latin1_impl(
+    &aDest, aSource.Elements(), aSource.Length(), 0);
+}
 
-void CopyUTF16toUTF8(const char16ptr_t aSource, nsACString& aDest);
-void CopyUTF8toUTF16(const char* aSource, nsAString& aDest);
+inline void
+CopyASCIItoUTF16(mozilla::Span<const char> aSource, nsAString& aDest)
+{
+  if (MOZ_UNLIKELY(!CopyASCIItoUTF16(aSource, aDest, mozilla::fallible))) {
+    aDest.AllocFailed(aSource.Length());
+  }
+}
+
+inline MOZ_MUST_USE bool
+AppendASCIItoUTF16(mozilla::Span<const char> aSource,
+                   nsAString& aDest,
+                   const mozilla::fallible_t&)
+{
+  return nsstring_fallible_append_latin1_impl(
+    &aDest, aSource.Elements(), aSource.Length(), aDest.Length());
+}
+
+inline void
+AppendASCIItoUTF16(mozilla::Span<const char> aSource, nsAString& aDest)
+{
+  if (MOZ_UNLIKELY(!AppendASCIItoUTF16(aSource, aDest, mozilla::fallible))) {
+    aDest.AllocFailed(aDest.Length() + aSource.Length());
+  }
+}
 
-void LossyAppendUTF16toASCII(const nsAString& aSource, nsACString& aDest);
-void AppendASCIItoUTF16(const nsACString& aSource, nsAString& aDest);
-MOZ_MUST_USE bool AppendASCIItoUTF16(const nsACString& aSource,
-                                     nsAString& aDest,
-                                     const mozilla::fallible_t&);
+// UTF-16 to UTF-8
+// Unpaired surrogates are replaced with the REPLACEMENT CHARACTER.
+
+inline MOZ_MUST_USE bool
+CopyUTF16toUTF8(mozilla::Span<const char16_t> aSource,
+                nsACString& aDest,
+                const mozilla::fallible_t&)
+{
+  return nscstring_fallible_append_utf16_to_utf8_impl(
+    &aDest, aSource.Elements(), aSource.Length(), 0);
+}
 
-void LossyAppendUTF16toASCII(const char16ptr_t aSource, nsACString& aDest);
-MOZ_MUST_USE bool AppendASCIItoUTF16(const char* aSource,
-                                     nsAString& aDest,
-                                     const mozilla::fallible_t&);
-void AppendASCIItoUTF16(const char* aSource, nsAString& aDest);
+inline void
+CopyUTF16toUTF8(mozilla::Span<const char16_t> aSource, nsACString& aDest)
+{
+  if (MOZ_UNLIKELY(!CopyUTF16toUTF8(aSource, aDest, mozilla::fallible))) {
+    aDest.AllocFailed(aSource.Length());
+  }
+}
+
+inline MOZ_MUST_USE bool
+AppendUTF16toUTF8(mozilla::Span<const char16_t> aSource,
+                  nsACString& aDest,
+                  const mozilla::fallible_t&)
+{
+  return nscstring_fallible_append_utf16_to_utf8_impl(
+    &aDest, aSource.Elements(), aSource.Length(), aDest.Length());
+}
+
+inline void
+AppendUTF16toUTF8(mozilla::Span<const char16_t> aSource, nsACString& aDest)
+{
+  if (MOZ_UNLIKELY(!AppendUTF16toUTF8(aSource, aDest, mozilla::fallible))) {
+    aDest.AllocFailed(aDest.Length() + aSource.Length());
+  }
+}
 
-void AppendUTF16toUTF8(const nsAString& aSource, nsACString& aDest);
-MOZ_MUST_USE bool AppendUTF16toUTF8(const nsAString& aSource,
-                                    nsACString& aDest,
-                                    const mozilla::fallible_t&);
-void AppendUTF8toUTF16(const nsACString& aSource, nsAString& aDest);
-MOZ_MUST_USE bool AppendUTF8toUTF16(const nsACString& aSource,
-                                    nsAString& aDest,
-                                    const mozilla::fallible_t&);
+// UTF-16 to Latin1
+// If all code points in the input are below U+0100, represents each scalar
+// value as an unsigned byte. (This is not windows-1252!) If there are code
+// points above U+00FF, asserts in debug builds and memory-safely produces
+// garbage in release builds. The nature of the garbage may differ based on
+// CPU architecture and must not be relied upon. The names say "ASCII" instead
+// of "Latin1" for legacy reasons.
+
+inline MOZ_MUST_USE bool
+LossyCopyUTF16toASCII(mozilla::Span<const char16_t> aSource,
+                      nsACString& aDest,
+                      const mozilla::fallible_t&)
+{
+  return nscstring_fallible_append_utf16_to_latin1_lossy_impl(
+    &aDest, aSource.Elements(), aSource.Length(), 0);
+}
 
-void AppendUTF16toUTF8(const char16ptr_t aSource, nsACString& aDest);
-void AppendUTF8toUTF16(const char* aSource, nsAString& aDest);
+inline void
+LossyCopyUTF16toASCII(mozilla::Span<const char16_t> aSource, nsACString& aDest)
+{
+  if (MOZ_UNLIKELY(!LossyCopyUTF16toASCII(aSource, aDest, mozilla::fallible))) {
+    aDest.AllocFailed(aSource.Length());
+  }
+}
+
+inline MOZ_MUST_USE bool
+LossyAppendUTF16toASCII(mozilla::Span<const char16_t> aSource,
+                        nsACString& aDest,
+                        const mozilla::fallible_t&)
+{
+  return nscstring_fallible_append_utf16_to_latin1_lossy_impl(
+    &aDest, aSource.Elements(), aSource.Length(), aDest.Length());
+}
+
+inline void
+LossyAppendUTF16toASCII(mozilla::Span<const char16_t> aSource,
+                        nsACString& aDest)
+{
+  if (MOZ_UNLIKELY(
+        !LossyAppendUTF16toASCII(aSource, aDest, mozilla::fallible))) {
+    aDest.AllocFailed(aDest.Length() + aSource.Length());
+  }
+}
 
 /**
  * Returns a new |char| buffer containing a zero-terminated copy of |aSource|.
  *
  * Allocates and returns a new |char| buffer which you must free with |free|.
- * Performs a lossy encoding conversion by chopping 16-bit wide characters down to 8-bits wide while copying |aSource| to your new buffer.
- * This conversion is not well defined; but it reproduces legacy string behavior.
- * The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls.
+ * Performs a conversion with LossyConvertUTF16toLatin1() writing into the
+ * newly-allocated buffer.
+ *
+ * The new buffer is zero-terminated, but that may not help you if |aSource|
+ * contains embedded nulls.
  *
  * @param aSource a 16-bit wide string
  * @return a new |char| buffer you must free with |free|.
  */
 char* ToNewCString(const nsAString& aSource);
 
-
 /**
  * Returns a new |char| buffer containing a zero-terminated copy of |aSource|.
  *
  * Allocates and returns a new |char| buffer which you must free with |free|.
- * The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls.
+ *
+ * The new buffer is zero-terminated, but that may not help you if |aSource|
+ * contains embedded nulls.
  *
  * @param aSource an 8-bit wide string
  * @return a new |char| buffer you must free with |free|.
  */
 char* ToNewCString(const nsACString& aSource);
 
 /**
  * Returns a new |char| buffer containing a zero-terminated copy of |aSource|.
  *
  * Allocates and returns a new |char| buffer which you must free with
  * |free|.
- * Performs an encoding conversion from a UTF-16 string to a UTF-8 string
- * copying |aSource| to your new buffer.
+ * Performs an encoding conversion from a UTF-16 string to a UTF-8 string with
+ * unpaired surrogates replaced with the REPLACEMENT CHARACTER copying
+ * |aSource| to your new buffer.
+ *
  * The new buffer is zero-terminated, but that may not help you if |aSource|
  * contains embedded nulls.
  *
  * @param aSource a UTF-16 string (made of char16_t's)
  * @param aUTF8Count the number of 8-bit units that was returned
  * @return a new |char| buffer you must free with |free|.
  */
 
@@ -123,79 +407,56 @@ char* ToNewUTF8String(const nsAString& a
 
 
 /**
  * Returns a new |char16_t| buffer containing a zero-terminated copy of
  * |aSource|.
  *
  * Allocates and returns a new |char16_t| buffer which you must free with
  * |free|.
+ *
  * The new buffer is zero-terminated, but that may not help you if |aSource|
  * contains embedded nulls.
  *
  * @param aSource a UTF-16 string
  * @return a new |char16_t| buffer you must free with |free|.
  */
 char16_t* ToNewUnicode(const nsAString& aSource);
 
 
 /**
- * Returns a new |char16_t| buffer containing a zero-terminated copy of |aSource|.
+ * Returns a new |char16_t| buffer containing a zero-terminated copy of
+ * |aSource|.
+ *
+ * Allocates and returns a new |char16_t| buffer which you must free with
+ * |free|.
  *
- * Allocates and returns a new |char16_t| buffer which you must free with |free|.
- * Performs an encoding conversion by 0-padding 8-bit wide characters up to 16-bits wide while copying |aSource| to your new buffer.
- * This conversion is not well defined; but it reproduces legacy string behavior.
- * The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls.
+ * Performs an encoding conversion by 0-padding 8-bit wide characters up to
+ * 16-bits wide (i.e. Latin1 to UTF-16 conversion) while copying |aSource|
+ * to your new buffer.
  *
- * @param aSource an 8-bit wide string (a C-string, NOT UTF-8)
+ * The new buffer is zero-terminated, but that may not help you if |aSource|
+ * contains embedded nulls.
+ *
+ * @param aSource a Latin1 string
  * @return a new |char16_t| buffer you must free with |free|.
  */
 char16_t* ToNewUnicode(const nsACString& aSource);
 
 /**
- * Returns the required length for a char16_t buffer holding
- * a copy of aSource, using UTF-8 to UTF-16 conversion.
- * The length does NOT include any space for zero-termination.
- *
- * @param aSource an 8-bit wide string, UTF-8 encoded
- * @return length of UTF-16 encoded string copy, not zero-terminated
- */
-uint32_t CalcUTF8ToUnicodeLength(const nsACString& aSource);
-
-/**
- * Copies the source string into the specified buffer, converting UTF-8 to
- * UTF-16 in the process. The conversion is well defined for valid UTF-8
- * strings.
- * The copied string will be zero-terminated! Any embedded nulls will be
- * copied nonetheless. It is the caller's responsiblity to ensure the buffer
- * is large enough to hold the string copy plus one char16_t for
- * zero-termination!
- *
- * @see CalcUTF8ToUnicodeLength( const nsACString& )
- * @see UTF8ToNewUnicode( const nsACString&, uint32_t* )
- *
- * @param aSource an 8-bit wide string, UTF-8 encoded
- * @param aBuffer the buffer holding the converted string copy
- * @param aUTF16Count receiving optionally the number of 16-bit units that
- *                    were copied
- * @return aBuffer pointer, for convenience
- */
-char16_t* UTF8ToUnicodeBuffer(const nsACString& aSource,
-                              char16_t* aBuffer,
-                              uint32_t* aUTF16Count = nullptr);
-
-/**
  * Returns a new |char16_t| buffer containing a zero-terminated copy
  * of |aSource|.
  *
  * Allocates and returns a new |char| buffer which you must free with
  * |free|.  Performs an encoding conversion from UTF-8 to UTF-16
- * while copying |aSource| to your new buffer.  This conversion is well defined
- * for a valid UTF-8 string.  The new buffer is zero-terminated, but that
- * may not help you if |aSource| contains embedded nulls.
+ * while copying |aSource| to your new buffer.  Malformed byte sequences
+ * are replaced with the REPLACEMENT CHARACTER.
+ *
+ * The new buffer is zero-terminated, but that may not help you if |aSource|
+ * contains embedded nulls.
  *
  * @param aSource an 8-bit wide string, UTF-8 encoded
  * @param aUTF16Count the number of 16-bit units that was returned
  * @return a new |char16_t| buffer you must free with |free|.
  *         (UTF-16 encoded)
  */
 char16_t* UTF8ToNewUnicode(const nsACString& aSource,
                            uint32_t* aUTF16Count = nullptr);
@@ -212,99 +473,163 @@ char16_t* UTF8ToNewUnicode(const nsACStr
  * @param aLength the number of 16-bit code units to copy
  * @return pointer to destination buffer - identical to |aDest|
  */
 char16_t* CopyUnicodeTo(const nsAString& aSource,
                         uint32_t aSrcOffset,
                         char16_t* aDest,
                         uint32_t aLength);
 
-
 /**
- * Copies 16-bit characters between iterators |aSrcStart| and
- * |aSrcEnd| to the writable string |aDest|. Similar to the
- * |nsString::Mid| method.
- *
- * After this operation |aDest| is not null terminated.
- *
- * @param aSrcStart start source iterator
- * @param aSrcEnd end source iterator
- * @param aDest destination for the copy
- */
-void CopyUnicodeTo(const nsAString::const_iterator& aSrcStart,
-                   const nsAString::const_iterator& aSrcEnd,
-                   nsAString& aDest);
-
-/**
- * Appends 16-bit characters between iterators |aSrcStart| and
- * |aSrcEnd| to the writable string |aDest|.
- *
- * After this operation |aDest| is not null terminated.
- *
- * @param aSrcStart start source iterator
- * @param aSrcEnd end source iterator
- * @param aDest destination for the copy
- */
-void AppendUnicodeTo(const nsAString::const_iterator& aSrcStart,
-                     const nsAString::const_iterator& aSrcEnd,
-                     nsAString& aDest);
-
-/**
- * Returns |true| if |aString| contains only ASCII characters, that is, characters in the range (0x00, 0x7F).
+ * Returns |true| if |aString| contains only ASCII characters, that is,
+ * characters in the range (0x00, 0x7F).
  *
  * @param aString a 16-bit wide string to scan
  */
-bool IsASCII(const nsAString& aString);
+inline bool
+IsASCII(mozilla::Span<const char16_t> aString)
+{
+  size_t length = aString.Length();
+  const char16_t* ptr = aString.Elements();
+  // For short strings, calling into Rust is a pessimization, and the SIMD
+  // code won't have a chance to kick in anyway.
+  if (length < 16) {
+    char16_t accu = 0;
+    for (size_t i = 0; i < length; i++) {
+      accu |= ptr[i];
+    }
+    return accu < 0x80U;
+  }
+  return encoding_mem_is_basic_latin(ptr, length);
+}
 
 /**
- * Returns |true| if |aString| contains only ASCII characters, that is, characters in the range (0x00, 0x7F).
+ * Returns |true| if |aString| contains only ASCII characters, that is,
+ * characters in the range (0x00, 0x7F).
  *
  * @param aString a 8-bit wide string to scan
  */
-inline bool IsASCII(const nsACString& aString)
+inline bool
+IsASCII(mozilla::Span<const char> aString)
 {
   size_t length = aString.Length();
-  const uint8_t* ptr = reinterpret_cast<const uint8_t*>(aString.BeginReading());
+  const uint8_t* ptr = reinterpret_cast<const uint8_t*>(aString.Elements());
   // For short strings, calling into Rust is a pessimization, and the SIMD
-  // code won't have a chance to kick in anyway. Additionally, handling the
-  // case of the empty string here makes null-checking ptr unnecessary.
-  // (Passing nullptr to Rust would technically be UB.)
+  // code won't have a chance to kick in anyway.
   if (length < 16) {
-    size_t accu = 0;
+    uint8_t accu = 0;
+    for (size_t i = 0; i < length; i++) {
+      accu |= ptr[i];
+    }
+    return accu < 0x80U;
+  }
+  return encoding_mem_is_ascii(ptr, length);
+}
+
+/**
+ * Returns |true| if |aString| contains only Latin1 characters, that is,
+ * characters in the range (U+0000, U+00FF).
+ *
+ * @param aString a potentially-invalid UTF-16 string to scan
+ */
+inline bool
+IsUTF16Latin1(mozilla::Span<const char16_t> aString)
+{
+  size_t length = aString.Length();
+  const char16_t* ptr = aString.Elements();
+  // For short strings, calling into Rust is a pessimization, and the SIMD
+  // code won't have a chance to kick in anyway.
+  if (length < 16) {
+    char16_t accu = 0;
     for (size_t i = 0; i < length; i++) {
       accu |= ptr[i];
     }
-    return accu < 0x80;
+    return accu < 0x100U;
+  }
+  return encoding_mem_is_utf16_latin1(ptr, length);
+}
+
+/**
+ * Returns |true| if |aString| contains only Latin1 characters, that is,
+ * characters in the range (U+0000, U+00FF).
+ *
+ * If you know that the argument is always absolutely guaranteed to be valid
+ * UTF-8, use the faster UnsafeIsValidUTF8Latin1() instead.
+ *
+ * @param aString potentially-invalid UTF-8 string to scan
+ */
+inline bool
+IsUTF8Latin1(mozilla::Span<const char> aString)
+{
+  size_t length = aString.Length();
+  const uint8_t* ptr = reinterpret_cast<const uint8_t*>(aString.Elements());
+  // For short strings, calling into Rust is a pessimization, and the SIMD
+  // code won't have a chance to kick in anyway.
+  if (length < 16) {
+    for (size_t i = 0; i < length; i++) {
+      if (ptr[i] >= 0x80U) {
+        ptr += i;
+        length -= i;
+        goto end;
+      }
+    }
+    return true;
   }
-  // This is not quite optimal, because it's not fail-fast when the by-register
-  // check already finds non-ASCII. Also, input to this function is almost
-  // always ASCII, so even the by-register check wouldn't need to be fail-fast
-  // and could be more like the loop above.
-  return length == encoding_ascii_valid_up_to(ptr, length);
+end:
+  return encoding_mem_is_utf8_latin1(ptr, length);
+}
+
+/**
+ * Returns |true| if |aString| contains only Latin1 characters, that is,
+ * characters in the range (U+0000, U+00FF).
+ *
+ * The argument MUST be valid UTF-8. If you are at all unsure, use IsUTF8Latin1
+ * instead!
+ *
+ * @param aString known-valid UTF-8 string to scan
+ */
+inline bool
+UnsafeIsValidUTF8Latin1(mozilla::Span<const char> aString)
+{
+  size_t length = aString.Length();
+  const uint8_t* ptr = reinterpret_cast<const uint8_t*>(aString.Elements());
+  // For short strings, calling into Rust is a pessimization, and the SIMD
+  // code won't have a chance to kick in anyway.
+  if (length < 16) {
+    for (size_t i = 0; i < length; i++) {
+      if (ptr[i] >= 0x80U) {
+        ptr += i;
+        length -= i;
+        goto end;
+      }
+    }
+    return true;
+  }
+end:
+  return encoding_mem_is_str_latin1(ptr, length);
 }
 
 /**
  * Returns |true| if |aString| is a valid UTF-8 string.
  *
  * Note that this doesn't check whether the string might look like a valid
  * string in another encoding, too, e.g. ISO-2022-JP.
  *
  * @param aString an 8-bit wide string to scan
  */
-inline bool IsUTF8(const nsACString& aString)
+inline bool
+IsUTF8(mozilla::Span<const char> aString)
 {
   size_t length = aString.Length();
-  const uint8_t* ptr = reinterpret_cast<const uint8_t*>(aString.BeginReading());
+  const uint8_t* ptr = reinterpret_cast<const uint8_t*>(aString.Elements());
   // For short strings, calling into Rust is a pessimization, and the SIMD
-  // code won't have a chance to kick in anyway. Additionally, handling the
-  // case of the empty string here makes null-checking ptr unnecessary.
-  // (Passing nullptr to Rust would technically be UB.)
+  // code won't have a chance to kick in anyway.
   if (length < 16) {
     for (size_t i = 0; i < length; i++) {
-      if (ptr[i] >= 0x80) {
+      if (ptr[i] >= 0x80U) {
         ptr += i;
         length -= i;
         goto end;
       }
     }
     return true;
   }
   end:
@@ -328,22 +653,26 @@ void ToLowerCase(nsACString&);
 /**
  * Converts case from string aSource to aDest.
  */
 void ToUpperCase(const nsACString& aSource, nsACString& aDest);
 
 void ToLowerCase(const nsACString& aSource, nsACString& aDest);
 
 /**
- * Finds the leftmost occurrence of |aPattern|, if any in the range |aSearchStart|..|aSearchEnd|.
+ * Finds the leftmost occurrence of |aPattern|, if any in the range
+ * |aSearchStart|..|aSearchEnd|.
  *
- * Returns |true| if a match was found, and adjusts |aSearchStart| and |aSearchEnd| to
- * point to the match.  If no match was found, returns |false| and makes |aSearchStart == aSearchEnd|.
+ * Returns |true| if a match was found, and adjusts |aSearchStart| and
+ * |aSearchEnd| to point to the match.  If no match was found, returns |false|
+ * and makes |aSearchStart == aSearchEnd|.
  *
- * Currently, this is equivalent to the O(m*n) implementation previously on |ns[C]String|.
+ * Currently, this is equivalent to the O(m*n) implementation previously on
+ * |ns[C]String|.
+ *
  * If we need something faster, then we can implement that later.
  */
 
 bool FindInReadable(const nsAString& aPattern, nsAString::const_iterator&,
                     nsAString::const_iterator&,
                     const nsStringComparator& = nsDefaultStringComparator());
 bool FindInReadable(const nsACString& aPattern, nsACString::const_iterator&,
                     nsACString::const_iterator&,
@@ -373,19 +702,19 @@ FindInReadable(const nsACString& aPatter
 
 
 bool CaseInsensitiveFindInReadable(const nsACString& aPattern,
                                    nsACString::const_iterator&,
                                    nsACString::const_iterator&);
 
 /**
  * Finds the rightmost occurrence of |aPattern|
- * Returns |true| if a match was found, and adjusts |aSearchStart| and |aSearchEnd| to
- * point to the match.  If no match was found, returns |false| and makes |aSearchStart == aSearchEnd|.
- *
+ * Returns |true| if a match was found, and adjusts |aSearchStart| and
+ * |aSearchEnd| to point to the match.  If no match was found, returns |false|
+ * and makes |aSearchStart == aSearchEnd|.
  */
 bool RFindInReadable(const nsAString& aPattern, nsAString::const_iterator&,
                      nsAString::const_iterator&,
                      const nsStringComparator& = nsDefaultStringComparator());
 bool RFindInReadable(const nsACString& aPattern, nsACString::const_iterator&,
                      nsACString::const_iterator&,
                      const nsCStringComparator& = nsDefaultCStringComparator());
 
@@ -425,25 +754,28 @@ bool StringEndsWith(const nsACString& aS
 
 const nsString& EmptyString();
 const nsCString& EmptyCString();
 
 const nsString& VoidString();
 const nsCString& VoidCString();
 
 /**
-* Compare a UTF-8 string to an UTF-16 string.
-*
-* Returns 0 if the strings are equal, -1 if aUTF8String is less
-* than aUTF16Count, and 1 in the reverse case.  In case of fatal
-* error (eg the strings are not valid UTF8 and UTF16 respectively),
-* this method will return INT32_MIN.
-*/
-int32_t CompareUTF8toUTF16(const nsACString& aUTF8String,
-                           const nsAString& aUTF16String);
+ * Compare a UTF-8 string to an UTF-16 string.
+ *
+ * Returns 0 if the strings are equal, -1 if aUTF8String is less
+ * than aUTF16Count, and 1 in the reverse case. Errors are replaced
+ * with U+FFFD and then the U+FFFD is compared as if it had occurred
+ * in the input. If aErr is not nullptr, *aErr is set to true if
+ * either string had malformed sequences.
+ */
+int32_t
+CompareUTF8toUTF16(const nsACString& aUTF8String,
+                   const nsAString& aUTF16String,
+                   bool* aErr = nullptr);
 
 void AppendUCS4ToUTF16(const uint32_t aSource, nsAString& aDest);
 
 template<class T>
 inline bool
 EnsureStringLength(T& aStr, uint32_t aLen)
 {
   aStr.SetLength(aLen);
--- a/xpcom/string/nsSubstring.cpp
+++ b/xpcom/string/nsSubstring.cpp
@@ -454,16 +454,24 @@ char* Gecko_BeginWritingCString(nsACStri
   return aThis->BeginWriting();
 }
 
 char* Gecko_FallibleBeginWritingCString(nsACString* aThis)
 {
   return aThis->BeginWriting(mozilla::fallible);
 }
 
+uint32_t
+Gecko_StartBulkWriteCString(nsACString* aThis,
+                            uint32_t aCapacity,
+                            uint32_t aUnitsToPreserve)
+{
+  return aThis->StartBulkWrite(aCapacity, aUnitsToPreserve);
+}
+
 void Gecko_FinalizeString(nsAString* aThis)
 {
   aThis->~nsAString();
 }
 
 void Gecko_AssignString(nsAString* aThis, const nsAString* aOther)
 {
   aThis->Assign(*aOther);
@@ -509,9 +517,17 @@ char16_t* Gecko_BeginWritingString(nsASt
   return aThis->BeginWriting();
 }
 
 char16_t* Gecko_FallibleBeginWritingString(nsAString* aThis)
 {
   return aThis->BeginWriting(mozilla::fallible);
 }
 
+uint32_t
+Gecko_StartBulkWriteString(nsAString* aThis,
+                           uint32_t aCapacity,
+                           uint32_t aUnitsToPreserve)
+{
+  return aThis->StartBulkWrite(aCapacity, aUnitsToPreserve);
+}
+
 } // extern "C"
--- a/xpcom/string/nsTStringObsolete.cpp
+++ b/xpcom/string/nsTStringObsolete.cpp
@@ -320,24 +320,21 @@ nsTString<T>::ReplaceSubstring(const sel
                "We should have the correct non-matching segment.");
     return true;
   }
 
   // Make sure that we can mutate our buffer.
   // Note that we always allocate at least an this->mLength sized buffer, because the
   // rest of the algorithm relies on having access to all of the original
   // string.  In other words, we over-allocate in the shrinking case.
-  char_type* oldData;
-  DataFlags oldFlags;
-  if (!this->MutatePrep(XPCOM_MAX(this->mLength, newLength.value()), &oldData, &oldFlags))
+  uint32_t oldLen = this->mLength;
+  uint32_t capacity =
+    this->StartBulkWrite(XPCOM_MAX(oldLen, newLength.value()), oldLen);
+  if (capacity == UINT32_MAX) {
     return false;
-  if (oldData) {
-    // Copy all of the old data to the new buffer.
-    char_traits::copy(this->mData, oldData, this->mLength);
-    ::ReleaseData(oldData, oldFlags);
   }
 
   if (aTarget.Length() >= aNewValue.Length()) {
     // In the shrinking case, start filling the buffer from the beginning.
     const uint32_t delta = (aTarget.Length() - aNewValue.Length());
     for (i = 1; i < nonMatching.Length(); ++i) {
       // When we move the i'th non-matching segment into position, we need to
       // account for the characters deleted by the previous |i| replacements by
@@ -365,18 +362,17 @@ nsTString<T>::ReplaceSubstring(const sel
       // Write the i'th replacement immediately before the new i'th non-matching
       // segment.
       char_traits::copy(destinationSegmentPtr - aNewValue.Length(),
                         aNewValue.Data(), aNewValue.Length());
     }
   }
 
   // Adjust the length and make sure the string is null terminated.
-  this->mLength = newLength.value();
-  this->mData[this->mLength] = char_type(0);
+  this->FinishBulkWrite(newLength.value());
 
   return true;
 }
 
 /**
  * nsTString::Trim
  */
 
--- a/xpcom/string/nsTSubstring.cpp
+++ b/xpcom/string/nsTSubstring.cpp
@@ -42,55 +42,82 @@ nsTSubstring<T>::nsTSubstring(char_type*
  */
 template <typename T>
 inline const nsTAutoString<T>*
 AsAutoString(const nsTSubstring<T>* aStr)
 {
   return static_cast<const nsTAutoString<T>*>(aStr);
 }
 
-/**
- * this function is called to prepare mData for writing.  the given capacity
- * indicates the required minimum storage size for mData, in sizeof(char_type)
- * increments.  this function returns true if the operation succeeds.  it also
- * returns the old data and old flags members if mData is newly allocated.
- * the old data must be released by the caller.
- */
-template <typename T>
-bool
-nsTSubstring<T>::MutatePrep(size_type aCapacity, char_type** aOldData,
-                            DataFlags* aOldDataFlags)
+template<typename T>
+uint32_t
+nsTSubstring<T>::StartBulkWrite(size_type aCapacity,
+                                size_type aPrefixToPreserve,
+                                bool aAllowShrinking,
+                                size_type aSuffixLength,
+                                size_type aOldSuffixStart,
+                                size_type aNewSuffixStart)
 {
-  // initialize to no old data
-  *aOldData = nullptr;
-  *aOldDataFlags = DataFlags(0);
+  // Note! Capacity does not include room for the terminating null char.
+
+  MOZ_ASSERT(aPrefixToPreserve <= aCapacity,
+             "Requested preservation of an overlong prefix.");
+  MOZ_ASSERT(aNewSuffixStart + aSuffixLength <= aCapacity,
+             "Requesed move of suffix to out-of-bounds location.");
+  // Can't assert aOldSuffixStart, because mLength may not be valid anymore,
+  // since this method allows itself to be called more than once.
 
+  // If zero capacity is requested, set the string to the special empty
+  // string.
+  if (MOZ_UNLIKELY(!aCapacity)) {
+    ::ReleaseData(this->mData, this->mDataFlags);
+    SetToEmptyBuffer();
+    this->mDataFlags &= ~DataFlags::VOIDED; // mutation clears voided flag
+    return 0;
+  }
+
+  // Note! Capacity() returns 0 when the string is immutable.
   size_type curCapacity = Capacity();
 
-  // If |aCapacity > kMaxCapacity|, then our doubling algorithm may not be
-  // able to allocate it.  Just bail out in cases like that.  We don't want
-  // to be allocating 2GB+ strings anyway.
-  static_assert((sizeof(nsStringBuffer) & 0x1) == 0,
-                "bad size for nsStringBuffer");
-  if (!CheckCapacity(aCapacity)) {
-      return false;
-  }
-
+  // We've established that aCapacity > 0.
   // |curCapacity == 0| means that the buffer is immutable or 0-sized, so we
   // need to allocate a new buffer. We cannot use the existing buffer even
   // though it might be large enough.
 
-  if (curCapacity != 0) {
-    if (aCapacity <= curCapacity) {
-      this->mDataFlags &= ~DataFlags::VOIDED;  // mutation clears voided flag
-      return true;
-    }
+  if (!aAllowShrinking && aCapacity <= curCapacity) {
+    char_traits::move(this->mData + aNewSuffixStart,
+                      this->mData + aOldSuffixStart,
+                      aSuffixLength);
+    return curCapacity;
   }
 
-  if (curCapacity < aCapacity) {
+  char_type* oldData = this->mData;
+  DataFlags oldFlags = this->mDataFlags;
+
+  char_type* newData;
+  DataFlags newDataFlags;
+  size_type newCapacity;
+
+  // If this is an nsTAutoStringN, it's possible that we can use the inline
+  // buffer.
+  if ((this->mClassFlags & ClassFlags::INLINE) &&
+      (aCapacity <= AsAutoString(this)->mInlineCapacity)) {
+    newCapacity = AsAutoString(this)->mInlineCapacity;
+    newData = (char_type*)AsAutoString(this)->mStorage;
+    newDataFlags = DataFlags::TERMINATED | DataFlags::INLINE;
+  } else {
+    // If |aCapacity > kMaxCapacity|, then our doubling algorithm may not be
+    // able to allocate it.  Just bail out in cases like that.  We don't want
+    // to be allocating 2GB+ strings anyway.
+    static_assert((sizeof(nsStringBuffer) & 0x1) == 0,
+                  "bad size for nsStringBuffer");
+    if (MOZ_UNLIKELY(!CheckCapacity(aCapacity))) {
+      return UINT32_MAX;
+    }
+
     // We increase our capacity so that the allocated buffer grows
     // exponentially, which gives us amortized O(1) appending. Below the
     // threshold, we use powers-of-two. Above the threshold, we grow by at
     // least 1.125, rounding up to the nearest MiB.
     const size_type slowGrowthThreshold = 8 * 1024 * 1024;
 
     // nsStringBuffer allocates sizeof(nsStringBuffer) + passed size, and
     // storageSize below wants extra 1 * sizeof(char_type).
@@ -108,87 +135,70 @@ nsTSubstring<T>::MutatePrep(size_type aC
       const size_t MiB = 1 << 20;
       temp = (MiB * ((temp + MiB - 1) / MiB)) - neededExtraSpace;
     } else {
       // Round up to the next power of two.
       temp =
         mozilla::RoundUpPow2(aCapacity + neededExtraSpace) - neededExtraSpace;
     }
 
-    MOZ_ASSERT(XPCOM_MIN(temp, kMaxCapacity) >= aCapacity,
+    newCapacity = XPCOM_MIN(temp, kMaxCapacity);
+    MOZ_ASSERT(newCapacity >= aCapacity,
                "should have hit the early return at the top");
-    aCapacity = XPCOM_MIN(temp, kMaxCapacity);
-  }
-
-  //
-  // several cases:
-  //
-  //  (1) we have a refcounted shareable buffer (this->mDataFlags &
-  //      DataFlags::REFCOUNTED)
-  //  (2) we have an owned buffer (this->mDataFlags & DataFlags::OWNED)
-  //  (3) we have an inline buffer (this->mDataFlags & DataFlags::INLINE)
-  //  (4) we have a readonly buffer
-  //
-  // requiring that we in some cases preserve the data before creating
-  // a new buffer complicates things just a bit ;-)
-  //
-
-  size_type storageSize = (aCapacity + 1) * sizeof(char_type);
-
-  // case #1
-  if (this->mDataFlags & DataFlags::REFCOUNTED) {
-    nsStringBuffer* hdr = nsStringBuffer::FromData(this->mData);
-    if (!hdr->IsReadonly()) {
-      nsStringBuffer* newHdr = nsStringBuffer::Realloc(hdr, storageSize);
+    // Avoid shinking if new buffer within 300 of the old. Note that
+    // signed underflow is defined behavior.
+    if ((curCapacity - newCapacity) <= 300 &&
+        (this->mDataFlags & DataFlags::REFCOUNTED)) {
+      MOZ_ASSERT(aAllowShrinking, "How come we didn't return earlier?");
+      // We're already close enough to the right size.
+      newData = oldData;
+    } else {
+      size_type storageSize = (newCapacity + 1) * sizeof(char_type);
+      // Since we allocate only if we need a different jemalloc bucket
+      // size, it's not useful to use realloc, which may spend time
+      // uselessly copying too much.
+      nsStringBuffer* newHdr = nsStringBuffer::Alloc(storageSize).take();
       if (!newHdr) {
-        return false;  // out-of-memory (original header left intact)
+        return UINT32_MAX; // we are still in a consistent state
       }
 
-      hdr = newHdr;
-      this->mData = (char_type*)hdr->Data();
-      this->mDataFlags &= ~DataFlags::VOIDED;  // mutation clears voided flag
-      return true;
+      newData = (char_type*)newHdr->Data();
     }
-  }
-
-  char_type* newData;
-  DataFlags newDataFlags;
-
-  // If this is an nsTAutoStringN whose inline buffer is sufficiently large,
-  // then use it. This helps avoid heap allocations.
-  if ((this->mClassFlags & ClassFlags::INLINE) &&
-      (aCapacity < AsAutoString(this)->mInlineCapacity)) {
-    newData = (char_type*)AsAutoString(this)->mStorage;
-    newDataFlags = DataFlags::TERMINATED | DataFlags::INLINE;
-  } else {
-    // if we reach here then, we must allocate a new buffer.  we cannot
-    // make use of our DataFlags::OWNED or DataFlags::INLINE buffers because
-    // they are not large enough.
-
-    nsStringBuffer* newHdr =
-      nsStringBuffer::Alloc(storageSize).take();
-    if (!newHdr) {
-      return false;  // we are still in a consistent state
-    }
-
-    newData = (char_type*)newHdr->Data();
     newDataFlags = DataFlags::TERMINATED | DataFlags::REFCOUNTED;
   }
 
-  // save old data and flags
-  *aOldData = this->mData;
-  *aOldDataFlags = this->mDataFlags;
+  this->mData = newData;
+  this->mDataFlags = newDataFlags;
+
+  if (oldData == newData) {
+    char_traits::move(
+      newData + aNewSuffixStart, oldData + aOldSuffixStart, aSuffixLength);
+  } else {
+    char_traits::copy(newData, oldData, aPrefixToPreserve);
+    char_traits::copy(
+      newData + aNewSuffixStart, oldData + aOldSuffixStart, aSuffixLength);
+    ::ReleaseData(oldData, oldFlags);
+  }
 
-  // this->mLength does not change
-  SetData(newData, this->mLength, newDataFlags);
+  return newCapacity;
+}
 
-  // though we are not necessarily terminated at the moment, now is probably
-  // still the best time to set DataFlags::TERMINATED.
-
-  return true;
+template<typename T>
+void
+nsTSubstring<T>::FinishBulkWrite(size_type aLength)
+{
+  MOZ_ASSERT(aLength != UINT32_MAX, "OOM magic value passed as length.");
+  if (aLength) {
+    this->mData[aLength] = char_type(0);
+    this->mLength = aLength;
+  } else {
+    ::ReleaseData(this->mData, this->mDataFlags);
+    SetToEmptyBuffer();
+  }
+  AssertValid();
 }
 
 template <typename T>
 void
 nsTSubstring<T>::Finalize()
 {
   ::ReleaseData(this->mData, this->mDataFlags);
   // this->mData, this->mLength, and this->mDataFlags are purposefully left dangling
@@ -220,58 +230,26 @@ nsTSubstring<T>::ReplacePrep(index_type 
                              newTotalLen.value());
 }
 
 template <typename T>
 bool
 nsTSubstring<T>::ReplacePrepInternal(index_type aCutStart, size_type aCutLen,
                                      size_type aFragLen, size_type aNewLen)
 {
-  char_type* oldData;
-  DataFlags oldFlags;
-  if (!MutatePrep(aNewLen, &oldData, &oldFlags)) {
-    return false;  // out-of-memory
-  }
-
-  if (oldData) {
-    // determine whether or not we need to copy part of the old string
-    // over to the new string.
-
-    if (aCutStart > 0) {
-      // copy prefix from old string
-      char_traits::copy(this->mData, oldData, aCutStart);
-    }
+  size_type newSuffixStart = aCutStart + aFragLen;
+  size_type oldSuffixStart = aCutStart + aCutLen;
+  size_type suffixLength = this->mLength - oldSuffixStart;
 
-    if (aCutStart + aCutLen < this->mLength) {
-      // copy suffix from old string to new offset
-      size_type from = aCutStart + aCutLen;
-      size_type fromLen = this->mLength - from;
-      uint32_t to = aCutStart + aFragLen;
-      char_traits::copy(this->mData + to, oldData + from, fromLen);
-    }
-
-    ::ReleaseData(oldData, oldFlags);
-  } else {
-    // original data remains intact
-
-    // determine whether or not we need to move part of the existing string
-    // to make room for the requested hole.
-    if (aFragLen != aCutLen && aCutStart + aCutLen < this->mLength) {
-      uint32_t from = aCutStart + aCutLen;
-      uint32_t fromLen = this->mLength - from;
-      uint32_t to = aCutStart + aFragLen;
-      char_traits::move(this->mData + to, this->mData + from, fromLen);
-    }
+  size_type capacity = StartBulkWrite(
+    aNewLen, aCutStart, true, suffixLength, oldSuffixStart, newSuffixStart);
+  if (capacity == UINT32_MAX) {
+    return false;
   }
-
-  // add null terminator (mutable this->mData always has room for the null-
-  // terminator).
-  this->mData[aNewLen] = char_type(0);
-  this->mLength = aNewLen;
-
+  FinishBulkWrite(aNewLen);
   return true;
 }
 
 template <typename T>
 typename nsTSubstring<T>::size_type
 nsTSubstring<T>::Capacity() const
 {
   // return 0 to indicate an immutable or 0-sized buffer
@@ -556,30 +534,24 @@ nsTSubstring<T>::Assign(const substring_
 {
   if (aTuple.IsDependentOn(this->mData, this->mData + this->mLength)) {
     // take advantage of sharing here...
     return Assign(string_type(aTuple), aFallible);
   }
 
   size_type length = aTuple.Length();
 
-  // don't use ReplacePrep here because it changes the length
-  char_type* oldData;
-  DataFlags oldFlags;
-  if (!MutatePrep(length, &oldData, &oldFlags)) {
+  size_type capacity = StartBulkWrite(length);
+  if (capacity == UINT32_MAX) {
     return false;
   }
 
-  if (oldData) {
-    ::ReleaseData(oldData, oldFlags);
-  }
+  aTuple.WriteTo(this->mData, length);
 
-  aTuple.WriteTo(this->mData, length);
-  this->mData[length] = 0;
-  this->mLength = length;
+  FinishBulkWrite(length);
   return true;
 }
 
 template <typename T>
 void
 nsTSubstring<T>::Adopt(char_type* aData, size_type aLength)
 {
   if (aData) {
@@ -765,50 +737,57 @@ nsTSubstring<T>::SetCapacity(size_type a
 }
 
 template <typename T>
 bool
 nsTSubstring<T>::SetCapacity(size_type aCapacity, const fallible_t&)
 {
   // capacity does not include room for the terminating null char
 
-  // if our capacity is reduced to zero, then free our buffer.
-  if (aCapacity == 0) {
-    ::ReleaseData(this->mData, this->mDataFlags);
-    SetToEmptyBuffer();
-    return true;
+  // Sadly, existing callers assume that it's valid to
+  // first call SetCapacity(), then write past mLength
+  // and then call SetLength() with the assumption that
+  // SetLength still preserves the written data past
+  // mLength!!!
+
+  size_type preserve;
+  if (this->mDataFlags & DataFlags::REFCOUNTED) {
+    nsStringBuffer* hdr = nsStringBuffer::FromData(this->mData);
+    preserve = (hdr->StorageSize() / sizeof(char_type)) - 1;
+  } else if (this->mDataFlags & DataFlags::INLINE) {
+    preserve = AsAutoString(this)->mInlineCapacity;
+  } else {
+    preserve = this->mLength;
   }
 
-  char_type* oldData;
-  DataFlags oldFlags;
-  if (!MutatePrep(aCapacity, &oldData, &oldFlags)) {
-    return false;  // out-of-memory
+  if (preserve > aCapacity) {
+    preserve = aCapacity;
   }
 
-  // compute new string length
-  size_type newLen = XPCOM_MIN(this->mLength, aCapacity);
-
-  if (oldData) {
-    // preserve old data
-    if (this->mLength > 0) {
-      char_traits::copy(this->mData, oldData, newLen);
-    }
-
-    ::ReleaseData(oldData, oldFlags);
+  size_type capacity = StartBulkWrite(aCapacity, preserve);
+  if (capacity == UINT32_MAX) {
+    return false;
   }
+  if (capacity) {
+    // In the zero case StartBulkWrite already put the string
+    // in a valid state.
 
-  // adjust this->mLength if our buffer shrunk down in size
-  if (newLen < this->mLength) {
-    this->mLength = newLen;
+    // Otherwise, instead of calling FinishBulkWrite,
+    // intentionally leave the string in the weird state
+    // required by the legacy semantics of this method.
+    if (aCapacity < this->mLength) {
+      // aCapacity not capacity for legacy reasons;
+      // maybe capacity would work, too.
+      this->mLength = aCapacity;
+    }
+    // Note that we can't write a terminator at
+    // mData[mLength], because doing so would overwrite
+    // data when this method is called from SetLength.
+    this->mData[aCapacity] = char_type(0);
   }
-
-  // always null-terminate here, even if the buffer got longer.  this is
-  // for backwards compat with the old string implementation.
-  this->mData[aCapacity] = char_type(0);
-
   return true;
 }
 
 template <typename T>
 void
 nsTSubstring<T>::SetLength(size_type aLength)
 {
   SetCapacity(aLength);
--- a/xpcom/string/nsTSubstring.h
+++ b/xpcom/string/nsTSubstring.h
@@ -895,38 +895,74 @@ protected:
 
   /**
    * this function releases mData and does not change the value of
    * any of its member variables.  in other words, this function acts
    * like a destructor.
    */
   void NS_FASTCALL Finalize();
 
+public:
   /**
-   * this function prepares mData to be mutated.
+   * Prepares mData to be mutated such that the capacity of the string
+   * (not counting the zero-terminator) is at least aCapacity.
+   * Returns the actual capacity, which may be larger than what was
+   * requested or UINT32_MAX on allocation failure.
+   *
+   * mLength is ignored by this method. If the buffer is reallocated,
+   * aUnitsToPreserve specifies how many code units to copy over to
+   * the new buffer. The old buffer is freed if applicable.
    *
-   * @param aCapacity    specifies the required capacity of mData
-   * @param aOldData     returns null or the old value of mData
-   * @param aOldFlags    returns 0 or the old value of mDataFlags
+   * Unless the return value is UINT32_MAX to signal failure or 0 to
+   * signal that the string has been set to the special empty state,
+   * this method leaves the string in an invalid state! The caller is
+   * responsible for calling FinishBulkWrite() (or in Rust calling
+   * nsA[C]StringBulkWriteHandle::finish()), which put the string
+   * into a valid state by setting mLength and zero-terminating.
+   * This method sets the flag to claim that the string is
+   * zero-terminated before it actually is.
+   *
+   * Once this method has been called and before FinishBulkWrite()
+   * has been called, only calls to Data() or this method again
+   * are valid. Do not call any other methods between calling this
+   * method and FinishBulkWrite().
    *
-   * if mData is already mutable and of sufficient capacity, then this
-   * function will return immediately.  otherwise, it will either resize
-   * mData or allocate a new shared buffer.  if it needs to allocate a
-   * new buffer, then it will return the old buffer and the corresponding
-   * flags.  this allows the caller to decide when to free the old data.
+   * @param aCapacity The requested capacity. The return value
+   *                  will be greater than or equal to this value.
+   * @param aPrefixToPreserve The number of code units at the start
+   *                          of the old buffer to copy into the
+   *                          new buffer.
+   * @parem aAllowShrinking If true, an allocation may be performed
+   *                        if the requested capacity is smaller
+   *                        than the current capacity.
+   * @param aSuffixLength The length, in code units, of a suffix
+   *                      to move.
+   * @param aOldSuffixStart The old start index of the suffix to
+   *                        move.
+   * @param aNewSuffixStart The new start index of the suffix to
+   *                        move.
    *
-   * this function returns false if is unable to allocate sufficient
-   * memory.
-   *
-   * XXX we should expose a way for subclasses to free old_data.
    */
-  bool NS_FASTCALL MutatePrep(size_type aCapacity,
-                              char_type** aOldData, DataFlags* aOldDataFlags);
+  uint32_t NS_FASTCALL StartBulkWrite(size_type aCapacity,
+                                      size_type aPrefixToPreserve = 0,
+                                      bool aAllowShrinking = true,
+                                      size_type aSuffixLength = 0,
+                                      size_type aOldSuffixStart = 0,
+                                      size_type aNewSuffixStart = 0);
 
   /**
+   * Restores the string to a valid state after a call to StartBulkWrite()
+   * that returned a non-UINT32_MAX value. The argument to this method
+   * must be less than or equal to the non-UINT32_MAX value returned by
+   * the most recent StartBulkWrite() call.
+   */
+  void NS_FASTCALL FinishBulkWrite(size_type aLength);
+
+protected:
+  /**
    * this function prepares a section of mData to be modified.  if
    * necessary, this function will reallocate mData and possibly move
    * existing data to open up the specified section.
    *
    * @param aCutStart    specifies the starting offset of the section
    * @param aCutLength   specifies the length of the section to be replaced
    * @param aNewLength   specifies the length of the new section
    *
--- a/xpcom/string/nsUTF8Utils.h
+++ b/xpcom/string/nsUTF8Utils.h
@@ -6,20 +6,18 @@
 #ifndef nsUTF8Utils_h_
 #define nsUTF8Utils_h_
 
 // This file may be used in two ways: if MOZILLA_INTERNAL_API is defined, this
 // file will provide signatures for the Mozilla abstract string types. It will
 // use XPCOM assertion/debugging macros, etc.
 
 #include "nscore.h"
-#include "mozilla/arm.h"
 #include "mozilla/Assertions.h"
 #include "mozilla/EndianUtils.h"
-#include "mozilla/SSE.h"
 #include "mozilla/TypeTraits.h"
 
 #include "nsCharTraits.h"
 
 #ifdef MOZILLA_INTERNAL_API
 #define UTF8UTILS_WARNING(msg) NS_WARNING(msg)
 #else
 #define UTF8UTILS_WARNING(msg)
@@ -66,721 +64,196 @@ public:
       return 2;
     }
     if (is3byte(aChar)) {
       return 3;
     }
     if (is4byte(aChar)) {
       return 4;
     }
-    if (is5byte(aChar)) {
-      return 5;
-    }
-    if (is6byte(aChar)) {
-      return 6;
-    }
     MOZ_ASSERT_UNREACHABLE("should not be used for in-sequence characters");
     return 1;
   }
 };
 
 /**
- * Extract the next UCS-4 character from the buffer and return it.  The
+ * Extract the next Unicode scalar value from the buffer and return it. The
  * pointer passed in is advanced to the start of the next character in the
- * buffer.  If non-null, the parameters err and overlong are filled in to
- * indicate that the character was represented by an overlong sequence, or
- * that an error occurred.
+ * buffer. Upon error, the return value is 0xFFFD, *aBuffer is advanced
+ * over the maximal valid prefix and *aErr is set to true (if aErr is not
+ * null).
+ *
+ * Note: This method never sets *aErr to false to allow error accumulation
+ * across multiple calls.
+ *
+ * Precondition: *aBuffer < aEnd
  */
-
 class UTF8CharEnumerator
 {
 public:
-  static uint32_t NextChar(const char** aBuffer, const char* aEnd, bool* aErr)
+  static inline char32_t NextChar(const char** aBuffer,
+                                  const char* aEnd,
+                                  bool* aErr = nullptr)
   {
-    NS_ASSERTION(aBuffer && *aBuffer, "null buffer!");
-
-    const char* p = *aBuffer;
-    *aErr = false;
-
-    if (p >= aEnd) {
-      *aErr = true;
-
-      return 0;
-    }
-
-    char c = *p++;
-
-    if (UTF8traits::isASCII(c)) {
-      *aBuffer = p;
-      return c;
-    }
-
-    uint32_t ucs4;
-    uint32_t minUcs4;
-    int32_t state = 0;
+    MOZ_ASSERT(aBuffer, "null buffer pointer pointer");
+    MOZ_ASSERT(aEnd, "null end pointer");
 
-    if (!CalcState(c, ucs4, minUcs4, state)) {
-      NS_ERROR("Not a UTF-8 string. This code should only be used for converting from known UTF-8 strings.");
-      *aErr = true;
-
-      return 0;
-    }
+    const unsigned char* p = reinterpret_cast<const unsigned char*>(*aBuffer);
+    const unsigned char* end = reinterpret_cast<const unsigned char*>(aEnd);
 
-    while (state--) {
-      if (p == aEnd) {
-        *aErr = true;
-
-        return 0;
-      }
+    MOZ_ASSERT(p, "null buffer");
+    MOZ_ASSERT(p < end, "Bogus range");
 
-      c = *p++;
-
-      if (!AddByte(c, state, ucs4)) {
-        *aErr = true;
+    unsigned char first = *p++;
 
-        return 0;
-      }
-    }
-
-    if (ucs4 < minUcs4) {
-      // Overlong sequence
-      ucs4 = UCS2_REPLACEMENT_CHAR;
-    } else if (ucs4 >= 0xD800 &&
-               (ucs4 <= 0xDFFF || ucs4 >= UCS_END)) {
-      // Surrogates and code points outside the Unicode range.
-      ucs4 = UCS2_REPLACEMENT_CHAR;
+    if (MOZ_LIKELY(first < 0x80U)) {
+      *aBuffer = reinterpret_cast<const char*>(p);
+      return first;
     }
 
-    *aBuffer = p;
-    return ucs4;
-  }
-
-private:
-  static bool CalcState(char aChar, uint32_t& aUcs4, uint32_t& aMinUcs4,
-                        int32_t& aState)
-  {
-    if (UTF8traits::is2byte(aChar)) {
-      aUcs4 = (uint32_t(aChar) << 6) & 0x000007C0L;
-      aState = 1;
-      aMinUcs4 = 0x00000080;
-    } else if (UTF8traits::is3byte(aChar)) {
-      aUcs4 = (uint32_t(aChar) << 12) & 0x0000F000L;
-      aState = 2;
-      aMinUcs4 = 0x00000800;
-    } else if (UTF8traits::is4byte(aChar)) {
-      aUcs4 = (uint32_t(aChar) << 18) & 0x001F0000L;
-      aState = 3;
-      aMinUcs4 = 0x00010000;
-    } else if (UTF8traits::is5byte(aChar)) {
-      aUcs4 = (uint32_t(aChar) << 24) & 0x03000000L;
-      aState = 4;
-      aMinUcs4 = 0x00200000;
-    } else if (UTF8traits::is6byte(aChar)) {
-      aUcs4 = (uint32_t(aChar) << 30) & 0x40000000L;
-      aState = 5;
-      aMinUcs4 = 0x04000000;
-    } else {
-      return false;
+    // Unsigned underflow is defined behavior
+    if (MOZ_UNLIKELY((p == end) || ((first - 0xC2U) >= (0xF5U - 0xC2U)))) {
+      *aBuffer = reinterpret_cast<const char*>(p);
+      if (aErr) {
+        *aErr = true;
+      }
+      return 0xFFFDU;
     }
 
-    return true;
-  }
-
-  static bool AddByte(char aChar, int32_t aState, uint32_t& aUcs4)
-  {
-    if (UTF8traits::isInSeq(aChar)) {
-      int32_t shift = aState * 6;
-      aUcs4 |= (uint32_t(aChar) & 0x3F) << shift;
-      return true;
-    }
-
-    return false;
-  }
-};
-
+    unsigned char second = *p;
 
-/**
- * Extract the next UCS-4 character from the buffer and return it.  The
- * pointer passed in is advanced to the start of the next character in the
- * buffer.  If non-null, the err parameter is filled in if an error occurs.
- *
- * If an error occurs that causes UCS2_REPLACEMENT_CHAR to be returned, then
- * the buffer will be updated to move only a single UCS-2 character.
- *
- * Any other error returns 0 and does not move the buffer position.
- */
-
-
-class UTF16CharEnumerator
-{
-public:
-  static uint32_t NextChar(const char16_t** aBuffer, const char16_t* aEnd,
-                           bool* aErr = nullptr)
-  {
-    NS_ASSERTION(aBuffer && *aBuffer, "null buffer!");
-
-    const char16_t* p = *aBuffer;
-
-    if (p >= aEnd) {
-      NS_ERROR("No input to work with");
+    if (first < 0xE0U) {
+      // Two-byte
+      if (MOZ_LIKELY((second & 0xC0U) == 0x80U)) {
+        *aBuffer = reinterpret_cast<const char*>(++p);
+        return ((uint32_t(first) & 0x1FU) << 6) | (uint32_t(second) & 0x3FU);
+      }
+      *aBuffer = reinterpret_cast<const char*>(p);
       if (aErr) {
         *aErr = true;
       }
-
-      return 0;
+      return 0xFFFDU;
     }
 
-    char16_t c = *p++;
-
-    if (!IS_SURROGATE(c)) { // U+0000 - U+D7FF,U+E000 - U+FFFF
-      if (aErr) {
-        *aErr = false;
-      }
-      *aBuffer = p;
-      return c;
-    } else if (NS_IS_HIGH_SURROGATE(c)) { // U+D800 - U+DBFF
-      if (p == aEnd) {
-        // Found a high surrogate at the end of the buffer. Flag this
-        // as an error and return the Unicode replacement
-        // character 0xFFFD.
-
-        UTF8UTILS_WARNING("Unexpected end of buffer after high surrogate");
-
-        if (aErr) {
-          *aErr = true;
-        }
-        *aBuffer = p;
-        return 0xFFFD;
+    if (MOZ_LIKELY(first < 0xF0U)) {
+      // Three-byte
+      unsigned char lower = 0x80U;
+      unsigned char upper = 0xBFU;
+      if (first == 0xE0U) {
+        lower = 0xA0U;
+      } else if (first == 0xEDU) {
+        upper = 0x9FU;
       }
-
-      // D800- DBFF - High Surrogate
-      char16_t h = c;
-
-      c = *p++;
-
-      if (NS_IS_LOW_SURROGATE(c)) {
-        // DC00- DFFF - Low Surrogate
-        // N = (H - D800) *400 + 10000 + (L - DC00)
-        uint32_t ucs4 = SURROGATE_TO_UCS4(h, c);
-        if (aErr) {
-          *aErr = false;
+      if (MOZ_LIKELY(second >= lower && second <= upper)) {
+        if (MOZ_LIKELY(p != end)) {
+          unsigned char third = *++p;
+          if (MOZ_LIKELY((third & 0xC0U) == 0x80U)) {
+            *aBuffer = reinterpret_cast<const char*>(++p);
+            return ((uint32_t(first) & 0xFU) << 12) |
+                   ((uint32_t(second) & 0x3FU) << 6) |
+                   (uint32_t(third) & 0x3FU);
+          }
         }
-        *aBuffer = p;
-        return ucs4;
-      } else {
-        // Found a high surrogate followed by something other than
-        // a low surrogate. Flag this as an error and return the
-        // Unicode replacement character 0xFFFD.  Note that the
-        // pointer to the next character points to the second 16-bit
-        // value, not beyond it, as per Unicode 5.0.0 Chapter 3 C10,
-        // only the first code unit of an illegal sequence must be
-        // treated as an illegally terminated code unit sequence
-        // (also Chapter 3 D91, "isolated [not paired and ill-formed]
-        // UTF-16 code units in the range D800..DFFF are ill-formed").
-        UTF8UTILS_WARNING("got a High Surrogate but no low surrogate");
-
-        if (aErr) {
-          *aErr = true;
-        }
-        *aBuffer = p - 1;
-        return 0xFFFD;
       }
-    } else { // U+DC00 - U+DFFF
-      // DC00- DFFF - Low Surrogate
-
-      // Found a low surrogate w/o a preceding high surrogate. Flag
-      // this as an error and return the Unicode replacement
-      // character 0xFFFD.
-
-      UTF8UTILS_WARNING("got a low Surrogate but no high surrogate");
+      *aBuffer = reinterpret_cast<const char*>(p);
       if (aErr) {
         *aErr = true;
       }
-      *aBuffer = p;
-      return 0xFFFD;
+      return 0xFFFDU;
     }
 
-    MOZ_ASSERT_UNREACHABLE("Impossible UCS-2 character value.");
-  }
-};
-
-
-/**
- * A character sink (see |copy_string| in nsAlgorithm.h) for converting
- * UTF-8 to UTF-16
- */
-class ConvertUTF8toUTF16
-{
-public:
-  typedef char value_type;
-  typedef char16_t buffer_type;
-
-  explicit ConvertUTF8toUTF16(buffer_type* aBuffer)
-    : mStart(aBuffer), mBuffer(aBuffer), mErrorEncountered(false)
-  {
-  }
-
-  size_t Length() const
-  {
-    return mBuffer - mStart;
-  }
-
-  bool ErrorEncountered() const
-  {
-    return mErrorEncountered;
-  }
-
-  void write(const value_type* aStart, uint32_t aN)
-  {
-    if (mErrorEncountered) {
-      return;
+    // Four-byte
+    unsigned char lower = 0x80U;
+    unsigned char upper = 0xBFU;
+    if (first == 0xF0U) {
+      lower = 0x90U;
+    } else if (first == 0xF4U) {
+      upper = 0x8FU;
     }
-
-    // algorithm assumes utf8 units won't
-    // be spread across fragments
-    const value_type* p = aStart;
-    const value_type* end = aStart + aN;
-    buffer_type* out = mBuffer;
-    for (; p != end /* && *p */;) {
-      bool err;
-      uint32_t ucs4 = UTF8CharEnumerator::NextChar(&p, end, &err);
-
-      if (err) {
-        mErrorEncountered = true;
-        mBuffer = out;
-        return;
-      }
-
-      if (ucs4 >= PLANE1_BASE) {
-        *out++ = (buffer_type)H_SURROGATE(ucs4);
-        *out++ = (buffer_type)L_SURROGATE(ucs4);
-      } else {
-        *out++ = ucs4;
+    if (MOZ_LIKELY(second >= lower && second <= upper)) {
+      if (MOZ_LIKELY(p != end)) {
+        unsigned char third = *++p;
+        if (MOZ_LIKELY((third & 0xC0U) == 0x80U)) {
+          if (MOZ_LIKELY(p != end)) {
+            unsigned char fourth = *++p;
+            if (MOZ_LIKELY((fourth & 0xC0U) == 0x80U)) {
+              *aBuffer = reinterpret_cast<const char*>(++p);
+              return ((uint32_t(first) & 0x7U) << 18) |
+                     ((uint32_t(second) & 0x3FU) << 12) |
+                     ((uint32_t(third) & 0x3FU) << 6) |
+                     (uint32_t(fourth) & 0x3FU);
+            }
+          }
+        }
       }
     }
-    mBuffer = out;
+    *aBuffer = reinterpret_cast<const char*>(p);
+    if (aErr) {
+      *aErr = true;
+    }
+    return 0xFFFDU;
   }
-
-  void write_terminator()
-  {
-    *mBuffer = buffer_type(0);
-  }
-
-private:
-  buffer_type* const mStart;
-  buffer_type* mBuffer;
-  bool mErrorEncountered;
 };
 
 /**
- * A character sink (see |copy_string| in nsAlgorithm.h) for computing
- * the length of the UTF-16 string equivalent to a UTF-8 string.
+ * Extract the next Unicode scalar value from the buffer and return it. The
+ * pointer passed in is advanced to the start of the next character in the
+ * buffer. Upon error, the return value is 0xFFFD, *aBuffer is advanced over
+ * the unpaired surrogate and *aErr is set to true (if aErr is not null).
+ *
+ * Note: This method never sets *aErr to false to allow error accumulation
+ * across multiple calls.
+ *
+ * Precondition: *aBuffer < aEnd
  */
-class CalculateUTF8Length
+class UTF16CharEnumerator
 {
 public:
-  typedef char value_type;
-
-  CalculateUTF8Length()
-    : mLength(0), mErrorEncountered(false)
-  {
-  }
-
-  size_t Length() const
-  {
-    return mLength;
-  }
-
-  void write(const value_type* aStart, uint32_t aN)
+  static inline char32_t NextChar(const char16_t** aBuffer,
+                                  const char16_t* aEnd,
+                                  bool* aErr = nullptr)
   {
-    // ignore any further requests
-    if (mErrorEncountered) {
-      return;
-    }
-
-    // algorithm assumes utf8 units won't
-    // be spread across fragments
-    const value_type* p = aStart;
-    const value_type* end = aStart + aN;
-    for (; p < end /* && *p */; ++mLength) {
-      if (UTF8traits::isASCII(*p)) {
-        p += 1;
-      } else if (UTF8traits::is2byte(*p)) {
-        p += 2;
-      } else if (UTF8traits::is3byte(*p)) {
-        p += 3;
-      } else if (UTF8traits::is4byte(*p)) {
-        // Because a UTF-8 sequence of 4 bytes represents a codepoint
-        // greater than 0xFFFF, it will become a surrogate pair in the
-        // UTF-16 string, so add 1 more to mLength.
-        // This doesn't happen with is5byte and is6byte because they
-        // are illegal UTF-8 sequences (greater than 0x10FFFF) so get
-        // converted to a single replacement character.
-
-        // However, there is one case when a 4 byte UTF-8 sequence will
-        // only generate 2 UTF-16 bytes. If we have a properly encoded
-        // sequence, but with an invalid value (too small or too big),
-        // that will result in a replacement character being written
-        // This replacement character is encoded as just 1 single
-        // UTF-16 character, which is 2 bytes.
+    MOZ_ASSERT(aBuffer, "null buffer pointer pointer");
+    MOZ_ASSERT(aEnd, "null end pointer");
 
-        // The below code therefore only adds 1 to mLength if the UTF8
-        // data will produce a decoded character which is greater than
-        // or equal to 0x010000 and less than 0x0110000.
-
-        // A 4byte UTF8 character is encoded as
-        // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
-        // Bit 1-3 on the first byte, and bit 5-6 on the second byte,
-        // map to bit 17-21 in the final result. If these bits are
-        // between 0x01 and 0x11, that means that the final result is
-        // between 0x010000 and 0x110000. The below code reads these
-        // bits out and assigns them to c, but shifted up 4 bits to
-        // avoid having to shift twice.
-
-        // It doesn't matter what to do in the case where p + 4 > end
-        // since no UTF16 characters will be written in that case by
-        // ConvertUTF8toUTF16. Likewise it doesn't matter what we do if
-        // any of the surrogate bits are wrong since no UTF16
-        // characters will be written in that case either.
+    const char16_t* p = *aBuffer;
 
-        if (p + 4 <= end) {
-          uint32_t c = ((uint32_t)(p[0] & 0x07)) << 6 |
-                       ((uint32_t)(p[1] & 0x30));
-          if (c >= 0x010 && c < 0x110) {
-            ++mLength;
-          }
-        }
+    MOZ_ASSERT(p, "null buffer");
+    MOZ_ASSERT(p < aEnd, "Bogus range");
 
-        p += 4;
-      } else if (UTF8traits::is5byte(*p)) {
-        p += 5;
-      } else if (UTF8traits::is6byte(*p)) {
-        p += 6;
-      } else { // error
-        ++mLength; // to account for the decrement below
-        break;
-      }
-    }
-    if (p != end) {
-      NS_ERROR("Not a UTF-8 string. This code should only be used for converting from known UTF-8 strings.");
-      --mLength; // The last multi-byte char wasn't complete, discard it.
-      mErrorEncountered = true;
-    }
-  }
-
-private:
-  size_t mLength;
-  bool mErrorEncountered;
-};
+    char16_t c = *p++;
 
-/**
- * A character sink (see |copy_string| in nsAlgorithm.h) for
- * converting UTF-16 to UTF-8. Treats invalid UTF-16 data as 0xFFFD
- * (0xEFBFBD in UTF-8).
- */
-class ConvertUTF16toUTF8
-{
-public:
-  typedef char16_t value_type;
-  typedef char buffer_type;
-
-  // The error handling here is more lenient than that in
-  // |ConvertUTF8toUTF16|, but it's that way for backwards
-  // compatibility.
-
-  explicit ConvertUTF16toUTF8(buffer_type* aBuffer)
-    : mStart(aBuffer), mBuffer(aBuffer)
-  {
-  }
-
-  size_t Size() const
-  {
-    return mBuffer - mStart;
-  }
-
-  void write(const value_type* aStart, uint32_t aN)
-  {
-    buffer_type* out = mBuffer; // gcc isn't smart enough to do this!
-
-    for (const value_type* p = aStart, *end = aStart + aN; p < end; ++p) {
-      value_type c = *p;
-      if (!(c & 0xFF80)) { // U+0000 - U+007F
-        *out++ = (char)c;
-      } else if (!(c & 0xF800)) { // U+0100 - U+07FF
-        *out++ = 0xC0 | (char)(c >> 6);
-        *out++ = 0x80 | (char)(0x003F & c);
-      } else if (!IS_SURROGATE(c)) { // U+0800 - U+D7FF,U+E000 - U+FFFF
-        *out++ = 0xE0 | (char)(c >> 12);
-        *out++ = 0x80 | (char)(0x003F & (c >> 6));
-        *out++ = 0x80 | (char)(0x003F & c);
-      } else if (NS_IS_HIGH_SURROGATE(c)) { // U+D800 - U+DBFF
-        // D800- DBFF - High Surrogate
-        value_type h = c;
-
-        ++p;
-        if (p == end) {
-          // Treat broken characters as the Unicode
-          // replacement character 0xFFFD (0xEFBFBD in
-          // UTF-8)
-          *out++ = '\xEF';
-          *out++ = '\xBF';
-          *out++ = '\xBD';
-
-          UTF8UTILS_WARNING("String ending in half a surrogate pair!");
-
-          break;
+    // Let's use encoding_rs-style code golf here.
+    // Unsigned underflow is defined behavior
+    char16_t cMinusSurrogateStart = c - 0xD800U;
+    if (MOZ_LIKELY(cMinusSurrogateStart > (0xDFFFU - 0xD800U))) {
+      *aBuffer = p;
+      return c;
+    }
+    if (MOZ_LIKELY(cMinusSurrogateStart <= (0xDBFFU - 0xD800U))) {
+      // High surrogate
+      if (MOZ_LIKELY(p != aEnd)) {
+        char16_t second = *p;
+        // Unsigned underflow is defined behavior
+        if (MOZ_LIKELY((second - 0xDC00U) <= (0xDFFFU - 0xDC00U))) {
+          *aBuffer = ++p;
+          return (uint32_t(c) << 10) + uint32_t(second) -
+                 (((0xD800U << 10) - 0x10000U) + 0xDC00U);
         }
-        c = *p;
-
-        if (NS_IS_LOW_SURROGATE(c)) {
-          // DC00- DFFF - Low Surrogate
-          // N = (H - D800) *400 + 10000 + ( L - DC00 )
-          uint32_t ucs4 = SURROGATE_TO_UCS4(h, c);
-
-          // 0001 0000-001F FFFF
-          *out++ = 0xF0 | (char)(ucs4 >> 18);
-          *out++ = 0x80 | (char)(0x003F & (ucs4 >> 12));
-          *out++ = 0x80 | (char)(0x003F & (ucs4 >> 6));
-          *out++ = 0x80 | (char)(0x003F & ucs4);
-        } else {
-          // Treat broken characters as the Unicode
-          // replacement character 0xFFFD (0xEFBFBD in
-          // UTF-8)
-          *out++ = '\xEF';
-          *out++ = '\xBF';
-          *out++ = '\xBD';
-
-          // The pointer to the next character points to the second
-          // 16-bit value, not beyond it, as per Unicode 5.0.0
-          // Chapter 3 C10, only the first code unit of an illegal
-          // sequence must be treated as an illegally terminated
-          // code unit sequence (also Chapter 3 D91, "isolated [not
-          // paired and ill-formed] UTF-16 code units in the range
-          // D800..DFFF are ill-formed").
-          p--;
-
-          UTF8UTILS_WARNING("got a High Surrogate but no low surrogate");
-        }
-      } else { // U+DC00 - U+DFFF
-        // Treat broken characters as the Unicode replacement
-        // character 0xFFFD (0xEFBFBD in UTF-8)
-        *out++ = '\xEF';
-        *out++ = '\xBF';
-        *out++ = '\xBD';
-
-        // DC00- DFFF - Low Surrogate
-        UTF8UTILS_WARNING("got a low Surrogate but no high surrogate");
       }
     }
-
-    mBuffer = out;
-  }
-
-  void write_terminator()
-  {
-    *mBuffer = buffer_type(0);
-  }
-
-private:
-  buffer_type* const mStart;
-  buffer_type* mBuffer;
-};
-
-/**
- * A character sink (see |copy_string| in nsAlgorithm.h) for computing
- * the number of bytes a UTF-16 would occupy in UTF-8. Treats invalid
- * UTF-16 data as 0xFFFD (0xEFBFBD in UTF-8).
- */
-class CalculateUTF8Size
-{
-public:
-  typedef char16_t value_type;
-
-  CalculateUTF8Size()
-    : mSize(0)
-  {
-  }
-
-  size_t Size() const
-  {
-    return mSize;
+    // Unpaired surrogate
+    *aBuffer = p;
+    if (aErr) {
+      *aErr = true;
+    }
+    return 0xFFFDU;
   }
-
-  void write(const value_type* aStart, uint32_t aN)
-  {
-    // Assume UCS2 surrogate pairs won't be spread across fragments.
-    for (const value_type* p = aStart, *end = aStart + aN; p < end; ++p) {
-      value_type c = *p;
-      if (!(c & 0xFF80)) { // U+0000 - U+007F
-        mSize += 1;
-      } else if (!(c & 0xF800)) { // U+0100 - U+07FF
-        mSize += 2;
-      } else if (0xD800 != (0xF800 & c)) { // U+0800 - U+D7FF,U+E000 - U+FFFF
-        mSize += 3;
-      } else if (0xD800 == (0xFC00 & c)) { // U+D800 - U+DBFF
-        ++p;
-        if (p == end) {
-          // Treat broken characters as the Unicode
-          // replacement character 0xFFFD (0xEFBFBD in
-          // UTF-8)
-          mSize += 3;
-
-          UTF8UTILS_WARNING("String ending in half a surrogate pair!");
-
-          break;
-        }
-        c = *p;
-
-        if (0xDC00 == (0xFC00 & c)) {
-          mSize += 4;
-        } else {
-          // Treat broken characters as the Unicode
-          // replacement character 0xFFFD (0xEFBFBD in
-          // UTF-8)
-          mSize += 3;
-
-          // The next code unit is the second 16-bit value, not
-          // the one beyond it, as per Unicode 5.0.0 Chapter 3 C10,
-          // only the first code unit of an illegal sequence must
-          // be treated as an illegally terminated code unit
-          // sequence (also Chapter 3 D91, "isolated [not paired and
-          // ill-formed] UTF-16 code units in the range D800..DFFF
-          // are ill-formed").
-          p--;
-
-          UTF8UTILS_WARNING("got a high Surrogate but no low surrogate");
-        }
-      } else { // U+DC00 - U+DFFF
-        // Treat broken characters as the Unicode replacement
-        // character 0xFFFD (0xEFBFBD in UTF-8)
-        mSize += 3;
-
-        UTF8UTILS_WARNING("got a low Surrogate but no high surrogate");
-      }
-    }
-  }
-
-private:
-  size_t mSize;
 };
 
-#ifdef MOZILLA_INTERNAL_API
-/**
- * A character sink that performs a |reinterpret_cast|-style conversion
- * from char to char16_t.
- */
-class LossyConvertEncoding8to16
-{
-public:
-  typedef char value_type;
-  typedef char input_type;
-  typedef char16_t output_type;
-
-public:
-  explicit LossyConvertEncoding8to16(char16_t* aDestination) :
-    mDestination(aDestination)
-  {
-  }
-
-  void
-  write(const char* aSource, uint32_t aSourceLength)
-  {
-#ifdef MOZILLA_MAY_SUPPORT_SSE2
-    if (mozilla::supports_sse2()) {
-      write_sse2(aSource, aSourceLength);
-      return;
-    }
-#endif
-#if defined(MOZILLA_MAY_SUPPORT_NEON) && defined(MOZ_LITTLE_ENDIAN)
-    if (mozilla::supports_neon()) {
-      write_neon(aSource, aSourceLength);
-      return;
-    }
-#endif
-    const char* done_writing = aSource + aSourceLength;
-    while (aSource < done_writing) {
-      *mDestination++ = (char16_t)(unsigned char)(*aSource++);
-    }
-  }
-
-  void
-  write_sse2(const char* aSource, uint32_t aSourceLength);
-#if defined(MOZILLA_MAY_SUPPORT_NEON) && defined(MOZ_LITTLE_ENDIAN)
-  void
-  write_neon(const char* aSource, uint32_t aSourceLength);
-#endif
-
-  void
-  write_terminator()
-  {
-    *mDestination = (char16_t)(0);
-  }
-
-private:
-  char16_t* mDestination;
-};
-
-/**
- * A character sink that performs a |reinterpret_cast|-style conversion
- * from char16_t to char.
- */
-class LossyConvertEncoding16to8
-{
-public:
-  typedef char16_t value_type;
-  typedef char16_t input_type;
-  typedef char output_type;
-
-  explicit LossyConvertEncoding16to8(char* aDestination)
-    : mDestination(aDestination)
-  {
-  }
-
-  void
-  write(const char16_t* aSource, uint32_t aSourceLength)
-  {
-#ifdef MOZILLA_MAY_SUPPORT_SSE2
-    if (mozilla::supports_sse2()) {
-      write_sse2(aSource, aSourceLength);
-      return;
-    }
-#endif
-#if defined(MOZILLA_MAY_SUPPORT_NEON) && defined(MOZ_LITTLE_ENDIAN)
-    if (mozilla::supports_neon()) {
-      write_neon(aSource, aSourceLength);
-      return;
-    }
-#endif
-    const char16_t* done_writing = aSource + aSourceLength;
-    while (aSource < done_writing) {
-      *mDestination++ = (char)(*aSource++);
-    }
-  }
-
-#ifdef MOZILLA_MAY_SUPPORT_SSE2
-  void
-  write_sse2(const char16_t* aSource, uint32_t aSourceLength);
-#endif
-#if defined(MOZILLA_MAY_SUPPORT_NEON) && defined(MOZ_LITTLE_ENDIAN)
-  void
-  write_neon(const char16_t* aSource, uint32_t aSourceLength);
-#endif
-
-  void
-  write_terminator()
-  {
-    *mDestination = '\0';
-  }
-
-private:
-  char* mDestination;
-};
-#endif // MOZILLA_INTERNAL_API
-
-
 template<typename Char, typename UnsignedT>
 inline UnsignedT
 RewindToPriorUTF8Codepoint(const Char* utf8Chars, UnsignedT index)
 {
   static_assert(mozilla::IsSame<Char, char>::value ||
                 mozilla::IsSame<Char, unsigned char>::value ||
                 mozilla::IsSame<Char, signed char>::value,
                 "UTF-8 data must be in 8-bit units");
--- a/xpcom/tests/gtest/TestAtoms.cpp
+++ b/xpcom/tests/gtest/TestAtoms.cpp
@@ -77,39 +77,40 @@ TEST(Atoms, Invalid)
 
     {
       RefPtr<nsAtom> atom16 = NS_Atomize(Invalid16Strings[i].m16);
       EXPECT_TRUE(atom16->Equals(nsDependentString(Invalid16Strings[i].m16)));
     }
 
     EXPECT_EQ(count, NS_GetNumberOfAtoms());
   }
-
+#ifndef DEBUG
+// Don't run this test in debug builds as that intentionally asserts.
   for (unsigned int i = 0; i < ArrayLength(Invalid8Strings); ++i) {
     nsrefcnt count = NS_GetNumberOfAtoms();
 
     {
       RefPtr<nsAtom> atom8 = NS_Atomize(Invalid8Strings[i].m8);
       RefPtr<nsAtom> atom16 = NS_Atomize(Invalid8Strings[i].m16);
       EXPECT_EQ(atom16, atom8);
       EXPECT_TRUE(atom16->Equals(nsDependentString(Invalid8Strings[i].m16)));
     }
 
     EXPECT_EQ(count, NS_GetNumberOfAtoms());
   }
 
-// Don't run this test in debug builds as that intentionally asserts.
-#ifndef DEBUG
-  RefPtr<nsAtom> emptyAtom = NS_Atomize("");
-
   for (unsigned int i = 0; i < ArrayLength(Malformed8Strings); ++i) {
     nsrefcnt count = NS_GetNumberOfAtoms();
 
-    RefPtr<nsAtom> atom8 = NS_Atomize(Malformed8Strings[i]);
-    EXPECT_EQ(atom8, emptyAtom);
+    {
+      RefPtr<nsAtom> atom8 = NS_Atomize(Malformed8Strings[i].m8);
+      RefPtr<nsAtom> atom16 = NS_Atomize(Malformed8Strings[i].m16);
+      EXPECT_EQ(atom8, atom16);
+    }
+
     EXPECT_EQ(count, NS_GetNumberOfAtoms());
   }
 #endif
 }
 
 #define FIRST_ATOM_STR "first static atom. Hello!"
 #define SECOND_ATOM_STR "second static atom. @World!"
 #define THIRD_ATOM_STR "third static atom?!"
--- a/xpcom/tests/gtest/TestStrings.cpp
+++ b/xpcom/tests/gtest/TestStrings.cpp
@@ -764,22 +764,20 @@ TEST_F(Strings, replace_substr)
 
   s.AssignLiteral("foofoofoo");
   s.ReplaceSubstring("of", "fo");
   EXPECT_STREQ(s.get(), "fofoofooo");
 }
 
 TEST_F(Strings, replace_substr_2)
 {
-  const char *oldName = nullptr;
   const char *newName = "user";
   nsString acctName; acctName.AssignLiteral("forums.foo.com");
   nsAutoString newAcctName, oldVal, newVal;
-  CopyASCIItoUTF16(oldName, oldVal);
-  CopyASCIItoUTF16(newName, newVal);
+  CopyASCIItoUTF16(mozilla::MakeStringSpan(newName), newVal);
   newAcctName.Assign(acctName);
 
   // here, oldVal is empty.  we are testing that this function
   // does not hang.  see bug 235355.
   newAcctName.ReplaceSubstring(oldVal, newVal);
 
   // we expect that newAcctName will be unchanged.
   EXPECT_TRUE(newAcctName.Equals(acctName));
@@ -1282,16 +1280,45 @@ TEST(String, strip_chars)
   test_strip_chars_helper(u"foo",
                           u"foo",
                           NS_LITERAL_STRING(""));
   test_strip_chars_helper(u" foo",
                           u" ",
                           NS_LITERAL_STRING("foo"));
 }
 
+TEST_F(Strings, append_with_capacity)
+{
+  nsAutoString s;
+  const char16_t* origPtr = s.BeginReading();
+  s.SetCapacity(100);
+  const char16_t* ptr = s.BeginReading();
+  EXPECT_NE(origPtr, ptr);
+  for (int i = 0; i < 100; i++) {
+    s.Append(u'a');
+    EXPECT_EQ(s.BeginReading(), ptr);
+  }
+}
+
+TEST_F(Strings, append_string_with_capacity)
+{
+  nsAutoString aa;
+  aa.Append(u'a');
+  aa.Append(u'a');
+  nsAutoString s;
+  const char16_t* origPtr = s.BeginReading();
+  s.SetCapacity(200);
+  const char16_t* ptr = s.BeginReading();
+  EXPECT_NE(origPtr, ptr);
+  for (int i = 0; i < 100; i++) {
+    s.Append(aa);
+    EXPECT_EQ(s.BeginReading(), ptr);
+  }
+}
+
 TEST_F(Strings, huge_capacity)
 {
   nsString a, b, c, d, e, f, g, h, i, j, k, l, m, n;
   nsCString n1;
 
   // Ignore the result if the address space is less than 64-bit because
   // some of the allocations above will exhaust the address space.
   if (sizeof(void*) >= 8) {
--- a/xpcom/tests/gtest/TestTextFormatter.cpp
+++ b/xpcom/tests/gtest/TestTextFormatter.cpp
@@ -12,17 +12,16 @@ TEST(TextFormatter, Tests)
   nsAutoString fmt(NS_LITERAL_STRING("%3$s %4$S %1$d %2$d %2$d %3$s"));
   char utf8[] = "Hello";
   char16_t ucs2[]={'W', 'o', 'r', 'l', 'd', 0x4e00, 0xAc00, 0xFF45, 0x0103, 0x00};
   int d=3;
 
   char16_t buf[256];
   nsTextFormatter::snprintf(buf, 256, fmt.get(), d, 333, utf8, ucs2);
   nsAutoString out(buf);
-  ASSERT_STREQ("Hello World", NS_LossyConvertUTF16toASCII(out).get());
 
   const char16_t *uout = out.get();
   const char16_t expected[] = {0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20,
                                 0x57, 0x6F, 0x72, 0x6C, 0x64, 0x4E00,
                                 0xAC00, 0xFF45, 0x0103, 0x20, 0x33,
                                 0x20, 0x33, 0x33, 0x33, 0x20, 0x33,
                                 0x33, 0x33, 0x20, 0x48, 0x65, 0x6C,
                                 0x6C, 0x6F};
--- a/xpcom/tests/gtest/TestUTF.cpp
+++ b/xpcom/tests/gtest/TestUTF.cpp
@@ -9,16 +9,17 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include "nsString.h"
 #include "nsStringBuffer.h"
 #include "nsReadableUtils.h"
 #include "UTFStrings.h"
 #include "nsUnicharUtils.h"
 #include "mozilla/HashFunctions.h"
+#include "nsUTF8Utils.h"
 
 #include "gtest/gtest.h"
 
 using namespace mozilla;
 
 namespace TestUTF {
 
 TEST(UTF, Valid)
@@ -72,59 +73,53 @@ TEST(UTF, Invalid8)
     EXPECT_TRUE(tmp16.Equals(NS_LITERAL_STRING("string ") + str16));
 
     EXPECT_EQ(CompareUTF8toUTF16(str8, str16), 0);
   }
 }
 
 TEST(UTF, Malformed8)
 {
-// Don't run this test in debug builds as that intentionally asserts.
-#ifndef DEBUG
   for (unsigned int i = 0; i < ArrayLength(Malformed8Strings); ++i) {
-    nsDependentCString str8(Malformed8Strings[i]);
+    nsDependentString str16(Malformed8Strings[i].m16);
+    nsDependentCString str8(Malformed8Strings[i].m8);
 
-    EXPECT_TRUE(NS_ConvertUTF8toUTF16(str8).IsEmpty());
+    EXPECT_TRUE(NS_ConvertUTF8toUTF16(str8).Equals(str16));
 
-    nsString tmp16(NS_LITERAL_STRING("string"));
+    nsString tmp16(NS_LITERAL_STRING("string "));
     AppendUTF8toUTF16(str8, tmp16);
-    EXPECT_TRUE(tmp16.EqualsLiteral("string"));
+    EXPECT_TRUE(tmp16.Equals(NS_LITERAL_STRING("string ") + str16));
 
-    EXPECT_NE(CompareUTF8toUTF16(str8, EmptyString()), 0);
+    EXPECT_EQ(CompareUTF8toUTF16(str8, str16), 0);
   }
-#endif
 }
 
 TEST(UTF, Hash16)
 {
   for (unsigned int i = 0; i < ArrayLength(ValidStrings); ++i) {
     nsDependentCString str8(ValidStrings[i].m8);
     bool err;
     EXPECT_EQ(HashString(ValidStrings[i].m16),
               HashUTF8AsUTF16(str8.get(), str8.Length(), &err));
     EXPECT_FALSE(err);
   }
 
   for (unsigned int i = 0; i < ArrayLength(Invalid8Strings); ++i) {
     nsDependentCString str8(Invalid8Strings[i].m8);
     bool err;
-    EXPECT_EQ(HashString(Invalid8Strings[i].m16),
-              HashUTF8AsUTF16(str8.get(), str8.Length(), &err));
-    EXPECT_FALSE(err);
+    EXPECT_EQ(HashUTF8AsUTF16(str8.get(), str8.Length(), &err), 0u);
+    EXPECT_TRUE(err);
   }
 
-// Don't run this test in debug builds as that intentionally asserts.
-#ifndef DEBUG
   for (unsigned int i = 0; i < ArrayLength(Malformed8Strings); ++i) {
-    nsDependentCString str8(Malformed8Strings[i]);
+    nsDependentCString str8(Malformed8Strings[i].m8);
     bool err;
     EXPECT_EQ(HashUTF8AsUTF16(str8.get(), str8.Length(), &err), 0u);
     EXPECT_TRUE(err);
   }
-#endif
 }
 
 /**
  * This tests the handling of a non-ascii character at various locations in a
  * UTF-16 string that is being converted to UTF-8.
  */
 void NonASCII16_helper(const size_t aStrSize)
 {
@@ -173,19 +168,81 @@ void NonASCII16_helper(const size_t aStr
 
     // And finish with the trailing ASCII chars.
     expected.Append(asciiCString.BeginReading() + i + 1, kTestSize - i - 1);
 
     EXPECT_STREQ(dest.BeginReading(), expected.BeginReading());
   }
 }
 
-TEST(UTF, NonASCII16)
+TEST(UTF, UTF8CharEnumerator)
 {
-  // Test with various string sizes to catch any special casing.
-  NonASCII16_helper(1);
-  NonASCII16_helper(8);
-  NonASCII16_helper(16);
-  NonASCII16_helper(32);
-  NonASCII16_helper(512);
+  const char* p = "\x61\xC0\xC2\xC2\x80\xE0\x80\x80\xE0\xA0\x80\xE1\x80\x80\xED\xBF\xBF\xED\x9F\xBF\xEE\x80\x80\xEE\x80\xFF\xF0\x90\x80\x80\xF0\x80\x80\x80\xF1\x80\x80\x80\xF4\x8F\xBF\xF4\x8F\xBF\xBF\xF4\xBF\xBF\xBF";
+  const char* end = p + 49;
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0x0061U);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0x0080U);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0x0800U);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0x1000U);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xD7FFU);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xE000U);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0x10000U);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0x40000U);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0x10FFFFU);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
+  EXPECT_EQ(p, end);
+  p = "\xC2";
+  end = p + 1;
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
+  EXPECT_EQ(p, end);
+  p = "\xE1\x80";
+  end = p + 2;
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
+  EXPECT_EQ(p, end);
+  p = "\xF1\x80\x80";
+  end = p + 3;
+  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
+  EXPECT_EQ(p, end);
+}
+
+TEST(UTF, UTF16CharEnumerator)
+{
+  const char16_t* p = u"\u0061\U0001F4A9";
+  const char16_t* end = p + 3;
+  EXPECT_EQ(UTF16CharEnumerator::NextChar(&p, end), 0x0061U);
+  EXPECT_EQ(UTF16CharEnumerator::NextChar(&p, end), 0x1F4A9U);
+  EXPECT_EQ(p, end);
+  const char16_t loneHigh = 0xD83D;
+  p = &loneHigh;
+  end = p + 1;
+  EXPECT_EQ(UTF16CharEnumerator::NextChar(&p, end), 0xFFFDU);
+  EXPECT_EQ(p, end);
+  const char16_t loneLow = 0xDCA9;
+  p = &loneLow;
+  end = p + 1;
+  EXPECT_EQ(UTF16CharEnumerator::NextChar(&p, end), 0xFFFDU);
+  EXPECT_EQ(p, end);
+  const char16_t loneHighStr[] = { 0xD83D, 0x0061 };
+  p = loneHighStr;
+  end = p + 2;
+  EXPECT_EQ(UTF16CharEnumerator::NextChar(&p, end), 0xFFFDU);
+  EXPECT_EQ(UTF16CharEnumerator::NextChar(&p, end), 0x0061U);
+  EXPECT_EQ(p, end);
 }
 
 } // namespace TestUTF
--- a/xpcom/tests/gtest/UTFStrings.h
+++ b/xpcom/tests/gtest/UTFStrings.h
@@ -56,57 +56,73 @@ static const UTFStringsStringPair Invali
     { { 0xDC00, 0xD800, 0xDC00, 0xD800 },
       { char(0xEF), char(0xBF), char(0xBD), char(0xF0), char(0x90), char(0x80), char(0x80), char(0xEF), char(0xBF), char(0xBD) } },
     { { 0xDC00, 0xD800, 0xD800, 0xDC00 },
       { char(0xEF), char(0xBF), char(0xBD), char(0xEF), char(0xBF), char(0xBD), char(0xF0), char(0x90), char(0x80), char(0x80) } },
   };
 
 static const UTFStringsStringPair Invalid8Strings[] =
   {
-    { { 'a', 0xFFFD, 'b' },
+    { { 'a', 0xFFFD, 0xFFFD, 'b' },
       { 'a', char(0xC0), char(0x80), 'b' } },
-    { { 0xFFFD, 0x80 },
+    { { 0xFFFD, 0xFFFD, 0x80 },
       { char(0xC1), char(0xBF), char(0xC2), char(0x80) } },
-    { { 0xFFFD },
+    { { 0xFFFD, 0xFFFD },
       { char(0xC1), char(0xBF) } },
-    { { 0xFFFD, 'x', 0x0800 },
+    { { 0xFFFD, 0xFFFD, 0xFFFD, 'x', 0x0800 },
       { char(0xE0), char(0x80), char(0x80), 'x', char(0xE0), char(0xA0), char(0x80) } },
-    { { 0xFFFD, 'x', 0xFFFD },
+    { { 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 'x', 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD },
       { char(0xF0), char(0x80), char(0x80), char(0x80), 'x', char(0xF0), char(0x80), char(0x8F), char(0x80) } },
-    { { 0xFFFD, 0xFFFD },
+    { { 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD },
       { char(0xF4), char(0x90), char(0x80), char(0x80), char(0xF7), char(0xBF), char(0xBF), char(0xBF) } },
-    { { 0xFFFD, 'x', 0xD800, 0xDC00, 0xFFFD },
+    { { 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 'x', 0xD800, 0xDC00, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD },
       { char(0xF0), char(0x8F), char(0xBF), char(0xBF), 'x', char(0xF0), char(0x90), char(0x80), char(0x80), char(0xF0), char(0x8F), char(0xBF), char(0xBF) } },
-    { { 0xFFFD, 'x', 0xFFFD },
+    { { 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 'x', 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD },
       { char(0xF8), char(0x80), char(0x80), char(0x80), char(0x80), 'x', char(0xF8), char(0x88), char(0x80), char(0x80), char(0x80) } },
-    { { 0xFFFD, 0xFFFD },
+    { { 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD },
       { char(0xFB), char(0xBF), char(0xBF), char(0xBF), char(0xBF), char(0xFC), char(0xA0), char(0x80), char(0x80), char(0x80), char(0x80) } },
-    { { 0xFFFD, 0xFFFD },
+    { { 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD },
       { char(0xFC), char(0x80), char(0x80), char(0x80), char(0x80), char(0x80), char(0xFD), char(0xBF), char(0xBF), char(0xBF), char(0xBF), char(0xBF) } },
   };
 
-// Don't use this array in debug builds as that intentionally asserts.
-#ifndef DEBUG
-static const char Malformed8Strings[][16] =
+static const UTFStringsStringPair Malformed8Strings[] =
   {
-    { char(0x80) },
-    { 'a', char(0xC8), 'c' },
-    { 'a', char(0xC0) },
-    { 'a', char(0xE8), 'c' },
-    { 'a', char(0xE8), char(0x80), 'c' },
-    { 'a', char(0xE8), char(0x80) },
-    { char(0xE8), 0x7F, char(0x80) },
-    { 'a', char(0xE8), char(0xE8), char(0x80) },
-    { 'a', char(0xF4) },
-    { 'a', char(0xF4), char(0x80), char(0x80), 'c', 'c' },
-    { 'a', char(0xF4), char(0x80), 'x', char(0x80) },
-    { char(0xF4), char(0x80), char(0x80), char(0x80), char(0x80) },
-    { 'a', char(0xFA), 'c' },
-    { 'a', char(0xFA), char(0x80), char(0x80), 0x7F, char(0x80), 'c' },
-    { 'a', char(0xFA), char(0x80), char(0x80), char(0x80), char(0x80), char(0x80), 'c' },
-    { 'a', char(0xFD) },
-    { 'a', char(0xFD), char(0x80), char(0x80), char(0x80), char(0x80), 'c' },
-    { 'a', char(0xFD), char(0x80), char(0x80), char(0x80), char(0x80), char(0x80), char(0x80) },
-    { 'a', char(0xFC), char(0x80), char(0x80), 0x40, char(0x80), char(0x80), 'c' },
+    { { 0xFFFD },
+      { char(0x80) } },
+    { { 'a', 0xFFFD, 'c' },
+      { 'a', char(0xC8), 'c' } },
+    { { 'a', 0xFFFD },
+      { 'a', char(0xC8) } },
+    { { 'a', 0xFFFD, 'c' },
+      { 'a', char(0xE8), 'c' } },
+    { { 'a', 0xFFFD, 'c' },
+      { 'a', char(0xE8), char(0x80), 'c' } },
+    { { 'a', 0xFFFD },
+      { 'a', char(0xE8), char(0x80) } },
+    { { 0xFFFD, 0x7F, 0xFFFD },
+      { char(0xE8), 0x7F, char(0x80) } },
+    { { 'a', 0xFFFD, 0xFFFD },
+      { 'a', char(0xE8), char(0xE8), char(0x80) } },
+    { { 'a', 0xFFFD },
+      { 'a', char(0xF4) } },
+    { { 'a', 0xFFFD, 'c', 'c' },
+      { 'a', char(0xF4), char(0x80), char(0x80), 'c', 'c' } },
+    { { 'a', 0xFFFD, 'x', 0xFFFD },
+      { 'a', char(0xF4), char(0x80), 'x', char(0x80) } },
+    { { 0xDBC0, 0xDC00, 0xFFFD },
+      { char(0xF4), char(0x80), char(0x80), char(0x80), char(0x80) } },
+    { { 'a', 0xFFFD, 'c' },
+      { 'a', char(0xFA), 'c' } },
+    { { 'a', 0xFFFD, 0xFFFD, 0xFFFD, 0x7F, 0xFFFD, 'c' },
+      { 'a', char(0xFA), char(0x80), char(0x80), 0x7F, char(0x80), 'c' } },
+    { { 'a', 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 'c' },
+      { 'a', char(0xFA), char(0x80), char(0x80), char(0x80), char(0x80), char(0x80), 'c' } },
+    { { 'a', 0xFFFD },
+      { 'a', char(0xFD) } },
+    { { 'a', 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 'c' },
+      { 'a', char(0xFD), char(0x80), char(0x80), char(0x80), char(0x80), 'c' } },
+    { { 'a', 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD },
+      { 'a', char(0xFD), char(0x80), char(0x80), char(0x80), char(0x80), char(0x80), char(0x80) } },
+    { { 'a', 0xFFFD, 0xFFFD, 0xFFFD, 0x40, 0xFFFD, 0xFFFD, 'c' },
+      { 'a', char(0xFD), char(0x80), char(0x80), 0x40, char(0x80), char(0x80), 'c' } },
   };
-#endif
 
 #endif
author	Henri Sivonen <hsivonen@hsivonen.fi>
	Fri, 06 Jul 2018 10:44:43 +0300
changeset 827195	70e8b1fa104560990a0b7f25bea82cb880879791
parent 827194	afdf7d11181f3b90578f94fd3824f7064827a85a
child 827197	6378a57d5c9b04102600861ef1205af643be5df0
push id	118488
push user	bmo:hsivonen@hsivonen.fi
push date	Tue, 07 Aug 2018 12:28:14 +0000
bugs	1402247, 1472113
milestone	63.0a1
Cargo.lock		file \| annotate \| diff \| comparison \| revisions
intl/encoding_glue/src/lib.rs		file \| annotate \| diff \| comparison \| revisions
js/xpconnect/src/XPCConvert.cpp		file \| annotate \| diff \| comparison \| revisions
servo/ports/geckolib/glue.rs		file \| annotate \| diff \| comparison \| revisions
servo/support/gecko/nsstring/Cargo.toml		file \| annotate \| diff \| comparison \| revisions
servo/support/gecko/nsstring/src/conversions.rs		file \| annotate \| diff \| comparison \| revisions
servo/support/gecko/nsstring/src/lib.rs		file \| annotate \| diff \| comparison \| revisions
toolkit/xre/nsWindowsRestart.cpp		file \| annotate \| diff \| comparison \| revisions
toolkit/xre/nsWindowsWMain.cpp		file \| annotate \| diff \| comparison \| revisions
xpcom/base/nsAlgorithm.h		file \| annotate \| diff \| comparison \| revisions
xpcom/ds/nsAtomTable.cpp		file \| annotate \| diff \| comparison \| revisions
xpcom/string/moz.build		file \| annotate \| diff \| comparison \| revisions
xpcom/string/nsReadableUtils.cpp		file \| annotate \| diff \| comparison \| revisions
xpcom/string/nsReadableUtils.h		file \| annotate \| diff \| comparison \| revisions
xpcom/string/nsSubstring.cpp		file \| annotate \| diff \| comparison \| revisions
xpcom/string/nsTStringObsolete.cpp		file \| annotate \| diff \| comparison \| revisions
xpcom/string/nsTSubstring.cpp		file \| annotate \| diff \| comparison \| revisions
xpcom/string/nsTSubstring.h		file \| annotate \| diff \| comparison \| revisions
xpcom/string/nsUTF8Utils.h		file \| annotate \| diff \| comparison \| revisions
xpcom/tests/gtest/TestAtoms.cpp		file \| annotate \| diff \| comparison \| revisions
xpcom/tests/gtest/TestStrings.cpp		file \| annotate \| diff \| comparison \| revisions
xpcom/tests/gtest/TestTextFormatter.cpp		file \| annotate \| diff \| comparison \| revisions
xpcom/tests/gtest/TestUTF.cpp		file \| annotate \| diff \| comparison \| revisions
xpcom/tests/gtest/UTFStrings.h		file \| annotate \| diff \| comparison \| revisions