Bug 1402247 part 4 - Use encoding_rs::mem for string conversion that yield plain C strings. draft
authorHenri Sivonen <hsivonen@hsivonen.fi>
Tue, 06 Feb 2018 17:02:37 +0200
changeset 768030 1e0741a3568da43dd13df3724966e06ccebe46da
parent 768029 a1a06fd30435879dbd04856f41361f63dd2fe4ce
child 768031 f41bb7a7483db4c29cda50cc7bf60b2fca037622
push id102784
push userbmo:hsivonen@hsivonen.fi
push dateThu, 15 Mar 2018 16:05:17 +0000
bugs1402247
milestone61.0a1
Bug 1402247 part 4 - Use encoding_rs::mem for string conversion that yield plain C strings. MozReview-Commit-ID: 5x2UhkxiE6H
intl/encoding_glue/src/lib.rs
xpcom/string/moz.build
xpcom/string/nsReadableUtils.cpp
xpcom/string/nsReadableUtils.h
xpcom/string/nsReadableUtilsImpl.h
xpcom/string/nsReadableUtilsSSE2.cpp
--- a/intl/encoding_glue/src/lib.rs
+++ b/intl/encoding_glue/src/lib.rs
@@ -595,8 +595,28 @@ pub unsafe extern "C" fn encoding_mem_is
 pub unsafe extern "C" fn encoding_mem_is_ascii(buffer: *const u8, len: usize) -> bool {
     encoding_rs::mem::is_ascii(::std::slice::from_raw_parts(buffer, len))
 }
 
 #[no_mangle]
 pub unsafe extern "C" fn encoding_mem_is_basic_latin(buffer: *const u16, len: usize) -> bool {
     encoding_rs::mem::is_basic_latin(::std::slice::from_raw_parts(buffer, len))
 }
+
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_convert_utf16_to_latin1_lossy(src: *const u16, src_len: usize, dst: *mut u8, dst_len: usize) {
+    encoding_rs::mem::convert_utf16_to_latin1_lossy(::std::slice::from_raw_parts(src, src_len), ::std::slice::from_raw_parts_mut(dst, dst_len));
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_convert_latin1_to_utf16(src: *const u8, src_len: usize, dst: *mut u16, dst_len: usize) {
+    encoding_rs::mem::convert_latin1_to_utf16(::std::slice::from_raw_parts(src, src_len), ::std::slice::from_raw_parts_mut(dst, dst_len));
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_convert_utf16_to_utf8(src: *const u16, src_len: usize, dst: *mut u8, dst_len: usize) -> usize {
+    encoding_rs::mem::convert_utf16_to_utf8(::std::slice::from_raw_parts(src, src_len), ::std::slice::from_raw_parts_mut(dst, dst_len))
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_convert_utf8_to_utf16(src: *const u8, src_len: usize, dst: *mut u16, dst_len: usize) -> usize {
+    encoding_rs::mem::convert_utf8_to_utf16(::std::slice::from_raw_parts(src, src_len), ::std::slice::from_raw_parts_mut(dst, dst_len))
+}
--- a/xpcom/string/moz.build
+++ b/xpcom/string/moz.build
@@ -49,18 +49,16 @@ UNIFIED_SOURCES += [
     'precompiled_templates.cpp',
 ]
 
 # Are we targeting x86 or x86-64?  If so, compile the SSE2 functions for
 # nsUTF8Utils.cpp and nsReadableUtils.cpp.
 if CONFIG['INTEL_ARCHITECTURE']:
     SOURCES += ['nsUTF8UtilsSSE2.cpp']
     SOURCES['nsUTF8UtilsSSE2.cpp'].flags += CONFIG['SSE2_FLAGS']
-    SOURCES += ['nsReadableUtilsSSE2.cpp']
-    SOURCES['nsReadableUtilsSSE2.cpp'].flags += CONFIG['SSE2_FLAGS']
 
 if CONFIG['BUILD_ARM_NEON'] or CONFIG['CPU_ARCH'] == 'aarch64':
     SOURCES += ['nsUTF8UtilsNEON.cpp']
     SOURCES['nsUTF8UtilsNEON.cpp'].flags += CONFIG['NEON_FLAGS']
 
 # MSVC 2017 has a bug that incorrectly generates C5037 warning which
 # hits the template string code. We need to disable this warning as a
 # workaround. See https://developercommunity.visualstudio.com/
--- a/xpcom/string/nsReadableUtils.cpp
+++ b/xpcom/string/nsReadableUtils.cpp
@@ -1,87 +1,28 @@
 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "nsReadableUtils.h"
-#include "nsReadableUtilsImpl.h"
 
 #include <algorithm>
 
 #include "mozilla/CheckedInt.h"
 
 #include "nscore.h"
 #include "nsMemory.h"
 #include "nsString.h"
 #include "nsTArray.h"
 #include "nsUTF8Utils.h"
 
-using mozilla::IsASCII;
-
-/**
- * Fallback implementation for finding the first non-ASCII character in a
- * UTF-16 string.
- */
-static inline int32_t
-FirstNonASCIIUnvectorized(const char16_t* aBegin, const char16_t* aEnd)
-{
-  typedef mozilla::NonASCIIParameters<sizeof(size_t)> p;
-  const size_t kMask = p::mask();
-  const uintptr_t kAlignMask = p::alignMask();
-  const size_t kNumUnicharsPerWord = p::numUnicharsPerWord();
-
-  const char16_t* idx = aBegin;
-
-  // Align ourselves to a word boundary.
-  for (; idx != aEnd && ((uintptr_t(idx) & kAlignMask) != 0); idx++) {
-    if (!IsASCII(*idx)) {
-      return idx - aBegin;
-    }
-  }
-
-  // Check one word at a time.
-  const char16_t* wordWalkEnd = mozilla::aligned(aEnd, kAlignMask);
-  for (; idx != wordWalkEnd; idx += kNumUnicharsPerWord) {
-    const size_t word = *reinterpret_cast<const size_t*>(idx);
-    if (word & kMask) {
-      return idx - aBegin;
-    }
-  }
-
-  // Take care of the remainder one character at a time.
-  for (; idx != aEnd; idx++) {
-    if (!IsASCII(*idx)) {
-      return idx - aBegin;
-    }
-  }
-
-  return -1;
-}
-
-/*
- * This function returns -1 if all characters in str are ASCII characters.
- * Otherwise, it returns a value less than or equal to the index of the first
- * ASCII character in str. For example, if first non-ASCII character is at
- * position 25, it may return 25, 24, or 16. But it guarantees
- * there are only ASCII characters before returned value.
- */
-static inline int32_t
-FirstNonASCII(const char16_t* aBegin, const char16_t* aEnd)
-{
-#ifdef MOZILLA_MAY_SUPPORT_SSE2
-  if (mozilla::supports_sse2()) {
-    return mozilla::SSE2::FirstNonASCII(aBegin, aEnd);
-  }
-#endif
-
-  return FirstNonASCIIUnvectorized(aBegin, aEnd);
-}
+using mozilla::MakeSpan;
+using mozilla::AsWritableBytes;
 
 /**
  * A helper function that allocates a buffer of the desired character type big enough to hold a copy of the supplied string (plus a zero terminator).
  *
  * @param aSource an string you will eventually be making a copy of
  * @return a new buffer (of the type specified by the second parameter) which you must free with |free|.
  *
  */
@@ -93,101 +34,105 @@ AllocateStringCopy(const FromStringT& aS
   return static_cast<ToCharT*>(moz_xmalloc(
     (aSource.Length() + 1) * sizeof(ToCharT)));
 }
 
 
 char*
 ToNewCString(const nsAString& aSource)
 {
-  char* result = AllocateStringCopy(aSource, (char*)0);
-  if (!result) {
+  char* dest = AllocateStringCopy(aSource, (char*)nullptr);
+  if (!dest) {
     return nullptr;
   }
 
-  nsAString::const_iterator fromBegin, fromEnd;
-  LossyConvertEncoding16to8 converter(result);
-  copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
-              converter).write_terminator();
-  return result;
+  auto len = aSource.Length();
+  LossyConvertUTF16toLatin1(aSource, AsWritableBytes(MakeSpan(dest, len)));
+  dest[len] = 0;
+  return dest;
 }
 
 char*
 ToNewUTF8String(const nsAString& aSource, uint32_t* aUTF8Count)
 {
-  nsAString::const_iterator start, end;
-  CalculateUTF8Size calculator;
-  copy_string(aSource.BeginReading(start), aSource.EndReading(end),
-              calculator);
-
-  if (aUTF8Count) {
-    *aUTF8Count = calculator.Size();
+  auto len = aSource.Length();
+  // The uses of this function seem temporary enough that it's not
+  // worthwhile to be fancy about the allocation size. Let's just use
+  // the worst case.
+  // Times 3 plus 2, because ConvertUTF16toUTF8 requires times 3 plus 1 and
+  // then we have the terminator.
+  mozilla::CheckedInt<size_t> destLen(len);
+  destLen *= 3;
+  destLen += 2;
+  if (!destLen.isValid()) {
+    return nullptr;
   }
-
-  char* result = static_cast<char*>
-                 (moz_xmalloc(calculator.Size() + 1));
-  if (!result) {
+  size_t destLenVal = destLen.value();
+  if (destLenVal > UINT32_MAX) {
+    return nullptr;
+  }
+  char* dest = static_cast<char*>(moz_xmalloc(destLenVal));
+  if (!dest) {
     return nullptr;
   }
 
-  ConvertUTF16toUTF8 converter(result);
-  copy_string(aSource.BeginReading(start), aSource.EndReading(end),
-              converter).write_terminator();
-  NS_ASSERTION(calculator.Size() == converter.Size(), "length mismatch");
+  size_t written = ConvertUTF16toUTF8Func(aSource, AsWritableBytes(MakeSpan(dest, destLenVal)));
+  dest[written] = 0;
 
-  return result;
+  if (aUTF8Count) {
+    *aUTF8Count = written;
+  }
+
+  return dest;
 }
 
 char*
 ToNewCString(const nsACString& aSource)
 {
   // no conversion needed, just allocate a buffer of the correct length and copy into it
 
-  char* result = AllocateStringCopy(aSource, (char*)0);
-  if (!result) {
+  char* dest = AllocateStringCopy(aSource, (char*)nullptr);
+  if (!dest) {
     return nullptr;
   }
 
-  nsACString::const_iterator fromBegin, fromEnd;
-  char* toBegin = result;
-  *copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
-               toBegin) = char(0);
-  return result;
+  auto len = aSource.Length();
+  memcpy(dest, aSource.BeginReading(), len * sizeof(char));
+  dest[len] = 0;
+  return dest;
 }
 
 char16_t*
 ToNewUnicode(const nsAString& aSource)
 {
   // no conversion needed, just allocate a buffer of the correct length and copy into it
 
-  char16_t* result = AllocateStringCopy(aSource, (char16_t*)0);
-  if (!result) {
+  char16_t* dest = AllocateStringCopy(aSource, (char16_t*)nullptr);
+  if (!dest) {
     return nullptr;
   }
 
-  nsAString::const_iterator fromBegin, fromEnd;
-  char16_t* toBegin = result;
-  *copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
-               toBegin) = char16_t(0);
-  return result;
+  auto len = aSource.Length();
+  memcpy(dest, aSource.BeginReading(), len * sizeof(char16_t));
+  dest[len] = 0;
+  return dest;
 }
 
 char16_t*
 ToNewUnicode(const nsACString& aSource)
 {
-  char16_t* result = AllocateStringCopy(aSource, (char16_t*)0);
-  if (!result) {
+  char16_t* dest = AllocateStringCopy(aSource, (char16_t*)nullptr);
+  if (!dest) {
     return nullptr;
   }
 
-  nsACString::const_iterator fromBegin, fromEnd;
-  LossyConvertEncoding8to16 converter(result);
-  copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
-              converter).write_terminator();
-  return result;
+  auto len = aSource.Length();
+  ConvertLatin1toUTF16(aSource, MakeSpan(dest, len));
+  dest[len] = 0;
+  return dest;
 }
 
 uint32_t
 CalcUTF8ToUnicodeLength(const nsACString& aSource)
 {
   nsACString::const_iterator start, end;
   CalculateUTF8Length calculator;
   copy_string(aSource.BeginReading(start), aSource.EndReading(end),
@@ -208,31 +153,30 @@ UTF8ToUnicodeBuffer(const nsACString& aS
     *aUTF16Count = converter.Length();
   }
   return aBuffer;
 }
 
 char16_t*
 UTF8ToNewUnicode(const nsACString& aSource, uint32_t* aUTF16Count)
 {
-  const uint32_t length = CalcUTF8ToUnicodeLength(aSource);
-  const size_t buffer_size = (length + 1) * sizeof(char16_t);
-  char16_t* buffer = static_cast<char16_t*>(moz_xmalloc(buffer_size));
-  if (!buffer) {
+  char16_t* dest = AllocateStringCopy(aSource, (char16_t*)nullptr);
+  if (!dest) {
     return nullptr;
   }
 
-  uint32_t copied;
-  UTF8ToUnicodeBuffer(aSource, buffer, &copied);
-  NS_ASSERTION(length == copied, "length mismatch");
+  auto len = aSource.Length();
+  size_t written = ConvertUTF8toUTF16Func(aSource, MakeSpan(dest, len));
+  dest[written] = 0;
 
   if (aUTF16Count) {
-    *aUTF16Count = copied;
+    *aUTF16Count = written;
   }
-  return buffer;
+
+  return dest;
 }
 
 char16_t*
 CopyUnicodeTo(const nsAString& aSource, uint32_t aSrcOffset, char16_t* aDest,
               uint32_t aLength)
 {
   nsAString::const_iterator fromBegin, fromEnd;
   char16_t* toBegin = aDest;
--- a/xpcom/string/nsReadableUtils.h
+++ b/xpcom/string/nsReadableUtils.h
@@ -18,28 +18,82 @@
 
 #include "nsTArrayForwardDeclare.h"
 
 // Can't include mozilla/Encoding.h here
 extern "C" {
   size_t encoding_utf8_valid_up_to(uint8_t const* buffer, size_t buffer_len);
   bool encoding_mem_is_ascii(uint8_t const* buffer, size_t buffer_len);
   bool encoding_mem_is_basic_latin(char16_t const* buffer, size_t buffer_len);
+  void encoding_mem_convert_utf16_to_latin1_lossy(const char16_t* src, size_t src_len, uint8_t* dst, size_t dst_len);
+  void encoding_mem_convert_latin1_to_utf16(const uint8_t* src, size_t src_len, char16_t* dst, size_t dst_len);
+  size_t encoding_mem_convert_utf16_to_utf8(const char16_t* src, size_t src_len, uint8_t* dst, size_t dst_len);
+  size_t encoding_mem_convert_utf8_to_utf16(const uint8_t* src, size_t src_len, char16_t* dst, size_t dst_len);
 }
 
 // From the nsstring crate
 extern "C" {
   bool nsstring_fallible_append_utf8_impl(nsAString* aThis, const uint8_t* aOther, size_t aOtherLen, size_t aOldLen);
   bool nsstring_fallible_append_latin1_impl(nsAString* aThis, const uint8_t* aOther, size_t aOtherLen, size_t aOldLen);
   bool nscstring_fallible_append_utf16_to_utf8_impl(nsACString* aThis, const char16_t*, size_t aOtherLen, size_t aOldLen);
   bool nscstring_fallible_append_utf16_to_latin1_lossy_impl(nsACString* aThis, const char16_t*, size_t aOtherLen, size_t aOldLen);
   bool nscstring_fallible_append_utf8_to_latin1_lossy_check(nsACString* aThis, const nsACString* aOther, size_t aOldLen);
   bool nscstring_fallible_append_latin1_to_utf8_check(nsACString* aThis, const nsACString* aOther, size_t aOldLen);
 }
 
+/**
+ * If all the code points in the input are below U+0100, converts to Latin1, i.e. unsigned byte value is Unicode
+ * scalar value; not windows-1252. If there are code points above U+00FF, asserts in debug builds and produces
+ * garbage in release builds. The nature of the garbage depends on the CPU architecture and must not be relied upon.
+ *
+ * The length of aDest must be not be less than the length of aSource.
+ */
+inline void
+LossyConvertUTF16toLatin1(mozilla::Span<const char16_t> aSource, mozilla::Span<uint8_t> aDest)
+{
+  encoding_mem_convert_utf16_to_latin1_lossy(aSource.Elements(), aSource.Length(), aDest.Elements(), aDest.Length());
+}
+
+/**
+ * Interprets unsigned byte value as Unicode scalar value (i.e. not windows-1251!).
+ *
+ * The length of aDest must be not be less than the length of aSource.
+ */
+inline void
+ConvertLatin1toUTF16(mozilla::Span<const uint8_t> aSource, mozilla::Span<char16_t> aDest)
+{
+  encoding_mem_convert_latin1_to_utf16(aSource.Elements(), aSource.Length(), aDest.Elements(), aDest.Length());
+}
+
+/**
+ * Lone surrogates are replaced with the REPLACEMENT CHARACTER.
+ *
+ * The length of aDest must be at least the length of aSource times three _plus one_.
+ *
+ * Returns the number of code units written.
+ */
+inline size_t
+ConvertUTF16toUTF8Func(mozilla::Span<const char16_t> aSource, mozilla::Span<uint8_t> aDest)
+{
+  return encoding_mem_convert_utf16_to_utf8(aSource.Elements(), aSource.Length(), aDest.Elements(), aDest.Length());
+}
+
+/**
+ * Malformed byte sequences are replaced with the REPLACEMENT CHARACTER.
+ *
+ * The length of aDest must be not be less than the length of aSource.
+ *
+ * Returns the number of code units written.
+ */
+inline size_t
+ConvertUTF8toUTF16Func(mozilla::Span<const uint8_t> aSource, mozilla::Span<char16_t> aDest)
+{
+  return encoding_mem_convert_utf8_to_utf16(aSource.Elements(), aSource.Length(), aDest.Elements(), aDest.Length());
+}
+
 inline size_t
 Distance(const nsReadingIterator<char16_t>& aStart,
          const nsReadingIterator<char16_t>& aEnd)
 {
   MOZ_ASSERT(aStart.get() <= aEnd.get());
   return static_cast<size_t>(aEnd.get() - aStart.get());
 }
 inline size_t
@@ -96,16 +150,17 @@ inline MOZ_MUST_USE bool CopyUTF8toUTF16
 inline void CopyUTF8toUTF16(const char* aSource,
                             nsAString& aDest)
 {
   if (aSource) {
     CopyUTF8toUTF16(mozilla::AsBytes(mozilla::MakeStringSpan(aSource)), aDest);
   }
 }
 
+
 inline MOZ_MUST_USE bool AppendUTF8toUTF16(const char* aSource,
                                            nsAString& aDest,
                                            const mozilla::fallible_t&)
 {
   if (aSource) {
     return AppendUTF8toUTF16(mozilla::AsBytes(mozilla::MakeStringSpan(aSource)), aDest, mozilla::fallible);
   }
   return true;
@@ -436,19 +491,20 @@ char16_t* UTF8ToUnicodeBuffer(const nsAC
                               uint32_t* aUTF16Count = nullptr);
 
 /**
  * Returns a new |char16_t| buffer containing a zero-terminated copy
  * of |aSource|.
  *
  * Allocates and returns a new |char| buffer which you must free with
  * |free|.  Performs an encoding conversion from UTF-8 to UTF-16
- * while copying |aSource| to your new buffer.  This conversion is well defined
- * for a valid UTF-8 string.  The new buffer is zero-terminated, but that
- * may not help you if |aSource| contains embedded nulls.
+ * while copying |aSource| to your new buffer.  Malformed byte sequences
+ * are replaced with the REPLACEMENT CHARACTER.  The new buffer is
+ * zero-terminated, but that may not help you if |aSource| contains
+ * embedded nulls.
  *
  * @param aSource an 8-bit wide string, UTF-8 encoded
  * @param aUTF16Count the number of 16-bit units that was returned
  * @return a new |char16_t| buffer you must free with |free|.
  *         (UTF-16 encoded)
  */
 char16_t* UTF8ToNewUnicode(const nsACString& aSource,
                            uint32_t* aUTF16Count = nullptr);
deleted file mode 100644
--- a/xpcom/string/nsReadableUtilsImpl.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
-/* vim: set ts=8 sts=2 et sw=2 tw=80: */
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-#include <stdint.h>
-
-namespace mozilla {
-
-inline bool IsASCII(char16_t aChar) {
-  return (aChar & 0xFF80) == 0;
-}
-
-/**
- * Provides a pointer before or equal to |aPtr| that is is suitably aligned.
- */
-inline const char16_t* aligned(const char16_t* aPtr, const uintptr_t aMask)
-{
-  return reinterpret_cast<const char16_t*>(
-      reinterpret_cast<uintptr_t>(aPtr) & ~aMask);
-}
-
-/**
- * Structures for word-sized vectorization of ASCII checking for UTF-16
- * strings.
- */
-template<size_t size> struct NonASCIIParameters;
-template<> struct NonASCIIParameters<4> {
-  static inline size_t mask() { return 0xff80ff80; }
-  static inline uintptr_t alignMask() { return 0x3; }
-  static inline size_t numUnicharsPerWord() { return 2; }
-};
-
-template<> struct NonASCIIParameters<8> {
-  static inline size_t mask() {
-    static const uint64_t maskAsUint64 = UINT64_C(0xff80ff80ff80ff80);
-    // We have to explicitly cast this 64-bit value to a size_t, or else
-    // compilers for 32-bit platforms will warn about it being too large to fit
-    // in the size_t return type. (Fortunately, this code isn't actually
-    // invoked on 32-bit platforms -- they'll use the <4> specialization above.
-    // So it is, in fact, OK that this value is too large for a 32-bit size_t.)
-    return (size_t)maskAsUint64;
-  }
-  static inline uintptr_t alignMask() { return 0x7; }
-  static inline size_t numUnicharsPerWord() { return 4; }
-};
-
-namespace SSE2 {
-
-int32_t FirstNonASCII(const char16_t* aBegin, const char16_t* aEnd);
-
-} // namespace SSE2
-} // namespace mozilla
deleted file mode 100644
--- a/xpcom/string/nsReadableUtilsSSE2.cpp
+++ /dev/null
@@ -1,70 +0,0 @@
-/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
-/* vim: set ts=8 sts=2 et sw=2 tw=80: */
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-#include <emmintrin.h>
-
-#include "nsReadableUtilsImpl.h"
-
-namespace mozilla {
-namespace SSE2 {
-
-static inline bool
-is_zero (__m128i x)
-{
-  return
-    _mm_movemask_epi8(_mm_cmpeq_epi8(x, _mm_setzero_si128())) == 0xffff;
-}
-
-int32_t
-FirstNonASCII(const char16_t* aBegin, const char16_t* aEnd)
-{
-  const size_t kNumUnicharsPerVector = sizeof(__m128i) / sizeof(char16_t);
-  typedef NonASCIIParameters<sizeof(size_t)> p;
-  const size_t kMask = p::mask();
-  const uintptr_t kXmmAlignMask = 0xf;
-  const uint16_t kShortMask = 0xff80;
-  const size_t kNumUnicharsPerWord = p::numUnicharsPerWord();
-
-  const char16_t* idx = aBegin;
-
-  // Align ourselves to a 16-byte boundary as required by _mm_load_si128
-  for (; idx != aEnd && ((uintptr_t(idx) & kXmmAlignMask) != 0); idx++) {
-    if (!IsASCII(*idx)) {
-      return idx - aBegin;
-    }
-  }
-
-  // Check one XMM register (16 bytes) at a time.
-  const char16_t* vectWalkEnd = aligned(aEnd, kXmmAlignMask);
-  __m128i vectmask = _mm_set1_epi16(static_cast<int16_t>(kShortMask));
-  for (; idx != vectWalkEnd; idx += kNumUnicharsPerVector) {
-    const __m128i vect = *reinterpret_cast<const __m128i*>(idx);
-    if (!is_zero(_mm_and_si128(vect, vectmask))) {
-      return idx - aBegin;
-    }
-  }
-
-  // Check one word at a time.
-  const char16_t* wordWalkEnd = aligned(aEnd, p::alignMask());
-  for(; idx != wordWalkEnd; idx += kNumUnicharsPerWord) {
-    const size_t word = *reinterpret_cast<const size_t*>(idx);
-    if (word & kMask) {
-      return idx - aBegin;
-    }
-  }
-
-  // Take care of the remainder one character at a time.
-  for (; idx != aEnd; idx++) {
-    if (!IsASCII(*idx)) {
-      return idx - aBegin;
-    }
-  }
-
-  return -1;
-}
-
-} // namespace SSE2
-} // namespace mozilla