Bug 586838 - Add NEON versions of LossyConvertEncoding. r? draft
authorMakoto Kato <m_kato@ga2.so-net.ne.jp>
Fri, 24 Nov 2017 21:11:19 +0900
changeset 706066 68a6c9b563daf216cdfb377d9d9a785a17cca400
parent 706025 a21f4e2ce5186e2dc9ee411b07e9348866b4ef30
child 742553 b05d178dd7ec954af49fceab997341b38625936b
push id91683
push userbmo:m_kato@ga2.so-net.ne.jp
push dateFri, 01 Dec 2017 05:56:40 +0000
bugs586838
milestone59.0a1
Bug 586838 - Add NEON versions of LossyConvertEncoding. r? MozReview-Commit-ID: EKC3eUyi2Ca
xpcom/string/moz.build
xpcom/string/nsUTF8Utils.h
xpcom/string/nsUTF8UtilsNEON.cpp
--- a/xpcom/string/moz.build
+++ b/xpcom/string/moz.build
@@ -54,16 +54,20 @@ UNIFIED_SOURCES += [
 # Are we targeting x86 or x86-64?  If so, compile the SSE2 functions for
 # nsUTF8Utils.cpp and nsReadableUtils.cpp.
 if CONFIG['INTEL_ARCHITECTURE']:
     SOURCES += ['nsUTF8UtilsSSE2.cpp']
     SOURCES['nsUTF8UtilsSSE2.cpp'].flags += CONFIG['SSE2_FLAGS']
     SOURCES += ['nsReadableUtilsSSE2.cpp']
     SOURCES['nsReadableUtilsSSE2.cpp'].flags += CONFIG['SSE2_FLAGS']
 
+if CONFIG['BUILD_ARM_NEON'] or CONFIG['CPU_ARCH'] == 'aarch64':
+    SOURCES += ['nsUTF8UtilsNEON.cpp']
+    SOURCES['nsUTF8UtilsNEON.cpp'].flags += CONFIG['NEON_FLAGS']
+
 # MSVC 2017 has a bug that incorrectly generates C5037 warning which
 # hits the template string code. We need to disable this warning as a
 # workaround. See https://developercommunity.visualstudio.com/
 # content/problem/81223/incorrect-error-c5037-with-permissive.html
 if CONFIG['_MSC_VER']:
     CXXFLAGS += ['-wd5037']
 
 FINAL_LIBRARY = 'xul'
--- a/xpcom/string/nsUTF8Utils.h
+++ b/xpcom/string/nsUTF8Utils.h
@@ -6,17 +6,19 @@
 #ifndef nsUTF8Utils_h_
 #define nsUTF8Utils_h_
 
 // This file may be used in two ways: if MOZILLA_INTERNAL_API is defined, this
 // file will provide signatures for the Mozilla abstract string types. It will
 // use XPCOM assertion/debugging macros, etc.
 
 #include "nscore.h"
+#include "mozilla/arm.h"
 #include "mozilla/Assertions.h"
+#include "mozilla/EndianUtils.h"
 #include "mozilla/SSE.h"
 #include "mozilla/TypeTraits.h"
 
 #include "nsCharTraits.h"
 
 #ifdef MOZILLA_INTERNAL_API
 #define UTF8UTILS_WARNING(msg) NS_WARNING(msg)
 #else
@@ -658,24 +660,34 @@ public:
   write(const char* aSource, uint32_t aSourceLength)
   {
 #ifdef MOZILLA_MAY_SUPPORT_SSE2
     if (mozilla::supports_sse2()) {
       write_sse2(aSource, aSourceLength);
       return;
     }
 #endif
+#if defined(MOZILLA_MAY_SUPPORT_NEON) && defined(MOZ_LITTLE_ENDIAN)
+    if (mozilla::supports_neon()) {
+      write_neon(aSource, aSourceLength);
+      return;
+    }
+#endif
     const char* done_writing = aSource + aSourceLength;
     while (aSource < done_writing) {
       *mDestination++ = (char16_t)(unsigned char)(*aSource++);
     }
   }
 
   void
   write_sse2(const char* aSource, uint32_t aSourceLength);
+#if defined(MOZILLA_MAY_SUPPORT_NEON) && defined(MOZ_LITTLE_ENDIAN)
+  void
+  write_neon(const char* aSource, uint32_t aSourceLength);
+#endif
 
   void
   write_terminator()
   {
     *mDestination = (char16_t)(0);
   }
 
 private:
@@ -702,26 +714,36 @@ public:
   write(const char16_t* aSource, uint32_t aSourceLength)
   {
 #ifdef MOZILLA_MAY_SUPPORT_SSE2
     if (mozilla::supports_sse2()) {
       write_sse2(aSource, aSourceLength);
       return;
     }
 #endif
+#if defined(MOZILLA_MAY_SUPPORT_NEON) && defined(MOZ_LITTLE_ENDIAN)
+    if (mozilla::supports_neon()) {
+      write_neon(aSource, aSourceLength);
+      return;
+    }
+#endif
     const char16_t* done_writing = aSource + aSourceLength;
     while (aSource < done_writing) {
       *mDestination++ = (char)(*aSource++);
     }
   }
 
 #ifdef MOZILLA_MAY_SUPPORT_SSE2
   void
   write_sse2(const char16_t* aSource, uint32_t aSourceLength);
 #endif
+#if defined(MOZILLA_MAY_SUPPORT_NEON) && defined(MOZ_LITTLE_ENDIAN)
+  void
+  write_neon(const char16_t* aSource, uint32_t aSourceLength);
+#endif
 
   void
   write_terminator()
   {
     *mDestination = '\0';
   }
 
 private:
new file mode 100644
--- /dev/null
+++ b/xpcom/string/nsUTF8UtilsNEON.cpp
@@ -0,0 +1,102 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nscore.h"
+#include "nsAlgorithm.h"
+#include "nsUTF8Utils.h"
+
+#include <arm_neon.h>
+
+void
+LossyConvertEncoding16to8::write_neon(const char16_t* aSource,
+                                      uint32_t aSourceLength)
+{
+  char* dest = mDestination;
+
+  // Align source and destination to a 16-byte boundary.
+  uint32_t i = 0;
+  while (((NS_PTR_TO_INT32(aSource + i) & 0xf) ||
+          (NS_PTR_TO_INT32(dest + i) & 0x7)) &&
+         i < aSourceLength) {
+    dest[i] = static_cast<unsigned char>(aSource[i]);
+    i++;
+  }
+
+  if ((NS_PTR_TO_INT32(dest + i) & 0xf) && aSourceLength - i > 7) {
+    // source is aligned, but destination isn't aligned by 16-byte
+    uint16x8_t s = vld1q_u16(reinterpret_cast<const uint16_t*>(aSource + i));
+    vst1_u8(reinterpret_cast<uint8_t*>(dest + i), vmovn_u16(s));
+    i += 8;
+  }
+
+  // Walk 32 bytes at a time.
+  while (aSourceLength - i > 15) {
+    uint16x8_t low = vld1q_u16(reinterpret_cast<const uint16_t*>(aSource + i));
+    uint16x8_t high =
+      vld1q_u16(reinterpret_cast<const uint16_t*>(aSource + i + 8));
+    uint8x16_t d = vcombine_u8(vmovn_u16(low), vmovn_u16(high));
+    vst1q_u8(reinterpret_cast<uint8_t*>(dest + i), d);
+    i += 16;
+  }
+
+  if (aSourceLength - i > 7) {
+    uint16x8_t s = vld1q_u16(reinterpret_cast<const uint16_t*>(aSource + i));
+    vst1_u8(reinterpret_cast<uint8_t*>(dest + i), vmovn_u16(s));
+    i += 8;
+  }
+
+  // Finish up the rest.
+  for (; i < aSourceLength; ++i) {
+    dest[i] = static_cast<unsigned char>(aSource[i]);
+  }
+
+  mDestination += i;
+}
+
+void
+LossyConvertEncoding8to16::write_neon(const char* aSource,
+                                      uint32_t aSourceLength)
+{
+  char16_t* dest = mDestination;
+
+  // Align source and destination to a 16-byte boundary.
+  uint32_t i = 0;
+  while (((NS_PTR_TO_INT32(aSource + i) & 0x7) ||
+          (NS_PTR_TO_INT32(dest + i) & 0xf)) && i < aSourceLength) {
+    dest[i] = static_cast<unsigned char>(aSource[i]);
+    i++;
+  }
+
+  if ((NS_PTR_TO_INT32(aSource + i) & 0xf) && aSourceLength - i > 7) {
+    // destination is aligned, but source isn't aligned by 16-byte
+    uint8x8_t s = vld1_u8(reinterpret_cast<const uint8_t*>(aSource + i));
+    vst1q_u16(reinterpret_cast<uint16_t*>(dest + i), vmovl_u8(s));
+    i += 8;
+  }
+
+  // Walk 16 bytes at a time.
+  while (aSourceLength - i > 15) {
+    uint8x16_t s = vld1q_u8(reinterpret_cast<const uint8_t*>(aSource + i));
+    uint16x8_t low = vmovl_u8(vget_low_u8(s));
+    uint16x8_t high = vmovl_u8(vget_high_u8(s));
+    vst1q_u16(reinterpret_cast<uint16_t*>(dest + i), low);
+    vst1q_u16(reinterpret_cast<uint16_t*>(dest + i + 8), high);
+    i += 16;
+  }
+
+  if (aSourceLength - i > 7) {
+    uint8x8_t s = vld1_u8(reinterpret_cast<const uint8_t*>(aSource + i));
+    vst1q_u16(reinterpret_cast<uint16_t*>(dest + i), vmovl_u8(s));
+    i += 8;
+  }
+
+  // Finish up whatever's left.
+  for (; i < aSourceLength; ++i) {
+    dest[i] = static_cast<unsigned char>(aSource[i]);
+  }
+
+  mDestination += i;
+}