Bug 586838 - Add NEON versions of LossyConvertEncoding. r?
MozReview-Commit-ID: EKC3eUyi2Ca
--- a/xpcom/string/moz.build
+++ b/xpcom/string/moz.build
@@ -54,16 +54,20 @@ UNIFIED_SOURCES += [
# Are we targeting x86 or x86-64? If so, compile the SSE2 functions for
# nsUTF8Utils.cpp and nsReadableUtils.cpp.
if CONFIG['INTEL_ARCHITECTURE']:
SOURCES += ['nsUTF8UtilsSSE2.cpp']
SOURCES['nsUTF8UtilsSSE2.cpp'].flags += CONFIG['SSE2_FLAGS']
SOURCES += ['nsReadableUtilsSSE2.cpp']
SOURCES['nsReadableUtilsSSE2.cpp'].flags += CONFIG['SSE2_FLAGS']
+if CONFIG['BUILD_ARM_NEON'] or CONFIG['CPU_ARCH'] == 'aarch64':
+ SOURCES += ['nsUTF8UtilsNEON.cpp']
+ SOURCES['nsUTF8UtilsNEON.cpp'].flags += CONFIG['NEON_FLAGS']
+
# MSVC 2017 has a bug that incorrectly generates C5037 warning which
# hits the template string code. We need to disable this warning as a
# workaround. See https://developercommunity.visualstudio.com/
# content/problem/81223/incorrect-error-c5037-with-permissive.html
if CONFIG['_MSC_VER']:
CXXFLAGS += ['-wd5037']
FINAL_LIBRARY = 'xul'
--- a/xpcom/string/nsUTF8Utils.h
+++ b/xpcom/string/nsUTF8Utils.h
@@ -6,17 +6,19 @@
#ifndef nsUTF8Utils_h_
#define nsUTF8Utils_h_
// This file may be used in two ways: if MOZILLA_INTERNAL_API is defined, this
// file will provide signatures for the Mozilla abstract string types. It will
// use XPCOM assertion/debugging macros, etc.
#include "nscore.h"
+#include "mozilla/arm.h"
#include "mozilla/Assertions.h"
+#include "mozilla/EndianUtils.h"
#include "mozilla/SSE.h"
#include "mozilla/TypeTraits.h"
#include "nsCharTraits.h"
#ifdef MOZILLA_INTERNAL_API
#define UTF8UTILS_WARNING(msg) NS_WARNING(msg)
#else
@@ -658,24 +660,34 @@ public:
write(const char* aSource, uint32_t aSourceLength)
{
#ifdef MOZILLA_MAY_SUPPORT_SSE2
if (mozilla::supports_sse2()) {
write_sse2(aSource, aSourceLength);
return;
}
#endif
+#if defined(MOZILLA_MAY_SUPPORT_NEON) && defined(MOZ_LITTLE_ENDIAN)
+ if (mozilla::supports_neon()) {
+ write_neon(aSource, aSourceLength);
+ return;
+ }
+#endif
const char* done_writing = aSource + aSourceLength;
while (aSource < done_writing) {
*mDestination++ = (char16_t)(unsigned char)(*aSource++);
}
}
void
write_sse2(const char* aSource, uint32_t aSourceLength);
+#if defined(MOZILLA_MAY_SUPPORT_NEON) && defined(MOZ_LITTLE_ENDIAN)
+ void
+ write_neon(const char* aSource, uint32_t aSourceLength);
+#endif
void
write_terminator()
{
*mDestination = (char16_t)(0);
}
private:
@@ -702,26 +714,36 @@ public:
write(const char16_t* aSource, uint32_t aSourceLength)
{
#ifdef MOZILLA_MAY_SUPPORT_SSE2
if (mozilla::supports_sse2()) {
write_sse2(aSource, aSourceLength);
return;
}
#endif
+#if defined(MOZILLA_MAY_SUPPORT_NEON) && defined(MOZ_LITTLE_ENDIAN)
+ if (mozilla::supports_neon()) {
+ write_neon(aSource, aSourceLength);
+ return;
+ }
+#endif
const char16_t* done_writing = aSource + aSourceLength;
while (aSource < done_writing) {
*mDestination++ = (char)(*aSource++);
}
}
#ifdef MOZILLA_MAY_SUPPORT_SSE2
void
write_sse2(const char16_t* aSource, uint32_t aSourceLength);
#endif
+#if defined(MOZILLA_MAY_SUPPORT_NEON) && defined(MOZ_LITTLE_ENDIAN)
+ void
+ write_neon(const char16_t* aSource, uint32_t aSourceLength);
+#endif
void
write_terminator()
{
*mDestination = '\0';
}
private:
new file mode 100644
--- /dev/null
+++ b/xpcom/string/nsUTF8UtilsNEON.cpp
@@ -0,0 +1,102 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nscore.h"
+#include "nsAlgorithm.h"
+#include "nsUTF8Utils.h"
+
+#include <arm_neon.h>
+
+void
+LossyConvertEncoding16to8::write_neon(const char16_t* aSource,
+ uint32_t aSourceLength)
+{
+ char* dest = mDestination;
+
+ // Align source and destination to a 16-byte boundary.
+ uint32_t i = 0;
+ while (((NS_PTR_TO_INT32(aSource + i) & 0xf) ||
+ (NS_PTR_TO_INT32(dest + i) & 0x7)) &&
+ i < aSourceLength) {
+ dest[i] = static_cast<unsigned char>(aSource[i]);
+ i++;
+ }
+
+ if ((NS_PTR_TO_INT32(dest + i) & 0xf) && aSourceLength - i > 7) {
+ // source is aligned, but destination isn't aligned by 16-byte
+ uint16x8_t s = vld1q_u16(reinterpret_cast<const uint16_t*>(aSource + i));
+ vst1_u8(reinterpret_cast<uint8_t*>(dest + i), vmovn_u16(s));
+ i += 8;
+ }
+
+ // Walk 32 bytes at a time.
+ while (aSourceLength - i > 15) {
+ uint16x8_t low = vld1q_u16(reinterpret_cast<const uint16_t*>(aSource + i));
+ uint16x8_t high =
+ vld1q_u16(reinterpret_cast<const uint16_t*>(aSource + i + 8));
+ uint8x16_t d = vcombine_u8(vmovn_u16(low), vmovn_u16(high));
+ vst1q_u8(reinterpret_cast<uint8_t*>(dest + i), d);
+ i += 16;
+ }
+
+ if (aSourceLength - i > 7) {
+ uint16x8_t s = vld1q_u16(reinterpret_cast<const uint16_t*>(aSource + i));
+ vst1_u8(reinterpret_cast<uint8_t*>(dest + i), vmovn_u16(s));
+ i += 8;
+ }
+
+ // Finish up the rest.
+ for (; i < aSourceLength; ++i) {
+ dest[i] = static_cast<unsigned char>(aSource[i]);
+ }
+
+ mDestination += i;
+}
+
+void
+LossyConvertEncoding8to16::write_neon(const char* aSource,
+ uint32_t aSourceLength)
+{
+ char16_t* dest = mDestination;
+
+ // Align source and destination to a 16-byte boundary.
+ uint32_t i = 0;
+ while (((NS_PTR_TO_INT32(aSource + i) & 0x7) ||
+ (NS_PTR_TO_INT32(dest + i) & 0xf)) && i < aSourceLength) {
+ dest[i] = static_cast<unsigned char>(aSource[i]);
+ i++;
+ }
+
+ if ((NS_PTR_TO_INT32(aSource + i) & 0xf) && aSourceLength - i > 7) {
+ // destination is aligned, but source isn't aligned by 16-byte
+ uint8x8_t s = vld1_u8(reinterpret_cast<const uint8_t*>(aSource + i));
+ vst1q_u16(reinterpret_cast<uint16_t*>(dest + i), vmovl_u8(s));
+ i += 8;
+ }
+
+ // Walk 16 bytes at a time.
+ while (aSourceLength - i > 15) {
+ uint8x16_t s = vld1q_u8(reinterpret_cast<const uint8_t*>(aSource + i));
+ uint16x8_t low = vmovl_u8(vget_low_u8(s));
+ uint16x8_t high = vmovl_u8(vget_high_u8(s));
+ vst1q_u16(reinterpret_cast<uint16_t*>(dest + i), low);
+ vst1q_u16(reinterpret_cast<uint16_t*>(dest + i + 8), high);
+ i += 16;
+ }
+
+ if (aSourceLength - i > 7) {
+ uint8x8_t s = vld1_u8(reinterpret_cast<const uint8_t*>(aSource + i));
+ vst1q_u16(reinterpret_cast<uint16_t*>(dest + i), vmovl_u8(s));
+ i += 8;
+ }
+
+ // Finish up whatever's left.
+ for (; i < aSourceLength; ++i) {
+ dest[i] = static_cast<unsigned char>(aSource[i]);
+ }
+
+ mDestination += i;
+}