Bug 877662 - Use SSE2 versions of AudioNodeEngine functions r?padenot
MozReview-Commit-ID: AJ2f5YBobPv
--- a/dom/media/webaudio/AudioNodeEngine.cpp
+++ b/dom/media/webaudio/AudioNodeEngine.cpp
@@ -4,16 +4,19 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "AudioNodeEngine.h"
#ifdef BUILD_ARM_NEON
#include "mozilla/arm.h"
#include "AudioNodeEngineNEON.h"
#endif
+#ifdef USE_SSE2
+#include "AudioNodeEngineSSE2.h"
+#endif
namespace mozilla {
already_AddRefed<ThreadSharedFloatArrayBufferList>
ThreadSharedFloatArrayBufferList::Create(uint32_t aChannelCount,
size_t aLength,
const mozilla::fallible_t&)
{
@@ -66,16 +69,24 @@ void AudioBufferAddWithScale(const float
uint32_t aSize)
{
#ifdef BUILD_ARM_NEON
if (mozilla::supports_neon()) {
AudioBufferAddWithScale_NEON(aInput, aScale, aOutput, aSize);
return;
}
#endif
+
+#ifdef USE_SSE2
+ if (mozilla::supports_sse2()) {
+ AudioBufferAddWithScale_SSE(aInput, aScale, aOutput, aSize);
+ return;
+ }
+#endif
+
if (aScale == 1.0f) {
for (uint32_t i = 0; i < aSize; ++i) {
aOutput[i] += aInput[i];
}
} else {
for (uint32_t i = 0; i < aSize; ++i) {
aOutput[i] += aInput[i]*aScale;
}
@@ -99,16 +110,24 @@ AudioBlockCopyChannelWithScale(const flo
memcpy(aOutput, aInput, WEBAUDIO_BLOCK_SIZE*sizeof(float));
} else {
#ifdef BUILD_ARM_NEON
if (mozilla::supports_neon()) {
AudioBlockCopyChannelWithScale_NEON(aInput, aScale, aOutput);
return;
}
#endif
+
+#ifdef USE_SSE2
+ if (mozilla::supports_sse2()) {
+ AudioBlockCopyChannelWithScale_SSE(aInput, aScale, aOutput);
+ return;
+ }
+#endif
+
for (uint32_t i = 0; i < WEBAUDIO_BLOCK_SIZE; ++i) {
aOutput[i] = aInput[i]*aScale;
}
}
}
void
BufferComplexMultiply(const float* aInput,
@@ -147,16 +166,24 @@ AudioBlockCopyChannelWithScale(const flo
float aOutput[WEBAUDIO_BLOCK_SIZE])
{
#ifdef BUILD_ARM_NEON
if (mozilla::supports_neon()) {
AudioBlockCopyChannelWithScale_NEON(aInput, aScale, aOutput);
return;
}
#endif
+
+#ifdef USE_SSE2
+ if (mozilla::supports_sse2()) {
+ AudioBlockCopyChannelWithScale_SSE(aInput, aScale, aOutput);
+ return;
+ }
+#endif
+
for (uint32_t i = 0; i < WEBAUDIO_BLOCK_SIZE; ++i) {
aOutput[i] = aInput[i]*aScale[i];
}
}
void
AudioBlockInPlaceScale(float aBlock[WEBAUDIO_BLOCK_SIZE],
float aScale)
@@ -173,16 +200,24 @@ AudioBufferInPlaceScale(float* aBlock,
return;
}
#ifdef BUILD_ARM_NEON
if (mozilla::supports_neon()) {
AudioBufferInPlaceScale_NEON(aBlock, aScale, aSize);
return;
}
#endif
+
+#ifdef USE_SSE2
+ if (mozilla::supports_sse2()) {
+ AudioBufferInPlaceScale_SSE(aBlock, aScale, aSize);
+ return;
+ }
+#endif
+
for (uint32_t i = 0; i < aSize; ++i) {
*aBlock++ *= aScale;
}
}
void
AudioBlockPanMonoToStereo(const float aInput[WEBAUDIO_BLOCK_SIZE],
float aGainL[WEBAUDIO_BLOCK_SIZE],
@@ -215,16 +250,25 @@ AudioBlockPanStereoToStereo(const float
if (mozilla::supports_neon()) {
AudioBlockPanStereoToStereo_NEON(aInputL, aInputR,
aGainL, aGainR, aIsOnTheLeft,
aOutputL, aOutputR);
return;
}
#endif
+#ifdef USE_SSE2
+ if (mozilla::supports_sse2()) {
+ AudioBlockPanStereoToStereo_SSE(aInputL, aInputR,
+ aGainL, aGainR, aIsOnTheLeft,
+ aOutputL, aOutputR);
+ return;
+ }
+#endif
+
uint32_t i;
if (aIsOnTheLeft) {
for (i = 0; i < WEBAUDIO_BLOCK_SIZE; ++i) {
aOutputL[i] = aInputL[i] + aInputR[i] * aGainL;
aOutputR[i] = aInputR[i] * aGainR;
}
} else {
--- a/dom/media/webaudio/blink/ReverbAccumulationBuffer.cpp
+++ b/dom/media/webaudio/blink/ReverbAccumulationBuffer.cpp
@@ -91,22 +91,40 @@ int ReverbAccumulationBuffer::accumulate
float* destination = m_buffer.Elements();
bool isSafe = writeIndex <= bufferLength && numberOfFrames1 + writeIndex <= bufferLength && numberOfFrames2 <= bufferLength;
MOZ_ASSERT(isSafe);
if (!isSafe)
return 0;
- AudioBufferAddWithScale(source, 1.0f, destination + writeIndex, numberOfFrames1);
+#ifdef USE_SSE2
+ // It is unlikely either the source is aligned or the number of values
+ // is a multiple of 16, so we just add them here rather than calling
+ // AudioBufferAddWithScale.
+ //
+ // TODO: Ideally we would use scalar calls when necessary and switch
+ // to vector calls when we have aligned sources and destinations.
+ // See Bug 1263910.
+ for (uint32_t i = 0; i < numberOfFrames1; ++i) {
+ destination[writeIndex + i] += source[i];
+ }
- // Handle wrap-around if necessary
+ // Handle wrap-around if necessary.
+ if (numberOfFrames2 > 0) {
+ for (uint32_t i = 0; i < numberOfFrames2; ++i) {
+ destination[i] += source[numberOfFrames1 + i];
+ }
+ }
+#else
+ AudioBufferAddWithScale(source, 1.0f, destination + writeIndex, numberOfFrames1);
if (numberOfFrames2 > 0) {
AudioBufferAddWithScale(source + numberOfFrames1, 1.0f, destination, numberOfFrames2);
}
+#endif
return writeIndex;
}
void ReverbAccumulationBuffer::reset()
{
PodZero(m_buffer.Elements(), m_buffer.Length());
m_readIndex = 0;