Bug 877662 - Use SSE2 versions of AudioNodeEngine functions r?padenot draft
authorDan Minor <dminor@mozilla.com>
Fri, 18 Mar 2016 16:24:02 -0400
changeset 350445 b0706b7a8e7e88e82a63df1c563a1af886e5802e
parent 350444 c440eeba3492bbc01d3762f3139c128e9701f81b
child 351945 7e2e4736f83f3c516a2d1d0dc8de90ee2763e2ac
push id15349
push userdminor@mozilla.com
push dateWed, 13 Apr 2016 19:34:21 +0000
reviewerspadenot
bugs877662
milestone48.0a1
Bug 877662 - Use SSE2 versions of AudioNodeEngine functions r?padenot MozReview-Commit-ID: AJ2f5YBobPv
dom/media/webaudio/AudioNodeEngine.cpp
dom/media/webaudio/blink/ReverbAccumulationBuffer.cpp
--- a/dom/media/webaudio/AudioNodeEngine.cpp
+++ b/dom/media/webaudio/AudioNodeEngine.cpp
@@ -4,16 +4,19 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "AudioNodeEngine.h"
 #ifdef BUILD_ARM_NEON
 #include "mozilla/arm.h"
 #include "AudioNodeEngineNEON.h"
 #endif
+#ifdef USE_SSE2
+#include "AudioNodeEngineSSE2.h"
+#endif
 
 namespace mozilla {
 
 already_AddRefed<ThreadSharedFloatArrayBufferList>
 ThreadSharedFloatArrayBufferList::Create(uint32_t aChannelCount,
                                          size_t aLength,
                                          const mozilla::fallible_t&)
 {
@@ -66,16 +69,24 @@ void AudioBufferAddWithScale(const float
                              uint32_t aSize)
 {
 #ifdef BUILD_ARM_NEON
   if (mozilla::supports_neon()) {
     AudioBufferAddWithScale_NEON(aInput, aScale, aOutput, aSize);
     return;
   }
 #endif
+
+#ifdef USE_SSE2
+  if (mozilla::supports_sse2()) {
+    AudioBufferAddWithScale_SSE(aInput, aScale, aOutput, aSize);
+    return;
+  }
+#endif
+
   if (aScale == 1.0f) {
     for (uint32_t i = 0; i < aSize; ++i) {
       aOutput[i] += aInput[i];
     }
   } else {
     for (uint32_t i = 0; i < aSize; ++i) {
       aOutput[i] += aInput[i]*aScale;
     }
@@ -99,16 +110,24 @@ AudioBlockCopyChannelWithScale(const flo
     memcpy(aOutput, aInput, WEBAUDIO_BLOCK_SIZE*sizeof(float));
   } else {
 #ifdef BUILD_ARM_NEON
     if (mozilla::supports_neon()) {
       AudioBlockCopyChannelWithScale_NEON(aInput, aScale, aOutput);
       return;
     }
 #endif
+
+#ifdef USE_SSE2
+  if (mozilla::supports_sse2()) {
+    AudioBlockCopyChannelWithScale_SSE(aInput, aScale, aOutput);
+    return;
+  }
+#endif
+
     for (uint32_t i = 0; i < WEBAUDIO_BLOCK_SIZE; ++i) {
       aOutput[i] = aInput[i]*aScale;
     }
   }
 }
 
 void
 BufferComplexMultiply(const float* aInput,
@@ -147,16 +166,24 @@ AudioBlockCopyChannelWithScale(const flo
                                float aOutput[WEBAUDIO_BLOCK_SIZE])
 {
 #ifdef BUILD_ARM_NEON
   if (mozilla::supports_neon()) {
     AudioBlockCopyChannelWithScale_NEON(aInput, aScale, aOutput);
     return;
   }
 #endif
+
+#ifdef USE_SSE2
+  if (mozilla::supports_sse2()) {
+    AudioBlockCopyChannelWithScale_SSE(aInput, aScale, aOutput);
+    return;
+  }
+#endif
+
   for (uint32_t i = 0; i < WEBAUDIO_BLOCK_SIZE; ++i) {
     aOutput[i] = aInput[i]*aScale[i];
   }
 }
 
 void
 AudioBlockInPlaceScale(float aBlock[WEBAUDIO_BLOCK_SIZE],
                        float aScale)
@@ -173,16 +200,24 @@ AudioBufferInPlaceScale(float* aBlock,
     return;
   }
 #ifdef BUILD_ARM_NEON
   if (mozilla::supports_neon()) {
     AudioBufferInPlaceScale_NEON(aBlock, aScale, aSize);
     return;
   }
 #endif
+
+#ifdef USE_SSE2
+  if (mozilla::supports_sse2()) {
+    AudioBufferInPlaceScale_SSE(aBlock, aScale, aSize);
+    return;
+  }
+#endif
+
   for (uint32_t i = 0; i < aSize; ++i) {
     *aBlock++ *= aScale;
   }
 }
 
 void
 AudioBlockPanMonoToStereo(const float aInput[WEBAUDIO_BLOCK_SIZE],
                           float aGainL[WEBAUDIO_BLOCK_SIZE],
@@ -215,16 +250,25 @@ AudioBlockPanStereoToStereo(const float 
   if (mozilla::supports_neon()) {
     AudioBlockPanStereoToStereo_NEON(aInputL, aInputR,
                                      aGainL, aGainR, aIsOnTheLeft,
                                      aOutputL, aOutputR);
     return;
   }
 #endif
 
+#ifdef USE_SSE2
+  if (mozilla::supports_sse2()) {
+    AudioBlockPanStereoToStereo_SSE(aInputL, aInputR,
+                                    aGainL, aGainR, aIsOnTheLeft,
+                                    aOutputL, aOutputR);
+    return;
+  }
+#endif
+
   uint32_t i;
 
   if (aIsOnTheLeft) {
     for (i = 0; i < WEBAUDIO_BLOCK_SIZE; ++i) {
       aOutputL[i] = aInputL[i] + aInputR[i] * aGainL;
       aOutputR[i] = aInputR[i] * aGainR;
     }
   } else {
--- a/dom/media/webaudio/blink/ReverbAccumulationBuffer.cpp
+++ b/dom/media/webaudio/blink/ReverbAccumulationBuffer.cpp
@@ -91,22 +91,40 @@ int ReverbAccumulationBuffer::accumulate
 
     float* destination = m_buffer.Elements();
 
     bool isSafe = writeIndex <= bufferLength && numberOfFrames1 + writeIndex <= bufferLength && numberOfFrames2 <= bufferLength;
     MOZ_ASSERT(isSafe);
     if (!isSafe)
         return 0;
 
-    AudioBufferAddWithScale(source, 1.0f, destination + writeIndex, numberOfFrames1);
+#ifdef USE_SSE2
+    // It is unlikely either the source is aligned or the number of values
+    // is a multiple of 16, so we just add them here rather than calling
+    // AudioBufferAddWithScale.
+    //
+    // TODO: Ideally we would use scalar calls when necessary and switch
+    //       to vector calls when we have aligned sources and destinations.
+    //       See Bug 1263910.
+    for (uint32_t i = 0; i < numberOfFrames1; ++i) {
+      destination[writeIndex + i] += source[i];
+    }
 
-    // Handle wrap-around if necessary
+    // Handle wrap-around if necessary.
+    if (numberOfFrames2 > 0) {
+        for (uint32_t i = 0; i < numberOfFrames2; ++i) {
+          destination[i] += source[numberOfFrames1 + i];
+        }
+    }
+#else
+    AudioBufferAddWithScale(source, 1.0f, destination + writeIndex, numberOfFrames1);
     if (numberOfFrames2 > 0) {
         AudioBufferAddWithScale(source + numberOfFrames1, 1.0f, destination, numberOfFrames2);
     }
+#endif
 
     return writeIndex;
 }
 
 void ReverbAccumulationBuffer::reset()
 {
     PodZero(m_buffer.Elements(), m_buffer.Length());
     m_readIndex = 0;