Bug 1263910 - Make AudioBufferAddWithScale handle unaligned buffers; r=padenot draft
authorDan Minor <dminor@mozilla.com>
Tue, 10 May 2016 06:37:45 -0400
changeset 366365 0c9970807262c8a13be5ad866e470d78ff6c1bb9
parent 364815 043082cb7bd8490c60815f67fbd1f33323ad7663
child 520758 ea8734bc002de5bd8423932d721a7917356f0616
push id17964
push userdminor@mozilla.com
push dateThu, 12 May 2016 15:41:41 +0000
reviewerspadenot
bugs1263910
milestone49.0a1
Bug 1263910 - Make AudioBufferAddWithScale handle unaligned buffers; r=padenot ReverbAccumulationBuffer often produces unaligned buffers due to the way it wraps around results. This modifies AudioBufferAddWithScale on SSE2 platforms to handle unaligned buffers by performing scalar operations until both the input and output buffers are aligned to 16 bytes. It then does as many vector operations as possible and switches back to scalar operations for anything that is left over. This could also be done within the ReverbAccumulationBuffer code but doing it directly within the AudioNodeEngine code makes it available to other callers in the future, at the cost of a few extra branches in the case where everything was aligned anyway. MozReview-Commit-ID: Ky0uIe5LMVq
dom/media/webaudio/AudioNodeEngine.cpp
dom/media/webaudio/blink/ReverbAccumulationBuffer.cpp
--- a/dom/media/webaudio/AudioNodeEngine.cpp
+++ b/dom/media/webaudio/AudioNodeEngine.cpp
@@ -74,18 +74,42 @@ void AudioBufferAddWithScale(const float
   if (mozilla::supports_neon()) {
     AudioBufferAddWithScale_NEON(aInput, aScale, aOutput, aSize);
     return;
   }
 #endif
 
 #ifdef USE_SSE2
   if (mozilla::supports_sse2()) {
-    AudioBufferAddWithScale_SSE(aInput, aScale, aOutput, aSize);
-    return;
+    if (aScale == 1.0f) {
+      while (aSize && (!IS_ALIGNED16(aInput) || !IS_ALIGNED16(aOutput))) {
+        *aOutput += *aInput;
+        ++aOutput;
+        ++aInput;
+        --aSize;
+      }
+    } else {
+      while (aSize && (!IS_ALIGNED16(aInput) || !IS_ALIGNED16(aOutput))) {
+        *aOutput += *aInput*aScale;
+        ++aOutput;
+        ++aInput;
+        --aSize;
+      }
+    }
+
+    // we need to round aSize down to the nearest multiple of 16
+    uint32_t alignedSize = aSize & ~0x0F;
+    if (alignedSize > 0) {
+      AudioBufferAddWithScale_SSE(aInput, aScale, aOutput, alignedSize);
+
+      // adjust parameters for use with scalar operations below
+      aInput += alignedSize;
+      aOutput += alignedSize;
+      aSize -= alignedSize;
+    }
   }
 #endif
 
   if (aScale == 1.0f) {
     for (uint32_t i = 0; i < aSize; ++i) {
       aOutput[i] += aInput[i];
     }
   } else {
--- a/dom/media/webaudio/blink/ReverbAccumulationBuffer.cpp
+++ b/dom/media/webaudio/blink/ReverbAccumulationBuffer.cpp
@@ -91,40 +91,20 @@ int ReverbAccumulationBuffer::accumulate
 
     float* destination = m_buffer.Elements();
 
     bool isSafe = writeIndex <= bufferLength && numberOfFrames1 + writeIndex <= bufferLength && numberOfFrames2 <= bufferLength;
     MOZ_ASSERT(isSafe);
     if (!isSafe)
         return 0;
 
-#ifdef USE_SSE2
-    // It is unlikely either the source is aligned or the number of values
-    // is a multiple of 16, so we just add them here rather than calling
-    // AudioBufferAddWithScale.
-    //
-    // TODO: Ideally we would use scalar calls when necessary and switch
-    //       to vector calls when we have aligned sources and destinations.
-    //       See Bug 1263910.
-    for (uint32_t i = 0; i < numberOfFrames1; ++i) {
-      destination[writeIndex + i] += source[i];
-    }
-
-    // Handle wrap-around if necessary.
-    if (numberOfFrames2 > 0) {
-        for (uint32_t i = 0; i < numberOfFrames2; ++i) {
-          destination[i] += source[numberOfFrames1 + i];
-        }
-    }
-#else
     AudioBufferAddWithScale(source, 1.0f, destination + writeIndex, numberOfFrames1);
     if (numberOfFrames2 > 0) {
         AudioBufferAddWithScale(source + numberOfFrames1, 1.0f, destination, numberOfFrames2);
     }
-#endif
 
     return writeIndex;
 }
 
 void ReverbAccumulationBuffer::reset()
 {
     PodZero(m_buffer.Elements(), m_buffer.Length());
     m_readIndex = 0;