Bug 881587 - Add SSE2 version of AudioNodeEngine.cpp routines added in bug 815643. r=tterribe draft
authorPaul Adenot <paul@paul.cx>
Wed, 12 Jun 2013 02:56:44 +0200
changeset 351945 7e2e4736f83f3c516a2d1d0dc8de90ee2763e2ac
parent 350445 b0706b7a8e7e88e82a63df1c563a1af886e5802e
child 351946 c706a971fc370521ad10e05ba63181818db7a99c
push id15558
push userdminor@mozilla.com
push dateFri, 15 Apr 2016 09:23:03 +0000
reviewerstterribe
bugs881587, 815643
milestone48.0a1
Bug 881587 - Add SSE2 version of AudioNodeEngine.cpp routines added in bug 815643. r=tterribe MozReview-Commit-ID: 4aTdyTQEuuj
dom/media/webaudio/AudioNodeEngineSSE2.cpp
dom/media/webaudio/AudioNodeEngineSSE2.h
--- a/dom/media/webaudio/AudioNodeEngineSSE2.cpp
+++ b/dom/media/webaudio/AudioNodeEngineSSE2.cpp
@@ -207,9 +207,101 @@ AudioBlockPanStereoToStereo_SSE(const fl
       vscaled1 = _mm_mul_ps(vinl1, vgainr);
       vout0 = _mm_add_ps(vscaled0, vinr0);
       vout1 = _mm_add_ps(vscaled1, vinr1);
       _mm_store_ps(&aOutputR[i], vout0);
       _mm_store_ps(&aOutputR[i+4], vout1);
     }
   }
 }
+
+void BufferComplexMultiply_SSE(const float* aInput,
+                               const float* aScale,
+                               float* aOutput,
+                               uint32_t aSize)
+{
+  unsigned i;
+  __m128 in0, in1, in2, in3,
+         outreal0, outreal1, outreal2, outreal3,
+         outimag0, outimag1, outimag2, outimag3;
+
+  for (i = 0; i < aSize * 2; i += 16) {
+    in0 = _mm_load_ps(&aInput[i]);
+    in1 = _mm_load_ps(&aInput[i + 4]);
+    in2 = _mm_load_ps(&aInput[i + 8]);
+    in3 = _mm_load_ps(&aInput[i + 12]);
+
+    outreal0 = _mm_shuffle_ps(in0, in1, _MM_SHUFFLE(2, 0, 2, 0));
+    outimag0 = _mm_shuffle_ps(in0, in1, _MM_SHUFFLE(3, 1, 3, 1));
+    outreal2 = _mm_shuffle_ps(in2, in3, _MM_SHUFFLE(2, 0, 2, 0));
+    outimag2 = _mm_shuffle_ps(in2, in3, _MM_SHUFFLE(3, 1, 3, 1));
+
+    in0 = _mm_load_ps(&aScale[i]);
+    in1 = _mm_load_ps(&aScale[i + 4]);
+    in2 = _mm_load_ps(&aScale[i + 8]);
+    in3 = _mm_load_ps(&aScale[i + 12]);
+
+    outreal1 = _mm_shuffle_ps(in0, in1, _MM_SHUFFLE(2, 0, 2, 0));
+    outimag1 = _mm_shuffle_ps(in0, in1, _MM_SHUFFLE(3, 1, 3, 1));
+    outreal3 = _mm_shuffle_ps(in2, in3, _MM_SHUFFLE(2, 0, 2, 0));
+    outimag3 = _mm_shuffle_ps(in2, in3, _MM_SHUFFLE(3, 1, 3, 1));
+
+    in0 = _mm_sub_ps(_mm_mul_ps(outreal0, outreal1),
+                     _mm_mul_ps(outimag0, outimag1));
+    in1 = _mm_add_ps(_mm_mul_ps(outreal0, outimag1),
+                     _mm_mul_ps(outimag0, outreal1));
+    in2 = _mm_sub_ps(_mm_mul_ps(outreal2, outreal3),
+                     _mm_mul_ps(outimag2, outimag3));
+    in3 = _mm_add_ps(_mm_mul_ps(outreal2, outimag3),
+                     _mm_mul_ps(outimag2, outreal3));
+
+    outreal0 = _mm_unpacklo_ps(in0, in1);
+    outreal1 = _mm_unpackhi_ps(in0, in1);
+    outreal2 = _mm_unpacklo_ps(in2, in3);
+    outreal3 = _mm_unpackhi_ps(in2, in3);
+
+    _mm_store_ps(&aOutput[i], outreal0);
+    _mm_store_ps(&aOutput[i + 4], outreal1);
+    _mm_store_ps(&aOutput[i + 8], outreal2);
+    _mm_store_ps(&aOutput[i + 12], outreal3);
+  }
 }
+
+float
+AudioBufferSumOfSquares_SSE(const float* aInput, uint32_t aLength)
+{
+  unsigned i;
+  __m128 in0, in1, in2, in3,
+         acc0, acc1, acc2, acc3;
+  float out[4];
+
+  acc0 = _mm_setzero_ps();
+  acc1 = _mm_setzero_ps();
+  acc2 = _mm_setzero_ps();
+  acc3 = _mm_setzero_ps();
+
+  for (i = 0; i < aLength; i+=16) {
+    in0 = _mm_load_ps(&aInput[i]);
+    in1 = _mm_load_ps(&aInput[i + 4]);
+    in2 = _mm_load_ps(&aInput[i + 8]);
+    in3 = _mm_load_ps(&aInput[i + 12]);
+
+    in0 = _mm_mul_ps(in0, in0);
+    in1 = _mm_mul_ps(in1, in1);
+    in2 = _mm_mul_ps(in2, in2);
+    in3 = _mm_mul_ps(in3, in3);
+
+    acc0 = _mm_add_ps(acc0, in0);
+    acc1 = _mm_add_ps(acc1, in1);
+    acc2 = _mm_add_ps(acc2, in2);
+    acc3 = _mm_add_ps(acc3, in3);
+  }
+
+  acc0 = _mm_add_ps(acc0, acc1);
+  acc0 = _mm_add_ps(acc0, acc2);
+  acc0 = _mm_add_ps(acc0, acc3);
+
+  _mm_store_ps(out, acc0);
+
+  return out[0] + out[1] + out[2] + out[3];
+}
+
+}
--- a/dom/media/webaudio/AudioNodeEngineSSE2.h
+++ b/dom/media/webaudio/AudioNodeEngineSSE2.h
@@ -28,9 +28,18 @@ AudioBufferInPlaceScale_SSE(float* aBloc
                             uint32_t aSize);
 
 void
 AudioBlockPanStereoToStereo_SSE(const float aInputL[WEBAUDIO_BLOCK_SIZE],
                                 const float aInputR[WEBAUDIO_BLOCK_SIZE],
                                 float aGainL, float aGainR, bool aIsOnTheLeft,
                                 float aOutputL[WEBAUDIO_BLOCK_SIZE],
                                 float aOutputR[WEBAUDIO_BLOCK_SIZE]);
+
+float
+AudioBufferSumOfSquares_SSE(const float* aInput, uint32_t aLength);
+
+void
+BufferComplexMultiply_SSE(const float* aInput,
+                          const float* aScale,
+                          float* aOutput,
+                          uint32_t aSize);
 }