Bug 877662 - Align audio buffer allocations to 16 byte boundaries r?padenot draft
authorDan Minor <dminor@mozilla.com>
Wed, 13 Apr 2016 15:31:50 -0400
changeset 350442 9649ce342ecd1b5f1564a23827bba659dce77ad8
parent 350361 3fda7cb458aa54d0b32d28cd546ccf052611b133
child 350443 73cb12cdfb3a7a0137b9eefe6619fb2853f38a9d
push id15349
push userdminor@mozilla.com
push dateWed, 13 Apr 2016 19:34:21 +0000
reviewerspadenot
bugs877662
milestone48.0a1
Bug 877662 - Align audio buffer allocations to 16 byte boundaries r?padenot To be able to use SSE2 routines, we need to audio buffers to be allocated on 16 byte boundaries. MozReview-Commit-ID: 2mjxMWqysFd
dom/media/webaudio/AudioBlock.cpp
dom/media/webaudio/AudioDestinationNode.cpp
dom/media/webaudio/AudioNodeExternalInputStream.cpp
dom/media/webaudio/AudioNodeStream.cpp
dom/media/webaudio/AudioNodeStream.h
dom/media/webaudio/BiquadFilterNode.cpp
dom/media/webaudio/ConvolverNode.cpp
dom/media/webaudio/GainNode.cpp
dom/media/webaudio/StereoPannerNode.cpp
dom/media/webaudio/WaveShaperNode.cpp
dom/media/webaudio/blink/ReverbAccumulationBuffer.h
--- a/dom/media/webaudio/AudioBlock.cpp
+++ b/dom/media/webaudio/AudioBlock.cpp
@@ -1,50 +1,53 @@
 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
 /* vim:set ts=2 sw=2 sts=2 et cindent: */
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "AudioBlock.h"
+#include "AlignmentUtils.h"
 
 namespace mozilla {
 
 /**
  * Heap-allocated buffer of channels of 128-sample float arrays, with
  * threadsafe refcounting.  Typically you would allocate one of these, fill it
  * in, and then treat it as immutable while it's shared.
  *
  * Downstream references are accounted specially so that the creator of the
  * buffer can reuse and modify its contents next iteration if other references
  * are all downstream temporary references held by AudioBlock.
  *
- * This only guarantees 4-byte alignment of the data. For alignment we simply
- * assume that the memory from malloc is at least 4-byte aligned and that
- * AudioBlockBuffer's size is divisible by 4.
+ * We guarantee 16 byte alignment of the channel data.
  */
 class AudioBlockBuffer final : public ThreadSharedObject {
 public:
 
   virtual AudioBlockBuffer* AsAudioBlockBuffer() override { return this; };
 
   float* ChannelData(uint32_t aChannel)
   {
-    return reinterpret_cast<float*>(this + 1) + aChannel * WEBAUDIO_BLOCK_SIZE;
+    float* base = reinterpret_cast<float*>(((uintptr_t)(this + 1) + 15) & ~0x0F);
+    ASSERT_ALIGNED16(base);
+    return base + aChannel * WEBAUDIO_BLOCK_SIZE;
   }
 
   static already_AddRefed<AudioBlockBuffer> Create(uint32_t aChannelCount)
   {
     CheckedInt<size_t> size = WEBAUDIO_BLOCK_SIZE;
     size *= aChannelCount;
     size *= sizeof(float);
     size += sizeof(AudioBlockBuffer);
+    size += 15;  //padding for alignment
     if (!size.isValid()) {
       MOZ_CRASH();
     }
+
     void* m = moz_xmalloc(size.value());
     RefPtr<AudioBlockBuffer> p = new (m) AudioBlockBuffer();
     NS_ASSERTION((reinterpret_cast<char*>(p.get() + 1) - reinterpret_cast<char*>(p.get())) % 4 == 0,
                  "AudioBlockBuffers should be at least 4-byte aligned");
     return p.forget();
   }
 
   // Graph thread only.
@@ -145,18 +148,16 @@ AudioBlock::AllocateChannels(uint32_t aC
     if (buffer && !buffer->HasLastingShares()) {
       MOZ_ASSERT(mBufferFormat == AUDIO_FORMAT_FLOAT32);
       // No need to allocate again.
       mVolume = 1.0f;
       return;
     }
   }
 
-  // XXX for SIMD purposes we should do something here to make sure the
-  // channel buffers are 16-byte aligned.
   RefPtr<AudioBlockBuffer> buffer = AudioBlockBuffer::Create(aChannelCount);
   mChannelData.SetLength(aChannelCount);
   for (uint32_t i = 0; i < aChannelCount; ++i) {
     mChannelData[i] = buffer->ChannelData(i);
   }
   mBuffer = buffer.forget();
   mVolume = 1.0f;
   mBufferFormat = AUDIO_FORMAT_FLOAT32;
--- a/dom/media/webaudio/AudioDestinationNode.cpp
+++ b/dom/media/webaudio/AudioDestinationNode.cpp
@@ -1,15 +1,16 @@
 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
 /* vim:set ts=2 sw=2 sts=2 et cindent: */
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "AudioDestinationNode.h"
+#include "AlignmentUtils.h"
 #include "AudioContext.h"
 #include "mozilla/dom/AudioDestinationNodeBinding.h"
 #include "mozilla/dom/ScriptSettings.h"
 #include "mozilla/Preferences.h"
 #include "mozilla/Services.h"
 #include "AudioChannelAgent.h"
 #include "AudioChannelService.h"
 #include "AudioNodeEngine.h"
@@ -82,17 +83,17 @@ public:
     const uint32_t duration = std::min(WEBAUDIO_BLOCK_SIZE, mLength - mWriteIndex);
     const uint32_t inputChannelCount = aInput.ChannelCount();
     for (uint32_t i = 0; i < outputChannelCount; ++i) {
       float* outputData = mBuffer->GetDataForWrite(i) + mWriteIndex;
       if (aInput.IsNull() || i >= inputChannelCount) {
         PodZero(outputData, duration);
       } else {
         const float* inputBuffer = static_cast<const float*>(aInput.mChannelData[i]);
-        if (duration == WEBAUDIO_BLOCK_SIZE) {
+        if (duration == WEBAUDIO_BLOCK_SIZE && IS_ALIGNED16(inputBuffer)) {
           // Use the optimized version of the copy with scale operation
           AudioBlockCopyChannelWithScale(inputBuffer, aInput.mVolume,
                                          outputData);
         } else {
           if (aInput.mVolume == 1.0f) {
             PodCopy(outputData, inputBuffer, duration);
           } else {
             for (uint32_t j = 0; j < duration; ++j) {
--- a/dom/media/webaudio/AudioNodeExternalInputStream.cpp
+++ b/dom/media/webaudio/AudioNodeExternalInputStream.cpp
@@ -1,13 +1,15 @@
 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this file,
  * You can obtain one at http://mozilla.org/MPL/2.0/. */
 
+#include "AlignedTArray.h"
+#include "AlignmentUtils.h"
 #include "AudioNodeEngine.h"
 #include "AudioNodeExternalInputStream.h"
 #include "AudioChannelFormat.h"
 #include "mozilla/dom/MediaStreamAudioSourceNode.h"
 
 using namespace mozilla::dom;
 
 namespace mozilla {
@@ -85,19 +87,30 @@ static void ConvertSegmentToAudioBlock(A
 {
   NS_ASSERTION(aSegment->GetDuration() == WEBAUDIO_BLOCK_SIZE, "Bad segment duration");
 
   {
     AudioSegment::ChunkIterator ci(*aSegment);
     NS_ASSERTION(!ci.IsEnded(), "Should be at least one chunk!");
     if (ci->GetDuration() == WEBAUDIO_BLOCK_SIZE &&
         (ci->IsNull() || ci->mBufferFormat == AUDIO_FORMAT_FLOAT32)) {
+
+      bool aligned = true;
+      for (size_t i = 0; i < ci->mChannelData.Length(); ++i) {
+        if (!IS_ALIGNED16(ci->mChannelData[i])) {
+            aligned = false;
+            break;
+        }
+      }
+
       // Return this chunk directly to avoid copying data.
-      *aBlock = *ci;
-      return;
+      if (aligned) {
+        *aBlock = *ci;
+        return;
+      }
     }
   }
 
   aBlock->AllocateChannels(aFallbackChannelCount);
 
   uint32_t duration = 0;
   for (AudioSegment::ChunkIterator ci(*aSegment); !ci.IsEnded(); ci.Next()) {
     switch (ci->mBufferFormat) {
@@ -187,17 +200,20 @@ AudioNodeExternalInputStream::ProcessInp
 
     for (AudioSegment::ChunkIterator iter(segment); !iter.IsEnded(); iter.Next()) {
       inputChannels = GetAudioChannelsSuperset(inputChannels, iter->ChannelCount());
     }
   }
 
   uint32_t accumulateIndex = 0;
   if (inputChannels) {
-    AutoTArray<float,GUESS_AUDIO_CHANNELS*WEBAUDIO_BLOCK_SIZE> downmixBuffer;
+    // TODO: See Bug 1261168. Ideally we would use an aligned version of
+    // AutoTArray (of size GUESS_AUDIO_CHANNELS*WEBAUDIO_BLOCK_SIZE) here.
+    AlignedTArray<float,16> downmixBuffer;
+    downmixBuffer.SetLength(GUESS_AUDIO_CHANNELS*WEBAUDIO_BLOCK_SIZE);
     for (uint32_t i = 0; i < audioSegments.Length(); ++i) {
       AudioBlock tmpChunk;
       ConvertSegmentToAudioBlock(&audioSegments[i], &tmpChunk, inputChannels);
       if (!tmpChunk.IsNull()) {
         if (accumulateIndex == 0) {
           mLastChunks[0].AllocateChannels(inputChannels);
         }
         AccumulateInputChunk(accumulateIndex, tmpChunk, &mLastChunks[0], &downmixBuffer);
--- a/dom/media/webaudio/AudioNodeStream.cpp
+++ b/dom/media/webaudio/AudioNodeStream.cpp
@@ -448,29 +448,31 @@ AudioNodeStream::ObtainInputBlock(AudioB
   }
 
   if (outputChannelCount == 0) {
     aTmpChunk.SetNull(WEBAUDIO_BLOCK_SIZE);
     return;
   }
 
   aTmpChunk.AllocateChannels(outputChannelCount);
-  // The static storage here should be 1KB, so it's fine
-  AutoTArray<float, GUESS_AUDIO_CHANNELS*WEBAUDIO_BLOCK_SIZE> downmixBuffer;
+  // TODO: See Bug 1261168. Ideally we would use an aligned version of
+  // AutoTArray (of size GUESS_AUDIO_CHANNELS*WEBAUDIO_BLOCK_SIZE) here.
+  AlignedTArray<float, 16> downmixBuffer;
+  downmixBuffer.SetLength(GUESS_AUDIO_CHANNELS*WEBAUDIO_BLOCK_SIZE);
 
   for (uint32_t i = 0; i < inputChunkCount; ++i) {
     AccumulateInputChunk(i, *inputChunks[i], &aTmpChunk, &downmixBuffer);
   }
 }
 
 void
 AudioNodeStream::AccumulateInputChunk(uint32_t aInputIndex,
                                       const AudioBlock& aChunk,
                                       AudioBlock* aBlock,
-                                      nsTArray<float>* aDownmixBuffer)
+                                      AlignedTArray<float, 16>* aDownmixBuffer)
 {
   AutoTArray<const float*,GUESS_AUDIO_CHANNELS> channels;
   UpMixDownMixChunk(&aChunk, aBlock->ChannelCount(), channels, *aDownmixBuffer);
 
   for (uint32_t c = 0; c < channels.Length(); ++c) {
     const float* inputData = static_cast<const float*>(channels[c]);
     float* outputData = aBlock->ChannelFloatsForWrite(c);
     if (inputData) {
@@ -486,17 +488,17 @@ AudioNodeStream::AccumulateInputChunk(ui
     }
   }
 }
 
 void
 AudioNodeStream::UpMixDownMixChunk(const AudioBlock* aChunk,
                                    uint32_t aOutputChannelCount,
                                    nsTArray<const float*>& aOutputChannels,
-                                   nsTArray<float>& aDownmixBuffer)
+                                   AlignedTArray<float, 16>& aDownmixBuffer)
 {
   for (uint32_t i = 0; i < aChunk->ChannelCount(); i++) {
     aOutputChannels.AppendElement(static_cast<const float*>(aChunk->mChannelData[i]));
   }
   if (aOutputChannels.Length() < aOutputChannelCount) {
     if (mChannelInterpretation == ChannelInterpretation::Speakers) {
       AudioChannelsUpMix<float>(&aOutputChannels, aOutputChannelCount, nullptr);
       NS_ASSERTION(aOutputChannelCount == aOutputChannels.Length(),
--- a/dom/media/webaudio/AudioNodeStream.h
+++ b/dom/media/webaudio/AudioNodeStream.h
@@ -3,16 +3,17 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this file,
  * You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #ifndef MOZILLA_AUDIONODESTREAM_H_
 #define MOZILLA_AUDIONODESTREAM_H_
 
 #include "MediaStreamGraph.h"
 #include "mozilla/dom/AudioNodeBinding.h"
+#include "AlignedTArray.h"
 #include "AudioBlock.h"
 
 namespace mozilla {
 
 namespace dom {
 struct ThreeDPoint;
 struct AudioTimelineEvent;
 class AudioContext;
@@ -185,20 +186,20 @@ protected:
    * again until SetActive() is called.
    */
   void CheckForInactive();
 
   void AdvanceOutputSegment();
   void FinishOutput();
   void AccumulateInputChunk(uint32_t aInputIndex, const AudioBlock& aChunk,
                             AudioBlock* aBlock,
-                            nsTArray<float>* aDownmixBuffer);
+                            AlignedTArray<float, 16>* aDownmixBuffer);
   void UpMixDownMixChunk(const AudioBlock* aChunk, uint32_t aOutputChannelCount,
                          nsTArray<const float*>& aOutputChannels,
-                         nsTArray<float>& aDownmixBuffer);
+                         AlignedTArray<float, 16>& aDownmixBuffer);
 
   uint32_t ComputedNumberOfChannels(uint32_t aInputChannelCount);
   void ObtainInputBlock(AudioBlock& aTmpChunk, uint32_t aPortIndex);
   void IncrementActiveInputCount();
   void DecrementActiveInputCount();
 
   // The engine that will generate output for this node.
   nsAutoPtr<AudioNodeEngine> mEngine;
--- a/dom/media/webaudio/BiquadFilterNode.cpp
+++ b/dom/media/webaudio/BiquadFilterNode.cpp
@@ -1,15 +1,16 @@
 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
 /* vim:set ts=2 sw=2 sts=2 et cindent: */
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "BiquadFilterNode.h"
+#include "AlignmentUtils.h"
 #include "AudioNodeEngine.h"
 #include "AudioNodeStream.h"
 #include "AudioDestinationNode.h"
 #include "PlayingRefChangeHandler.h"
 #include "WebAudioUtils.h"
 #include "blink/Biquad.h"
 #include "mozilla/Preferences.h"
 #include "mozilla/UniquePtr.h"
@@ -132,17 +133,19 @@ public:
   }
 
   void ProcessBlock(AudioNodeStream* aStream,
                     GraphTime aFrom,
                     const AudioBlock& aInput,
                     AudioBlock* aOutput,
                     bool* aFinished) override
   {
-    float inputBuffer[WEBAUDIO_BLOCK_SIZE];
+    float inputBuffer[WEBAUDIO_BLOCK_SIZE + 4];
+    float* alignedInputBuffer = ALIGNED16(inputBuffer);
+    ASSERT_ALIGNED16(alignedInputBuffer);
 
     if (aInput.IsNull()) {
       bool hasTail = false;
       for (uint32_t i = 0; i < mBiquads.Length(); ++i) {
         if (mBiquads[i].hasTail()) {
           hasTail = true;
           break;
         }
@@ -186,22 +189,22 @@ public:
     double freq = mFrequency.GetValueAtTime(pos);
     double q = mQ.GetValueAtTime(pos);
     double gain = mGain.GetValueAtTime(pos);
     double detune = mDetune.GetValueAtTime(pos);
 
     for (uint32_t i = 0; i < numberOfChannels; ++i) {
       const float* input;
       if (aInput.IsNull()) {
-        input = inputBuffer;
+        input = alignedInputBuffer;
       } else {
         input = static_cast<const float*>(aInput.mChannelData[i]);
         if (aInput.mVolume != 1.0) {
-          AudioBlockCopyChannelWithScale(input, aInput.mVolume, inputBuffer);
-          input = inputBuffer;
+          AudioBlockCopyChannelWithScale(input, aInput.mVolume, alignedInputBuffer);
+          input = alignedInputBuffer;
         }
       }
       SetParamsOnBiquad(mBiquads[i], aStream->SampleRate(), mType, freq, q, gain, detune);
 
       mBiquads[i].process(input,
                           aOutput->ChannelFloatsForWrite(i),
                           aInput.GetDuration());
     }
--- a/dom/media/webaudio/ConvolverNode.cpp
+++ b/dom/media/webaudio/ConvolverNode.cpp
@@ -1,16 +1,17 @@
 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
 /* vim:set ts=2 sw=2 sts=2 et cindent: */
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "ConvolverNode.h"
 #include "mozilla/dom/ConvolverNodeBinding.h"
+#include "AlignmentUtils.h"
 #include "AudioNodeEngine.h"
 #include "AudioNodeStream.h"
 #include "blink/Reverb.h"
 #include "PlayingRefChangeHandler.h"
 
 namespace mozilla {
 namespace dom {
 
@@ -256,21 +257,23 @@ ConvolverNode::SetBuffer(JSContext* aCx,
       mBuffer->GetThreadSharedChannelsForRate(aCx);
     if (data && length < WEBAUDIO_BLOCK_SIZE) {
       // For very small impulse response buffers, we need to pad the
       // buffer with 0 to make sure that the Reverb implementation
       // has enough data to compute FFTs from.
       length = WEBAUDIO_BLOCK_SIZE;
       RefPtr<ThreadSharedFloatArrayBufferList> paddedBuffer =
         new ThreadSharedFloatArrayBufferList(data->GetChannels());
-      float* channelData = (float*) malloc(sizeof(float) * length * data->GetChannels());
+      void* channelData = malloc(sizeof(float) * length * data->GetChannels() + 15);
+      float* alignedChannelData = ALIGNED16(channelData);
+      ASSERT_ALIGNED16(alignedChannelData);
       for (uint32_t i = 0; i < data->GetChannels(); ++i) {
-        PodCopy(channelData + length * i, data->GetData(i), mBuffer->Length());
-        PodZero(channelData + length * i + mBuffer->Length(), WEBAUDIO_BLOCK_SIZE - mBuffer->Length());
-        paddedBuffer->SetData(i, (i == 0) ? channelData : nullptr, free, channelData);
+        PodCopy(alignedChannelData + length * i, data->GetData(i), mBuffer->Length());
+        PodZero(alignedChannelData + length * i + mBuffer->Length(), WEBAUDIO_BLOCK_SIZE - mBuffer->Length());
+        paddedBuffer->SetData(i, (i == 0) ? channelData : nullptr, free, alignedChannelData);
       }
       data = paddedBuffer;
     }
     SendInt32ParameterToStream(ConvolverNodeEngine::BUFFER_LENGTH, length);
     SendDoubleParameterToStream(ConvolverNodeEngine::SAMPLE_RATE,
                                 mBuffer->SampleRate());
     ns->SetBuffer(data.forget());
   } else {
--- a/dom/media/webaudio/GainNode.cpp
+++ b/dom/media/webaudio/GainNode.cpp
@@ -1,16 +1,17 @@
 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
 /* vim:set ts=2 sw=2 sts=2 et cindent: */
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "GainNode.h"
 #include "mozilla/dom/GainNodeBinding.h"
+#include "AlignmentUtils.h"
 #include "AudioNodeEngine.h"
 #include "AudioNodeStream.h"
 #include "AudioDestinationNode.h"
 #include "WebAudioUtils.h"
 
 namespace mozilla {
 namespace dom {
 
@@ -74,28 +75,30 @@ public:
     } else {
       // First, compute a vector of gains for each track tick based on the
       // timeline at hand, and then for each channel, multiply the values
       // in the buffer with the gain vector.
       aOutput->AllocateChannels(aInput.ChannelCount());
 
       // Compute the gain values for the duration of the input AudioChunk
       StreamTime tick = mDestination->GraphTimeToStreamTime(aFrom);
-      float computedGain[WEBAUDIO_BLOCK_SIZE];
-      mGain.GetValuesAtTime(tick, computedGain, WEBAUDIO_BLOCK_SIZE);
+      float computedGain[WEBAUDIO_BLOCK_SIZE + 4];
+      float* alignedComputedGain = ALIGNED16(computedGain);
+      ASSERT_ALIGNED16(alignedComputedGain);
+      mGain.GetValuesAtTime(tick, alignedComputedGain, WEBAUDIO_BLOCK_SIZE);
 
       for (size_t counter = 0; counter < WEBAUDIO_BLOCK_SIZE; ++counter) {
-        computedGain[counter] *= aInput.mVolume;
+        alignedComputedGain[counter] *= aInput.mVolume;
       }
 
       // Apply the gain to the output buffer
       for (size_t channel = 0; channel < aOutput->ChannelCount(); ++channel) {
         const float* inputBuffer = static_cast<const float*> (aInput.mChannelData[channel]);
         float* buffer = aOutput->ChannelFloatsForWrite(channel);
-        AudioBlockCopyChannelWithScale(inputBuffer, computedGain, buffer);
+        AudioBlockCopyChannelWithScale(inputBuffer, alignedComputedGain, buffer);
       }
     }
   }
 
   size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const override
   {
     // Not owned:
     // - mDestination (probably)
--- a/dom/media/webaudio/StereoPannerNode.cpp
+++ b/dom/media/webaudio/StereoPannerNode.cpp
@@ -4,16 +4,17 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "StereoPannerNode.h"
 #include "mozilla/dom/StereoPannerNodeBinding.h"
 #include "AudioNodeEngine.h"
 #include "AudioNodeStream.h"
 #include "AudioDestinationNode.h"
+#include "AlignmentUtils.h"
 #include "WebAudioUtils.h"
 #include "PanningUtils.h"
 #include "AudioParamTimeline.h"
 #include "AudioParam.h"
 
 namespace mozilla {
 namespace dom {
 
@@ -132,34 +133,36 @@ public:
 
         GetGainValuesForPanning(panning, monoToStereo, gainL, gainR);
         ApplyStereoPanning(aInput, aOutput,
                            gainL * aInput.mVolume,
                            gainR * aInput.mVolume,
                            panning <= 0);
       }
     } else {
-      float computedGain[2][WEBAUDIO_BLOCK_SIZE];
+      float computedGain[2*WEBAUDIO_BLOCK_SIZE + 4];
       bool onLeft[WEBAUDIO_BLOCK_SIZE];
 
       float values[WEBAUDIO_BLOCK_SIZE];
       StreamTime tick = mDestination->GraphTimeToStreamTime(aFrom);
       mPan.GetValuesAtTime(tick, values, WEBAUDIO_BLOCK_SIZE);
 
+      float* alignedComputedGain = ALIGNED16(computedGain);
+      ASSERT_ALIGNED16(alignedComputedGain);
       for (size_t counter = 0; counter < WEBAUDIO_BLOCK_SIZE; ++counter) {
         float left, right;
         GetGainValuesForPanning(values[counter], monoToStereo, left, right);
 
-        computedGain[0][counter] = left * aInput.mVolume;
-        computedGain[1][counter] = right * aInput.mVolume;
+        alignedComputedGain[counter] = left * aInput.mVolume;
+        alignedComputedGain[WEBAUDIO_BLOCK_SIZE + counter] = right * aInput.mVolume;
         onLeft[counter] = values[counter] <= 0;
       }
 
       // Apply the gain to the output buffer
-      ApplyStereoPanning(aInput, aOutput, computedGain[0], computedGain[1], onLeft);
+      ApplyStereoPanning(aInput, aOutput, alignedComputedGain, &alignedComputedGain[WEBAUDIO_BLOCK_SIZE], onLeft);
     }
   }
 
   virtual size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const override
   {
     return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
   }
 
--- a/dom/media/webaudio/WaveShaperNode.cpp
+++ b/dom/media/webaudio/WaveShaperNode.cpp
@@ -1,16 +1,17 @@
 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
 /* vim:set ts=2 sw=2 sts=2 et cindent: */
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "WaveShaperNode.h"
 #include "mozilla/dom/WaveShaperNodeBinding.h"
+#include "AlignmentUtils.h"
 #include "AudioNode.h"
 #include "AudioNodeEngine.h"
 #include "AudioNodeStream.h"
 #include "mozilla/PodOperations.h"
 
 namespace mozilla {
 namespace dom {
 
@@ -226,23 +227,25 @@ public:
       // or the input is null.
       *aOutput = aInput;
       return;
     }
 
     aOutput->AllocateChannels(channelCount);
     for (uint32_t i = 0; i < channelCount; ++i) {
       const float* inputSamples;
-      float scaledInput[WEBAUDIO_BLOCK_SIZE];
+      float scaledInput[WEBAUDIO_BLOCK_SIZE + 4];
+      float* alignedScaledInput = ALIGNED16(scaledInput);
+      ASSERT_ALIGNED16(alignedScaledInput);
       if (aInput.mVolume != 1.0f) {
         AudioBlockCopyChannelWithScale(
             static_cast<const float*>(aInput.mChannelData[i]),
                                       aInput.mVolume,
-                                      scaledInput);
-        inputSamples = scaledInput;
+                                      alignedScaledInput);
+        inputSamples = alignedScaledInput;
       } else {
         inputSamples = static_cast<const float*>(aInput.mChannelData[i]);
       }
       float* outputBuffer = aOutput->ChannelFloatsForWrite(i);
       float* sampleBuffer;
 
       switch (mType) {
       case OverSampleType::None:
--- a/dom/media/webaudio/blink/ReverbAccumulationBuffer.h
+++ b/dom/media/webaudio/blink/ReverbAccumulationBuffer.h
@@ -24,23 +24,21 @@
  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #ifndef ReverbAccumulationBuffer_h
 #define ReverbAccumulationBuffer_h
 
-#include "nsTArray.h"
+#include "AlignedTArray.h"
 #include "mozilla/MemoryReporting.h"
 
 namespace WebCore {
 
-typedef nsTArray<float> AudioFloatArray;
-
 // ReverbAccumulationBuffer is a circular delay buffer with one client reading from it and multiple clients
 // writing/accumulating to it at different delay offsets from the read position.  The read operation will zero the memory
 // just read from the buffer, so it will be ready for accumulation the next time around.
 class ReverbAccumulationBuffer {
 public:
     explicit ReverbAccumulationBuffer(size_t length);
 
     // This will read from, then clear-out numberOfFrames
@@ -60,16 +58,16 @@ public:
     void reset();
 
     size_t sizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) const
     {
         return m_buffer.ShallowSizeOfExcludingThis(aMallocSizeOf);
     }
 
 private:
-    AudioFloatArray m_buffer;
+    AlignedTArray<float, 16> m_buffer;
     size_t m_readIndex;
     size_t m_readTimeFrame; // for debugging (frame on continuous timeline)
 };
 
 } // namespace WebCore
 
 #endif // ReverbAccumulationBuffer_h