Bug 1378070 - Implement multichannel WebAudio. r?padenot draft
authorAlex Chronopoulos <achronop@gmail.com>
Mon, 28 Aug 2017 17:16:20 +0300
changeset 654305 942b1ab187b55e2a794473f907aebb7eaadbd2e4
parent 654304 d275e7fb60ed9742e22c89b7506c5b4052a8d7de
child 728529 b91b63c38f2bac9f6bcb816d808253d030482521
push id76533
push userachronop@gmail.com
push dateMon, 28 Aug 2017 14:27:14 +0000
reviewerspadenot
bugs1378070
milestone57.0a1
Bug 1378070 - Implement multichannel WebAudio. r?padenot * * * [mq]: rebase MozReview-Commit-ID: KJwH3ZeJn55
dom/media/AudioBufferUtils.h
dom/media/GraphDriver.cpp
dom/media/GraphDriver.h
dom/media/MediaStreamGraphImpl.h
dom/media/gtest/TestAudioBuffers.cpp
dom/media/webaudio/AudioContext.cpp
dom/media/webaudio/AudioDestinationNode.cpp
dom/media/webrtc/AudioOutputObserver.h
dom/media/webrtc/MediaEngineWebRTCAudio.cpp
--- a/dom/media/AudioBufferUtils.h
+++ b/dom/media/AudioBufferUtils.h
@@ -24,55 +24,59 @@ static inline uint32_t SamplesToFrames(u
   return aSamples / aChannels;
 }
 
 /**
  * Class that gets a buffer pointer from an audio callback and provides a safe
  * interface to manipulate this buffer, and to ensure we are not missing frames
  * by the end of the callback.
  */
-template<typename T, uint32_t CHANNELS>
+template<typename T>
 class AudioCallbackBufferWrapper
 {
 public:
-  AudioCallbackBufferWrapper()
+  explicit AudioCallbackBufferWrapper(uint32_t aChannels)
     : mBuffer(nullptr),
       mSamples(0),
-      mSampleWriteOffset(1)
-  {}
+      mSampleWriteOffset(1),
+      mChannels(aChannels)
+
+  {
+    MOZ_ASSERT(aChannels);
+  }
   /**
    * Set the buffer in this wrapper. This is to be called at the beginning of
    * the callback.
    */
   void SetBuffer(T* aBuffer, uint32_t aFrames) {
     MOZ_ASSERT(!mBuffer && !mSamples,
         "SetBuffer called twice.");
     mBuffer = aBuffer;
-    mSamples = FramesToSamples(CHANNELS, aFrames);
+    mSamples = FramesToSamples(mChannels, aFrames);
     mSampleWriteOffset = 0;
   }
 
   /**
    * Write some frames to the internal buffer. Free space in the buffer should
    * be check prior to calling this.
    */
   void WriteFrames(T* aBuffer, uint32_t aFrames) {
     MOZ_ASSERT(aFrames <= Available(),
         "Writing more that we can in the audio buffer.");
 
-    PodCopy(mBuffer + mSampleWriteOffset, aBuffer, FramesToSamples(CHANNELS,
+    PodCopy(mBuffer + mSampleWriteOffset, aBuffer, FramesToSamples(mChannels,
                                                                    aFrames));
-    mSampleWriteOffset += FramesToSamples(CHANNELS, aFrames);
+    mSampleWriteOffset += FramesToSamples(mChannels, aFrames);
   }
 
   /**
    * Number of frames that can be written to the buffer.
    */
   uint32_t Available() {
-    return SamplesToFrames(CHANNELS, mSamples - mSampleWriteOffset);
+    return SamplesToFrames(mChannels, mSamples - mSampleWriteOffset);
   }
 
   /**
    * Check that the buffer is completly filled, and reset internal state so this
    * instance can be reused.
    */
   void BufferFilled() {
     // It's okay to have exactly zero samples here, it can happen we have an
@@ -83,84 +87,92 @@ public:
     // all the streams were ended (no mixer callback occured).
     // XXX Remove this warning, or find a way to avoid it if the mixer callback
     // isn't called.
     NS_WARNING_ASSERTION(
       Available() == 0 || mSampleWriteOffset == 0,
       "Audio Buffer is not full by the end of the callback.");
     // Make sure the data returned is always set and not random!
     if (Available()) {
-      PodZero(mBuffer + mSampleWriteOffset, FramesToSamples(CHANNELS, Available()));
+      PodZero(mBuffer + mSampleWriteOffset, FramesToSamples(mChannels, Available()));
     }
     MOZ_ASSERT(mSamples, "Buffer not set.");
     mSamples = 0;
     mSampleWriteOffset = 0;
     mBuffer = nullptr;
   }
 
 private:
   /* This is not an owned pointer, but the pointer passed to use via the audio
    * callback. */
   T* mBuffer;
   /* The number of samples of this audio buffer. */
   uint32_t mSamples;
   /* The position at which new samples should be written. We want to return to
    * the audio callback iff this is equal to mSamples. */
   uint32_t mSampleWriteOffset;
+  uint32_t const mChannels;
 };
 
 /**
  * This is a class that interfaces with the AudioCallbackBufferWrapper, and is
  * responsible for storing the excess of data produced by the MediaStreamGraph
  * because of different rounding constraints, to be used the next time the audio
  * backend calls back.
  */
-template<typename T, uint32_t BLOCK_SIZE, uint32_t CHANNELS>
+template<typename T, uint32_t BLOCK_SIZE>
 class SpillBuffer
 {
 public:
-  SpillBuffer()
+  explicit SpillBuffer(uint32_t aChannels)
   : mPosition(0)
+  , mChannels(aChannels)
   {
-    PodArrayZero(mBuffer);
+    MOZ_ASSERT(aChannels);
+    mBuffer = MakeUnique<T[]>(BLOCK_SIZE * mChannels);
+    PodZero(mBuffer.get(), BLOCK_SIZE * mChannels);
   }
+
   /* Empty the spill buffer into the buffer of the audio callback. This returns
    * the number of frames written. */
-  uint32_t Empty(AudioCallbackBufferWrapper<T, CHANNELS>& aBuffer) {
+  uint32_t Empty(AudioCallbackBufferWrapper<T>& aBuffer) {
     uint32_t framesToWrite = std::min(aBuffer.Available(),
-                                      SamplesToFrames(CHANNELS, mPosition));
+                                      SamplesToFrames(mChannels, mPosition));
 
-    aBuffer.WriteFrames(mBuffer, framesToWrite);
+    aBuffer.WriteFrames(mBuffer.get(), framesToWrite);
 
-    mPosition -= FramesToSamples(CHANNELS, framesToWrite);
+    mPosition -= FramesToSamples(mChannels, framesToWrite);
     // If we didn't empty the spill buffer for some reason, shift the remaining data down
     if (mPosition > 0) {
-      PodMove(mBuffer, mBuffer + FramesToSamples(CHANNELS, framesToWrite),
+      MOZ_ASSERT(FramesToSamples(mChannels, framesToWrite) + mPosition <= BLOCK_SIZE * mChannels);
+      PodMove(mBuffer.get(), mBuffer.get() + FramesToSamples(mChannels, framesToWrite),
               mPosition);
     }
 
     return framesToWrite;
   }
   /* Fill the spill buffer from aInput, containing aFrames frames, return the
    * number of frames written to the spill buffer */
   uint32_t Fill(T* aInput, uint32_t aFrames) {
     uint32_t framesToWrite = std::min(aFrames,
-                                      BLOCK_SIZE - SamplesToFrames(CHANNELS,
+                                      BLOCK_SIZE - SamplesToFrames(mChannels,
                                                                    mPosition));
 
-    PodCopy(mBuffer + mPosition, aInput, FramesToSamples(CHANNELS,
+    MOZ_ASSERT(FramesToSamples(mChannels, framesToWrite) + mPosition <= BLOCK_SIZE * mChannels);
+    PodCopy(mBuffer.get() + mPosition, aInput, FramesToSamples(mChannels,
                                                          framesToWrite));
 
-    mPosition += FramesToSamples(CHANNELS, framesToWrite);
+    mPosition += FramesToSamples(mChannels, framesToWrite);
 
     return framesToWrite;
   }
 private:
   /* The spilled data. */
-  T mBuffer[BLOCK_SIZE * CHANNELS];
+  UniquePtr<T[]> mBuffer;
   /* The current write position, in samples, in the buffer when filling, or the
    * amount of buffer filled when emptying. */
   uint32_t mPosition;
+  uint32_t const mChannels;
 };
 
 } // namespace mozilla
 
 #endif // MOZILLA_SCRATCHBUFFER_H_
--- a/dom/media/GraphDriver.cpp
+++ b/dom/media/GraphDriver.cpp
@@ -555,16 +555,19 @@ StreamAndPromiseForOperation::StreamAndP
   , mPromise(aPromise)
   , mOperation(aOperation)
 {
   // MOZ_ASSERT(aPromise);
 }
 
 AudioCallbackDriver::AudioCallbackDriver(MediaStreamGraphImpl* aGraphImpl)
   : GraphDriver(aGraphImpl)
+  , mOuputChannels(mGraphImpl->AudioChannelCount())
+  , mScratchBuffer(mOuputChannels)
+  , mBuffer(mOuputChannels)
   , mSampleRate(0)
   , mInputChannels(1)
   , mIterationDurationMS(MEDIA_GRAPH_TARGET_PERIOD_MS)
   , mStarted(false)
   , mAudioInput(nullptr)
   , mAddedMixer(false)
   , mInCallback(false)
   , mMicrophoneActive(false)
@@ -621,25 +624,24 @@ AudioCallbackDriver::Init()
   uint32_t latency_frames;
   bool firstStream = CubebUtils::GetFirstStream();
 
   MOZ_ASSERT(!NS_IsMainThread(),
       "This is blocking and should never run on the main thread.");
 
   mSampleRate = output.rate = CubebUtils::PreferredSampleRate();
 
-  output.channels = mGraphImpl->AudioChannelCount();
   if (AUDIO_OUTPUT_FORMAT == AUDIO_FORMAT_S16) {
     output.format = CUBEB_SAMPLE_S16NE;
   } else {
     output.format = CUBEB_SAMPLE_FLOAT32NE;
   }
 
-  // Graphs are always stereo for now.
-  output.layout = CUBEB_LAYOUT_STEREO;
+  output.channels = mOuputChannels;
+  output.layout = CUBEB_LAYOUT_UNDEFINED;
 
   Maybe<uint32_t> latencyPref = CubebUtils::GetCubebMSGLatencyInFrames();
   if (latencyPref) {
     latency_frames = latencyPref.value();
   } else {
     if (cubeb_get_min_latency(cubebContext, &output, &latency_frames) != CUBEB_OK) {
       NS_WARNING("Could not get minimal latency from cubeb.");
     }
@@ -918,17 +920,17 @@ AudioCallbackDriver::DataCallback(const 
   GraphTime stateComputedTime = StateComputedTime();
   if (stateComputedTime == 0) {
     MonitorAutoLock mon(mGraphImpl->GetMonitor());
     // Because this function is called during cubeb_stream_init (to prefill the
     // audio buffers), it can be that we don't have a message here (because this
     // driver is the first one for this graph), and the graph would exit. Simply
     // return here until we have messages.
     if (!mGraphImpl->MessagesQueued()) {
-      PodZero(aOutputBuffer, aFrames * mGraphImpl->AudioChannelCount());
+      PodZero(aOutputBuffer, aFrames * mOuputChannels);
       return aFrames;
     }
     mGraphImpl->SwapMessageQueues();
   }
 
   uint32_t durationMS = aFrames * 1000 / mSampleRate;
 
   // For now, simply average the duration with the previous
@@ -1005,17 +1007,17 @@ AudioCallbackDriver::DataCallback(const 
   mBuffer.BufferFilled();
 
   // Callback any observers for the AEC speaker data.  Note that one
   // (maybe) of these will be full-duplex, the others will get their input
   // data off separate cubeb callbacks.  Take care with how stuff is
   // removed/added to this list and TSAN issues, but input and output will
   // use separate callback methods.
   mGraphImpl->NotifyOutputData(aOutputBuffer, static_cast<size_t>(aFrames),
-                               mSampleRate, ChannelCount);
+                               mSampleRate, mOuputChannels);
 
   bool switching = false;
   {
     MonitorAutoLock mon(mGraphImpl->GetMonitor());
     switching = !!NextDriver();
   }
 
   if (switching && stillProcessing) {
--- a/dom/media/GraphDriver.h
+++ b/dom/media/GraphDriver.h
@@ -465,28 +465,28 @@ private:
   void PanOutputIfNeeded(bool aMicrophoneActive);
   /**
    * This is called when the output device used by the cubeb stream changes. */
   void DeviceChangedCallback();
   /* Start the cubeb stream */
   bool StartStream();
   friend class AsyncCubebTask;
   bool Init();
-  /* MediaStreamGraphs are always down/up mixed to stereo for now. */
-  static const uint32_t ChannelCount = 2;
+  /* MediaStreamGraphs are always down/up mixed to output channels. */
+  uint32_t mOuputChannels;
   /* The size of this buffer comes from the fact that some audio backends can
    * call back with a number of frames lower than one block (128 frames), so we
    * need to keep at most two block in the SpillBuffer, because we always round
    * up to block boundaries during an iteration.
    * This is only ever accessed on the audio callback thread. */
-  SpillBuffer<AudioDataValue, WEBAUDIO_BLOCK_SIZE * 2, ChannelCount> mScratchBuffer;
+  SpillBuffer<AudioDataValue, WEBAUDIO_BLOCK_SIZE * 2> mScratchBuffer;
   /* Wrapper to ensure we write exactly the number of frames we need in the
    * audio buffer cubeb passes us. This is only ever accessed on the audio
    * callback thread. */
-  AudioCallbackBufferWrapper<AudioDataValue, ChannelCount> mBuffer;
+  AudioCallbackBufferWrapper<AudioDataValue> mBuffer;
   /* cubeb stream for this graph. This is guaranteed to be non-null after Init()
    * has been called, and is synchronized internaly. */
   nsAutoRef<cubeb_stream> mAudioStream;
   /* The sample rate for the aforementionned cubeb stream. This is set on
    * initialization and can be read safely afterwards. */
   uint32_t mSampleRate;
   /* The number of input channels from cubeb.  Should be set before opening cubeb
    * and then be static. */
--- a/dom/media/MediaStreamGraphImpl.h
+++ b/dom/media/MediaStreamGraphImpl.h
@@ -448,18 +448,20 @@ public:
   /**
    * Mark the media stream order as dirty.
    */
   void SetStreamOrderDirty()
   {
     mStreamOrderDirty = true;
   }
 
-  // Always stereo for now.
-  uint32_t AudioChannelCount() const { return 2; }
+  uint32_t AudioChannelCount() const
+  {
+    return std::min<uint32_t>(8, CubebUtils::MaxNumberOfChannels());
+  }
 
   double MediaTimeToSeconds(GraphTime aTime) const
   {
     NS_ASSERTION(aTime > -STREAM_TIME_MAX && aTime <= STREAM_TIME_MAX,
                  "Bad time");
     return static_cast<double>(aTime)/GraphRate();
   }
 
--- a/dom/media/gtest/TestAudioBuffers.cpp
+++ b/dom/media/gtest/TestAudioBuffers.cpp
@@ -1,57 +1,60 @@
 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this file,
  * You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include <stdint.h>
 #include "AudioBufferUtils.h"
 #include "gtest/gtest.h"
+#include <vector>
 
 const uint32_t FRAMES = 256;
-const uint32_t CHANNELS = 2;
-const uint32_t SAMPLES = CHANNELS * FRAMES;
 
-TEST(AudioBuffers, Test)
+void test_for_number_of_channels(const uint32_t channels)
 {
-  mozilla::AudioCallbackBufferWrapper<float, CHANNELS> mBuffer;
-  mozilla::SpillBuffer<float, 128, CHANNELS> b;
-  float fromCallback[SAMPLES];
-  float other[SAMPLES];
+  const uint32_t samples = channels * FRAMES;
 
-  for (uint32_t i = 0; i < SAMPLES; i++) {
-    other[i] = 1.0;
-    fromCallback[i] = 0.0;
-  }
+  mozilla::AudioCallbackBufferWrapper<float> mBuffer(channels);
+  mozilla::SpillBuffer<float, 128> b(channels);
+  std::vector<float> fromCallback(samples, 0.0);
+  std::vector<float> other(samples, 1.0);
 
   // Set the buffer in the wrapper from the callback
-  mBuffer.SetBuffer(fromCallback, FRAMES);
+  mBuffer.SetBuffer(fromCallback.data(), FRAMES);
 
   // Fill the SpillBuffer with data.
-  ASSERT_TRUE(b.Fill(other, 15) == 15);
-  ASSERT_TRUE(b.Fill(other, 17) == 17);
-  for (uint32_t i = 0; i < 32 * CHANNELS; i++) {
+  ASSERT_TRUE(b.Fill(other.data(), 15) == 15);
+  ASSERT_TRUE(b.Fill(other.data(), 17) == 17);
+  for (uint32_t i = 0; i < 32 * channels; i++) {
     other[i] = 0.0;
   }
 
   // Empty it in the AudioCallbackBufferWrapper
   ASSERT_TRUE(b.Empty(mBuffer) == 32);
 
   // Check available return something reasonnable
   ASSERT_TRUE(mBuffer.Available() == FRAMES - 32);
 
   // Fill the buffer with the rest of the data
-  mBuffer.WriteFrames(other + 32 * CHANNELS, FRAMES - 32);
+  mBuffer.WriteFrames(other.data() + 32 * channels, FRAMES - 32);
 
   // Check the buffer is now full
   ASSERT_TRUE(mBuffer.Available() == 0);
 
-  for (uint32_t i = 0 ; i < SAMPLES; i++) {
+  for (uint32_t i = 0 ; i < samples; i++) {
     ASSERT_TRUE(fromCallback[i] == 1.0) <<
       "Difference at " << i << " (" << fromCallback[i] << " != " << 1.0 <<
       ")\n";
   }
 
-  ASSERT_TRUE(b.Fill(other, FRAMES) == 128);
-  ASSERT_TRUE(b.Fill(other, FRAMES) == 0);
+  ASSERT_TRUE(b.Fill(other.data(), FRAMES) == 128);
+  ASSERT_TRUE(b.Fill(other.data(), FRAMES) == 0);
   ASSERT_TRUE(b.Empty(mBuffer) == 0);
 }
+
+TEST(AudioBuffers, Test)
+{
+  for (uint32_t ch = 1; ch <= 8; ++ch) {
+    test_for_number_of_channels(ch);
+  }
+}
--- a/dom/media/webaudio/AudioContext.cpp
+++ b/dom/media/webaudio/AudioContext.cpp
@@ -200,18 +200,20 @@ AudioContext::Constructor(const GlobalOb
                           ErrorResult& aRv)
 {
   nsCOMPtr<nsPIDOMWindowInner> window = do_QueryInterface(aGlobal.GetAsSupports());
   if (!window) {
     aRv.Throw(NS_ERROR_FAILURE);
     return nullptr;
   }
 
+  uint32_t maxChannelCount = std::min<uint32_t>(WebAudioUtils::MaxChannelCount,
+      CubebUtils::MaxNumberOfChannels());
   RefPtr<AudioContext> object =
-    new AudioContext(window, false);
+    new AudioContext(window, false,maxChannelCount);
   aRv = object->Init();
   if (NS_WARN_IF(aRv.Failed())) {
      return nullptr;
   }
 
   RegisterWeakMemoryReporter(object);
 
   return object.forget();
@@ -617,17 +619,18 @@ AudioContext::UpdatePannerSource()
   for (auto iter = mPannerNodes.Iter(); !iter.Done(); iter.Next()) {
     iter.Get()->GetKey()->FindConnectedSources();
   }
 }
 
 uint32_t
 AudioContext::MaxChannelCount() const
 {
-  return mIsOffline ? mNumberOfChannels : CubebUtils::MaxNumberOfChannels();
+  return std::min<uint32_t>(WebAudioUtils::MaxChannelCount,
+      mIsOffline ? mNumberOfChannels : CubebUtils::MaxNumberOfChannels());
 }
 
 uint32_t
 AudioContext::ActiveNodeCount() const
 {
   return mActiveNodes.Count();
 }
 
--- a/dom/media/webaudio/AudioDestinationNode.cpp
+++ b/dom/media/webaudio/AudioDestinationNode.cpp
@@ -320,17 +320,17 @@ NS_INTERFACE_MAP_END_INHERITING(AudioNod
 
 NS_IMPL_ADDREF_INHERITED(AudioDestinationNode, AudioNode)
 NS_IMPL_RELEASE_INHERITED(AudioDestinationNode, AudioNode)
 
 AudioDestinationNode::AudioDestinationNode(AudioContext* aContext,
                                            bool aIsOffline,
                                            uint32_t aNumberOfChannels,
                                            uint32_t aLength, float aSampleRate)
-  : AudioNode(aContext, aIsOffline ? aNumberOfChannels : 2,
+  : AudioNode(aContext, aNumberOfChannels,
               ChannelCountMode::Explicit, ChannelInterpretation::Speakers)
   , mFramesToProduce(aLength)
   , mIsOffline(aIsOffline)
   , mAudioChannelSuspended(false)
   , mCaptured(false)
   , mAudible(AudioChannelService::AudibleState::eAudible)
 {
   nsPIDOMWindowInner* window = aContext->GetParentObject();
--- a/dom/media/webrtc/AudioOutputObserver.h
+++ b/dom/media/webrtc/AudioOutputObserver.h
@@ -3,16 +3,17 @@
  * You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #ifndef AUDIOOUTPUTOBSERVER_H_
 #define AUDIOOUTPUTOBSERVER_H_
 
 #include "mozilla/StaticPtr.h"
 #include "nsAutoPtr.h"
 #include "AudioMixer.h"
+#include "MediaData.h"
 
 namespace webrtc {
 class SingleRwFifo;
 }
 
 namespace mozilla {
 
 typedef struct FarEndAudioChunk_ {
@@ -45,15 +46,16 @@ private:
   uint32_t mPlayoutChannels;
 
   nsAutoPtr<webrtc::SingleRwFifo> mPlayoutFifo;
   uint32_t mChunkSize;
 
   // chunking to 10ms support
   FarEndAudioChunk *mSaved; // can't be nsAutoPtr since we need to use free(), not delete
   uint32_t mSamplesSaved;
+  AlignedAudioBuffer mDownmixBuffer;
 };
 
 extern StaticRefPtr<AudioOutputObserver> gFarendObserver;
 
 }
 
 #endif
--- a/dom/media/webrtc/MediaEngineWebRTCAudio.cpp
+++ b/dom/media/webrtc/MediaEngineWebRTCAudio.cpp
@@ -4,16 +4,17 @@
 
 #include "MediaEngineWebRTC.h"
 #include <stdio.h>
 #include <algorithm>
 #include "mozilla/Assertions.h"
 #include "MediaTrackConstraints.h"
 #include "mtransport/runnable_utils.h"
 #include "nsAutoPtr.h"
+#include "AudioConverter.h"
 
 // scoped_ptr.h uses FF
 #ifdef FF
 #undef FF
 #endif
 #include "webrtc/modules/audio_device/opensl/single_rw_fifo.h"
 
 #define CHANNELS 1
@@ -58,16 +59,17 @@ ScopedCustomReleasePtr<webrtc::VoENetwor
 ScopedCustomReleasePtr<webrtc::VoEAudioProcessing> MediaEngineWebRTCMicrophoneSource::mVoEProcessing;
 
 AudioOutputObserver::AudioOutputObserver()
   : mPlayoutFreq(0)
   , mPlayoutChannels(0)
   , mChunkSize(0)
   , mSaved(nullptr)
   , mSamplesSaved(0)
+  , mDownmixBuffer(MAX_SAMPLING_FREQ * MAX_CHANNELS / 100)
 {
   // Buffers of 10ms chunks
   mPlayoutFifo = new webrtc::SingleRwFifo(MAX_AEC_FIFO_DEPTH/10);
 }
 
 AudioOutputObserver::~AudioOutputObserver()
 {
   Clear();
@@ -96,23 +98,29 @@ AudioOutputObserver::Size()
   return mPlayoutFifo->size();
 }
 
 // static
 void
 AudioOutputObserver::InsertFarEnd(const AudioDataValue *aBuffer, uint32_t aFrames, bool aOverran,
                                   int aFreq, int aChannels)
 {
+  // Prepare for downmix if needed
+  int channels = aChannels;
+  if (aChannels > MAX_CHANNELS) {
+    channels = MAX_CHANNELS;
+  }
+
   if (mPlayoutChannels != 0) {
-    if (mPlayoutChannels != static_cast<uint32_t>(aChannels)) {
+    if (mPlayoutChannels != static_cast<uint32_t>(channels)) {
       MOZ_CRASH();
     }
   } else {
-    MOZ_ASSERT(aChannels <= MAX_CHANNELS);
-    mPlayoutChannels = static_cast<uint32_t>(aChannels);
+    MOZ_ASSERT(channels <= MAX_CHANNELS);
+    mPlayoutChannels = static_cast<uint32_t>(channels);
   }
   if (mPlayoutFreq != 0) {
     if (mPlayoutFreq != static_cast<uint32_t>(aFreq)) {
       MOZ_CRASH();
     }
   } else {
     MOZ_ASSERT(aFreq <= MAX_SAMPLING_FREQ);
     MOZ_ASSERT(!(aFreq % 100), "Sampling rate for far end data should be multiple of 100.");
@@ -130,28 +138,34 @@ AudioOutputObserver::InsertFarEnd(const 
     aOverran = false;
   }
   // Rechunk to 10ms.
   // The AnalyzeReverseStream() and WebRtcAec_BufferFarend() functions insist on 10ms
   // samples per call.  Annoying...
   while (aFrames) {
     if (!mSaved) {
       mSaved = (FarEndAudioChunk *) moz_xmalloc(sizeof(FarEndAudioChunk) +
-                                                (mChunkSize * aChannels - 1)*sizeof(int16_t));
+                                                (mChunkSize * channels - 1)*sizeof(int16_t));
       mSaved->mSamples = mChunkSize;
       mSaved->mOverrun = aOverran;
       aOverran = false;
     }
     uint32_t to_copy = mChunkSize - mSamplesSaved;
     if (to_copy > aFrames) {
       to_copy = aFrames;
     }
 
-    int16_t *dest = &(mSaved->mData[mSamplesSaved * aChannels]);
-    ConvertAudioSamples(aBuffer, dest, to_copy * aChannels);
+    int16_t* dest = &(mSaved->mData[mSamplesSaved * channels]);
+    if (aChannels > MAX_CHANNELS) {
+      AudioConverter converter(AudioConfig(aChannels, 0), AudioConfig(channels, 0));
+      converter.Process(mDownmixBuffer, aBuffer, to_copy);
+      ConvertAudioSamples(mDownmixBuffer.Data(), dest, to_copy * channels);
+    } else {
+      ConvertAudioSamples(aBuffer, dest, to_copy * channels);
+    }
 
 #ifdef LOG_FAREND_INSERTION
     if (fp) {
       fwrite(&(mSaved->mData[mSamplesSaved * aChannels]), to_copy * aChannels, sizeof(int16_t), fp);
     }
 #endif
     aFrames -= to_copy;
     mSamplesSaved += to_copy;