Bug 1221587: patch 6 - allow getUserMedia to use full-duplex cubeb streams r?padenot draft
authorPaul Adenot <paul@paul.cx>
Tue, 12 Jan 2016 18:11:48 -0500
changeset 321194 566b432289c3c3e37dc43972e5998cefd6d81e5b
parent 321193 24f31e31725cfa875ad6c46e190602965ce7d888
child 321195 d2f80d4053215adb85111fbe4fa5987691b81b7b
push id9349
push userrjesup@wgate.com
push dateWed, 13 Jan 2016 06:48:48 +0000
reviewerspadenot
bugs1221587
milestone46.0a1
Bug 1221587: patch 6 - allow getUserMedia to use full-duplex cubeb streams r?padenot
dom/media/AudioStream.cpp
dom/media/AudioStream.h
dom/media/GraphDriver.cpp
dom/media/GraphDriver.h
dom/media/webrtc/MediaEngineWebRTC.cpp
dom/media/webrtc/MediaEngineWebRTC.h
dom/media/webrtc/MediaEngineWebRTCAudio.cpp
--- a/dom/media/AudioStream.cpp
+++ b/dom/media/AudioStream.cpp
@@ -377,17 +377,17 @@ AudioStream::OpenCubeb(cubeb_stream_para
 
   // If the latency pref is set, use it. Otherwise, if this stream is intended
   // for low latency playback, try to get the lowest latency possible.
   // Otherwise, for normal streams, use 100ms.
   uint32_t latency = CubebUtils::GetCubebLatency();
 
   {
     cubeb_stream* stream;
-    if (cubeb_stream_init(cubebContext, &stream, "AudioStream", aParams,
+    if (cubeb_stream_init(cubebContext, &stream, "AudioStream", nullptr, &aParams,
                           latency, DataCallback_S, StateCallback_S, this) == CUBEB_OK) {
       MonitorAutoLock mon(mMonitor);
       MOZ_ASSERT(mState != SHUTDOWN);
       mCubebStream.reset(stream);
     } else {
       MonitorAutoLock mon(mMonitor);
       mState = ERRORED;
       NS_WARNING(nsPrintfCString("AudioStream::OpenCubeb() %p failed to init cubeb", this).get());
--- a/dom/media/AudioStream.h
+++ b/dom/media/AudioStream.h
@@ -233,19 +233,19 @@ protected:
   // opened, of the audio hardware, not adjusted for the changes of playback
   // rate or underrun frames.
   // Caller must own the monitor.
   int64_t GetPositionInFramesUnlocked();
 
 private:
   nsresult OpenCubeb(cubeb_stream_params &aParams);
 
-  static long DataCallback_S(cubeb_stream*, void* aThis, void* aBuffer, long aFrames)
+  static long DataCallback_S(cubeb_stream*, void* aThis, void* /* aInputBuffer */, void* aOutputBuffer, long aFrames)
   {
-    return static_cast<AudioStream*>(aThis)->DataCallback(aBuffer, aFrames);
+    return static_cast<AudioStream*>(aThis)->DataCallback(aOutputBuffer, aFrames);
   }
 
   static void StateCallback_S(cubeb_stream*, void* aThis, cubeb_state aState)
   {
     static_cast<AudioStream*>(aThis)->StateCallback(aState);
   }
 
 
--- a/dom/media/GraphDriver.cpp
+++ b/dom/media/GraphDriver.cpp
@@ -552,54 +552,61 @@ AudioCallbackDriver::AudioCallbackDriver
 AudioCallbackDriver::~AudioCallbackDriver()
 {
   MOZ_ASSERT(mPromisesForOperation.IsEmpty());
 }
 
 void
 AudioCallbackDriver::Init()
 {
-  cubeb_stream_params params;
+  cubeb_stream_params out_params;
+  cubeb_stream_params in_params;
   uint32_t latency;
 
   MOZ_ASSERT(!NS_IsMainThread(),
       "This is blocking and should never run on the main thread.");
 
-  params.devid = nullptr; // XXX take from config for the graph
-  mSampleRate = params.rate = CubebUtils::PreferredSampleRate();
+  out_params.devid = nullptr; // XXX take from config for the graph
+  mSampleRate = out_params.rate = CubebUtils::PreferredSampleRate();
 
 #if defined(__ANDROID__)
 #if defined(MOZ_B2G)
-  params.stream_type = CubebUtils::ConvertChannelToCubebType(mAudioChannel);
+  out_params.stream_type = CubebUtils::ConvertChannelToCubebType(mAudioChannel);
 #else
-  params.stream_type = CUBEB_STREAM_TYPE_MUSIC;
+  out_params.stream_type = CUBEB_STREAM_TYPE_MUSIC;
 #endif
-  if (params.stream_type == CUBEB_STREAM_TYPE_MAX) {
+  if (out_params.stream_type == CUBEB_STREAM_TYPE_MAX) {
     NS_WARNING("Bad stream type");
     return;
   }
 #else
   (void)mAudioChannel;
 #endif
 
-  params.channels = mGraphImpl->AudioChannelCount();
+  out_params.channels = mGraphImpl->AudioChannelCount();
   if (AUDIO_OUTPUT_FORMAT == AUDIO_FORMAT_S16) {
-    params.format = CUBEB_SAMPLE_S16NE;
+    out_params.format = CUBEB_SAMPLE_S16NE;
   } else {
-    params.format = CUBEB_SAMPLE_FLOAT32NE;
+    out_params.format = CUBEB_SAMPLE_FLOAT32NE;
   }
 
-  if (cubeb_get_min_latency(CubebUtils::GetCubebContext(), params, &latency) != CUBEB_OK) {
+  if (cubeb_get_min_latency(CubebUtils::GetCubebContext(), out_params, &latency) != CUBEB_OK) {
     NS_WARNING("Could not get minimal latency from cubeb.");
     return;
   }
 
+  in_params = out_params;
+  in_params.channels = 1; // change to support optional stereo capture
+
   cubeb_stream* stream;
+  // XXX Only pass input in_params if we have an input listener.  Always
+  // set up output because it's easier, and it will just get silence.
+  // XXX Add support for adding/removing an input listener later.
   if (cubeb_stream_init(CubebUtils::GetCubebContext(), &stream,
-                        "AudioCallbackDriver", params, latency,
+                        "AudioCallbackDriver", &out_params, &in_params, latency,
                         DataCallback_s, StateCallback_s, this) == CUBEB_OK) {
     mAudioStream.own(stream);
   } else {
     NS_WARNING("Could not create a cubeb stream for MediaStreamGraph, falling back to a SystemClockDriver");
     // Fall back to a driver using a normal thread.
     MonitorAutoLock lock(GraphImpl()->GetMonitor());
     SetNextDriver(new SystemClockDriver(GraphImpl()));
     NextDriver()->SetGraphTime(this, mIterationStart, mIterationEnd);
@@ -719,21 +726,22 @@ void
 AudioCallbackDriver::WakeUp()
 {
   mGraphImpl->GetMonitor().AssertCurrentThreadOwns();
   mGraphImpl->GetMonitor().Notify();
 }
 
 /* static */ long
 AudioCallbackDriver::DataCallback_s(cubeb_stream* aStream,
-                                    void* aUser, void* aBuffer,
+                                    void* aUser, void* aInputBuffer, void* aOutputBuffer,
                                     long aFrames)
 {
   AudioCallbackDriver* driver = reinterpret_cast<AudioCallbackDriver*>(aUser);
-  return driver->DataCallback(static_cast<AudioDataValue*>(aBuffer), aFrames);
+  return driver->DataCallback(static_cast<AudioDataValue*>(aInputBuffer),
+                              static_cast<AudioDataValue*>(aOutputBuffer), aFrames);
 }
 
 /* static */ void
 AudioCallbackDriver::StateCallback_s(cubeb_stream* aStream, void * aUser,
                                      cubeb_state aState)
 {
   AudioCallbackDriver* driver = reinterpret_cast<AudioCallbackDriver*>(aUser);
   driver->StateCallback(aState);
@@ -790,23 +798,24 @@ AudioCallbackDriver::OSXDeviceSwitchingW
     return true;
   }
 
   return false;
 }
 #endif // XP_MACOSX
 
 long
-AudioCallbackDriver::DataCallback(AudioDataValue* aBuffer, long aFrames)
+AudioCallbackDriver::DataCallback(AudioDataValue* aInputBuffer,
+                                  AudioDataValue* aOutputBuffer, long aFrames)
 {
   bool stillProcessing;
 
 #ifdef XP_MACOSX
   if (OSXDeviceSwitchingWorkaround()) {
-    PodZero(aBuffer, aFrames * mGraphImpl->AudioChannelCount());
+    PodZero(aOutputBuffer, aFrames * mGraphImpl->AudioChannelCount());
     return aFrames;
   }
 #endif
 
 #ifdef DEBUG
   // DebugOnly<> doesn't work here... it forces an initialization that will cause
   // mInCallback to be set back to false before we exit the statement.  Do it by
   // hand instead.
@@ -816,34 +825,34 @@ AudioCallbackDriver::DataCallback(AudioD
   GraphTime stateComputedTime = StateComputedTime();
   if (stateComputedTime == 0) {
     MonitorAutoLock mon(mGraphImpl->GetMonitor());
     // Because this function is called during cubeb_stream_init (to prefill the
     // audio buffers), it can be that we don't have a message here (because this
     // driver is the first one for this graph), and the graph would exit. Simply
     // return here until we have messages.
     if (!mGraphImpl->MessagesQueued()) {
-      PodZero(aBuffer, aFrames * mGraphImpl->AudioChannelCount());
+      PodZero(aOutputBuffer, aFrames * mGraphImpl->AudioChannelCount());
       return aFrames;
     }
     mGraphImpl->SwapMessageQueues();
   }
 
   uint32_t durationMS = aFrames * 1000 / mSampleRate;
 
   // For now, simply average the duration with the previous
   // duration so there is some damping against sudden changes.
   if (!mIterationDurationMS) {
     mIterationDurationMS = durationMS;
   } else {
     mIterationDurationMS = (mIterationDurationMS*3) + durationMS;
     mIterationDurationMS /= 4;
   }
 
-  mBuffer.SetBuffer(aBuffer, aFrames);
+  mBuffer.SetBuffer(aOutputBuffer, aFrames);
   // fill part or all with leftover data from last iteration (since we
   // align to Audio blocks)
   mScratchBuffer.Empty(mBuffer);
   // if we totally filled the buffer (and mScratchBuffer isn't empty),
   // we don't need to run an iteration and if we do so we may overflow.
   if (mBuffer.Available()) {
 
     // State computed time is decided by the audio callback's buffer length. We
--- a/dom/media/GraphDriver.h
+++ b/dom/media/GraphDriver.h
@@ -148,20 +148,16 @@ public:
   virtual TimeStamp GetCurrentTimeStamp() {
     return mCurrentTimeStamp;
   }
 
   GraphTime IterationEnd() {
     return mIterationEnd;
   }
 
-  virtual void GetAudioBuffer(float** aBuffer, long& aFrames) {
-    MOZ_CRASH("This is not an Audio GraphDriver!");
-  }
-
   virtual AudioCallbackDriver* AsAudioCallbackDriver() {
     return nullptr;
   }
 
   virtual OfflineClockDriver* AsOfflineClockDriver() {
     return nullptr;
   }
 
@@ -386,27 +382,27 @@ public:
   virtual void Stop() override;
   virtual void Resume() override;
   virtual void Revive() override;
   virtual void WaitForNextIteration() override;
   virtual void WakeUp() override;
 
   /* Static wrapper function cubeb calls back. */
   static long DataCallback_s(cubeb_stream * aStream,
-                             void * aUser, void * aBuffer,
+                             void * aUser, void * aInputBuffer, void * aOutputBuffer,
                              long aFrames);
   static void StateCallback_s(cubeb_stream* aStream, void * aUser,
                               cubeb_state aState);
   static void DeviceChangedCallback_s(void * aUser);
   /* This function is called by the underlying audio backend when a refill is
    * needed. This is what drives the whole graph when it is used to output
    * audio. If the return value is exactly aFrames, this function will get
    * called again. If it is less than aFrames, the stream will go in draining
    * mode, and this function will not be called again. */
-  long DataCallback(AudioDataValue* aBuffer, long aFrames);
+  long DataCallback(AudioDataValue* aInputBuffer, AudioDataValue* aOutputBuffer, long aFrames);
   /* This function is called by the underlying audio backend, but is only used
    * for informational purposes at the moment. */
   void StateCallback(cubeb_state aState);
   /* This is an approximation of the number of millisecond there are between two
    * iterations of the graph. */
   virtual uint32_t IterationDuration() override;
 
   /* This function gets called when the graph has produced the audio frames for
--- a/dom/media/webrtc/MediaEngineWebRTC.cpp
+++ b/dom/media/webrtc/MediaEngineWebRTC.cpp
@@ -42,16 +42,17 @@ GetUserMediaLog()
 #undef LOG
 #define LOG(args) MOZ_LOG(GetUserMediaLog(), mozilla::LogLevel::Debug, args)
 
 namespace mozilla {
 
 MediaEngineWebRTC::MediaEngineWebRTC(MediaEnginePrefs &aPrefs)
   : mMutex("mozilla::MediaEngineWebRTC"),
     mVoiceEngine(nullptr),
+    mAudioInput(nullptr),
     mAudioEngineInit(false)
 {
 #ifndef MOZ_B2G_CAMERA
   nsCOMPtr<nsIComponentRegistrar> compMgr;
   NS_GetComponentRegistrar(getter_AddRefs(compMgr));
   if (compMgr) {
     compMgr->IsContractIDRegistered(NS_TABSOURCESERVICE_CONTRACTID, &mHasTabVideoSource);
   }
@@ -227,17 +228,16 @@ MediaEngineWebRTC::EnumerateVideoDevices
 #endif
 }
 
 void
 MediaEngineWebRTC::EnumerateAudioDevices(dom::MediaSourceEnum aMediaSource,
                                          nsTArray<RefPtr<MediaEngineAudioSource> >* aASources)
 {
   ScopedCustomReleasePtr<webrtc::VoEBase> ptrVoEBase;
-  ScopedCustomReleasePtr<webrtc::VoEHardware> ptrVoEHw;
   // We spawn threads to handle gUM runnables, so we must protect the member vars
   MutexAutoLock lock(mMutex);
 
   if (aMediaSource == dom::MediaSourceEnum::AudioCapture) {
     RefPtr<MediaEngineWebRTCAudioCaptureSource> audioCaptureSource =
       new MediaEngineWebRTCAudioCaptureSource(nullptr);
     aASources->AppendElement(audioCaptureSource);
     return;
@@ -271,59 +271,61 @@ MediaEngineWebRTC::EnumerateAudioDevices
 
   if (!mAudioEngineInit) {
     if (ptrVoEBase->Init() < 0) {
       return;
     }
     mAudioEngineInit = true;
   }
 
-  ptrVoEHw = webrtc::VoEHardware::GetInterface(mVoiceEngine);
-  if (!ptrVoEHw)  {
-    return;
+  if (!mAudioInput) {
+    if (true /*platform_supports_full_duplex*/) {
+      mAudioInput = new mozilla::AudioInputCubeb(mVoiceEngine);
+    } else {
+      mAudioInput = new mozilla::AudioInputWebRTC(mVoiceEngine);
+    }
   }
 
   int nDevices = 0;
-  ptrVoEHw->GetNumOfRecordingDevices(nDevices);
+  mAudioInput->GetNumOfRecordingDevices(nDevices);
   int i;
 #if defined(MOZ_WIDGET_ANDROID) || defined(MOZ_WIDGET_GONK)
   i = 0; // Bug 1037025 - let the OS handle defaulting for now on android/b2g
 #else
   // -1 is "default communications device" depending on OS in webrtc.org code
   i = -1;
 #endif
   for (; i < nDevices; i++) {
     // We use constants here because GetRecordingDeviceName takes char[128].
     char deviceName[128];
     char uniqueId[128];
     // paranoia; jingle doesn't bother with this
     deviceName[0] = '\0';
     uniqueId[0] = '\0';
 
-    int error = ptrVoEHw->GetRecordingDeviceName(i, deviceName, uniqueId);
+    int error = mAudioInput->GetRecordingDeviceName(i, deviceName, uniqueId);
     if (error) {
-      LOG((" VoEHardware:GetRecordingDeviceName: Failed %d",
-           ptrVoEBase->LastError() ));
+      LOG((" VoEHardware:GetRecordingDeviceName: Failed %d", error));
       continue;
     }
 
     if (uniqueId[0] == '\0') {
       // Mac and Linux don't set uniqueId!
       MOZ_ASSERT(sizeof(deviceName) == sizeof(uniqueId)); // total paranoia
       strcpy(uniqueId,deviceName); // safe given assert and initialization/error-check
     }
 
     RefPtr<MediaEngineAudioSource> aSource;
     NS_ConvertUTF8toUTF16 uuid(uniqueId);
     if (mAudioSources.Get(uuid, getter_AddRefs(aSource))) {
       // We've already seen this device, just append.
       aASources->AppendElement(aSource.get());
     } else {
-      aSource = new MediaEngineWebRTCMicrophoneSource(mThread, mVoiceEngine, i,
-                                                      deviceName, uniqueId);
+      aSource = new MediaEngineWebRTCMicrophoneSource(mThread, mVoiceEngine, mAudioInput,
+                                                      i, deviceName, uniqueId);
       mAudioSources.Put(uuid, aSource); // Hashtable takes ownership.
       aASources->AppendElement(aSource);
     }
   }
 }
 
 void
 MediaEngineWebRTC::Shutdown()
--- a/dom/media/webrtc/MediaEngineWebRTC.h
+++ b/dom/media/webrtc/MediaEngineWebRTC.h
@@ -2,16 +2,17 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this file,
  * You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #ifndef MEDIAENGINEWEBRTC_H_
 #define MEDIAENGINEWEBRTC_H_
 
 #include "prcvar.h"
 #include "prthread.h"
+#include "prprf.h"
 #include "nsIThread.h"
 #include "nsIRunnable.h"
 
 #include "mozilla/dom/File.h"
 #include "mozilla/Mutex.h"
 #include "mozilla/Monitor.h"
 #include "nsCOMPtr.h"
 #include "nsThreadUtils.h"
@@ -21,16 +22,18 @@
 #include "nsRefPtrHashtable.h"
 
 #include "VideoUtils.h"
 #include "MediaEngineCameraVideoSource.h"
 #include "VideoSegment.h"
 #include "AudioSegment.h"
 #include "StreamBuffer.h"
 #include "MediaStreamGraph.h"
+#include "cubeb/cubeb.h"
+#include "CubebUtils.h"
 
 #include "MediaEngineWrapper.h"
 #include "mozilla/dom/MediaStreamTrackBinding.h"
 // WebRTC library includes follow
 #include "webrtc/common.h"
 // Audio Engine
 #include "webrtc/voice_engine/include/voe_base.h"
 #include "webrtc/voice_engine/include/voe_codec.h"
@@ -114,43 +117,199 @@ public:
     const nsTArray<const dom::MediaTrackConstraintSet*>& aConstraintSets,
     const nsString& aDeviceId) override;
 
 protected:
   virtual ~MediaEngineWebRTCAudioCaptureSource() { Shutdown(); }
   nsCString mUUID;
 };
 
+// Small subset of VoEHardware
+class AudioInput
+{
+public:
+  AudioInput(webrtc::VoiceEngine* aVoiceEngine) : mVoiceEngine(aVoiceEngine) {};
+  virtual ~AudioInput() {}
+
+  NS_INLINE_DECL_REFCOUNTING(AudioInput)
+
+  virtual int GetNumOfRecordingDevices(int& aDevices) = 0;
+  virtual int GetRecordingDeviceName(int aIndex, char aStrNameUTF8[128],
+                                     char aStrGuidUTF8[128]) = 0;
+  virtual int GetRecordingDeviceStatus(bool& aIsAvailable) = 0;
+  virtual void StartRecording(MediaStreamGraph *aGraph) = 0;
+  virtual void StopRecording(MediaStreamGraph *aGraph) = 0;
+  virtual int SetRecordingDevice(int aIndex) = 0;
+
+protected:
+  webrtc::VoiceEngine* mVoiceEngine;
+};
+
+class AudioInputCubeb : public AudioInput,
+                        public MediaStreamListener
+{
+public:
+  AudioInputCubeb(webrtc::VoiceEngine* aVoiceEngine) :
+    AudioInput(aVoiceEngine), mDevices(nullptr) {}
+  virtual ~AudioInputCubeb()
+  {
+    if (mDevices) {
+      cubeb_device_collection_destroy(mDevices);
+      mDevices = nullptr;
+    }
+  }
+
+  virtual int GetNumOfRecordingDevices(int& aDevices)
+  {
+    // devices = cubeb_get_num_devices(...)
+    if (CUBEB_OK != cubeb_enumerate_devices(CubebUtils::GetCubebContext(),
+                                            CUBEB_DEVICE_TYPE_INPUT,
+                                            &mDevices)) {
+      return 0;
+    }
+    aDevices = 0;
+    for (uint32_t i = 0; i < mDevices->count; i++) {
+      if (mDevices->device[i]->type == CUBEB_DEVICE_TYPE_INPUT && // paranoia
+          mDevices->device[i]->state == CUBEB_DEVICE_STATE_ENABLED)
+      {
+        aDevices++;
+        // XXX to support device changes, we need to identify by name/UUID not index
+      }
+    }
+    return 0;
+  }
+
+  virtual int GetRecordingDeviceName(int aIndex, char aStrNameUTF8[128],
+                                     char aStrGuidUTF8[128])
+  {
+    if (!mDevices) {
+      return 1;
+    }
+    int devindex = aIndex == -1 ? 0 : aIndex;
+    PR_snprintf(aStrNameUTF8, 128, "%s%s", aIndex == -1 ? "default: " : "",
+                mDevices->device[devindex]->friendly_name);
+    aStrGuidUTF8[0] = '\0';
+    return 0;
+  }
+
+  virtual int GetRecordingDeviceStatus(bool& aIsAvailable)
+  {
+    // With cubeb, we only expose devices of type CUBEB_DEVICE_TYPE_INPUT
+    aIsAvailable = true;
+    return 0;
+  }
+
+  virtual void StartRecording(MediaStreamGraph *aGraph)
+  {
+    ScopedCustomReleasePtr<webrtc::VoEExternalMedia> ptrVoERender;
+    ptrVoERender = webrtc::VoEExternalMedia::GetInterface(mVoiceEngine);
+    if (ptrVoERender) {
+      ptrVoERender->SetExternalRecordingStatus(true);
+    }
+    aGraph->OpenAudioInput(nullptr, this);
+  }
+
+  virtual void StopRecording(MediaStreamGraph *aGraph)
+  {
+    aGraph->CloseAudioInput(this);
+  }
+
+  virtual int SetRecordingDevice(int aIndex)
+  {
+    // Not relevant to cubeb
+    return 1;
+  }
+
+private:
+  cubeb_device_collection* mDevices;
+};
+
+class AudioInputWebRTC : public AudioInput
+{
+public:
+  AudioInputWebRTC(webrtc::VoiceEngine* aVoiceEngine) : AudioInput(aVoiceEngine) {}
+  virtual ~AudioInputWebRTC() {}
+
+  virtual int GetNumOfRecordingDevices(int& aDevices)
+  {
+    ScopedCustomReleasePtr<webrtc::VoEHardware> ptrVoEHw;
+    ptrVoEHw = webrtc::VoEHardware::GetInterface(mVoiceEngine);
+    if (!ptrVoEHw)  {
+      return 1;
+    }
+    return ptrVoEHw->GetNumOfRecordingDevices(aDevices);
+  }
+
+  virtual int GetRecordingDeviceName(int aIndex, char aStrNameUTF8[128],
+                                     char aStrGuidUTF8[128])
+  {
+    ScopedCustomReleasePtr<webrtc::VoEHardware> ptrVoEHw;
+    ptrVoEHw = webrtc::VoEHardware::GetInterface(mVoiceEngine);
+    if (!ptrVoEHw)  {
+      return 1;
+    }
+    return ptrVoEHw->GetRecordingDeviceName(aIndex, aStrNameUTF8,
+                                            aStrGuidUTF8);
+  }
+
+  virtual int GetRecordingDeviceStatus(bool& aIsAvailable)
+  {
+    ScopedCustomReleasePtr<webrtc::VoEHardware> ptrVoEHw;
+    ptrVoEHw = webrtc::VoEHardware::GetInterface(mVoiceEngine);
+    if (!ptrVoEHw)  {
+      return 1;
+    }
+    ptrVoEHw->GetRecordingDeviceStatus(aIsAvailable);
+    return 0;
+  }
+
+  virtual void StartRecording(MediaStreamGraph *aGraph) {}
+  virtual void StopRecording(MediaStreamGraph *aGraph) {}
+
+  virtual int SetRecordingDevice(int aIndex)
+  {
+    ScopedCustomReleasePtr<webrtc::VoEHardware> ptrVoEHw;
+    ptrVoEHw = webrtc::VoEHardware::GetInterface(mVoiceEngine);
+    if (!ptrVoEHw)  {
+      return 1;
+    }
+    return ptrVoEHw->SetRecordingDevice(aIndex);
+  }
+};
+
 class MediaEngineWebRTCMicrophoneSource : public MediaEngineAudioSource,
                                           public webrtc::VoEMediaProcess,
                                           private MediaConstraintsHelper
 {
 public:
   MediaEngineWebRTCMicrophoneSource(nsIThread* aThread,
                                     webrtc::VoiceEngine* aVoiceEnginePtr,
+                                    mozilla::AudioInput* aAudioInput,
                                     int aIndex,
                                     const char* name,
                                     const char* uuid)
     : MediaEngineAudioSource(kReleased)
     , mVoiceEngine(aVoiceEnginePtr)
+    , mAudioInput(aAudioInput)
     , mMonitor("WebRTCMic.Monitor")
     , mThread(aThread)
     , mCapIndex(aIndex)
     , mChannel(-1)
     , mNrAllocations(0)
     , mInitDone(false)
     , mStarted(false)
     , mSampleFrequency(MediaEngine::DEFAULT_SAMPLE_RATE)
     , mEchoOn(false), mAgcOn(false), mNoiseOn(false)
     , mEchoCancel(webrtc::kEcDefault)
     , mAGC(webrtc::kAgcDefault)
     , mNoiseSuppress(webrtc::kNsDefault)
     , mPlayoutDelay(0)
     , mNullTransport(nullptr) {
     MOZ_ASSERT(aVoiceEnginePtr);
+    MOZ_ASSERT(aAudioInput);
     mDeviceName.Assign(NS_ConvertUTF8toUTF16(name));
     mDeviceUUID.Assign(uuid);
     Init();
   }
 
   virtual void GetName(nsAString& aName) override;
   virtual void GetUUID(nsACString& aUUID) override;
 
@@ -202,16 +361,18 @@ public:
 
 protected:
   ~MediaEngineWebRTCMicrophoneSource() { Shutdown(); }
 
 private:
   void Init();
 
   webrtc::VoiceEngine* mVoiceEngine;
+  RefPtr<mozilla::AudioInput> mAudioInput;
+
   ScopedCustomReleasePtr<webrtc::VoEBase> mVoEBase;
   ScopedCustomReleasePtr<webrtc::VoEExternalMedia> mVoERender;
   ScopedCustomReleasePtr<webrtc::VoENetwork> mVoENetwork;
   ScopedCustomReleasePtr<webrtc::VoEAudioProcessing> mVoEProcessing;
 
   // mMonitor protects mSources[] access/changes, and transitions of mState
   // from kStarted to kStopped (which are combined with EndTrack()).
   // mSources[] is accessed from webrtc threads.
@@ -260,16 +421,17 @@ private:
     gFarendObserver = nullptr;
   }
 
   nsCOMPtr<nsIThread> mThread;
 
   // gUM runnables can e.g. Enumerate from multiple threads
   Mutex mMutex;
   webrtc::VoiceEngine* mVoiceEngine;
+  RefPtr<mozilla::AudioInput> mAudioInput;
   bool mAudioEngineInit;
 
   bool mHasTabVideoSource;
 
   // Store devices we've already seen in a hashtable for quick return.
   // Maps UUID to MediaEngineSource (one set for audio, one for video).
   nsRefPtrHashtable<nsStringHashKey, MediaEngineVideoSource> mVideoSources;
   nsRefPtrHashtable<nsStringHashKey, MediaEngineAudioSource> mAudioSources;
--- a/dom/media/webrtc/MediaEngineWebRTCAudio.cpp
+++ b/dom/media/webrtc/MediaEngineWebRTCAudio.cpp
@@ -283,18 +283,17 @@ uint32_t MediaEngineWebRTCMicrophoneSour
 nsresult
 MediaEngineWebRTCMicrophoneSource::Allocate(const dom::MediaTrackConstraints &aConstraints,
                                             const MediaEnginePrefs &aPrefs,
                                             const nsString& aDeviceId)
 {
   AssertIsOnOwningThread();
   if (mState == kReleased) {
     if (mInitDone) {
-      ScopedCustomReleasePtr<webrtc::VoEHardware> ptrVoEHw(webrtc::VoEHardware::GetInterface(mVoiceEngine));
-      if (!ptrVoEHw || ptrVoEHw->SetRecordingDevice(mCapIndex)) {
+      if (mAudioInput->SetRecordingDevice(mCapIndex)) {
         return NS_ERROR_FAILURE;
       }
       mState = kAllocated;
       LOG(("Audio device %d allocated", mCapIndex));
     } else {
       LOG(("Audio device is not initalized"));
       return NS_ERROR_FAILURE;
     }
@@ -377,16 +376,18 @@ MediaEngineWebRTCMicrophoneSource::Start
   }
   if (mVoEBase->StartSend(mChannel)) {
     return NS_ERROR_FAILURE;
   }
 
   // Attach external media processor, so this::Process will be called.
   mVoERender->RegisterExternalMediaProcessing(mChannel, webrtc::kRecordingPerChannel, *this);
 
+  mAudioInput->StartRecording(aStream->Graph());
+
   return NS_OK;
 }
 
 nsresult
 MediaEngineWebRTCMicrophoneSource::Stop(SourceMediaStream *aSource, TrackID aID)
 {
   AssertIsOnOwningThread();
   {
@@ -407,16 +408,18 @@ MediaEngineWebRTCMicrophoneSource::Stop(
     }
     if (!mVoEBase) {
       return NS_ERROR_FAILURE;
     }
 
     mState = kStopped;
   }
 
+  mAudioInput->StopRecording(aSource->Graph());
+
   mVoERender->DeRegisterExternalMediaProcessing(mChannel, webrtc::kRecordingPerChannel);
 
   if (mVoEBase->StopSend(mChannel)) {
     return NS_ERROR_FAILURE;
   }
   if (mVoEBase->StopReceive(mChannel)) {
     return NS_ERROR_FAILURE;
   }
@@ -470,26 +473,25 @@ MediaEngineWebRTCMicrophoneSource::Init(
   if (mVoENetwork->RegisterExternalTransport(mChannel, *mNullTransport)) {
     return;
   }
 
   mSampleFrequency = MediaEngine::DEFAULT_SAMPLE_RATE;
   LOG(("%s: sampling rate %u", __FUNCTION__, mSampleFrequency));
 
   // Check for availability.
-  ScopedCustomReleasePtr<webrtc::VoEHardware> ptrVoEHw(webrtc::VoEHardware::GetInterface(mVoiceEngine));
-  if (!ptrVoEHw || ptrVoEHw->SetRecordingDevice(mCapIndex)) {
+  if (mAudioInput->SetRecordingDevice(mCapIndex)) {
     return;
   }
 
 #ifndef MOZ_B2G
   // Because of the permission mechanism of B2G, we need to skip the status
   // check here.
   bool avail = false;
-  ptrVoEHw->GetRecordingDeviceStatus(avail);
+  mAudioInput->GetRecordingDeviceStatus(avail);
   if (!avail) {
     return;
   }
 #endif // MOZ_B2G
 
   // Set "codec" to PCM, 32kHz on 1 channel
   ScopedCustomReleasePtr<webrtc::VoECodec> ptrVoECodec(webrtc::VoECodec::GetInterface(mVoiceEngine));
   if (!ptrVoECodec) {
@@ -634,16 +636,17 @@ MediaEngineWebRTCMicrophoneSource::Proce
   return;
 }
 
 void
 MediaEngineWebRTCAudioCaptureSource::GetName(nsAString &aName)
 {
   aName.AssignLiteral("AudioCapture");
 }
+
 void
 MediaEngineWebRTCAudioCaptureSource::GetUUID(nsACString &aUUID)
 {
   nsID uuid;
   char uuidBuffer[NSID_LENGTH];
   nsCString asciiString;
   ErrorResult rv;
 
@@ -691,9 +694,10 @@ MediaEngineWebRTCAudioCaptureSource::Res
 uint32_t
 MediaEngineWebRTCAudioCaptureSource::GetBestFitnessDistance(
     const nsTArray<const dom::MediaTrackConstraintSet*>& aConstraintSets,
     const nsString& aDeviceId)
 {
   // There is only one way of capturing audio for now, and it's always adequate.
   return 0;
 }
+
 }