Bug 1404977 - Part 7 - Make each MediaEngineWebRTCMicrophoneSource independent. r?pehrsons draft
authorPaul Adenot <paul@paul.cx>
Mon, 30 Apr 2018 15:37:18 +0200
changeset 824648 366945e86efca27c4416a22f7591d190a5a90042
parent 824192 31daa50cd025087a9470ae78413f4beafb48427e
child 824649 2488a5f07872d837d65fc1816278213470dd54ae
push id117965
push userpaul@paul.cx
push dateTue, 31 Jul 2018 14:52:35 +0000
reviewerspehrsons
bugs1404977
milestone63.0a1
Bug 1404977 - Part 7 - Make each MediaEngineWebRTCMicrophoneSource independent. r?pehrsons MozReview-Commit-ID: 1RSsT6AV3iI
dom/media/webrtc/MediaEngineWebRTC.cpp
dom/media/webrtc/MediaEngineWebRTC.h
dom/media/webrtc/MediaEngineWebRTCAudio.cpp
--- a/dom/media/webrtc/MediaEngineWebRTC.cpp
+++ b/dom/media/webrtc/MediaEngineWebRTC.cpp
@@ -201,18 +201,17 @@ MediaEngineWebRTC::EnumerateMicrophoneDe
           devices[i]->State(),
           NS_ConvertUTF16toUTF8(devices[i]->Name()).get(),
           devices[i]->DeviceID()));
 
     if (devices[i]->State() == CUBEB_DEVICE_STATE_ENABLED) {
       MOZ_ASSERT(devices[i]->Type() == CUBEB_DEVICE_TYPE_INPUT);
       RefPtr<MediaEngineSource> source =
         new MediaEngineWebRTCMicrophoneSource(
-          mEnumerator,
-          devices[i]->GetDeviceID().ref(),
+          devices[i],
           devices[i]->Name(),
           // Lie and provide the name as UUID
           NS_LossyConvertUTF16toASCII(devices[i]->Name()),
           devices[i]->MaxChannels(),
           mDelayAgnostic,
           mExtendedFilter);
       aDevices->AppendElement(source);
     }
--- a/dom/media/webrtc/MediaEngineWebRTC.h
+++ b/dom/media/webrtc/MediaEngineWebRTC.h
@@ -195,26 +195,26 @@ private:
   Mutex mMutex;
   RefPtr<MediaEngineWebRTCMicrophoneSource> mAudioSource;
 };
 
 class MediaEngineWebRTCMicrophoneSource : public MediaEngineSource,
                                           public AudioDataListenerInterface
 {
 public:
-  MediaEngineWebRTCMicrophoneSource(mozilla::AudioInput* aAudioInput,
-                                    int aIndex,
-                                    const char* name,
-                                    const char* uuid,
+  MediaEngineWebRTCMicrophoneSource(RefPtr<AudioDeviceInfo> aInfo,
+                                    const nsString& name,
+                                    const nsCString& uuid,
+                                    uint32_t maxChannelCount,
                                     bool aDelayAgnostic,
                                     bool aExtendedFilter);
 
   bool RequiresSharing() const override
   {
-    return true;
+    return false;
   }
 
   nsString GetName() const override;
   nsCString GetUUID() const override;
 
   nsresult Allocate(const dom::MediaTrackConstraints &aConstraints,
                     const MediaEnginePrefs& aPrefs,
                     const nsString& aDeviceId,
@@ -251,16 +251,21 @@ public:
                         AudioDataValue* aBuffer, size_t aFrames,
                         TrackRate aRate, uint32_t aChannels) override;
   void NotifyInputData(MediaStreamGraph* aGraph,
                        const AudioDataValue* aBuffer, size_t aFrames,
                        TrackRate aRate, uint32_t aChannels) override;
 
   void DeviceChanged() override;
 
+  uint32_t RequestedInputChannelCount(MediaStreamGraphImpl* aGraph) override
+  {
+    return GetRequestedInputChannelCount(aGraph);
+  }
+
   dom::MediaSourceEnum GetMediaSource() const override
   {
     return dom::MediaSourceEnum::Microphone;
   }
 
   nsresult TakePhoto(MediaEnginePhotoCallback* aCallback) override
   {
     return NS_ERROR_NOT_IMPLEMENTED;
@@ -364,58 +369,62 @@ private:
                            size_t aFrames,
                            TrackRate aRate,
                            uint32_t aChannels);
 
 
   // This is true when all processing is disabled, we can skip
   // packetization, resampling and other processing passes.
   // Graph thread only.
-  bool PassThrough() const;
+  bool PassThrough(MediaStreamGraphImpl* aGraphImpl) const;
 
   // Graph thread only.
   void SetPassThrough(bool aPassThrough);
+  uint32_t GetRequestedInputChannelCount(MediaStreamGraphImpl* aGraphImpl);
+  void SetRequestedInputChannelCount(uint32_t aRequestedInputChannelCount);
 
   // Owning thread only.
   RefPtr<WebRTCAudioDataListener> mListener;
 
-  // Note: shared across all microphone sources. Owning thread only.
-  static int sChannelsOpen;
+  // Can be shared on any thread.
+  const RefPtr<AudioDeviceInfo> mDeviceInfo;
 
-  const RefPtr<mozilla::AudioInput> mAudioInput;
   const UniquePtr<webrtc::AudioProcessing> mAudioProcessing;
 
   // accessed from the GraphDriver thread except for deletion.
   nsAutoPtr<AudioPacketizer<AudioDataValue, float>> mPacketizerInput;
   nsAutoPtr<AudioPacketizer<AudioDataValue, float>> mPacketizerOutput;
 
   // mMutex protects some of our members off the owning thread.
   Mutex mMutex;
 
   // We append an allocation in Allocate() and remove it in Deallocate().
   // Both the array and the Allocation members are modified under mMutex on
   // the owning thread. Accessed under one of the two.
   nsTArray<Allocation> mAllocations;
 
-  // Current state of the shared resource for this source.
-  // Set under mMutex on the owning thread. Accessed under one of the two
-  MediaEngineSourceState mState = kReleased;
+  // Current state of the shared resource for this source. Written on the
+  // owning thread, read on either the owning thread or the MSG thread.
+  Atomic<MediaEngineSourceState> mState;
 
-  int mCapIndex;
   bool mDelayAgnostic;
   bool mExtendedFilter;
   bool mStarted;
 
   const nsString mDeviceName;
   const nsCString mDeviceUUID;
 
   // The current settings for the underlying device.
   // Member access is main thread only after construction.
   const nsMainThreadPtrHandle<media::Refcountable<dom::MediaTrackSettings>> mSettings;
 
+  // The number of channels asked for by content, after clamping to the range of
+  // legal channel count for this particular device. This is the number of
+  // channels of the input buffer passed as parameter in NotifyInputData.
+  uint32_t mRequestedInputChannelCount;
   uint64_t mTotalFrames;
   uint64_t mLastLogFrames;
 
   // mSkipProcessing is true if none of the processing passes are enabled,
   // because of prefs or constraints. This allows simply copying the audio into
   // the MSG, skipping resampling and the whole webrtc.org code.
   // This is read and written to only on the MSG thread.
   bool mSkipProcessing;
--- a/dom/media/webrtc/MediaEngineWebRTCAudio.cpp
+++ b/dom/media/webrtc/MediaEngineWebRTCAudio.cpp
@@ -101,59 +101,61 @@ WebRTCAudioDataListener::Shutdown()
 {
   MutexAutoLock lock(mMutex);
   mAudioSource = nullptr;
 }
 
 /**
  * WebRTC Microphone MediaEngineSource.
  */
-int MediaEngineWebRTCMicrophoneSource::sChannelsOpen = 0;
 
 MediaEngineWebRTCMicrophoneSource::Allocation::Allocation(
     const RefPtr<AllocationHandle>& aHandle)
   : mHandle(aHandle)
 {}
 
 MediaEngineWebRTCMicrophoneSource::Allocation::~Allocation() = default;
 
 MediaEngineWebRTCMicrophoneSource::MediaEngineWebRTCMicrophoneSource(
-    mozilla::AudioInput* aAudioInput,
-    int aIndex,
-    const char* aDeviceName,
-    const char* aDeviceUUID,
+    RefPtr<AudioDeviceInfo> aInfo,
+    const nsString& aDeviceName,
+    const nsCString& aDeviceUUID,
+    uint32_t aMaxChannelCount,
     bool aDelayAgnostic,
     bool aExtendedFilter)
-  : mAudioInput(aAudioInput)
+  : mDeviceInfo(std::move(aInfo))
   , mAudioProcessing(AudioProcessing::Create())
   , mMutex("WebRTCMic::Mutex")
-  , mCapIndex(aIndex)
   , mDelayAgnostic(aDelayAgnostic)
   , mExtendedFilter(aExtendedFilter)
   , mStarted(false)
-  , mDeviceName(NS_ConvertUTF8toUTF16(aDeviceName))
+  , mDeviceName(aDeviceName)
   , mDeviceUUID(aDeviceUUID)
   , mSettings(
       new nsMainThreadPtrHolder<media::Refcountable<dom::MediaTrackSettings>>(
         "MediaEngineWebRTCMicrophoneSource::mSettings",
         new media::Refcountable<dom::MediaTrackSettings>(),
         // Non-strict means it won't assert main thread for us.
         // It would be great if it did but we're already on the media thread.
         /* aStrict = */ false))
+  , mRequestedInputChannelCount(aMaxChannelCount)
   , mTotalFrames(0)
   , mLastLogFrames(0)
   , mSkipProcessing(false)
   , mInputDownmixBuffer(MAX_SAMPLING_FREQ * MAX_CHANNELS / 100)
 {
-  MOZ_ASSERT(aAudioInput);
+  MOZ_ASSERT(mDeviceInfo->DeviceID());
+
+  // We'll init lazily as needed
   mSettings->mEchoCancellation.Construct(0);
   mSettings->mAutoGainControl.Construct(0);
   mSettings->mNoiseSuppression.Construct(0);
   mSettings->mChannelCount.Construct(0);
-  // We'll init lazily as needed
+
+  mState = kReleased;
 }
 
 nsString
 MediaEngineWebRTCMicrophoneSource::GetName() const
 {
   return mDeviceName;
 }
 
@@ -421,125 +423,143 @@ MediaEngineWebRTCMicrophoneSource::Updat
   AssertIsOnOwningThread();
 
   FlattenedConstraints c(aNetConstraints);
 
   MediaEnginePrefs prefs = aPrefs;
   prefs.mAecOn = c.mEchoCancellation.Get(prefs.mAecOn);
   prefs.mAgcOn = c.mAutoGainControl.Get(prefs.mAgcOn);
   prefs.mNoiseOn = c.mNoiseSuppression.Get(prefs.mNoiseOn);
-  uint32_t maxChannels = 1;
-  if (mAudioInput->GetMaxAvailableChannels(maxChannels) != 0) {
-    return NS_ERROR_FAILURE;
-  }
-  // Check channelCount violation
-  if (static_cast<int32_t>(maxChannels) < c.mChannelCount.mMin ||
-      static_cast<int32_t>(maxChannels) > c.mChannelCount.mMax) {
+
+  // Determine an actual channel count to use for this source. Three factors at
+  // play here: the device capabilities, the constraints passed in by content,
+  // and a pref that can force things (for testing)
+  int32_t maxChannels = mDeviceInfo->MaxChannels();
+
+  // First, check channelCount violation wrt constraints. This fails in case of
+  // error.
+  if (c.mChannelCount.mMin > maxChannels) {
     *aOutBadConstraint = "channelCount";
     return NS_ERROR_FAILURE;
   }
-  // Clamp channelCount to a valid value
+  // A pref can force the channel count to use. If the pref has a value of zero
+  // or lower, it has no effect.
   if (prefs.mChannels <= 0) {
-    prefs.mChannels = static_cast<int32_t>(maxChannels);
+    prefs.mChannels = maxChannels;
   }
+
+  // Get the number of channels asked for by content, and clamp it between the
+  // pref and the maximum number of channels that the device supports.
   prefs.mChannels = c.mChannelCount.Get(std::min(prefs.mChannels,
-                                        static_cast<int32_t>(maxChannels)));
-  // Clamp channelCount to a valid value
-  prefs.mChannels = std::max(1, std::min(prefs.mChannels, static_cast<int32_t>(maxChannels)));
+                                        maxChannels));
+  prefs.mChannels = std::max(1, std::min(prefs.mChannels, maxChannels));
 
   LOG(("Audio config: aec: %d, agc: %d, noise: %d, channels: %d",
       prefs.mAecOn ? prefs.mAec : -1,
       prefs.mAgcOn ? prefs.mAgc : -1,
       prefs.mNoiseOn ? prefs.mNoise : -1,
       prefs.mChannels));
 
   switch (mState) {
     case kReleased:
       MOZ_ASSERT(aHandle);
-      if (sChannelsOpen != 0) {
-        // Until we fix (or wallpaper) support for multiple mic input
-        // (Bug 1238038) fail allocation for a second device
-        return NS_ERROR_FAILURE;
-      }
-      if (mAudioInput->SetRecordingDevice(mCapIndex)) {
-         return NS_ERROR_FAILURE;
-      }
-      mAudioInput->SetUserChannelCount(prefs.mChannels);
       {
         MutexAutoLock lock(mMutex);
         mState = kAllocated;
       }
-      sChannelsOpen++;
-      LOG(("Audio device %d allocated", mCapIndex));
-      {
-        // Update with the actual applied channelCount in order
-        // to store it in settings.
-        uint32_t channelCount = 0;
-        mAudioInput->GetChannelCount(channelCount);
-        MOZ_ASSERT(channelCount > 0);
-        prefs.mChannels = channelCount;
-      }
+      LOG(("Audio device %s allocated", NS_ConvertUTF16toUTF8(mDeviceInfo->Name()).get()));
       break;
 
     case kStarted:
     case kStopped:
-      if (prefs.mChannels != mNetPrefs.mChannels) {
-        // If the channel count changed, tell the MSG to open a new driver with
-        // the correct channel count.
-        MOZ_ASSERT(!mAllocations.IsEmpty());
-        RefPtr<SourceMediaStream> stream;
-        for (const Allocation& allocation : mAllocations) {
-          if (allocation.mStream && allocation.mStream->GraphImpl()) {
-            stream = allocation.mStream;
-            break;
-          }
-        }
-        MOZ_ASSERT(stream);
-
-        mAudioInput->SetUserChannelCount(prefs.mChannels);
-        // Get validated number of channel
-        uint32_t channelCount = 0;
-        mAudioInput->GetChannelCount(channelCount);
-        MOZ_ASSERT(channelCount > 0 && mNetPrefs.mChannels > 0);
-        if (!stream->OpenNewAudioCallbackDriver(mListener)) {
-          MOZ_LOG(GetMediaManagerLog(), LogLevel::Error, ("Could not open a new AudioCallbackDriver for input"));
-          return NS_ERROR_FAILURE;
-        }
+      if (prefs == mNetPrefs) {
+        LOG(("UpdateSingleSource: new prefs for %s are the same as the current prefs, returning.",
+             NS_ConvertUTF16toUTF8(mDeviceName).get()));
+        return NS_OK;
       }
       break;
 
     default:
-      LOG(("Audio device %d in ignored state %d", mCapIndex, mState));
+      LOG(("Audio device %s in ignored state %d", NS_ConvertUTF16toUTF8(mDeviceInfo->Name()).get(), MediaEngineSourceState(mState)));
       break;
   }
 
-  if (MOZ_LOG_TEST(GetMediaManagerLog(), LogLevel::Debug)) {
-    if (mAllocations.IsEmpty()) {
-      LOG(("Audio device %d reallocated", mCapIndex));
-    } else {
-      LOG(("Audio device %d allocated shared", mCapIndex));
-    }
-  }
-
-  if (sChannelsOpen > 0) {
+  if (mState != kReleased) {
     UpdateAGCSettingsIfNeeded(prefs.mAgcOn, static_cast<AgcModes>(prefs.mAgc));
     UpdateNSSettingsIfNeeded(prefs.mNoiseOn, static_cast<NsModes>(prefs.mNoise));
     UpdateAECSettingsIfNeeded(prefs.mAecOn, static_cast<EcModes>(prefs.mAec));
 
     webrtc::Config config;
     config.Set<webrtc::ExtendedFilter>(new webrtc::ExtendedFilter(mExtendedFilter));
     config.Set<webrtc::DelayAgnostic>(new webrtc::DelayAgnostic(mDelayAgnostic));
     mAudioProcessing->SetExtraOptions(config);
   }
   mNetPrefs = prefs;
   return NS_OK;
 }
 
 #undef HANDLE_APM_ERROR
 
+bool
+MediaEngineWebRTCMicrophoneSource::PassThrough(MediaStreamGraphImpl* aGraph) const
+{
+  MOZ_ASSERT(aGraph->CurrentDriver()->OnThread());
+  return mSkipProcessing;
+}
+void
+MediaEngineWebRTCMicrophoneSource::SetPassThrough(bool aPassThrough)
+{
+  {
+    MutexAutoLock lock(mMutex);
+    if (mAllocations.IsEmpty()) {
+      // This can be the case, for now, because we're mixing mutable shared state
+      // and linearization via message queue. This is temporary.
+      return;
+    }
+
+    // mStream is always valid because it's set right before ::Start is called.
+    // SetPassThrough cannot be called before that, because it's running on the
+    // graph thread, and this cannot happen before the source has been started.
+    MOZ_ASSERT(mAllocations.Length() == 1 &&
+               mAllocations[0].mStream &&
+               mAllocations[0].mStream->GraphImpl()->CurrentDriver()->OnThread(),
+               "Wrong calling pattern, don't call this before ::SetTrack.");
+  }
+  mSkipProcessing = aPassThrough;
+}
+
+uint32_t
+MediaEngineWebRTCMicrophoneSource::GetRequestedInputChannelCount(MediaStreamGraphImpl* aGraphImpl)
+{
+  if (mState == kReleased) {
+    // This source has been released, and is waiting for collection. Simply
+    // return 0, this source won't contribute to the channel count decision.
+    // Again, this is temporary.
+    return 0;
+  }
+
+  return mRequestedInputChannelCount;
+}
+
+void
+MediaEngineWebRTCMicrophoneSource::SetRequestedInputChannelCount(
+  uint32_t aRequestedInputChannelCount)
+{
+  MutexAutoLock lock(mMutex);
+  if (mAllocations.IsEmpty()) {
+      return;
+  }
+  MOZ_ASSERT(mAllocations.Length() == 1 &&
+             mAllocations[0].mStream &&
+             mAllocations[0].mStream->GraphImpl()->CurrentDriver()->OnThread(),
+             "Wrong calling pattern, don't call this before ::SetTrack.");
+  mRequestedInputChannelCount = aRequestedInputChannelCount;
+  mAllocations[0].mStream->GraphImpl()->ReevaluateInputDevice();
+}
+
 void
 MediaEngineWebRTCMicrophoneSource::ApplySettings(const MediaEnginePrefs& aPrefs,
                                                  RefPtr<MediaStreamGraphImpl> aGraph)
 {
   AssertIsOnOwningThread();
   MOZ_DIAGNOSTIC_ASSERT(aGraph);
 
   RefPtr<MediaEngineWebRTCMicrophoneSource> that = this;
@@ -547,35 +567,41 @@ MediaEngineWebRTCMicrophoneSource::Apply
     that->mSettings->mEchoCancellation.Value() = aPrefs.mAecOn;
     that->mSettings->mAutoGainControl.Value() = aPrefs.mAgcOn;
     that->mSettings->mNoiseSuppression.Value() = aPrefs.mNoiseOn;
     that->mSettings->mChannelCount.Value() = aPrefs.mChannels;
 
     class Message : public ControlMessage {
     public:
       Message(MediaEngineWebRTCMicrophoneSource* aSource,
-              bool aPassThrough)
+              bool aPassThrough,
+              uint32_t aRequestedInputChannelCount)
         : ControlMessage(nullptr)
         , mMicrophoneSource(aSource)
         , mPassThrough(aPassThrough)
+        , mRequestedInputChannelCount(aRequestedInputChannelCount)
         {}
 
       void Run() override
       {
         mMicrophoneSource->SetPassThrough(mPassThrough);
+        mMicrophoneSource->SetRequestedInputChannelCount(mRequestedInputChannelCount);
       }
 
     protected:
       RefPtr<MediaEngineWebRTCMicrophoneSource> mMicrophoneSource;
       bool mPassThrough;
+      uint32_t mRequestedInputChannelCount;
     };
 
     bool passThrough = !(aPrefs.mAecOn || aPrefs.mAgcOn || aPrefs.mNoiseOn);
     if (graph) {
-      graph->AppendMessage(MakeUnique<Message>(that, passThrough));
+      graph->AppendMessage(MakeUnique<Message>(that,
+                                               passThrough,
+                                               aPrefs.mChannels));
     }
 
     return NS_OK;
   }));
 }
 
 nsresult
 MediaEngineWebRTCMicrophoneSource::Allocate(const dom::MediaTrackConstraints &aConstraints,
@@ -612,60 +638,50 @@ MediaEngineWebRTCMicrophoneSource::Deall
 {
   AssertIsOnOwningThread();
 
   size_t i = mAllocations.IndexOf(aHandle, 0, AllocationHandleComparator());
   MOZ_DIAGNOSTIC_ASSERT(i != mAllocations.NoIndex);
   MOZ_DIAGNOSTIC_ASSERT(!mAllocations[i].mEnabled,
                         "Source should be stopped for the track before removing");
 
-  LOG(("Mic source %p allocation %p Deallocate()", this, aHandle.get()));
-
   if (mAllocations[i].mStream && IsTrackIDExplicit(mAllocations[i].mTrackID)) {
     mAllocations[i].mStream->EndTrack(mAllocations[i].mTrackID);
   }
 
   {
     MutexAutoLock lock(mMutex);
     mAllocations.RemoveElementAt(i);
   }
 
   if (mAllocations.IsEmpty()) {
     // If empty, no callbacks to deliver data should be occuring
     MOZ_ASSERT(mState != kReleased, "Source not allocated");
     MOZ_ASSERT(mState != kStarted, "Source not stopped");
-    MOZ_ASSERT(sChannelsOpen > 0);
-    --sChannelsOpen;
 
     MutexAutoLock lock(mMutex);
     mState = kReleased;
-    LOG(("Audio device %d deallocated", mCapIndex));
+    LOG(("Audio device %s deallocated", NS_ConvertUTF16toUTF8(mDeviceName).get()));
   } else {
-    LOG(("Audio device %d deallocated but still in use", mCapIndex));
+    LOG(("Audio device %s deallocated but still in use", NS_ConvertUTF16toUTF8(mDeviceName).get()));
   }
   return NS_OK;
 }
 
 nsresult
 MediaEngineWebRTCMicrophoneSource::SetTrack(const RefPtr<const AllocationHandle>& aHandle,
                                             const RefPtr<SourceMediaStream>& aStream,
                                             TrackID aTrackID,
                                             const PrincipalHandle& aPrincipal)
 {
   AssertIsOnOwningThread();
   MOZ_ASSERT(aStream);
   MOZ_ASSERT(IsTrackIDExplicit(aTrackID));
 
-  LOG(("Mic source %p allocation %p SetTrack() stream=%p, track=%" PRId32,
-       this, aHandle.get(), aStream.get(), aTrackID));
-
-  // Until we fix bug 1400488 we need to block a second tab (OuterWindow)
-  // from opening an already-open device.  If it's the same tab, they
-  // will share a Graph(), and we can allow it.
-  if (!mAllocations.IsEmpty() &&
+  if (mAllocations.Length() == 1 &&
       mAllocations[0].mStream &&
       mAllocations[0].mStream->Graph() != aStream->Graph()) {
     return NS_ERROR_NOT_AVAILABLE;
   }
 
   size_t i = mAllocations.IndexOf(aHandle, 0, AllocationHandleComparator());
   MOZ_DIAGNOSTIC_ASSERT(i != mAllocations.NoIndex);
   MOZ_ASSERT(!mAllocations[i].mStream);
@@ -692,28 +708,35 @@ MediaEngineWebRTCMicrophoneSource::SetTr
   LOG(("Stream %p registered for microphone capture", aStream.get()));
   return NS_OK;
 }
 
 nsresult
 MediaEngineWebRTCMicrophoneSource::Start(const RefPtr<const AllocationHandle>& aHandle)
 {
   AssertIsOnOwningThread();
-
-  if (sChannelsOpen == 0) {
-    return NS_ERROR_FAILURE;
+  if (mState == kStarted) {
+    return NS_OK;
   }
 
-  LOG(("Mic source %p allocation %p Start()", this, aHandle.get()));
+  MOZ_ASSERT(mState == kAllocated || mState == kStopped);
 
   size_t i = mAllocations.IndexOf(aHandle, 0, AllocationHandleComparator());
   MOZ_DIAGNOSTIC_ASSERT(i != mAllocations.NoIndex,
                         "Can't start track that hasn't been added");
   Allocation& allocation = mAllocations[i];
 
+  CubebUtils::AudioDeviceID deviceID = mDeviceInfo->DeviceID();
+  if (allocation.mStream->GraphImpl()->InputDeviceID() &&
+      allocation.mStream->GraphImpl()->InputDeviceID() != deviceID) {
+    // For now, we only allow opening a single audio input device per document,
+    // because we can only have one MSG per document.
+    return NS_ERROR_FAILURE;
+  }
+
   MOZ_ASSERT(!allocation.mEnabled, "Source already started");
   {
     // This spans setting both the enabled state and mState.
     MutexAutoLock lock(mMutex);
     allocation.mEnabled = true;
 
 #ifdef DEBUG
     // Ensure that callback-tracking state is reset when callbacks start coming.
@@ -724,18 +747,17 @@ MediaEngineWebRTCMicrophoneSource::Start
 
     if (!mListener) {
       mListener = new WebRTCAudioDataListener(this);
     }
 
     // Make sure logger starts before capture
     AsyncLatencyLogger::Get(true);
 
-    // Must be *before* StartSend() so it will notice we selected external input (full_duplex)
-    mAudioInput->StartRecording(allocation.mStream, mListener);
+    allocation.mStream->OpenAudioInput(deviceID, mListener);
 
     MOZ_ASSERT(mState != kReleased);
     mState = kStarted;
   }
 
   ApplySettings(mNetPrefs, allocation.mStream->GraphImpl());
 
   return NS_OK;
@@ -758,17 +780,19 @@ MediaEngineWebRTCMicrophoneSource::Stop(
     return NS_OK;
   }
 
   {
     // This spans setting both the enabled state and mState.
     MutexAutoLock lock(mMutex);
     allocation.mEnabled = false;
 
-    mAudioInput->StopRecording(allocation.mStream);
+    CubebUtils::AudioDeviceID deviceID = mDeviceInfo->DeviceID();
+    Maybe<CubebUtils::AudioDeviceID> id = Some(deviceID);
+    allocation.mStream->CloseAudioInput(id, mListener);
 
     if (HasEnabledTrack()) {
       // Another track is keeping us from stopping
       return NS_OK;
     }
 
     MOZ_ASSERT(mState == kStarted, "Should be started when stopping");
     mState = kStopped;
@@ -830,17 +854,18 @@ MediaEngineWebRTCMicrophoneSource::Pull(
       return;
     }
 
     LOG_FRAMES(("Pulling %" PRId64 " frames of silence for allocation %p",
                 delta, mAllocations[i].mHandle.get()));
 
     // This assertion fails when we append silence here in the same iteration
     // as there were real audio samples already appended by the audio callback.
-    // Note that this is exempted until live samples and a subsequent chunk of silence have been appended to the track. This will cover cases like:
+    // Note that this is exempted until live samples and a subsequent chunk of
+    // silence have been appended to the track. This will cover cases like:
     // - After Start(), there is silence (maybe multiple times) appended before
     //   the first audio callback.
     // - After Start(), there is real data (maybe multiple times) appended
     //   before the first graph iteration.
     // And other combinations of order of audio sample sources.
     MOZ_ASSERT_IF(
       mAllocations[i].mEnabled &&
       mAllocations[i].mLiveFramesAppended &&
@@ -953,17 +978,17 @@ MediaEngineWebRTCMicrophoneSource::Notif
 // Only called if we're not in passthrough mode
 void
 MediaEngineWebRTCMicrophoneSource::PacketizeAndProcess(MediaStreamGraph* aGraph,
                                                        const AudioDataValue* aBuffer,
                                                        size_t aFrames,
                                                        TrackRate aRate,
                                                        uint32_t aChannels)
 {
-  MOZ_ASSERT(!PassThrough(), "This should be bypassed when in PassThrough mode.");
+  MOZ_ASSERT(!PassThrough(aGraph), "This should be bypassed when in PassThrough mode.");
   size_t offset = 0;
 
   if (!mPacketizerInput ||
       mPacketizerInput->PacketSize() != aRate/100u ||
       mPacketizerInput->Channels() != aChannels) {
     // It's ok to drop the audio still in the packetizer here.
     mPacketizerInput =
       new AudioPacketizer<AudioDataValue, float>(aRate/100, aChannels);
@@ -1075,43 +1100,31 @@ MediaEngineWebRTCMicrophoneSource::Packe
                            processedOutputChannelPointersConst,
                            mPacketizerInput->PacketSize(),
                            allocation.mPrincipal);
       allocation.mStream->AppendToTrack(allocation.mTrackID, &segment);
     }
   }
 }
 
-bool
-MediaEngineWebRTCMicrophoneSource::PassThrough() const
-{
-  return mSkipProcessing;
-}
-
-void
-MediaEngineWebRTCMicrophoneSource::SetPassThrough(bool aPassThrough)
-{
-  mSkipProcessing = aPassThrough;
-}
-
 template<typename T>
 void
 MediaEngineWebRTCMicrophoneSource::InsertInGraph(const T* aBuffer,
                                                  size_t aFrames,
                                                  uint32_t aChannels)
 {
   MutexAutoLock lock(mMutex);
 
   if (mState != kStarted) {
     return;
   }
 
   if (MOZ_LOG_TEST(AudioLogModule(), LogLevel::Debug)) {
     mTotalFrames += aFrames;
-    if (!mAllocations.IsEmpty() && mAllocations[0].mStream &&
+    if (mAllocations[0].mStream &&
         mTotalFrames > mLastLogFrames +
                        mAllocations[0].mStream->GraphRate()) { // ~ 1 second
       MOZ_LOG(AudioLogModule(), LogLevel::Debug,
               ("%p: Inserting %zu samples into graph, total frames = %" PRIu64,
                (void*)this, aFrames, mTotalFrames));
       mLastLogFrames = mTotalFrames;
     }
   }
@@ -1191,19 +1204,32 @@ MediaEngineWebRTCMicrophoneSource::Inser
 void
 MediaEngineWebRTCMicrophoneSource::NotifyInputData(MediaStreamGraph* aGraph,
                                                    const AudioDataValue* aBuffer,
                                                    size_t aFrames,
                                                    TrackRate aRate,
                                                    uint32_t aChannels)
 {
   TRACE_AUDIO_CALLBACK();
+
+  {
+    MutexAutoLock lock(mMutex);
+    if (mAllocations.IsEmpty()) {
+      // This can happen because mAllocations is not yet using message passing, and
+      // is access both on the media manager thread and the MSG thread. This is to
+      // be fixed soon.
+      // When deallocating, the listener is removed via message passing, while the
+      // allocation is removed immediately, so there can be a few iterations where
+      // we need to return early here.
+      return;
+    }
+  }
   // If some processing is necessary, packetize and insert in the WebRTC.org
   // code. Otherwise, directly insert the mic data in the MSG, bypassing all processing.
-  if (PassThrough()) {
+  if (PassThrough(aGraph)) {
     InsertInGraph<AudioDataValue>(aBuffer, aFrames, aChannels);
   } else {
     PacketizeAndProcess(aGraph, aBuffer, aFrames, aRate, aChannels);
   }
 }
 
 #define ResetProcessingIfNeeded(_processing)                        \
 do {                                                                \