Bug 1271585 - Part 2 - Synchronously insert audio frames from the microphone in the MSG if possible. r?pehrsons draft
authorPaul Adenot <paul@paul.cx>
Mon, 30 May 2016 11:34:07 +0200
changeset 372771 777f192625fa09f77e2fa6e839ceca2c6d95e5a9
parent 372770 4e02a6e19d817a1917d176ef59c43534c368190c
child 522244 15ad652797d03c12f8019d2f1a91710c89d97e54
push id19593
push userpaul@paul.cx
push dateMon, 30 May 2016 09:34:31 +0000
reviewerspehrsons
bugs1271585
milestone49.0a1
Bug 1271585 - Part 2 - Synchronously insert audio frames from the microphone in the MSG if possible. r?pehrsons MozReview-Commit-ID: Fm2woel600v
dom/media/webrtc/MediaEngine.h
dom/media/webrtc/MediaEngineWebRTC.h
dom/media/webrtc/MediaEngineWebRTCAudio.cpp
--- a/dom/media/webrtc/MediaEngine.h
+++ b/dom/media/webrtc/MediaEngine.h
@@ -53,16 +53,20 @@ public:
   static const int DEFAULT_169_VIDEO_WIDTH = 1280;
   static const int DEFAULT_169_VIDEO_HEIGHT = 720;
 
 #ifndef MOZ_B2G
   static const int DEFAULT_SAMPLE_RATE = 32000;
 #else
   static const int DEFAULT_SAMPLE_RATE = 16000;
 #endif
+  // This allows using whatever rate the graph is using for the
+  // MediaStreamTrack. This is useful for microphone data, we know it's already
+  // at the correct rate for insertion in the MSG.
+  static const int USE_GRAPH_RATE = 0;
 
   /* Populate an array of video sources in the nsTArray. Also include devices
    * that are currently unavailable. */
   virtual void EnumerateVideoDevices(dom::MediaSourceEnum,
                                      nsTArray<RefPtr<MediaEngineVideoSource> >*) = 0;
 
   /* Populate an array of audio sources in the nsTArray. Also include devices
    * that are currently unavailable. */
--- a/dom/media/webrtc/MediaEngineWebRTC.h
+++ b/dom/media/webrtc/MediaEngineWebRTC.h
@@ -429,16 +429,17 @@ public:
     , mThread(aThread)
     , mCapIndex(aIndex)
     , mChannel(-1)
     , mNrAllocations(0)
     , mStarted(false)
     , mSampleFrequency(MediaEngine::DEFAULT_SAMPLE_RATE)
     , mPlayoutDelay(0)
     , mNullTransport(nullptr)
+    , mSkipProcessing(false)
   {
     MOZ_ASSERT(aVoiceEnginePtr);
     MOZ_ASSERT(aAudioInput);
     mDeviceName.Assign(NS_ConvertUTF8toUTF16(name));
     mDeviceUUID.Assign(uuid);
     mListener = new mozilla::WebRTCAudioDataListener(this);
     // We'll init lazily as needed
   }
@@ -510,16 +511,31 @@ protected:
 private:
   // These allocate/configure and release the channel
   bool AllocChannel();
   void FreeChannel();
   // These start/stop VoEBase and associated interfaces
   bool InitEngine();
   void DeInitEngine();
 
+  // This is true when all processing is disabled, we can skip
+  // packetization, resampling and other processing passes.
+  bool PassThrough() {
+    return mSkipProcessing;
+  }
+  template<typename T>
+  void InsertInGraph(const T* aBuffer,
+                     size_t aFrames,
+                     uint32_t aChannels);
+  void PacketizeAndProcess(MediaStreamGraph* aGraph,
+                           const AudioDataValue* aBuffer,
+                           size_t aFrames,
+                           TrackRate aRate,
+                           uint32_t aChannels);
+
   webrtc::VoiceEngine* mVoiceEngine;
   RefPtr<mozilla::AudioInput> mAudioInput;
   RefPtr<WebRTCAudioDataListener> mListener;
 
   // Note: shared across all microphone sources - we don't want to Terminate()
   // the VoEBase until there are no active captures
   static int sChannelsOpen;
   static ScopedCustomReleasePtr<webrtc::VoEBase> mVoEBase;
@@ -550,16 +566,20 @@ private:
   nsCString mDeviceUUID;
 
   uint32_t mSampleFrequency;
   int32_t mPlayoutDelay;
 
   NullTransport *mNullTransport;
 
   nsTArray<int16_t> mInputBuffer;
+  // mSkipProcessing is true if none of the processing passes are enabled,
+  // because of prefs or constraints. This allows simply copying the audio into
+  // the MSG, skipping resampling and the whole webrtc.org code.
+  bool mSkipProcessing;
 };
 
 class MediaEngineWebRTC : public MediaEngine
 {
 public:
   explicit MediaEngineWebRTC(MediaEnginePrefs& aPrefs);
 
   // Clients should ensure to clean-up sources video/audio sources
--- a/dom/media/webrtc/MediaEngineWebRTCAudio.cpp
+++ b/dom/media/webrtc/MediaEngineWebRTCAudio.cpp
@@ -293,16 +293,22 @@ MediaEngineWebRTCMicrophoneSource::Resta
     }
     if (0 != (error = mVoEProcessing->SetAgcStatus(agc_on, (webrtc::AgcModes) aPrefs.mAgc))) {
       LOG(("%s Error setting AGC Status: %d ",__FUNCTION__, error));
     }
     if (0 != (error = mVoEProcessing->SetNsStatus(noise_on, (webrtc::NsModes) aPrefs.mNoise))) {
       LOG(("%s Error setting NoiseSuppression Status: %d ",__FUNCTION__, error));
     }
   }
+
+  mSkipProcessing = !(aec_on || agc_on || noise_on);
+  if (mSkipProcessing) {
+    mSampleFrequency = MediaEngine::USE_GRAPH_RATE;
+  }
+
   return NS_OK;
 }
 
 nsresult
 MediaEngineWebRTCMicrophoneSource::Deallocate()
 {
   AssertIsOnOwningThread();
   --mNrAllocations;
@@ -339,16 +345,19 @@ MediaEngineWebRTCMicrophoneSource::Start
   {
     MonitorAutoLock lock(mMonitor);
     mSources.AppendElement(aStream);
     mPrincipalHandles.AppendElement(aPrincipalHandle);
     MOZ_ASSERT(mSources.Length() == mPrincipalHandles.Length());
   }
 
   AudioSegment* segment = new AudioSegment();
+  if (mSampleFrequency == MediaEngine::USE_GRAPH_RATE) {
+    mSampleFrequency = aStream->GraphRate();
+  }
   aStream->AddAudioTrack(aID, mSampleFrequency, 0, segment, SourceMediaStream::ADDTRACK_QUEUED);
 
   // XXX Make this based on the pref.
   aStream->RegisterForAudioMixing();
   LOG(("Start audio for stream %p", aStream));
 
   if (!mListener) {
     mListener = new mozilla::WebRTCAudioDataListener(this);
@@ -452,45 +461,105 @@ void
 MediaEngineWebRTCMicrophoneSource::NotifyOutputData(MediaStreamGraph* aGraph,
                                                     AudioDataValue* aBuffer,
                                                     size_t aFrames,
                                                     TrackRate aRate,
                                                     uint32_t aChannels)
 {
 }
 
+void
+MediaEngineWebRTCMicrophoneSource::PacketizeAndProcess(MediaStreamGraph* aGraph,
+                                                       const AudioDataValue* aBuffer,
+                                                       size_t aFrames,
+                                                       TrackRate aRate,
+                                                       uint32_t aChannels)
+{
+  // This will call Process() with data coming out of the AEC/NS/AGC/etc chain
+  if (!mPacketizer ||
+      mPacketizer->PacketSize() != aRate/100u ||
+      mPacketizer->Channels() != aChannels) {
+    // It's ok to drop the audio still in the packetizer here.
+    mPacketizer =
+      new AudioPacketizer<AudioDataValue, int16_t>(aRate/100, aChannels);
+  }
+
+  mPacketizer->Input(aBuffer, static_cast<uint32_t>(aFrames));
+
+  while (mPacketizer->PacketsAvailable()) {
+    uint32_t samplesPerPacket = mPacketizer->PacketSize() *
+      mPacketizer->Channels();
+    if (mInputBuffer.Length() < samplesPerPacket) {
+      mInputBuffer.SetLength(samplesPerPacket);
+    }
+    int16_t* packet = mInputBuffer.Elements();
+    mPacketizer->Output(packet);
+
+    mVoERender->ExternalRecordingInsertData(packet, samplesPerPacket,
+        aRate, 0);
+    }
+}
+
+template<typename T>
+void
+MediaEngineWebRTCMicrophoneSource::InsertInGraph(const T* aBuffer,
+                                                 size_t aFrames,
+                                                 uint32_t aChannels)
+{
+  if (mState != kStarted) {
+    return;
+  }
+
+  uint32_t len = mSources.Length();
+  for (uint32_t i = 0; i < len; i++) {
+    if (!mSources[i]) {
+      continue;
+    }
+    RefPtr<SharedBuffer> buffer =
+      SharedBuffer::Create(aFrames * aChannels * sizeof(T));
+    PodCopy(static_cast<T*>(buffer->Data()),
+            aBuffer, aFrames * aChannels);
+
+    TimeStamp insertTime;
+    // Make sure we include the stream and the track.
+    // The 0:1 is a flag to note when we've done the final insert for a given input block.
+    LogTime(AsyncLatencyLogger::AudioTrackInsertion, LATENCY_STREAM_ID(mSources[i].get(), mTrackID),
+        (i+1 < len) ? 0 : 1, insertTime);
+
+    nsAutoPtr<AudioSegment> segment(new AudioSegment());
+    AutoTArray<const T*, 1> channels;
+    // XXX handle stereo
+    MOZ_ASSERT(aChannels == 1);
+    channels.AppendElement(static_cast<T*>(buffer->Data()));
+    segment->AppendFrames(buffer.forget(), channels, aFrames,
+                         mPrincipalHandles[i]);
+    segment->GetStartTime(insertTime);
+
+    RUN_ON_THREAD(mThread,
+                  WrapRunnable(mSources[i], &SourceMediaStream::AppendToTrack,
+                               mTrackID, segment, (AudioSegment*)nullptr),
+                  NS_DISPATCH_NORMAL);
+  }
+}
+
 // Called back on GraphDriver thread!
 // Note this can be called back after ::Shutdown()
 void
 MediaEngineWebRTCMicrophoneSource::NotifyInputData(MediaStreamGraph* aGraph,
                                                    const AudioDataValue* aBuffer,
                                                    size_t aFrames,
                                                    TrackRate aRate,
                                                    uint32_t aChannels)
 {
-  // This will call Process() with data coming out of the AEC/NS/AGC/etc chain
-  if (!mPacketizer ||
-      mPacketizer->PacketSize() != aRate/100u ||
-      mPacketizer->Channels() != aChannels) {
-    // It's ok to drop the audio still in the packetizer here.
-    mPacketizer = new AudioPacketizer<AudioDataValue, int16_t>(aRate/100, aChannels);
-  }
-
-  mPacketizer->Input(aBuffer, static_cast<uint32_t>(aFrames));
-
-  while (mPacketizer->PacketsAvailable()) {
-    uint32_t samplesPerPacket = mPacketizer->PacketSize() *
-                                mPacketizer->Channels();
-    if (mInputBuffer.Length() < samplesPerPacket) {
-      mInputBuffer.SetLength(samplesPerPacket);
-    }
-    int16_t* packet = mInputBuffer.Elements();
-    mPacketizer->Output(packet);
-
-    mVoERender->ExternalRecordingInsertData(packet, samplesPerPacket, aRate, 0);
+  // If some processing is necessary, packetize and insert in the WebRTC.org
+  // code. Otherwise, directly insert the mic data in the MSG, bypassing all processing.
+  if (!PassThrough()) {
+    PacketizeAndProcess(aGraph, aBuffer, aFrames, aRate, aChannels);
+  } else {
+    InsertInGraph<AudioDataValue>(aBuffer, aFrames, aChannels);
   }
 }
 
 #define ResetProcessingIfNeeded(_processing)                        \
 do {                                                                \
   webrtc::_processing##Modes mode;                                  \
   int rv = mVoEProcessing->Get##_processing##Status(enabled, mode); \
   if (rv) {                                                         \
@@ -673,16 +742,17 @@ MediaEngineWebRTCMicrophoneSource::Shutd
 typedef int16_t sample;
 
 void
 MediaEngineWebRTCMicrophoneSource::Process(int channel,
                                            webrtc::ProcessingTypes type,
                                            sample *audio10ms, int length,
                                            int samplingFreq, bool isStereo)
 {
+  MOZ_ASSERT(!PassThrough(), "This should be bypassed when in PassThrough mode.");
   // On initial capture, throw away all far-end data except the most recent sample
   // since it's already irrelevant and we want to keep avoid confusing the AEC far-end
   // input code with "old" audio.
   if (!mStarted) {
     mStarted  = true;
     while (gFarendObserver->Size() > 1) {
       free(gFarendObserver->Pop()); // only call if size() > 0
     }
@@ -705,43 +775,18 @@ MediaEngineWebRTCMicrophoneSource::Proce
   }
 
   MonitorAutoLock lock(mMonitor);
   if (mState != kStarted)
     return;
 
   uint32_t len = mSources.Length();
   for (uint32_t i = 0; i < len; i++) {
-    RefPtr<SharedBuffer> buffer = SharedBuffer::Create(length * sizeof(sample));
-
-    sample* dest = static_cast<sample*>(buffer->Data());
-    memcpy(dest, audio10ms, length * sizeof(sample));
-
-    nsAutoPtr<AudioSegment> segment(new AudioSegment());
-    AutoTArray<const sample*,1> channels;
-    channels.AppendElement(dest);
-    segment->AppendFrames(buffer.forget(), channels, length,
-                          mPrincipalHandles[i]);
-    TimeStamp insertTime;
-    segment->GetStartTime(insertTime);
-
-    if (mSources[i]) {
-      // Make sure we include the stream and the track.
-      // The 0:1 is a flag to note when we've done the final insert for a given input block.
-      LogTime(AsyncLatencyLogger::AudioTrackInsertion, LATENCY_STREAM_ID(mSources[i].get(), mTrackID),
-              (i+1 < len) ? 0 : 1, insertTime);
-
-      // This is safe from any thread, and is safe if the track is Finished
-      // or Destroyed.
-      // Note: due to evil magic, the nsAutoPtr<AudioSegment>'s ownership transfers to
-      // the Runnable (AutoPtr<> = AutoPtr<>)
-      RUN_ON_THREAD(mThread, WrapRunnable(mSources[i], &SourceMediaStream::AppendToTrack,
-                                          mTrackID, segment, (AudioSegment *) nullptr),
-                    NS_DISPATCH_NORMAL);
-    }
+    MOZ_ASSERT(!isStereo);
+    InsertInGraph<int16_t>(audio10ms, length, 1);
   }
 
   return;
 }
 
 void
 MediaEngineWebRTCAudioCaptureSource::GetName(nsAString &aName)
 {