Bug 1262753: P6. Perform downmixing in DecodeAudioDataSink. r?kinetik draft
authorJean-Yves Avenard <jyavenard@mozilla.com>
Mon, 11 Apr 2016 21:16:17 +1000
changeset 352410 a354297a5ce5231a20a275457f63f38f25c01d4a
parent 352409 114f5055a774cd6d4b5d9572b9739b965be8a012
child 352411 9825d071d68c590fd6ab552fe20c998fa0478f65
push id15703
push userbmo:jyavenard@mozilla.com
push dateMon, 18 Apr 2016 07:44:49 +0000
reviewerskinetik
bugs1262753
milestone48.0a1
Bug 1262753: P6. Perform downmixing in DecodeAudioDataSink. r?kinetik Rather than performing the audio processing a few frames at a time, we perform the operation when popping the audio data block. The only downmixing task left to AudioStream is to handle the force mono preference. Resampling is currently disabled. MozReview-Commit-ID: GNQA1h0xcp7
dom/media/AudioStream.cpp
dom/media/mediasink/DecodedAudioDataSink.cpp
dom/media/mediasink/DecodedAudioDataSink.h
--- a/dom/media/AudioStream.cpp
+++ b/dom/media/AudioStream.cpp
@@ -326,17 +326,17 @@ AudioStream::Init(uint32_t aNumChannels,
   if (!CubebUtils::GetCubebContext()) {
     return NS_ERROR_FAILURE;
   }
 
   MOZ_LOG(gAudioStreamLog, LogLevel::Debug,
     ("%s  channels: %d, rate: %d for %p", __FUNCTION__, aNumChannels, aRate, this));
   mInRate = mOutRate = aRate;
   mChannels = aNumChannels;
-  mOutChannels = (aNumChannels > 2) ? 2 : aNumChannels;
+  mOutChannels = mIsMonoAudioEnabled ? 1 : aNumChannels;
 
   mDumpFile = OpenDumpFile(this);
 
   cubeb_stream_params params;
   params.rate = aRate;
   params.channels = mOutChannels;
 #if defined(__ANDROID__)
 #if defined(MOZ_B2G)
@@ -348,19 +348,21 @@ AudioStream::Init(uint32_t aNumChannels,
   if (params.stream_type == CUBEB_STREAM_TYPE_MAX) {
     return NS_ERROR_INVALID_ARG;
   }
 #endif
 
   params.format = ToCubebFormat<AUDIO_OUTPUT_FORMAT>::value;
   mAudioClock.Init();
 
-  AudioConfig inConfig(mChannels, mInRate);
-  AudioConfig outConfig(mOutChannels, mOutRate);
-  mAudioConverter = MakeUnique<AudioConverter>(inConfig, outConfig);
+  if (mIsMonoAudioEnabled) {
+    AudioConfig inConfig(mChannels, mInRate);
+    AudioConfig outConfig(mOutChannels, mOutRate);
+    mAudioConverter = MakeUnique<AudioConverter>(inConfig, outConfig);
+  }
   return OpenCubeb(params);
 }
 
 // This code used to live inside AudioStream::Init(), but on Mac (others?)
 // it has been known to take 300-800 (or even 8500) ms to execute(!)
 nsresult
 AudioStream::OpenCubeb(cubeb_stream_params &aParams)
 {
@@ -556,25 +558,20 @@ AudioStream::Downmix(Chunk* aChunk)
     LOGW("mismatched sample %u, mInRate=%u", aChunk->Rate(), mInRate);
     return false;
   }
 
   if (aChunk->Channels() > 8) {
     return false;
   }
 
-  if (aChunk->Channels() > 2) {
-    MOZ_ASSERT(mAudioConverter);
+  if (mAudioConverter) {
     mAudioConverter->Process(aChunk->GetWritable(), aChunk->Frames());
   }
 
-  if (aChunk->Channels() >= 2 && mIsMonoAudioEnabled) {
-    DownmixStereoToMono(aChunk->GetWritable(), aChunk->Frames());
-  }
-
   return true;
 }
 
 void
 AudioStream::GetUnprocessed(AudioBufferWriter& aWriter)
 {
   mMonitor.AssertCurrentThreadOwns();
 
--- a/dom/media/mediasink/DecodedAudioDataSink.cpp
+++ b/dom/media/mediasink/DecodedAudioDataSink.cpp
@@ -3,16 +3,17 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "nsPrintfCString.h"
 #include "MediaQueue.h"
 #include "DecodedAudioDataSink.h"
 #include "VideoUtils.h"
+#include "AudioConverter.h"
 
 #include "mozilla/CheckedInt.h"
 #include "mozilla/DebugOnly.h"
 
 namespace mozilla {
 
 extern LazyLogModule gMediaDecoderLog;
 #define SINK_LOG(msg, ...) \
@@ -34,16 +35,19 @@ DecodedAudioDataSink::DecodedAudioDataSi
   : AudioSink(aAudioQueue)
   , mStartTime(aStartTime)
   , mWritten(0)
   , mLastGoodPosition(0)
   , mInfo(aInfo)
   , mChannel(aChannel)
   , mPlaying(true)
   , mPlaybackComplete(false)
+  , mConverter(MakeUnique<AudioConverter>(
+                AudioConfig(mInfo.mChannels, mInfo.mRate),
+                AudioConfig(mInfo.mChannels > 2 ? 2 : mInfo.mChannels, mInfo.mRate)))
 {
 }
 
 DecodedAudioDataSink::~DecodedAudioDataSink()
 {
 }
 
 RefPtr<GenericPromise>
@@ -131,17 +135,19 @@ DecodedAudioDataSink::SetPlaying(bool aP
   }
   mPlaying = aPlaying;
 }
 
 nsresult
 DecodedAudioDataSink::InitializeAudioStream(const PlaybackParams& aParams)
 {
   mAudioStream = new AudioStream(*this);
-  nsresult rv = mAudioStream->Init(mInfo.mChannels, mInfo.mRate, mChannel);
+  nsresult rv = mAudioStream->Init(mConverter->OutputConfig().Channels(),
+                                   mConverter->OutputConfig().Rate(),
+                                   mChannel);
   if (NS_FAILED(rv)) {
     mAudioStream->Shutdown();
     mAudioStream = nullptr;
     return rv;
   }
 
   // Set playback params before calling Start() so they can take effect
   // as soon as the 1st DataCallback of the AudioStream fires.
@@ -151,17 +157,18 @@ DecodedAudioDataSink::InitializeAudioStr
   mAudioStream->Start();
 
   return NS_OK;
 }
 
 int64_t
 DecodedAudioDataSink::GetEndTime() const
 {
-  CheckedInt64 playedUsecs = FramesToUsecs(mWritten, mInfo.mRate) + mStartTime;
+  CheckedInt64 playedUsecs =
+    FramesToUsecs(mWritten, mConverter->OutputConfig().Rate()) + mStartTime;
   if (!playedUsecs.isValid()) {
     NS_WARNING("Int overflow calculating audio end time");
     return -1;
   }
   return playedUsecs.value();
 }
 
 UniquePtr<AudioStream::Chunk>
@@ -226,40 +233,61 @@ DecodedAudioDataSink::PopFrames(uint32_t
       RefPtr<MediaData> releaseMe = AudioQueue().PopFront();
       continue;
     }
 
     // See if there's a gap in the audio. If there is, push silence into the
     // audio hardware, so we can play across the gap.
     // Calculate the timestamp of the next chunk of audio in numbers of
     // samples.
-    CheckedInt64 sampleTime = UsecsToFrames(AudioQueue().PeekFront()->mTime, mInfo.mRate);
+    CheckedInt64 sampleTime = UsecsToFrames(AudioQueue().PeekFront()->mTime,
+                                            mConverter->OutputConfig().Rate());
     // Calculate the number of frames that have been pushed onto the audio hardware.
-    CheckedInt64 playedFrames = UsecsToFrames(mStartTime, mInfo.mRate) +
+    CheckedInt64 playedFrames = UsecsToFrames(mStartTime,
+                                              mConverter->OutputConfig().Rate()) +
                                 static_cast<int64_t>(mWritten);
     CheckedInt64 missingFrames = sampleTime - playedFrames;
 
     if (!missingFrames.isValid() || !sampleTime.isValid()) {
       NS_WARNING("Int overflow in DecodedAudioDataSink");
       mErrored = true;
       return MakeUnique<Chunk>();
     }
 
+    const uint32_t rate = mConverter->OutputConfig().Rate();
+    const uint32_t channels = mConverter->OutputConfig().Channels();
+
     if (missingFrames.value() > AUDIO_FUZZ_FRAMES) {
       // The next audio chunk begins some time after the end of the last chunk
       // we pushed to the audio hardware. We must push silence into the audio
       // hardware so that the next audio chunk begins playback at the correct
       // time.
       missingFrames = std::min<int64_t>(UINT32_MAX, missingFrames.value());
       auto framesToPop = std::min<uint32_t>(missingFrames.value(), aFrames);
       mWritten += framesToPop;
-      return MakeUnique<SilentChunk>(framesToPop, mInfo.mChannels, mInfo.mRate);
+      return MakeUnique<SilentChunk>(framesToPop, channels, rate);
     }
 
-    mCurrentData = dont_AddRef(AudioQueue().PopFront().take()->As<AudioData>());
+    RefPtr<AudioData> data =
+      dont_AddRef(AudioQueue().PopFront().take()->As<AudioData>());
+    if (mConverter->InputConfig() != mConverter->OutputConfig()) {
+      AlignedAudioBuffer convertedData =
+        mConverter->Process(AudioSampleBuffer(Move(data->mAudioData))).Forget();
+      mCurrentData =
+        new AudioData(data->mOffset,
+                      data->mTime,
+                      data->mDuration,
+                      convertedData.Length() / channels,
+                      Move(convertedData),
+                      channels,
+                      rate);
+    } else {
+      mCurrentData = Move(data);
+    }
+
     mCursor = MakeUnique<AudioBufferCursor>(mCurrentData->mAudioData.get(),
                                             mCurrentData->mChannels,
                                             mCurrentData->mFrames);
     MOZ_ASSERT(mCurrentData->mFrames > 0);
   }
 
   auto framesToPop = std::min(aFrames, mCursor->Available());
 
--- a/dom/media/mediasink/DecodedAudioDataSink.h
+++ b/dom/media/mediasink/DecodedAudioDataSink.h
@@ -16,16 +16,18 @@
 #include "mozilla/dom/AudioChannelBinding.h"
 #include "mozilla/Atomics.h"
 #include "mozilla/Maybe.h"
 #include "mozilla/MozPromise.h"
 #include "mozilla/ReentrantMonitor.h"
 
 namespace mozilla {
 
+class AudioConverter;
+
 namespace media {
 
 class DecodedAudioDataSink : public AudioSink,
                              private AudioStream::DataSource {
 public:
   DecodedAudioDataSink(MediaQueue<MediaData>& aAudioQueue,
                        int64_t aStartTime,
                        const AudioInfo& aInfo,
@@ -92,21 +94,23 @@ private:
   MozPromiseHolder<GenericPromise> mEndPromise;
 
   /*
    * Members to implement AudioStream::DataSource.
    * Used on the callback thread of cubeb.
    */
   // The AudioData at which AudioStream::DataSource is reading.
   RefPtr<AudioData> mCurrentData;
-  // Keep track of the read positoin of mCurrentData.
+  // Keep track of the read position of mCurrentData.
   UniquePtr<AudioBufferCursor> mCursor;
   // True if there is any error in processing audio data like overflow.
   bool mErrored = false;
 
   // Set on the callback thread of cubeb once the stream has drained.
   Atomic<bool> mPlaybackComplete;
+
+  UniquePtr<AudioConverter> mConverter;
 };
 
 } // namespace media
 } // namespace mozilla
 
 #endif