Bug 1264199: P1. Perform audio conversion in the MSDM taskqueue and ahead of use. r=kinetik draft
authorJean-Yves Avenard <jyavenard@mozilla.com>
Wed, 13 Apr 2016 17:17:54 +1000
changeset 356280 ae5183656326f1429951c53306fb1ac9acba8b9e
parent 356279 ad5d064e03fb123f6bdceff00df385ac75321a7a
child 356281 c1b416e1c4f22a71c5a8fb00e9146ee7c1d5b5ed
push id16486
push userbmo:jyavenard@mozilla.com
push dateTue, 26 Apr 2016 02:36:37 +0000
reviewerskinetik
bugs1264199
milestone49.0a1
Bug 1264199: P1. Perform audio conversion in the MSDM taskqueue and ahead of use. r=kinetik This will allow to easily detect audio configuration change prior immediate playback. MozReview-Commit-ID: IwDUGOd17ND
dom/media/MediaDecoderStateMachine.cpp
dom/media/mediasink/DecodedAudioDataSink.cpp
dom/media/mediasink/DecodedAudioDataSink.h
--- a/dom/media/MediaDecoderStateMachine.cpp
+++ b/dom/media/MediaDecoderStateMachine.cpp
@@ -360,17 +360,17 @@ MediaDecoderStateMachine::Initialization
 
 media::MediaSink*
 MediaDecoderStateMachine::CreateAudioSink()
 {
   RefPtr<MediaDecoderStateMachine> self = this;
   auto audioSinkCreator = [self] () {
     MOZ_ASSERT(self->OnTaskQueue());
     return new DecodedAudioDataSink(
-      self->mAudioQueue, self->GetMediaTime(),
+      self->mTaskQueue, self->mAudioQueue, self->GetMediaTime(),
       self->mInfo.mAudio, self->mAudioChannel);
   };
   return new AudioSinkWrapper(mTaskQueue, audioSinkCreator);
 }
 
 already_AddRefed<media::MediaSink>
 MediaDecoderStateMachine::CreateMediaSink(bool aAudioCaptured)
 {
--- a/dom/media/mediasink/DecodedAudioDataSink.cpp
+++ b/dom/media/mediasink/DecodedAudioDataSink.cpp
@@ -24,46 +24,68 @@ extern LazyLogModule gMediaDecoderLog;
   MOZ_LOG(gMediaDecoderLog, LogLevel::Verbose, \
   ("DecodedAudioDataSink=%p " msg, this, ##__VA_ARGS__))
 
 namespace media {
 
 // The amount of audio frames that is used to fuzz rounding errors.
 static const int64_t AUDIO_FUZZ_FRAMES = 1;
 
-DecodedAudioDataSink::DecodedAudioDataSink(MediaQueue<MediaData>& aAudioQueue,
+// Amount of audio frames we will be processing ahead of use
+static const int32_t LOW_AUDIO_USECS = 300000;
+
+DecodedAudioDataSink::DecodedAudioDataSink(AbstractThread* aThread,
+                                           MediaQueue<MediaData>& aAudioQueue,
                                            int64_t aStartTime,
                                            const AudioInfo& aInfo,
                                            dom::AudioChannel aChannel)
   : AudioSink(aAudioQueue)
   , mStartTime(aStartTime)
   , mWritten(0)
   , mLastGoodPosition(0)
   , mInfo(aInfo)
   , mChannel(aChannel)
   , mPlaying(true)
+  , mErrored(false)
   , mPlaybackComplete(false)
+  , mOwnerThread(aThread)
+  , mProcessedQueueLength(0)
+  , mFramesParsed(0)
+  , mLastEndTime(0)
 {
   bool resampling = gfxPrefs::AudioSinkResampling();
   uint32_t resamplingRate = gfxPrefs::AudioSinkResampleRate();
+  mOutputRate = resampling ? resamplingRate : mInfo.mRate;
+  mOutputChannels = mInfo.mChannels > 2 && gfxPrefs::AudioSinkForceStereo()
+                      ? 2 : mInfo.mChannels;
   mConverter =
     MakeUnique<AudioConverter>(
       AudioConfig(mInfo.mChannels, mInfo.mRate),
-      AudioConfig(mInfo.mChannels > 2 && gfxPrefs::AudioSinkForceStereo()
-                    ? 2 : mInfo.mChannels,
-                  resampling ? resamplingRate : mInfo.mRate));
+      AudioConfig(mOutputChannels, mOutputRate));
 }
 
 DecodedAudioDataSink::~DecodedAudioDataSink()
 {
 }
 
 RefPtr<GenericPromise>
 DecodedAudioDataSink::Init(const PlaybackParams& aParams)
 {
+  MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn());
+
+  mAudioQueueListener = mAudioQueue.PushEvent().Connect(
+    mOwnerThread, this, &DecodedAudioDataSink::OnAudioPushed);
+  mAudioQueueFinishListener = mAudioQueue.FinishEvent().Connect(
+    mOwnerThread, this, &DecodedAudioDataSink::NotifyAudioNeeded);
+  mProcessedQueueListener = mProcessedQueue.PopEvent().Connect(
+    mOwnerThread, this, &DecodedAudioDataSink::OnAudioPopped);
+
+  // To ensure at least one audio packet will be popped from AudioQueue and
+  // ready to be played.
+  NotifyAudioNeeded();
   RefPtr<GenericPromise> p = mEndPromise.Ensure(__func__);
   nsresult rv = InitializeAudioStream(aParams);
   if (NS_FAILED(rv)) {
     mEndPromise.Reject(rv, __func__);
   }
   return p;
 }
 
@@ -84,26 +106,35 @@ DecodedAudioDataSink::GetPosition()
   return mStartTime + mLastGoodPosition;
 }
 
 bool
 DecodedAudioDataSink::HasUnplayedFrames()
 {
   // Experimentation suggests that GetPositionInFrames() is zero-indexed,
   // so we need to add 1 here before comparing it to mWritten.
-  return mAudioStream && mAudioStream->GetPositionInFrames() + 1 < mWritten;
+  return mProcessedQueue.GetSize() ||
+         (mAudioStream && mAudioStream->GetPositionInFrames() + 1 < mWritten);
 }
 
 void
 DecodedAudioDataSink::Shutdown()
 {
+  MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn());
+
+  mAudioQueueListener.Disconnect();
+  mAudioQueueFinishListener.Disconnect();
+  mProcessedQueueListener.Disconnect();
+
   if (mAudioStream) {
     mAudioStream->Shutdown();
     mAudioStream = nullptr;
   }
+  mProcessedQueue.Reset();
+  mProcessedQueue.Finish();
   mEndPromise.ResolveIfExists(true, __func__);
 }
 
 void
 DecodedAudioDataSink::SetVolume(double aVolume)
 {
   if (mAudioStream) {
     mAudioStream->SetVolume(aVolume);
@@ -141,19 +172,17 @@ DecodedAudioDataSink::SetPlaying(bool aP
   }
   mPlaying = aPlaying;
 }
 
 nsresult
 DecodedAudioDataSink::InitializeAudioStream(const PlaybackParams& aParams)
 {
   mAudioStream = new AudioStream(*this);
-  nsresult rv = mAudioStream->Init(mConverter->OutputConfig().Channels(),
-                                   mConverter->OutputConfig().Rate(),
-                                   mChannel);
+  nsresult rv = mAudioStream->Init(mOutputChannels, mOutputRate, mChannel);
   if (NS_FAILED(rv)) {
     mAudioStream->Shutdown();
     mAudioStream = nullptr;
     return rv;
   }
 
   // Set playback params before calling Start() so they can take effect
   // as soon as the 1st DataCallback of the AudioStream fires.
@@ -163,23 +192,24 @@ DecodedAudioDataSink::InitializeAudioStr
   mAudioStream->Start();
 
   return NS_OK;
 }
 
 int64_t
 DecodedAudioDataSink::GetEndTime() const
 {
-  CheckedInt64 playedUsecs =
-    FramesToUsecs(mWritten, mConverter->OutputConfig().Rate()) + mStartTime;
+  CheckedInt64 playedUsecs = FramesToUsecs(mWritten, mOutputRate) + mStartTime;
   if (!playedUsecs.isValid()) {
     NS_WARNING("Int overflow calculating audio end time");
     return -1;
   }
-  return playedUsecs.value();
+  // As we may be resampling, rounding errors may occur. Ensure we never get
+  // past the original end time.
+  return std::min<int64_t>(mLastEndTime, playedUsecs.value());
 }
 
 UniquePtr<AudioStream::Chunk>
 DecodedAudioDataSink::PopFrames(uint32_t aFrames)
 {
   class Chunk : public AudioStream::Chunk {
   public:
     Chunk(AudioData* aBuffer, uint32_t aFrames, AudioDataValue* aData)
@@ -212,123 +242,211 @@ DecodedAudioDataSink::PopFrames(uint32_t
     AudioDataValue* GetWritable() const { return mData.get(); }
   private:
     const uint32_t mFrames;
     const uint32_t mChannels;
     const uint32_t mRate;
     UniquePtr<AudioDataValue[]> mData;
   };
 
-  while (!mCurrentData) {
+  bool needPopping = false;
+  if (!mCurrentData) {
     // No data in the queue. Return an empty chunk.
-    if (AudioQueue().GetSize() == 0) {
+    if (!mProcessedQueue.GetSize()) {
       return MakeUnique<Chunk>();
     }
 
-    RefPtr<AudioData> a = AudioQueue().PeekFront()->As<AudioData>();
-
-    // Ignore the element with 0 frames and try next.
-    if (a->mFrames == 0) {
-      RefPtr<MediaData> releaseMe = AudioQueue().PopFront();
-      continue;
-    }
-
-    // Ignore invalid samples.
-    if (a->mRate != mInfo.mRate || a->mChannels != mInfo.mChannels) {
-      NS_WARNING(nsPrintfCString(
-        "mismatched sample format, data=%p rate=%u channels=%u frames=%u",
-        a->mAudioData.get(), a->mRate, a->mChannels, a->mFrames).get());
-      RefPtr<MediaData> releaseMe = AudioQueue().PopFront();
-      continue;
-    }
-
-    // See if there's a gap in the audio. If there is, push silence into the
-    // audio hardware, so we can play across the gap.
-    // Calculate the timestamp of the next chunk of audio in numbers of
-    // samples.
-    CheckedInt64 sampleTime = UsecsToFrames(AudioQueue().PeekFront()->mTime,
-                                            mConverter->OutputConfig().Rate());
-    // Calculate the number of frames that have been pushed onto the audio hardware.
-    CheckedInt64 playedFrames = UsecsToFrames(mStartTime,
-                                              mConverter->OutputConfig().Rate()) +
-                                static_cast<int64_t>(mWritten);
-    CheckedInt64 missingFrames = sampleTime - playedFrames;
-
-    if (!missingFrames.isValid() || !sampleTime.isValid()) {
-      NS_WARNING("Int overflow in DecodedAudioDataSink");
-      mErrored = true;
-      return MakeUnique<Chunk>();
-    }
-
-    const uint32_t rate = mConverter->OutputConfig().Rate();
-    const uint32_t channels = mConverter->OutputConfig().Channels();
-
-    if (missingFrames.value() > AUDIO_FUZZ_FRAMES) {
-      // The next audio chunk begins some time after the end of the last chunk
-      // we pushed to the audio hardware. We must push silence into the audio
-      // hardware so that the next audio chunk begins playback at the correct
-      // time.
-      missingFrames = std::min<int64_t>(UINT32_MAX, missingFrames.value());
-      auto framesToPop = std::min<uint32_t>(missingFrames.value(), aFrames);
-      mWritten += framesToPop;
-      return MakeUnique<SilentChunk>(framesToPop, channels, rate);
-    }
-
-    RefPtr<AudioData> data =
-      dont_AddRef(AudioQueue().PopFront().take()->As<AudioData>());
-    if (mConverter->InputConfig() != mConverter->OutputConfig()) {
-      AlignedAudioBuffer convertedData =
-        mConverter->Process(AudioSampleBuffer(Move(data->mAudioData))).Forget();
-      mCurrentData =
-        new AudioData(data->mOffset,
-                      data->mTime,
-                      data->mDuration,
-                      convertedData.Length() / channels,
-                      Move(convertedData),
-                      channels,
-                      rate);
-    } else {
-      mCurrentData = Move(data);
-    }
-
+    // We need to update our values prior popping the processed queue in
+    // order to prevent the pop event to fire too early (prior
+    // mProcessedQueueLength being updated) or prevent HasUnplayedFrames
+    // to incorrectly return true during the time interval betweeen the
+    // when mProcessedQueue is read and mWritten is updated.
+    needPopping = true;
+    mCurrentData = mProcessedQueue.PeekFront();
     mCursor = MakeUnique<AudioBufferCursor>(mCurrentData->mAudioData.get(),
                                             mCurrentData->mChannels,
                                             mCurrentData->mFrames);
     MOZ_ASSERT(mCurrentData->mFrames > 0);
+    mProcessedQueueLength -=
+      FramesToUsecs(mCurrentData->mFrames, mOutputRate).value();
   }
 
   auto framesToPop = std::min(aFrames, mCursor->Available());
 
   SINK_LOG_V("playing audio at time=%lld offset=%u length=%u",
              mCurrentData->mTime, mCurrentData->mFrames - mCursor->Available(), framesToPop);
 
   UniquePtr<AudioStream::Chunk> chunk =
     MakeUnique<Chunk>(mCurrentData, framesToPop, mCursor->Ptr());
 
   mWritten += framesToPop;
   mCursor->Advance(framesToPop);
 
   // All frames are popped. Reset mCurrentData so we can pop new elements from
   // the audio queue in next calls to PopFrames().
-  if (mCursor->Available() == 0) {
+  if (!mCursor->Available()) {
     mCurrentData = nullptr;
   }
 
+  if (needPopping) {
+    // We can now safely pop the audio packet from the processed queue.
+    // This will fire the popped event, triggering a call to NotifyAudioNeeded.
+    RefPtr<AudioData> releaseMe = mProcessedQueue.PopFront();
+  }
+
   return chunk;
 }
 
 bool
 DecodedAudioDataSink::Ended() const
 {
   // Return true when error encountered so AudioStream can start draining.
-  return AudioQueue().IsFinished() || mErrored;
+  return mProcessedQueue.IsFinished() || mErrored;
 }
 
 void
 DecodedAudioDataSink::Drained()
 {
   SINK_LOG("Drained");
   mPlaybackComplete = true;
   mEndPromise.ResolveIfExists(true, __func__);
 }
 
+void
+DecodedAudioDataSink::OnAudioPopped(const RefPtr<MediaData>& aSample)
+{
+  SINK_LOG_V("AudioStream has used an audio packet.");
+  NotifyAudioNeeded();
+}
+
+void
+DecodedAudioDataSink::OnAudioPushed(const RefPtr<MediaData>& aSample)
+{
+  SINK_LOG_V("One new audio packet available.");
+  NotifyAudioNeeded();
+}
+
+void
+DecodedAudioDataSink::NotifyAudioNeeded()
+{
+  MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn(),
+             "Not called from the owner's thread");
+
+  // Always ensure we have two processed frames pending to allow for processing
+  // latency.
+  while (AudioQueue().GetSize() && (AudioQueue().IsFinished() ||
+                                    mProcessedQueueLength < LOW_AUDIO_USECS ||
+                                    mProcessedQueue.GetSize() < 2)) {
+    RefPtr<AudioData> data =
+      dont_AddRef(AudioQueue().PopFront().take()->As<AudioData>());
+
+    // Ignore the element with 0 frames and try next.
+    if (!data->mFrames) {
+      continue;
+    }
+
+    // Ignore invalid samples.
+    if (data->mRate != mConverter->InputConfig().Rate() ||
+        data->mChannels != mConverter->InputConfig().Channels()) {
+      NS_WARNING(nsPrintfCString(
+        "mismatched sample format, data=%p rate=%u channels=%u frames=%u",
+        data->mAudioData.get(), data->mRate, data->mChannels, data->mFrames).get());
+      continue;
+    }
+
+    // See if there's a gap in the audio. If there is, push silence into the
+    // audio hardware, so we can play across the gap.
+    // Calculate the timestamp of the next chunk of audio in numbers of
+    // samples.
+    CheckedInt64 sampleTime = UsecsToFrames(data->mTime - mStartTime,
+                                            data->mRate);
+    // Calculate the number of frames that have been pushed onto the audio hardware.
+    CheckedInt64 missingFrames = sampleTime - mFramesParsed;
+
+    if (!missingFrames.isValid()) {
+      NS_WARNING("Int overflow in DecodedAudioDataSink");
+      mErrored = true;
+      return;
+    }
+
+    if (missingFrames.value() > AUDIO_FUZZ_FRAMES) {
+      // The next audio packet begins some time after the end of the last packet
+      // we pushed to the audio hardware. We must push silence into the audio
+      // hardware so that the next audio packet begins playback at the correct
+      // time.
+      missingFrames = std::min<int64_t>(INT32_MAX, missingFrames.value());
+      mFramesParsed += missingFrames.value();
+
+      // We need to calculate how many frames are missing at the output rate.
+      missingFrames =
+        SaferMultDiv(missingFrames.value(), mOutputRate, data->mRate);
+      if (!missingFrames.isValid()) {
+        NS_WARNING("Int overflow in DecodedAudioDataSink");
+        mErrored = true;
+        return;
+      }
+
+      if (missingFrames.value()) {
+        AlignedAudioBuffer silenceData(missingFrames.value() * mOutputChannels);
+        if (!silenceData) {
+          NS_WARNING("OOM in DecodedAudioDataSink");
+          mErrored = true;
+          return;
+        }
+        RefPtr<AudioData> silence = CreateAudioFromBuffer(Move(silenceData), data);
+        PushProcessedAudio(silence);
+      }
+    }
+
+    mLastEndTime = data->GetEndTime();
+    mFramesParsed += data->mFrames;
+
+    if (mConverter->InputConfig() != mConverter->OutputConfig()) {
+      AlignedAudioBuffer convertedData =
+        mConverter->Process(AudioSampleBuffer(Move(data->mAudioData))).Forget();
+      data = CreateAudioFromBuffer(Move(convertedData), data);
+    }
+    PushProcessedAudio(data);
+  }
+
+  if (AudioQueue().IsFinished()) {
+    mProcessedQueue.Finish();
+  }
+}
+
+uint32_t
+DecodedAudioDataSink::PushProcessedAudio(AudioData* aData)
+{
+  if (!aData || !aData->mFrames) {
+    return 0;
+  }
+  mProcessedQueue.Push(aData);
+  mProcessedQueueLength += FramesToUsecs(aData->mFrames, mOutputRate).value();
+  return aData->mFrames;
+}
+
+already_AddRefed<AudioData>
+DecodedAudioDataSink::CreateAudioFromBuffer(AlignedAudioBuffer&& aBuffer,
+                                            AudioData* aReference)
+{
+  uint32_t frames = aBuffer.Length() / mOutputChannels;
+  if (!frames) {
+    return nullptr;
+  }
+  CheckedInt64 duration = FramesToUsecs(frames, mOutputRate);
+  if (!duration.isValid()) {
+    NS_WARNING("Int overflow in DecodedAudioDataSink");
+    mErrored = true;
+    return nullptr;
+  }
+  RefPtr<AudioData> data =
+    new AudioData(aReference->mOffset,
+                  aReference->mTime,
+                  duration.value(),
+                  frames,
+                  Move(aBuffer),
+                  mOutputChannels,
+                  mOutputRate);
+  return data.forget();
+}
+
 } // namespace media
 } // namespace mozilla
--- a/dom/media/mediasink/DecodedAudioDataSink.h
+++ b/dom/media/mediasink/DecodedAudioDataSink.h
@@ -23,17 +23,18 @@ namespace mozilla {
 
 class AudioConverter;
 
 namespace media {
 
 class DecodedAudioDataSink : public AudioSink,
                              private AudioStream::DataSource {
 public:
-  DecodedAudioDataSink(MediaQueue<MediaData>& aAudioQueue,
+  DecodedAudioDataSink(AbstractThread* aThread,
+                       MediaQueue<MediaData>& aAudioQueue,
                        int64_t aStartTime,
                        const AudioInfo& aInfo,
                        dom::AudioChannel aChannel);
 
   // Return a promise which will be resolved when DecodedAudioDataSink
   // finishes playing, or rejected if any error.
   RefPtr<GenericPromise> Init(const PlaybackParams& aParams) override;
 
@@ -97,20 +98,45 @@ private:
    * Members to implement AudioStream::DataSource.
    * Used on the callback thread of cubeb.
    */
   // The AudioData at which AudioStream::DataSource is reading.
   RefPtr<AudioData> mCurrentData;
   // Keep track of the read position of mCurrentData.
   UniquePtr<AudioBufferCursor> mCursor;
   // True if there is any error in processing audio data like overflow.
-  bool mErrored = false;
+  Atomic<bool> mErrored;
 
   // Set on the callback thread of cubeb once the stream has drained.
   Atomic<bool> mPlaybackComplete;
 
+  const RefPtr<AbstractThread> mOwnerThread;
+
+  // Audio Processing objects and methods
+  void OnAudioPopped(const RefPtr<MediaData>& aSample);
+  void OnAudioPushed(const RefPtr<MediaData>& aSample);
+  void NotifyAudioNeeded();
+  already_AddRefed<AudioData> CreateAudioFromBuffer(AlignedAudioBuffer&& aBuffer,
+                                                    AudioData* aReference);
+  // Add data to the processsed queue, update mProcessedQueueLength and
+  // return the number of frames added.
+  uint32_t PushProcessedAudio(AudioData* aData);
   UniquePtr<AudioConverter> mConverter;
+  MediaQueue<AudioData> mProcessedQueue;
+  // Length in microseconds of the ProcessedQueue
+  Atomic<int32_t> mProcessedQueueLength;
+  MediaEventListener mAudioQueueListener;
+  MediaEventListener mAudioQueueFinishListener;
+  MediaEventListener mProcessedQueueListener;
+  // Number of frames processed from AudioQueue(). Used to determine gaps in
+  // the input stream. It indicates the time in frames since playback started
+  // at the current input framerate.
+  int64_t mFramesParsed;
+  int64_t mLastEndTime;
+  // Never modifed after construction.
+  uint32_t mOutputRate;
+  uint32_t mOutputChannels;
 };
 
 } // namespace media
 } // namespace mozilla
 
 #endif