Bug 1014393 - MediaEncoder better orders frames passed to the muxer. r?pehrsons draft
authorBryce Van Dyk <bvandyk@mozilla.com>
Tue, 14 Nov 2017 10:34:05 -0500
changeset 698385 36548e9a87439a52d05b4dac1192386bbefdf450
parent 698384 4506fc66b5a9cbfda5c1c4066c085c40d31c9520
child 740367 56a288af8bc5a297dc687ecebdef65843c9edbac
push id89276
push userbvandyk@mozilla.com
push dateWed, 15 Nov 2017 17:29:54 +0000
reviewerspehrsons
bugs1014393
milestone58.0a1
Bug 1014393 - MediaEncoder better orders frames passed to the muxer. r?pehrsons Update MediaEncoder to pass frames to the muxer in order of their time stamps. This should prevent the currently possible scenario where audio and video frames are written with non-monotonically increasing timestamps (in violation of the webm spec). MozReview-Commit-ID: BhdlV2kOB0l
dom/media/encoder/MediaEncoder.cpp
--- a/dom/media/encoder/MediaEncoder.cpp
+++ b/dom/media/encoder/MediaEncoder.cpp
@@ -883,34 +883,114 @@ MediaEncoder::WriteEncodedDataToMuxer()
   AUTO_PROFILER_LABEL("MediaEncoder::WriteEncodedDataToMuxer", OTHER);
 
   MOZ_ASSERT(mEncoderThread->IsCurrentThreadIn());
 
   MOZ_ASSERT(mVideoEncoder || mAudioEncoder, "Must have atleast one encoder");
 
   nsresult rv;
 
+  // If we have both video and audio encoders, interleave the frames
+  if (mVideoEncoder && mAudioEncoder) {
+    EncodedFrameContainer encodedData;
+
+    RefPtr<EncodedFrame> videoFrame;
+    RefPtr<EncodedFrame> audioFrame;
+    // The times at which we expect our next video and audio frames. These are
+    // based on the time + duration (GetEndTime()) of the last seen frames.
+    // Assumes that the encoders write the correct duration for frames. If the
+    // encoders estimate duration the interleaving logic below may interleave
+    // incorrectly.
+    uint64_t expectedNextVideoTime = 0;
+    uint64_t expectedNextAudioTime = 0;
+    // Interleave frames until we're out of audio or video
+    while (mEncodedVideoFrames.GetSize() > 0 &&
+           mEncodedAudioFrames.GetSize() > 0) {
+      videoFrame = mEncodedVideoFrames.PeekFront();
+      audioFrame = mEncodedAudioFrames.PeekFront();
+      // For any expected time our frames should occur at or after that time
+      MOZ_ASSERT(videoFrame->mTime >= expectedNextVideoTime);
+      MOZ_ASSERT(audioFrame->mTime >= expectedNextAudioTime);
+      if (videoFrame->mTime <= audioFrame->mTime) {
+        expectedNextVideoTime = videoFrame->GetEndTime();
+        RefPtr<EncodedFrame> frame = mEncodedVideoFrames.PopFront();
+        encodedData.AppendEncodedFrame(frame);
+      } else {
+        expectedNextAudioTime = audioFrame->GetEndTime();
+        RefPtr<EncodedFrame> frame = mEncodedAudioFrames.PopFront();
+        encodedData.AppendEncodedFrame(frame);
+      }
+    }
+
+    // If we're out of audio we still may be able to add more video...
+    if (mEncodedAudioFrames.GetSize() == 0) {
+      while (mEncodedVideoFrames.GetSize() > 0) {
+        videoFrame = mEncodedVideoFrames.PeekFront();
+        // If audio encoding is complete or if the video would precede the
+        // next audio sample we can add it
+        if (mAudioEncoder->IsEncodingComplete() ||
+            videoFrame->mTime < expectedNextAudioTime) {
+          RefPtr<EncodedFrame> frame = mEncodedVideoFrames.PopFront();
+          encodedData.AppendEncodedFrame(frame);
+        } else {
+          break;
+        }
+      }
+    }
+
+    // If we're out of video we still may be able to add more audio...
+    if (mEncodedVideoFrames.GetSize() == 0) {
+      while (mEncodedAudioFrames.GetSize() > 0) {
+        audioFrame = mEncodedAudioFrames.PeekFront();
+        // If video encoding is complete or if audio would precede the next
+        // video sample we can add it
+        if (mVideoEncoder->IsEncodingComplete() ||
+            audioFrame->mTime < expectedNextVideoTime) {
+          RefPtr<EncodedFrame> frame = mEncodedAudioFrames.PopFront();
+          encodedData.AppendEncodedFrame(frame);
+        } else {
+          break;
+        }
+      }
+    }
+
+    rv = mWriter->WriteEncodedTrack(encodedData, 0);
+    if (NS_FAILED(rv)) {
+      LOG(LogLevel::Error,
+          ("Error! Fail to write encoded video + audio track "
+           "to the media container."));
+    }
+    return rv;
+  }
+
+  // If we reach here, we have only a single encoder and don't have to worry
+  // about any interleaving
+
+  // Write out our video frames if we have a video encoder
   if (mVideoEncoder) {
+    MOZ_ASSERT(!mAudioEncoder);
     EncodedFrameContainer encodedVideoData;
     while (mEncodedVideoFrames.GetSize() > 0) {
       RefPtr<EncodedFrame> frame = mEncodedVideoFrames.PopFront();
       encodedVideoData.AppendEncodedFrame(frame);
     }
 
     rv = mWriter->WriteEncodedTrack(
       encodedVideoData,
       mVideoEncoder->IsEncodingComplete() ? ContainerWriter::END_OF_STREAM : 0);
     if (NS_FAILED(rv)) {
       LOG(LogLevel::Error,
           ("Failed to write encoded video track to the muxer."));
       return rv;
     }
   }
 
+  // Write out our audio frames if we have an audio encoder
   if (mAudioEncoder) {
+    MOZ_ASSERT(!mVideoEncoder);
     EncodedFrameContainer encodedAudioData;
     while (mEncodedAudioFrames.GetSize() > 0) {
       RefPtr<EncodedFrame> frame = mEncodedAudioFrames.PopFront();
       encodedAudioData.AppendEncodedFrame(frame);
     }
 
     rv = mWriter->WriteEncodedTrack(
       encodedAudioData,