Bug 1014393 - Shift responsibility of adjusting packets with opus codec delay to MediaEncoder. r?pehrsons draft
authorBryce Van Dyk <bvandyk@mozilla.com>
Mon, 13 Nov 2017 14:19:39 -0500
changeset 698382 7e26b6073a842357dad65b1b71ec565031235011
parent 698381 7ea6b9e23e4368432e758d8da87037739c7c0edb
child 698383 fe81b3a0a69efbd18b2cd64eb5d6c6b6902fbe61
push id89276
push userbvandyk@mozilla.com
push dateWed, 15 Nov 2017 17:29:54 +0000
reviewerspehrsons
bugs1014393
milestone58.0a1
Bug 1014393 - Shift responsibility of adjusting packets with opus codec delay to MediaEncoder. r?pehrsons Move the responsibility of adjusting opus frame timestamps to the MediaEncoder. This was previously done by the EbmlComposer, but doing so in the MediaEncoder means we can have greater control over handling of time codes and interleaving of frames. MozReview-Commit-ID: 2g9cy1IqOph
dom/media/encoder/MediaEncoder.cpp
dom/media/encoder/MediaEncoder.h
dom/media/webm/EbmlComposer.cpp
dom/media/webm/EbmlComposer.h
--- a/dom/media/encoder/MediaEncoder.cpp
+++ b/dom/media/encoder/MediaEncoder.cpp
@@ -862,16 +862,19 @@ MediaEncoder::EncodeData()
     rv = mAudioEncoder->GetEncodedTrack(encodedAudioData);
     if (NS_FAILED(rv)) {
       // Encoding might be canceled.
       LOG(LogLevel::Error, ("Failed to get encoded data from audio encoder."));
       return rv;
     }
     for (const RefPtr<EncodedFrame>& frame :
          encodedAudioData.GetEncodedFrames()) {
+      if (frame->GetFrameType() == EncodedFrame::FrameType::OPUS_AUDIO_FRAME) {
+        frame->SetTimeStamp(frame->GetTimeStamp() + mAudioCodecDelay);
+      }
       mEncodedAudioFrames.AppendElement(frame);
     }
   }
 
   return rv;
 }
 
 nsresult
@@ -936,16 +939,25 @@ MediaEncoder::CopyMetadataToMuxer(TrackE
 
   RefPtr<TrackMetadataBase> meta = aTrackEncoder->GetMetadata();
   if (meta == nullptr) {
     LOG(LogLevel::Error, ("metadata == null"));
     SetError();
     return NS_ERROR_ABORT;
   }
 
+  // In the case of Opus we need to calculate the codec delay
+  if (meta->GetKind() == TrackMetadataBase::MetadataKind::METADATA_OPUS) {
+    // Calculate offset in microseconds
+    OpusMetadata* opusMeta = static_cast<OpusMetadata*>(meta.get());
+    mAudioCodecDelay = static_cast<uint64_t>(
+      LittleEndian::readUint16(opusMeta->mIdHeader.Elements() + 10) *
+      PR_USEC_PER_SEC / 48000);
+  }
+
   nsresult rv = mWriter->SetMetadata(meta);
   if (NS_FAILED(rv)) {
    LOG(LogLevel::Error, ("SetMetadata failed"));
    SetError();
   }
   return rv;
 }
 
--- a/dom/media/encoder/MediaEncoder.h
+++ b/dom/media/encoder/MediaEncoder.h
@@ -273,16 +273,20 @@ private:
   // doesn't contain video on start() or if the input is an AudioNode.
   RefPtr<dom::VideoStreamTrack> mVideoTrack;
 
   // Audio frames that have been encoded and are pending write to the muxer
   nsTArray<RefPtr<EncodedFrame>> mEncodedAudioFrames;
   // Video frames that have been encoded and are pending write to the muxer
   nsTArray<RefPtr<EncodedFrame>> mEncodedVideoFrames;
 
+  // How much each audio time stamp should be delayed in microseconds. Used to
+  // adjust for opus codec delay.
+  uint64_t mAudioCodecDelay = 0;
+
   TimeStamp mStartTime;
   nsString mMIMEType;
   bool mInitialized;
   bool mMetadataEncoded;
   bool mCompleted;
   bool mError;
   bool mCanceled;
   bool mShutdown;
--- a/dom/media/webm/EbmlComposer.cpp
+++ b/dom/media/webm/EbmlComposer.cpp
@@ -48,23 +48,29 @@ void EbmlComposer::GenerateHeader()
                             mWidth, mHeight,
                             mDisplayWidth, mDisplayHeight);
           }
           // Audio
           if (mCodecPrivateData.Length() > 0) {
             // Extract the pre-skip from mCodecPrivateData
             // then convert it to nanoseconds.
             // Details in OpusTrackEncoder.cpp.
-            mCodecDelay =
-              (uint64_t)LittleEndian::readUint16(mCodecPrivateData.Elements() + 10)
-              * PR_NSEC_PER_SEC / 48000;
+            uint64_t codecDelay = (uint64_t)LittleEndian::readUint16(
+                                    mCodecPrivateData.Elements() + 10) *
+                                  PR_NSEC_PER_SEC / 48000;
             // Fixed 80ms, convert into nanoseconds.
             uint64_t seekPreRoll = 80 * PR_NSEC_PER_MSEC;
-            writeAudioTrack(&ebml, 0x2, 0x0, "A_OPUS", mSampleFreq,
-                            mChannels, mCodecDelay, seekPreRoll,
+            writeAudioTrack(&ebml,
+                            0x2,
+                            0x0,
+                            "A_OPUS",
+                            mSampleFreq,
+                            mChannels,
+                            codecDelay,
+                            seekPreRoll,
                             mCodecPrivateData.Elements(),
                             mCodecPrivateData.Length());
           }
         }
         Ebml_EndSubElement(&ebml, &trackLoc);
       }
     }
     // The Recording length is unknown and
@@ -127,18 +133,18 @@ EbmlComposer::WriteSimpleBlock(EncodedFr
   auto frameType = aFrame->GetFrameType();
   bool flush = false;
   bool isVP8IFrame = (frameType == EncodedFrame::FrameType::VP8_I_FRAME);
   if (isVP8IFrame) {
     FinishCluster();
     flush = true;
   } else {
     // Force it to calculate timecode using signed math via cast
-    int64_t timeCode = (aFrame->GetTimeStamp() / ((int) PR_USEC_PER_MSEC) - mClusterTimecode) +
-                       (mCodecDelay / PR_NSEC_PER_MSEC);
+    int64_t timeCode =
+      (aFrame->GetTimeStamp() / ((int)PR_USEC_PER_MSEC) - mClusterTimecode);
     if (timeCode < SHRT_MIN || timeCode > SHRT_MAX ) {
       // We're probably going to overflow (or underflow) the timeCode value later!
       FinishCluster();
       flush = true;
     }
   }
 
   auto block = mClusterBuffs.AppendElement();
@@ -156,19 +162,16 @@ EbmlComposer::WriteSimpleBlock(EncodedFr
     mClusterTimecode = aFrame->GetTimeStamp() / PR_USEC_PER_MSEC;
     Ebml_SerializeUnsigned(&ebml, Timecode, mClusterTimecode);
     mFlushState |= FLUSH_CLUSTER;
   }
 
   bool isOpus = (frameType == EncodedFrame::FrameType::OPUS_AUDIO_FRAME);
   // Can't underflow/overflow now
   int64_t timeCode = aFrame->GetTimeStamp() / ((int) PR_USEC_PER_MSEC) - mClusterTimecode;
-  if (isOpus) {
-    timeCode += mCodecDelay / PR_NSEC_PER_MSEC;
-  }
   MOZ_ASSERT(timeCode >= SHRT_MIN && timeCode <= SHRT_MAX);
   writeSimpleBlock(&ebml, isOpus ? 0x2 : 0x1, static_cast<short>(timeCode), isVP8IFrame,
                    0, 0, (unsigned char*)aFrame->GetFrameData().Elements(),
                    aFrame->GetFrameData().Length());
   MOZ_ASSERT(ebml.offset <= DEFAULT_HEADER_SIZE +
              aFrame->GetFrameData().Length(),
              "write more data > EBML_BUFFER_SIZE");
   block->SetLength(ebml.offset);
@@ -216,17 +219,16 @@ EbmlComposer::ExtractBuffer(nsTArray<nsT
   }
   mClusterCanFlushBuffs.Clear();
 }
 
 EbmlComposer::EbmlComposer()
   : mFlushState(FLUSH_NONE)
   , mClusterHeaderIndex(0)
   , mClusterLengthLoc(0)
-  , mCodecDelay(0)
   , mClusterTimecode(0)
   , mWidth(0)
   , mHeight(0)
   , mSampleFreq(0)
   , mChannels(0)
 {}
 
 } // namespace mozilla
--- a/dom/media/webm/EbmlComposer.h
+++ b/dom/media/webm/EbmlComposer.h
@@ -31,17 +31,19 @@ public:
     mCodecPrivateData.AppendElements(aBufs);
   }
   /*
    * Generate the whole WebM header and output to mBuff.
    */
   void GenerateHeader();
   /*
    * Insert media encoded buffer into muxer and it would be package
-   * into SimpleBlock. If no cluster is opened, new cluster will start for writing.
+   * into SimpleBlock. If no cluster is opened, new cluster will start for
+   * writing. Frames passed to this function should already have any codec delay
+   * applied.
    */
   void WriteSimpleBlock(EncodedFrame* aFrame);
   /*
    * Get valid cluster data.
    */
   void ExtractBuffer(nsTArray<nsTArray<uint8_t> >* aDestBufs,
                      uint32_t aFlag = 0);
 private:
@@ -62,18 +64,16 @@ private:
   };
   uint32_t mFlushState;
   // Indicate the cluster header index in mClusterBuffs.
   uint32_t mClusterHeaderIndex;
   // The cluster length position.
   uint64_t mClusterLengthLoc;
   // Audio codec specific header data.
   nsTArray<uint8_t> mCodecPrivateData;
-  // Codec delay in nanoseconds.
-  uint64_t mCodecDelay;
 
   // The timecode of the cluster.
   uint64_t mClusterTimecode;
 
   // Video configuration
   int mWidth;
   int mHeight;
   int mDisplayWidth;