Bug 1215115 - part2: Mux opus into webm, remove bitdepth. r=rillian draft
authorbechen <bechen@mozilla.com>
Fri, 01 Apr 2016 11:04:00 +0800
changeset 346450 32aac8f3341ecf3c9b88acad9f2ddc7cfeb16d5d
parent 345779 2e3351544888841bb73b06b6066d42b8bb42ba18
child 346451 bd0dd38ac6946dec582f4a6dc5e2a115d1ec6678
push id14378
push userbechen@mozilla.com
push dateFri, 01 Apr 2016 03:15:20 +0000
reviewersrillian
bugs1215115
milestone48.0a1
Bug 1215115 - part2: Mux opus into webm, remove bitdepth. r=rillian MozReview-Commit-ID: 7D74vONbkFP
dom/media/encoder/OpusTrackEncoder.cpp
dom/media/encoder/OpusTrackEncoder.h
dom/media/webm/EbmlComposer.cpp
dom/media/webm/EbmlComposer.h
dom/media/webm/WebMWriter.cpp
dom/media/webm/WebMWriter.h
media/libmkv/EbmlIDs.h
media/libmkv/WebMElement.c
media/libmkv/WebMElement.h
--- a/dom/media/encoder/OpusTrackEncoder.cpp
+++ b/dom/media/encoder/OpusTrackEncoder.cpp
@@ -124,16 +124,17 @@ SerializeOpusCommentHeader(const nsCStri
 
 }  // Anonymous namespace.
 
 OpusTrackEncoder::OpusTrackEncoder()
   : AudioTrackEncoder()
   , mEncoder(nullptr)
   , mLookahead(0)
   , mResampler(nullptr)
+  , mOutputTimeStamp(0)
 {
 }
 
 OpusTrackEncoder::~OpusTrackEncoder()
 {
   if (mEncoder) {
     opus_encoder_destroy(mEncoder);
   }
@@ -224,16 +225,18 @@ OpusTrackEncoder::GetMetadata()
     }
   }
 
   if (mCanceled || mEncodingComplete) {
     return nullptr;
   }
 
   RefPtr<OpusMetadata> meta = new OpusMetadata();
+  meta->mChannels = mChannels;
+  meta->mSamplingFrequency = mSamplingRate;
 
   mLookahead = 0;
   int error = opus_encoder_ctl(mEncoder, OPUS_GET_LOOKAHEAD(&mLookahead));
   if (error != OPUS_OK) {
     mLookahead = 0;
   }
 
   // The ogg time stamping and pre-skip is always timed at 48000.
@@ -432,13 +435,16 @@ OpusTrackEncoder::GetEncodedTrack(Encode
     if (mResampler) {
       speex_resampler_destroy(mResampler);
       mResampler = nullptr;
     }
     mResampledLeftover.SetLength(0);
   }
 
   audiodata->SwapInFrameData(frameData);
+  mOutputTimeStamp += FramesToUsecs(GetPacketDuration(), kOpusSamplingRate).value();
+  audiodata->SetTimeStamp(mOutputTimeStamp);
+  LOG("[Opus] mOutputTimeStamp %lld.",mOutputTimeStamp);
   aData.AppendEncodedFrame(audiodata);
   return result >= 0 ? NS_OK : NS_ERROR_FAILURE;
 }
 
 } // namespace mozilla
--- a/dom/media/encoder/OpusTrackEncoder.h
+++ b/dom/media/encoder/OpusTrackEncoder.h
@@ -17,17 +17,18 @@ namespace mozilla {
 // Opus meta data structure
 class OpusMetadata : public TrackMetadataBase
 {
 public:
   // The ID Header of OggOpus. refer to http://wiki.xiph.org/OggOpus.
   nsTArray<uint8_t> mIdHeader;
   // The Comment Header of OggOpus.
   nsTArray<uint8_t> mCommentHeader;
-
+  int32_t mChannels;
+  float mSamplingFrequency;
   MetadataKind GetKind() const override { return METADATA_OPUS; }
 };
 
 class OpusTrackEncoder : public AudioTrackEncoder
 {
 public:
   OpusTrackEncoder();
   virtual ~OpusTrackEncoder();
@@ -75,13 +76,16 @@ private:
    */
   SpeexResamplerState* mResampler;
 
   /**
    * Store the resampled frames that don't fit into an Opus packet duration.
    * They will be prepended to the resampled frames next encoding cycle.
    */
   nsTArray<AudioDataValue> mResampledLeftover;
+
+  // TimeStamp in microseconds.
+  uint64_t mOutputTimeStamp;
 };
 
 } // namespace mozilla
 
 #endif
--- a/dom/media/webm/EbmlComposer.cpp
+++ b/dom/media/webm/EbmlComposer.cpp
@@ -1,15 +1,16 @@
 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this file,
  * You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "EbmlComposer.h"
 #include "mozilla/UniquePtr.h"
+#include "mozilla/Endian.h"
 #include "libmkv/EbmlIDs.h"
 #include "libmkv/EbmlWriter.h"
 #include "libmkv/WebMElement.h"
 #include "prtime.h"
 
 namespace mozilla {
 
 // Timecode scale in nanoseconds
@@ -43,18 +44,27 @@ void EbmlComposer::GenerateHeader()
           // Video
           if (mWidth > 0 && mHeight > 0) {
             writeVideoTrack(&ebml, 0x1, 0, "V_VP8",
                             mWidth, mHeight,
                             mDisplayWidth, mDisplayHeight, mFrameRate);
           }
           // Audio
           if (mCodecPrivateData.Length() > 0) {
-            writeAudioTrack(&ebml, 0x2, 0x0, "A_VORBIS", mSampleFreq,
-                            mChannels, mCodecPrivateData.Elements(),
+            // Extract the pre-skip from mCodecPrivateData
+            // then convert it to nanoseconds.
+            // Details in OpusTrackEncoder.cpp.
+            mCodecDelay =
+              (uint64_t)LittleEndian::readUint16(mCodecPrivateData.Elements() + 10)
+              * PR_NSEC_PER_SEC / 48000;
+            // Fixed 80ms, convert into nanoseconds.
+            uint64_t seekPreRoll = 80 * PR_NSEC_PER_MSEC;
+            writeAudioTrack(&ebml, 0x2, 0x0, "A_OPUS", mSampleFreq,
+                            mChannels, mCodecDelay, seekPreRoll,
+                            mCodecPrivateData.Elements(),
                             mCodecPrivateData.Length());
           }
         }
         Ebml_EndSubElement(&ebml, &trackLoc);
       }
     }
     // The Recording length is unknown and
     // ignore write the whole Segment element size
@@ -130,19 +140,22 @@ EbmlComposer::WriteSimpleBlock(EncodedFr
     // current cluster header array index
     mClusterHeaderIndex = mClusterBuffs.Length() - 1;
     mClusterLengthLoc = ebmlLoc.offset;
     mClusterTimecode = aFrame->GetTimeStamp() / PR_USEC_PER_MSEC;
     Ebml_SerializeUnsigned(&ebml, Timecode, mClusterTimecode);
     mFlushState |= FLUSH_CLUSTER;
   }
 
-  bool isVorbis = (frameType == EncodedFrame::FrameType::VORBIS_AUDIO_FRAME);
+  bool isOpus = (frameType == EncodedFrame::FrameType::OPUS_AUDIO_FRAME);
   short timeCode = aFrame->GetTimeStamp() / PR_USEC_PER_MSEC - mClusterTimecode;
-  writeSimpleBlock(&ebml, isVorbis ? 0x2 : 0x1, timeCode, isVP8IFrame,
+  if (isOpus) {
+    timeCode += mCodecDelay / PR_NSEC_PER_MSEC;
+  }
+  writeSimpleBlock(&ebml, isOpus ? 0x2 : 0x1, timeCode, isVP8IFrame,
                    0, 0, (unsigned char*)aFrame->GetFrameData().Elements(),
                    aFrame->GetFrameData().Length());
   MOZ_ASSERT(ebml.offset <= DEFAULT_HEADER_SIZE +
              aFrame->GetFrameData().Length(),
              "write more data > EBML_BUFFER_SIZE");
   block->SetLength(ebml.offset);
 }
 
@@ -159,24 +172,21 @@ EbmlComposer::SetVideoConfig(uint32_t aW
   mWidth = aWidth;
   mHeight = aHeight;
   mDisplayWidth = aDisplayWidth;
   mDisplayHeight = aDisplayHeight;
   mFrameRate = aFrameRate;
 }
 
 void
-EbmlComposer::SetAudioConfig(uint32_t aSampleFreq, uint32_t aChannels,
-                             uint32_t aBitDepth)
+EbmlComposer::SetAudioConfig(uint32_t aSampleFreq, uint32_t aChannels)
 {
   MOZ_ASSERT(aSampleFreq > 0, "SampleFreq should > 0");
-  MOZ_ASSERT(aBitDepth > 0, "BitDepth should > 0");
   MOZ_ASSERT(aChannels > 0, "Channels should > 0");
   mSampleFreq = aSampleFreq;
-  mBitDepth = aBitDepth;
   mChannels = aChannels;
 }
 
 void
 EbmlComposer::ExtractBuffer(nsTArray<nsTArray<uint8_t> >* aDestBufs,
                             uint32_t aFlag)
 {
   if ((aFlag & ContainerWriter::FLUSH_NEEDED) ||
@@ -194,18 +204,18 @@ EbmlComposer::ExtractBuffer(nsTArray<nsT
   }
   mClusterCanFlushBuffs.Clear();
 }
 
 EbmlComposer::EbmlComposer()
   : mFlushState(FLUSH_NONE)
   , mClusterHeaderIndex(0)
   , mClusterLengthLoc(0)
+  , mCodecDelay(0)
   , mClusterTimecode(0)
   , mWidth(0)
   , mHeight(0)
   , mFrameRate(0)
   , mSampleFreq(0)
-  , mBitDepth(0)
   , mChannels(0)
 {}
 
 } // namespace mozilla
--- a/dom/media/webm/EbmlComposer.h
+++ b/dom/media/webm/EbmlComposer.h
@@ -17,18 +17,17 @@ class EbmlComposer {
 public:
   EbmlComposer();
   /*
    * Assign the parameter which header required.
    */
   void SetVideoConfig(uint32_t aWidth, uint32_t aHeight, uint32_t aDisplayWidth,
                       uint32_t aDisplayHeight, float aFrameRate);
 
-  void SetAudioConfig(uint32_t aSampleFreq, uint32_t aChannels,
-                      uint32_t bitDepth);
+  void SetAudioConfig(uint32_t aSampleFreq, uint32_t aChannels);
   /*
    * Set the CodecPrivateData for writing in header.
    */
   void SetAudioCodecPrivateData(nsTArray<uint8_t>& aBufs)
   {
     mCodecPrivateData.AppendElements(aBufs);
   }
   /*
@@ -63,27 +62,28 @@ private:
   };
   uint32_t mFlushState;
   // Indicate the cluster header index in mClusterBuffs.
   uint32_t mClusterHeaderIndex;
   // The cluster length position.
   uint64_t mClusterLengthLoc;
   // Audio codec specific header data.
   nsTArray<uint8_t> mCodecPrivateData;
+  // Codec delay in nanoseconds.
+  uint64_t mCodecDelay;
 
   // The timecode of the cluster.
   uint64_t mClusterTimecode;
 
   // Video configuration
   int mWidth;
   int mHeight;
   int mDisplayWidth;
   int mDisplayHeight;
   float mFrameRate;
   // Audio configuration
   float mSampleFreq;
-  int mBitDepth;
   int mChannels;
 };
 
 } // namespace mozilla
 
 #endif
--- a/dom/media/webm/WebMWriter.cpp
+++ b/dom/media/webm/WebMWriter.cpp
@@ -1,16 +1,17 @@
 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this file,
  * You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "WebMWriter.h"
 #include "EbmlComposer.h"
 #include "GeckoProfiler.h"
+#include "OpusTrackEncoder.h"
 
 namespace mozilla {
 
 WebMWriter::WebMWriter(uint32_t aTrackTypes) : ContainerWriter()
 {
   mMetadataRequiredFlag = aTrackTypes;
   mEbmlComposer = new EbmlComposer();
 }
@@ -59,20 +60,28 @@ WebMWriter::SetMetadata(TrackMetadataBas
                                   meta->mDisplayWidth, meta->mDisplayHeight,
                                   meta->mEncodedFrameRate);
     mMetadataRequiredFlag = mMetadataRequiredFlag & ~ContainerWriter::CREATE_VIDEO_TRACK;
   }
 
   if (aMetadata->GetKind() == TrackMetadataBase::METADATA_VORBIS) {
     VorbisMetadata* meta = static_cast<VorbisMetadata*>(aMetadata);
     MOZ_ASSERT(meta, "Cannot find vorbis encoder metadata");
-    mEbmlComposer->SetAudioConfig(meta->mSamplingFrequency, meta->mChannels, meta->mBitDepth);
+    mEbmlComposer->SetAudioConfig(meta->mSamplingFrequency, meta->mChannels);
     mEbmlComposer->SetAudioCodecPrivateData(meta->mData);
     mMetadataRequiredFlag = mMetadataRequiredFlag & ~ContainerWriter::CREATE_AUDIO_TRACK;
   }
 
+  if (aMetadata->GetKind() == TrackMetadataBase::METADATA_OPUS) {
+    OpusMetadata* meta = static_cast<OpusMetadata*>(aMetadata);
+    MOZ_ASSERT(meta, "Cannot find Opus encoder metadata");
+    mEbmlComposer->SetAudioConfig(meta->mSamplingFrequency, meta->mChannels);
+    mEbmlComposer->SetAudioCodecPrivateData(meta->mIdHeader);
+    mMetadataRequiredFlag = mMetadataRequiredFlag & ~ContainerWriter::CREATE_AUDIO_TRACK;
+  }
+
   if (!mMetadataRequiredFlag) {
     mEbmlComposer->GenerateHeader();
   }
   return NS_OK;
 }
 
 } // namespace mozilla
--- a/dom/media/webm/WebMWriter.h
+++ b/dom/media/webm/WebMWriter.h
@@ -13,17 +13,16 @@ namespace mozilla {
 class EbmlComposer;
 
 // Vorbis meta data structure
 class VorbisMetadata : public TrackMetadataBase
 {
 public:
   nsTArray<uint8_t> mData;
   int32_t mChannels;
-  int32_t mBitDepth;
   float mSamplingFrequency;
   MetadataKind GetKind() const override { return METADATA_VORBIS; }
 };
 
 // VP8 meta data structure
 class VP8Metadata : public TrackMetadataBase
 {
 public:
--- a/media/libmkv/EbmlIDs.h
+++ b/media/libmkv/EbmlIDs.h
@@ -101,16 +101,18 @@ enum mkv {
 /* TrackTimecodeScale = 0x23314F, */
 /* TrackOffset = 0x537F, */
   MaxBlockAdditionID = 0x55EE,
   Name = 0x536E,
   Language = 0x22B59C,
   CodecID = 0x86,
   CodecPrivate = 0x63A2,
   CodecName = 0x258688,
+  CodecDelay = 0x56AA,
+  SeekPreRoll = 0x56BB,
 /* AttachmentLink = 0x7446, */
 /* CodecSettings = 0x3A9697, */
 /* CodecInfoURL = 0x3B4040, */
 /* CodecDownloadURL = 0x26B240, */
 /* CodecDecodeAll = 0xAA, */
 /* TrackOverlay = 0x6FAB, */
 /* TrackTranslate = 0x6624, */
 /* TrackTranslateEditionUID = 0x66FC, */
--- a/media/libmkv/WebMElement.c
+++ b/media/libmkv/WebMElement.c
@@ -82,33 +82,36 @@ void writeVideoTrack(EbmlGlobal *glob, u
     }
     Ebml_SerializeFloat(glob, FrameRate, frameRate);
     Ebml_EndSubElement(glob, &videoStart); // Video
   }
   Ebml_EndSubElement(glob, &start); // Track Entry
 }
 void writeAudioTrack(EbmlGlobal *glob, unsigned int trackNumber, int flagLacing,
                      const char *codecId, double samplingFrequency, unsigned int channels,
+                     uint64_t codecDelay, uint64_t seekPreRoll,
                      unsigned char *private, unsigned long privateSize) {
   EbmlLoc start;
   UInt64 trackID;
   Ebml_StartSubElement(glob, &start, TrackEntry);
   Ebml_SerializeUnsigned(glob, TrackNumber, trackNumber);
   trackID = generateTrackID(trackNumber);
   Ebml_SerializeUnsigned(glob, TrackUID, trackID);
   Ebml_SerializeUnsigned(glob, TrackType, 2); // audio is always 2
+  Ebml_SerializeUnsigned(glob, CodecDelay, codecDelay);
+  Ebml_SerializeUnsigned(glob, SeekPreRoll, seekPreRoll);
   // I am using defaults for thesed required fields
   /*  Ebml_SerializeUnsigned(glob, FlagEnabled, 1);
       Ebml_SerializeUnsigned(glob, FlagDefault, 1);
       Ebml_SerializeUnsigned(glob, FlagForced, 1);
       Ebml_SerializeUnsigned(glob, FlagLacing, flagLacing);*/
   Ebml_SerializeString(glob, CodecID, codecId);
   Ebml_SerializeData(glob, CodecPrivate, private, privateSize);
 
-  Ebml_SerializeString(glob, CodecName, "VORBIS");  // fixed for now
+  Ebml_SerializeString(glob, CodecName, "OPUS");  // fixed for now
   {
     EbmlLoc AudioStart;
     Ebml_StartSubElement(glob, &AudioStart, Audio);
     Ebml_SerializeFloat(glob, SamplingFrequency, samplingFrequency);
     Ebml_SerializeUnsigned(glob, Channels, channels);
     Ebml_EndSubElement(glob, &AudioStart);
   }
   Ebml_EndSubElement(glob, &start);
--- a/media/libmkv/WebMElement.h
+++ b/media/libmkv/WebMElement.h
@@ -20,16 +20,17 @@ void writeHeader(EbmlGlobal *ebml);
 void writeSegmentInformation(EbmlGlobal *ebml, EbmlLoc *startInfo, unsigned long timeCodeScale, double duration);
 // this function is a helper only, it assumes a lot of defaults
 void writeVideoTrack(EbmlGlobal *ebml, unsigned int trackNumber, int flagLacing,
                      const char *codecId, unsigned int pixelWidth, unsigned int pixelHeight,
                      unsigned int displayWidth, unsigned int displayHeight,
                      double frameRate);
 void writeAudioTrack(EbmlGlobal *glob, unsigned int trackNumber, int flagLacing,
                      const char *codecId, double samplingFrequency, unsigned int channels,
+                     uint64_t codecDelay, uint64_t seekPreRoll,
                      unsigned char *private_, unsigned long privateSize);
 
 void writeSimpleBlock(EbmlGlobal *ebml, unsigned char trackNumber, short timeCode,
                       int isKeyframe, unsigned char lacingFlag, int discardable,
                       unsigned char *data, unsigned long dataLength);
 
 #endif