Bug 1014393 - Shift responsibility of adjusting packets with opus codec delay to MediaEncoder. r?pehrsons
Move the responsibility of adjusting opus frame timestamps to the MediaEncoder.
This was previously done by the EbmlComposer, but doing so in the MediaEncoder
means we can have greater control over handling of time codes and interleaving
of frames.
MozReview-Commit-ID: 2g9cy1IqOph
--- a/dom/media/encoder/MediaEncoder.cpp
+++ b/dom/media/encoder/MediaEncoder.cpp
@@ -862,16 +862,19 @@ MediaEncoder::EncodeData()
rv = mAudioEncoder->GetEncodedTrack(encodedAudioData);
if (NS_FAILED(rv)) {
// Encoding might be canceled.
LOG(LogLevel::Error, ("Failed to get encoded data from audio encoder."));
return rv;
}
for (const RefPtr<EncodedFrame>& frame :
encodedAudioData.GetEncodedFrames()) {
+ if (frame->GetFrameType() == EncodedFrame::FrameType::OPUS_AUDIO_FRAME) {
+ frame->SetTimeStamp(frame->GetTimeStamp() + mAudioCodecDelay);
+ }
mEncodedAudioFrames.AppendElement(frame);
}
}
return rv;
}
nsresult
@@ -936,16 +939,25 @@ MediaEncoder::CopyMetadataToMuxer(TrackE
RefPtr<TrackMetadataBase> meta = aTrackEncoder->GetMetadata();
if (meta == nullptr) {
LOG(LogLevel::Error, ("metadata == null"));
SetError();
return NS_ERROR_ABORT;
}
+ // In the case of Opus we need to calculate the codec delay
+ if (meta->GetKind() == TrackMetadataBase::MetadataKind::METADATA_OPUS) {
+ // Calculate offset in microseconds
+ OpusMetadata* opusMeta = static_cast<OpusMetadata*>(meta.get());
+ mAudioCodecDelay = static_cast<uint64_t>(
+ LittleEndian::readUint16(opusMeta->mIdHeader.Elements() + 10) *
+ PR_USEC_PER_SEC / 48000);
+ }
+
nsresult rv = mWriter->SetMetadata(meta);
if (NS_FAILED(rv)) {
LOG(LogLevel::Error, ("SetMetadata failed"));
SetError();
}
return rv;
}
--- a/dom/media/encoder/MediaEncoder.h
+++ b/dom/media/encoder/MediaEncoder.h
@@ -273,16 +273,20 @@ private:
// doesn't contain video on start() or if the input is an AudioNode.
RefPtr<dom::VideoStreamTrack> mVideoTrack;
// Audio frames that have been encoded and are pending write to the muxer
nsTArray<RefPtr<EncodedFrame>> mEncodedAudioFrames;
// Video frames that have been encoded and are pending write to the muxer
nsTArray<RefPtr<EncodedFrame>> mEncodedVideoFrames;
+ // How much each audio time stamp should be delayed in microseconds. Used to
+ // adjust for opus codec delay.
+ uint64_t mAudioCodecDelay = 0;
+
TimeStamp mStartTime;
nsString mMIMEType;
bool mInitialized;
bool mMetadataEncoded;
bool mCompleted;
bool mError;
bool mCanceled;
bool mShutdown;
--- a/dom/media/webm/EbmlComposer.cpp
+++ b/dom/media/webm/EbmlComposer.cpp
@@ -48,23 +48,29 @@ void EbmlComposer::GenerateHeader()
mWidth, mHeight,
mDisplayWidth, mDisplayHeight);
}
// Audio
if (mCodecPrivateData.Length() > 0) {
// Extract the pre-skip from mCodecPrivateData
// then convert it to nanoseconds.
// Details in OpusTrackEncoder.cpp.
- mCodecDelay =
- (uint64_t)LittleEndian::readUint16(mCodecPrivateData.Elements() + 10)
- * PR_NSEC_PER_SEC / 48000;
+ uint64_t codecDelay = (uint64_t)LittleEndian::readUint16(
+ mCodecPrivateData.Elements() + 10) *
+ PR_NSEC_PER_SEC / 48000;
// Fixed 80ms, convert into nanoseconds.
uint64_t seekPreRoll = 80 * PR_NSEC_PER_MSEC;
- writeAudioTrack(&ebml, 0x2, 0x0, "A_OPUS", mSampleFreq,
- mChannels, mCodecDelay, seekPreRoll,
+ writeAudioTrack(&ebml,
+ 0x2,
+ 0x0,
+ "A_OPUS",
+ mSampleFreq,
+ mChannels,
+ codecDelay,
+ seekPreRoll,
mCodecPrivateData.Elements(),
mCodecPrivateData.Length());
}
}
Ebml_EndSubElement(&ebml, &trackLoc);
}
}
// The Recording length is unknown and
@@ -127,18 +133,18 @@ EbmlComposer::WriteSimpleBlock(EncodedFr
auto frameType = aFrame->GetFrameType();
bool flush = false;
bool isVP8IFrame = (frameType == EncodedFrame::FrameType::VP8_I_FRAME);
if (isVP8IFrame) {
FinishCluster();
flush = true;
} else {
// Force it to calculate timecode using signed math via cast
- int64_t timeCode = (aFrame->GetTimeStamp() / ((int) PR_USEC_PER_MSEC) - mClusterTimecode) +
- (mCodecDelay / PR_NSEC_PER_MSEC);
+ int64_t timeCode =
+ (aFrame->GetTimeStamp() / ((int)PR_USEC_PER_MSEC) - mClusterTimecode);
if (timeCode < SHRT_MIN || timeCode > SHRT_MAX ) {
// We're probably going to overflow (or underflow) the timeCode value later!
FinishCluster();
flush = true;
}
}
auto block = mClusterBuffs.AppendElement();
@@ -156,19 +162,16 @@ EbmlComposer::WriteSimpleBlock(EncodedFr
mClusterTimecode = aFrame->GetTimeStamp() / PR_USEC_PER_MSEC;
Ebml_SerializeUnsigned(&ebml, Timecode, mClusterTimecode);
mFlushState |= FLUSH_CLUSTER;
}
bool isOpus = (frameType == EncodedFrame::FrameType::OPUS_AUDIO_FRAME);
// Can't underflow/overflow now
int64_t timeCode = aFrame->GetTimeStamp() / ((int) PR_USEC_PER_MSEC) - mClusterTimecode;
- if (isOpus) {
- timeCode += mCodecDelay / PR_NSEC_PER_MSEC;
- }
MOZ_ASSERT(timeCode >= SHRT_MIN && timeCode <= SHRT_MAX);
writeSimpleBlock(&ebml, isOpus ? 0x2 : 0x1, static_cast<short>(timeCode), isVP8IFrame,
0, 0, (unsigned char*)aFrame->GetFrameData().Elements(),
aFrame->GetFrameData().Length());
MOZ_ASSERT(ebml.offset <= DEFAULT_HEADER_SIZE +
aFrame->GetFrameData().Length(),
"write more data > EBML_BUFFER_SIZE");
block->SetLength(ebml.offset);
@@ -216,17 +219,16 @@ EbmlComposer::ExtractBuffer(nsTArray<nsT
}
mClusterCanFlushBuffs.Clear();
}
EbmlComposer::EbmlComposer()
: mFlushState(FLUSH_NONE)
, mClusterHeaderIndex(0)
, mClusterLengthLoc(0)
- , mCodecDelay(0)
, mClusterTimecode(0)
, mWidth(0)
, mHeight(0)
, mSampleFreq(0)
, mChannels(0)
{}
} // namespace mozilla
--- a/dom/media/webm/EbmlComposer.h
+++ b/dom/media/webm/EbmlComposer.h
@@ -31,17 +31,19 @@ public:
mCodecPrivateData.AppendElements(aBufs);
}
/*
* Generate the whole WebM header and output to mBuff.
*/
void GenerateHeader();
/*
* Insert media encoded buffer into muxer and it would be package
- * into SimpleBlock. If no cluster is opened, new cluster will start for writing.
+ * into SimpleBlock. If no cluster is opened, new cluster will start for
+ * writing. Frames passed to this function should already have any codec delay
+ * applied.
*/
void WriteSimpleBlock(EncodedFrame* aFrame);
/*
* Get valid cluster data.
*/
void ExtractBuffer(nsTArray<nsTArray<uint8_t> >* aDestBufs,
uint32_t aFlag = 0);
private:
@@ -62,18 +64,16 @@ private:
};
uint32_t mFlushState;
// Indicate the cluster header index in mClusterBuffs.
uint32_t mClusterHeaderIndex;
// The cluster length position.
uint64_t mClusterLengthLoc;
// Audio codec specific header data.
nsTArray<uint8_t> mCodecPrivateData;
- // Codec delay in nanoseconds.
- uint64_t mCodecDelay;
// The timecode of the cluster.
uint64_t mClusterTimecode;
// Video configuration
int mWidth;
int mHeight;
int mDisplayWidth;