Bug 1269325: [mp4] Recalculate dts after adjusting cts. r?kentuckyfriedtakahe
CTS are adjusted so that all frames within a moof are contiguous and gapless. This means that the duration of each sample are updated accordingly. In MP4, the definition of a sample's duration is the delta between two decoding timestamp. As such, when changing the duration, the decode timestamp should be updated accordingly.
MozReview-Commit-ID: 8D8DeNeyzy
--- a/media/libstagefright/binding/MoofParser.cpp
+++ b/media/libstagefright/binding/MoofParser.cpp
@@ -549,18 +549,16 @@ Moof::ParseTrun(Box& aBox, Tfhd& aTfhd,
if (flags & 0x800) {
ctsOffset = reader->Read32();
}
Sample sample;
sample.mByteRange = MediaByteRange(offset, offset + sampleSize);
offset += sampleSize;
- sample.mDecodeTime =
- aMdhd.ToMicroseconds((int64_t)decodeTime - aEdts.mMediaStart) + aMvhd.ToMicroseconds(aEdts.mEmptyOffset);
sample.mCompositionRange = Interval<Microseconds>(
aMdhd.ToMicroseconds((int64_t)decodeTime + ctsOffset - aEdts.mMediaStart) + aMvhd.ToMicroseconds(aEdts.mEmptyOffset),
aMdhd.ToMicroseconds((int64_t)decodeTime + ctsOffset + sampleDuration - aEdts.mMediaStart) + aMvhd.ToMicroseconds(aEdts.mEmptyOffset));
decodeTime += sampleDuration;
// Sometimes audio streams don't properly mark their samples as keyframes,
// because every audio sample is a keyframe.
sample.mSync = !(sampleFlags & 0x1010000) || aIsAudio;
@@ -578,19 +576,42 @@ Moof::ParseTrun(Box& aBox, Tfhd& aTfhd,
}
ctsOrder.Sort(CtsComparator());
for (size_t i = 0; i < ctsOrder.Length(); i++) {
if (i + 1 < ctsOrder.Length()) {
ctsOrder[i]->mCompositionRange.end = ctsOrder[i + 1]->mCompositionRange.start;
}
}
+ // In MP4, the duration of a sample is defined as the delta between two decode
+ // timestamps. The operation above has updated the duration of each sample
+ // as a Sample's duration is mCompositionRange.end - mCompositionRange.start
+ // MSE's TrackBuffersManager expects dts that increased by the sample's
+ // duration, so we rewrite the dts accordingly.
+ int64_t presentationDuration = ctsOrder.LastElement()->mCompositionRange.end
+ - ctsOrder[0]->mCompositionRange.start;
+ int64_t decodeDuration = aMdhd.ToMicroseconds(decodeTime - *aDecodeTime);
+ float adjust = (float)decodeDuration / presentationDuration;
+ int64_t dtsOffset =
+ aMdhd.ToMicroseconds((int64_t)*aDecodeTime - aEdts.mMediaStart)
+ + aMvhd.ToMicroseconds(aEdts.mEmptyOffset);
+ int64_t compositionDuration = 0;
+ // Adjust the dts, ensuring that the new adjusted dts will never be greater
+ // than decodeTime (the next moof's decode start time).
+ for (auto& sample : mIndex) {
+ sample.mDecodeTime = dtsOffset + compositionDuration * adjust;
+ compositionDuration += sample.mCompositionRange.Length();
+ }
mTimeRange = Interval<Microseconds>(ctsOrder[0]->mCompositionRange.start,
ctsOrder.LastElement()->mCompositionRange.end);
*aDecodeTime = decodeTime;
+ MOZ_ASSERT(aMdhd.ToMicroseconds((int64_t)decodeTime - aEdts.mMediaStart)
+ + aMvhd.ToMicroseconds(aEdts.mEmptyOffset)
+ >= mIndex[mIndex.Length() -1].mDecodeTime,
+ "Adjusted dts is too high");
return true;
}
Tkhd::Tkhd(Box& aBox)
{
BoxReader reader(aBox);
if (!reader->CanReadType<uint32_t>()) {
LOG(Tkhd, "Incomplete Box (missing flags)");