Bug 1271847: Only adjust the samples once we have a complete moof. r?kentuckyfriedtakahe draft
authorJean-Yves Avenard <jyavenard@mozilla.com>
Wed, 11 May 2016 15:59:33 +1000
changeset 365664 534a384e6e310a3b3001574f50483219300cf355
parent 365638 25eb7d4919953d28059045b0aa96c038891075ac
child 520622 01b357cc2579bebadfdf02305a676d63a1c0cbae
push id17814
push userbmo:jyavenard@mozilla.com
push dateWed, 11 May 2016 07:29:07 +0000
reviewerskentuckyfriedtakahe
bugs1271847
milestone49.0a1
Bug 1271847: Only adjust the samples once we have a complete moof. r?kentuckyfriedtakahe MozReview-Commit-ID: 5TO0d20uUhZ
media/libstagefright/binding/MoofParser.cpp
--- a/media/libstagefright/binding/MoofParser.cpp
+++ b/media/libstagefright/binding/MoofParser.cpp
@@ -341,26 +341,76 @@ MoofParser::ParseEncrypted(Box& aBox)
 
       if (mSinf.IsValid()) {
         break;
       }
     }
   }
 }
 
+class CtsComparator
+{
+public:
+  bool Equals(Sample* const aA, Sample* const aB) const
+  {
+    return aA->mCompositionRange.start == aB->mCompositionRange.start;
+  }
+  bool
+  LessThan(Sample* const aA, Sample* const aB) const
+  {
+    return aA->mCompositionRange.start < aB->mCompositionRange.start;
+  }
+};
+
 Moof::Moof(Box& aBox, Trex& aTrex, Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf, uint64_t* aDecodeTime, bool aIsAudio)
   : mRange(aBox.Range())
   , mMaxRoundingError(35000)
 {
   for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) {
     if (box.IsType("traf")) {
       ParseTraf(box, aTrex, aMvhd, aMdhd, aEdts, aSinf, aDecodeTime, aIsAudio);
     }
   }
   if (IsValid()) {
+    if (mIndex.Length()) {
+      // Ensure the samples are contiguous with no gaps.
+      nsTArray<Sample*> ctsOrder;
+      for (auto& sample : mIndex) {
+        ctsOrder.AppendElement(&sample);
+      }
+      ctsOrder.Sort(CtsComparator());
+
+      for (size_t i = 1; i < ctsOrder.Length(); i++) {
+        ctsOrder[i-1]->mCompositionRange.end = ctsOrder[i]->mCompositionRange.start;
+      }
+
+      // In MP4, the duration of a sample is defined as the delta between two decode
+      // timestamps. The operation above has updated the duration of each sample
+      // as a Sample's duration is mCompositionRange.end - mCompositionRange.start
+      // MSE's TrackBuffersManager expects dts that increased by the sample's
+      // duration, so we rewrite the dts accordingly.
+      int64_t presentationDuration =
+        ctsOrder.LastElement()->mCompositionRange.end
+        - ctsOrder[0]->mCompositionRange.start;
+      int64_t endDecodeTime =
+        aMdhd.ToMicroseconds((int64_t)*aDecodeTime - aEdts.mMediaStart)
+        + aMvhd.ToMicroseconds(aEdts.mEmptyOffset);
+      int64_t decodeDuration = endDecodeTime - mIndex[0].mDecodeTime;
+      float adjust = (float)decodeDuration / presentationDuration;
+      int64_t dtsOffset = mIndex[0].mDecodeTime;
+      int64_t compositionDuration = 0;
+      // Adjust the dts, ensuring that the new adjusted dts will never be greater
+      // than decodeTime (the next moof's decode start time).
+      for (auto& sample : mIndex) {
+        sample.mDecodeTime = dtsOffset + compositionDuration * adjust;
+        compositionDuration += sample.mCompositionRange.Length();
+      }
+      mTimeRange = Interval<Microseconds>(ctsOrder[0]->mCompositionRange.start,
+          ctsOrder.LastElement()->mCompositionRange.end);
+    }
     ProcessCenc();
   }
 }
 
 bool
 Moof::GetAuxInfo(AtomType aType, nsTArray<MediaByteRange>* aByteRanges)
 {
   aByteRanges->Clear();
@@ -465,30 +515,16 @@ Moof::ParseTraf(Box& aBox, Trex& aTrex, 
 void
 Moof::FixRounding(const Moof& aMoof) {
   Microseconds gap = aMoof.mTimeRange.start - mTimeRange.end;
   if (gap > 0 && gap <= mMaxRoundingError) {
     mTimeRange.end = aMoof.mTimeRange.start;
   }
 }
 
-class CtsComparator
-{
-public:
-  bool Equals(Sample* const aA, Sample* const aB) const
-  {
-    return aA->mCompositionRange.start == aB->mCompositionRange.start;
-  }
-  bool
-  LessThan(Sample* const aA, Sample* const aB) const
-  {
-    return aA->mCompositionRange.start < aB->mCompositionRange.start;
-  }
-};
-
 bool
 Moof::ParseTrun(Box& aBox, Tfhd& aTfhd, Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, uint64_t* aDecodeTime, bool aIsAudio)
 {
   if (!aTfhd.IsValid() || !aMvhd.IsValid() || !aMdhd.IsValid() ||
       !aEdts.IsValid()) {
     LOG(Moof, "Invalid dependencies: aTfhd(%d) aMvhd(%d) aMdhd(%d) aEdts(%d)",
         aTfhd.IsValid(), aMvhd.IsValid(), aMdhd.IsValid(), !aEdts.IsValid());
     return false;
@@ -549,69 +585,36 @@ Moof::ParseTrun(Box& aBox, Tfhd& aTfhd, 
     if (flags & 0x800) {
       ctsOffset = reader->Read32();
     }
 
     Sample sample;
     sample.mByteRange = MediaByteRange(offset, offset + sampleSize);
     offset += sampleSize;
 
+    sample.mDecodeTime =
+      aMdhd.ToMicroseconds((int64_t)decodeTime - aEdts.mMediaStart) + aMvhd.ToMicroseconds(aEdts.mEmptyOffset);
     sample.mCompositionRange = Interval<Microseconds>(
       aMdhd.ToMicroseconds((int64_t)decodeTime + ctsOffset - aEdts.mMediaStart) + aMvhd.ToMicroseconds(aEdts.mEmptyOffset),
       aMdhd.ToMicroseconds((int64_t)decodeTime + ctsOffset + sampleDuration - aEdts.mMediaStart) + aMvhd.ToMicroseconds(aEdts.mEmptyOffset));
     decodeTime += sampleDuration;
 
     // Sometimes audio streams don't properly mark their samples as keyframes,
     // because every audio sample is a keyframe.
     sample.mSync = !(sampleFlags & 0x1010000) || aIsAudio;
 
     // FIXME: Make this infallible after bug 968520 is done.
     MOZ_ALWAYS_TRUE(mIndex.AppendElement(sample, fallible));
 
     mMdatRange = mMdatRange.Span(sample.mByteRange);
   }
   mMaxRoundingError += aMdhd.ToMicroseconds(sampleCount);
 
-  nsTArray<Sample*> ctsOrder;
-  for (int i = 0; i < mIndex.Length(); i++) {
-    ctsOrder.AppendElement(&mIndex[i]);
-  }
-  ctsOrder.Sort(CtsComparator());
+  *aDecodeTime = decodeTime;
 
-  for (size_t i = 0; i < ctsOrder.Length(); i++) {
-    if (i + 1 < ctsOrder.Length()) {
-      ctsOrder[i]->mCompositionRange.end = ctsOrder[i + 1]->mCompositionRange.start;
-    }
-  }
-  // In MP4, the duration of a sample is defined as the delta between two decode
-  // timestamps. The operation above has updated the duration of each sample
-  // as a Sample's duration is mCompositionRange.end - mCompositionRange.start
-  // MSE's TrackBuffersManager expects dts that increased by the sample's
-  // duration, so we rewrite the dts accordingly.
-  int64_t presentationDuration = ctsOrder.LastElement()->mCompositionRange.end
-                                 - ctsOrder[0]->mCompositionRange.start;
-  int64_t decodeDuration = aMdhd.ToMicroseconds(decodeTime - *aDecodeTime);
-  float adjust = (float)decodeDuration / presentationDuration;
-  int64_t dtsOffset =
-    aMdhd.ToMicroseconds((int64_t)*aDecodeTime - aEdts.mMediaStart)
-    + aMvhd.ToMicroseconds(aEdts.mEmptyOffset);
-  int64_t compositionDuration = 0;
-  // Adjust the dts, ensuring that the new adjusted dts will never be greater
-  // than decodeTime (the next moof's decode start time).
-  for (auto& sample : mIndex) {
-    sample.mDecodeTime = dtsOffset + compositionDuration * adjust;
-    compositionDuration += sample.mCompositionRange.Length();
-  }
-  mTimeRange = Interval<Microseconds>(ctsOrder[0]->mCompositionRange.start,
-      ctsOrder.LastElement()->mCompositionRange.end);
-  *aDecodeTime = decodeTime;
-  MOZ_ASSERT(aMdhd.ToMicroseconds((int64_t)decodeTime - aEdts.mMediaStart)
-             + aMvhd.ToMicroseconds(aEdts.mEmptyOffset)
-             >= mIndex[mIndex.Length() -1].mDecodeTime,
-             "Adjusted dts is too high");
   return true;
 }
 
 Tkhd::Tkhd(Box& aBox)
 {
   BoxReader reader(aBox);
   if (!reader->CanReadType<uint32_t>()) {
     LOG(Tkhd, "Incomplete Box (missing flags)");