Bug 1351124 - Detect MP4 PSSH boxes in MOOF boxes and dispatch those in 'encrypted' events to content. r=jya
We detect when a PSSH is contained in a MOOF and stash them in the
mp4_demmuxer::Moof object. When the mp4_demuxer::SampleIterator returns a
sample, we check whether it's the first sample from its MOOF, and if so, we
attach any PSSH boxes from that MOOF to the sample. The TrackBuffersManager
checks samples upon demux, to see whether they have any EME init data attached,
and if so dispatches thoses to the HTMLMediaElement in 'encrypted' events.
MozReview-Commit-ID: F8GobKOr96F
--- a/dom/media/MediaData.h
+++ b/dom/media/MediaData.h
@@ -586,16 +586,18 @@ public:
class CryptoSample : public CryptoTrack
{
public:
nsTArray<uint16_t> mPlainSizes;
nsTArray<uint32_t> mEncryptedSizes;
nsTArray<uint8_t> mIV;
nsTArray<nsCString> mSessionIds;
+ nsTArray<nsTArray<uint8_t>> mInitDatas;
+ nsString mInitDataType;
};
// MediaRawData is a MediaData container used to store demuxed, still compressed
// samples.
// Use MediaRawData::CreateWriter() to obtain a MediaRawDataWriter object that
// provides methods to modify and manipulate the data.
// Memory allocations are fallible. Methods return a boolean indicating if
// memory allocations were successful. Return values should always be checked.
--- a/dom/media/mediasource/TrackBuffersManager.cpp
+++ b/dom/media/mediasource/TrackBuffersManager.cpp
@@ -58,17 +58,17 @@ AppendStateToStr(SourceBufferAttributes:
}
}
static Atomic<uint32_t> sStreamSourceID(0u);
class DispatchKeyNeededEvent : public Runnable {
public:
DispatchKeyNeededEvent(AbstractMediaDecoder* aDecoder,
- nsTArray<uint8_t>& aInitData,
+ const nsTArray<uint8_t>& aInitData,
const nsString& aInitDataType)
: Runnable("DispatchKeyNeededEvent")
, mDecoder(aDecoder)
, mInitData(aInitData)
, mInitDataType(aInitDataType)
{
}
NS_IMETHOD Run() override {
@@ -1267,22 +1267,39 @@ TrackBuffersManager::DoDemuxVideo()
mVideoTracks.mDemuxer->GetSamples(-1)
->Then(GetTaskQueue(), __func__, this,
&TrackBuffersManager::OnVideoDemuxCompleted,
&TrackBuffersManager::OnVideoDemuxFailed)
->Track(mVideoTracks.mDemuxRequest);
}
void
-TrackBuffersManager::OnVideoDemuxCompleted(RefPtr<MediaTrackDemuxer::SamplesHolder> aSamples)
+TrackBuffersManager::MaybeDispatchEncryptedEvent(
+ const nsTArray<RefPtr<MediaRawData>>& aSamples)
+{
+ // Try and dispatch 'encrypted'. Won't go if ready state still HAVE_NOTHING.
+ for (const RefPtr<MediaRawData>& sample : aSamples) {
+ for (const nsTArray<uint8_t>& initData : sample->mCrypto.mInitDatas) {
+ nsCOMPtr<nsIRunnable> r = new DispatchKeyNeededEvent(
+ mParentDecoder, initData, sample->mCrypto.mInitDataType);
+ mAbstractMainThread->Dispatch(r.forget());
+ }
+ }
+}
+
+void
+TrackBuffersManager::OnVideoDemuxCompleted(
+ RefPtr<MediaTrackDemuxer::SamplesHolder> aSamples)
{
MOZ_ASSERT(OnTaskQueue());
MSE_DEBUG("%" PRIuSIZE " video samples demuxed", aSamples->mSamples.Length());
mVideoTracks.mDemuxRequest.Complete();
mVideoTracks.mQueuedSamples.AppendElements(aSamples->mSamples);
+
+ MaybeDispatchEncryptedEvent(aSamples->mSamples);
DoDemuxAudio();
}
void
TrackBuffersManager::DoDemuxAudio()
{
MOZ_ASSERT(OnTaskQueue());
if (!HasAudio()) {
@@ -1299,16 +1316,18 @@ TrackBuffersManager::DoDemuxAudio()
void
TrackBuffersManager::OnAudioDemuxCompleted(RefPtr<MediaTrackDemuxer::SamplesHolder> aSamples)
{
MOZ_ASSERT(OnTaskQueue());
MSE_DEBUG("%" PRIuSIZE " audio samples demuxed", aSamples->mSamples.Length());
mAudioTracks.mDemuxRequest.Complete();
mAudioTracks.mQueuedSamples.AppendElements(aSamples->mSamples);
CompleteCodedFrameProcessing();
+
+ MaybeDispatchEncryptedEvent(aSamples->mSamples);
}
void
TrackBuffersManager::CompleteCodedFrameProcessing()
{
MOZ_ASSERT(OnTaskQueue());
// 1. For each coded frame in the media segment run the following steps:
--- a/dom/media/mediasource/TrackBuffersManager.h
+++ b/dom/media/mediasource/TrackBuffersManager.h
@@ -253,16 +253,21 @@ private:
void DoDemuxAudio();
void OnAudioDemuxCompleted(RefPtr<MediaTrackDemuxer::SamplesHolder> aSamples);
void OnAudioDemuxFailed(const MediaResult& aError)
{
mAudioTracks.mDemuxRequest.Complete();
OnDemuxFailed(TrackType::kAudioTrack, aError);
}
+ // Dispatches an "encrypted" event is any sample in array has initData
+ // present.
+ void MaybeDispatchEncryptedEvent(
+ const nsTArray<RefPtr<MediaRawData>>& aSamples);
+
void DoEvictData(const media::TimeUnit& aPlaybackTime, int64_t aSizeToEvict);
struct TrackData
{
TrackData()
: mNumTracks(0)
, mNeedRandomAccessPoint(true)
, mSizeBuffer(0)
--- a/media/libstagefright/binding/Box.cpp
+++ b/media/libstagefright/binding/Box.cpp
@@ -68,16 +68,17 @@ Box::Box(BoxContext* aContext, uint64_t
}
size_t bytes;
if (!mContext->mSource->CachedReadAt(aOffset, header, sizeof(header),
&bytes) ||
bytes != sizeof(header)) {
return;
}
+ mHeader.AppendElements(header, sizeof(header));
uint64_t size = BigEndian::readUint32(header);
if (size == 1) {
uint8_t bigLength[8];
if (aOffset > INT64_MAX - sizeof(header) - sizeof(bigLength)) {
return;
}
MediaByteRange bigLengthRange(headerRange.mEnd,
@@ -86,16 +87,17 @@ Box::Box(BoxContext* aContext, uint64_t
!byteRange->Contains(bigLengthRange) ||
!mContext->mSource->CachedReadAt(aOffset + sizeof(header), bigLength,
sizeof(bigLength), &bytes) ||
bytes != sizeof(bigLength)) {
return;
}
size = BigEndian::readUint64(bigLength);
mBodyOffset = bigLengthRange.mEnd;
+ mHeader.AppendElements(bigLength, sizeof(bigLength));
} else if (size == 0) {
// box extends to end of file.
size = mContext->mByteRanges.LastInterval().mEnd - aOffset;
mBodyOffset = headerRange.mEnd;
} else {
mBodyOffset = headerRange.mEnd;
}
@@ -138,25 +140,25 @@ Box::FirstChild() const
MOZ_ASSERT(IsAvailable());
if (mChildOffset == mRange.mEnd) {
return Box();
}
return Box(mContext, mChildOffset, this);
}
nsTArray<uint8_t>
-Box::Read()
+Box::Read() const
{
nsTArray<uint8_t> out;
Unused << Read(&out, mRange);
return out;
}
bool
-Box::Read(nsTArray<uint8_t>* aDest, const MediaByteRange& aRange)
+Box::Read(nsTArray<uint8_t>* aDest, const MediaByteRange& aRange) const
{
int64_t length;
if (!mContext->mSource->Length(&length)) {
// The HTTP server didn't give us a length to work with.
// Limit the read to kMAX_BOX_READ max.
length = std::min(aRange.mEnd - mChildOffset, kMAX_BOX_READ);
} else {
length = aRange.mEnd - mChildOffset;
--- a/media/libstagefright/binding/Index.cpp
+++ b/media/libstagefright/binding/Index.cpp
@@ -119,16 +119,30 @@ already_AddRefed<MediaRawData> SampleIte
}
size_t bytesRead;
if (!mIndex->mSource->ReadAt(sample->mOffset, writer->Data(), sample->Size(),
&bytesRead) || bytesRead != sample->Size()) {
return nullptr;
}
+ if (mCurrentSample == 0 && mIndex->mMoofParser) {
+ const nsTArray<Moof>& moofs = mIndex->mMoofParser->Moofs();
+ MOZ_ASSERT(mCurrentMoof < moofs.Length());
+ const Moof* currentMoof = &moofs[mCurrentMoof];
+ if (!currentMoof->mPsshes.IsEmpty()) {
+ // This Moof contained crypto init data. Report that. We only report
+ // the init data on the Moof's first sample, to avoid reporting it more
+ // than once per Moof.
+ writer->mCrypto.mValid = true;
+ writer->mCrypto.mInitDatas.AppendElements(currentMoof->mPsshes);
+ writer->mCrypto.mInitDataType = NS_LITERAL_STRING("cenc");
+ }
+ }
+
if (!s->mCencRange.IsEmpty()) {
MoofParser* parser = mIndex->mMoofParser.get();
if (!parser || !parser->mSinf.IsValid()) {
return nullptr;
}
uint8_t ivSize = parser->mSinf.mDefaultIVSize;
--- a/media/libstagefright/binding/MoofParser.cpp
+++ b/media/libstagefright/binding/MoofParser.cpp
@@ -389,21 +389,39 @@ public:
return aA->mCompositionRange.start < aB->mCompositionRange.start;
}
};
Moof::Moof(Box& aBox, Trex& aTrex, Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf, uint64_t* aDecodeTime, bool aIsAudio)
: mRange(aBox.Range())
, mMaxRoundingError(35000)
{
+ nsTArray<Box> psshBoxes;
for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) {
if (box.IsType("traf")) {
ParseTraf(box, aTrex, aMvhd, aMdhd, aEdts, aSinf, aDecodeTime, aIsAudio);
}
+ if (box.IsType("pssh")) {
+ psshBoxes.AppendElement(box);
+ }
}
+
+ // The EME spec requires that PSSH boxes which are contiguous in the
+ // file are dispatched to the media element in a single "encrypted" event.
+ // So append contiguous boxes here.
+ for (size_t i = 0; i < psshBoxes.Length(); ++i) {
+ Box box = psshBoxes[i];
+ if (i == 0 || box.Offset() != psshBoxes[i - 1].NextOffset()) {
+ mPsshes.AppendElement();
+ }
+ nsTArray<uint8_t>& pssh = mPsshes.LastElement();
+ pssh.AppendElements(box.Header());
+ pssh.AppendElements(box.Read());
+ }
+
if (IsValid()) {
if (mIndex.Length()) {
// Ensure the samples are contiguous with no gaps.
nsTArray<Sample*> ctsOrder;
for (auto& sample : mIndex) {
ctsOrder.AppendElement(&sample);
}
ctsOrder.Sort(CtsComparator());
--- a/media/libstagefright/binding/include/mp4_demuxer/Box.h
+++ b/media/libstagefright/binding/include/mp4_demuxer/Box.h
@@ -43,28 +43,31 @@ public:
uint64_t Length() const { return mRange.mEnd - mRange.mStart; }
uint64_t NextOffset() const { return mRange.mEnd; }
const MediaByteRange& Range() const { return mRange; }
const Box* Parent() const { return mParent; }
bool IsType(const char* aType) const { return mType == AtomType(aType); }
Box Next() const;
Box FirstChild() const;
- nsTArray<uint8_t> Read();
- bool Read(nsTArray<uint8_t>* aDest, const MediaByteRange& aRange);
+ nsTArray<uint8_t> Read() const;
+ bool Read(nsTArray<uint8_t>* aDest, const MediaByteRange& aRange) const;
static const uint64_t kMAX_BOX_READ;
+ const nsTArray<uint8_t>& Header() const { return mHeader; }
+
private:
bool Contains(MediaByteRange aRange) const;
BoxContext* mContext;
mozilla::MediaByteRange mRange;
uint64_t mBodyOffset;
uint64_t mChildOffset;
AtomType mType;
+ nsTArray<uint8_t> mHeader;
const Box* mParent;
};
// BoxReader takes a copy of a box contents and serves through an AutoByteReader.
class MOZ_RAII BoxReader
{
public:
explicit BoxReader(Box& aBox)
--- a/media/libstagefright/binding/include/mp4_demuxer/MoofParser.h
+++ b/media/libstagefright/binding/include/mp4_demuxer/MoofParser.h
@@ -230,16 +230,17 @@ public:
Interval<Microseconds> mTimeRange;
FallibleTArray<Sample> mIndex;
nsTArray<CencSampleEncryptionInfoEntry> mFragmentSampleEncryptionInfoEntries;
nsTArray<SampleToGroupEntry> mFragmentSampleToGroupEntries;
nsTArray<Saiz> mSaizs;
nsTArray<Saio> mSaios;
+ nsTArray<nsTArray<uint8_t>> mPsshes;
private:
// aDecodeTime is updated to the end of the parsed TRAF on return.
void ParseTraf(Box& aBox, Trex& aTrex, Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf, uint64_t* aDecodeTime, bool aIsAudio);
// aDecodeTime is updated to the end of the parsed TRUN on return.
bool ParseTrun(Box& aBox, Tfhd& aTfhd, Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, uint64_t* aDecodeTime, bool aIsAudio);
void ParseSaiz(Box& aBox);
void ParseSaio(Box& aBox);